sinapsis-huggingface 0.2.10__tar.gz → 0.2.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/PKG-INFO +1 -1
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface.egg-info/PKG-INFO +1 -1
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/base_diffusers.py +37 -6
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/image_to_video_gen_xl_diffusers.py +30 -3
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/inpainting_diffusers.py +1 -1
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/base_transformers.py +29 -10
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/image_to_text_transformers.py +37 -4
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/speech_to_text_transformers.py +43 -8
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/summarization_transformers.py +41 -6
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/text_to_speech_transformers.py +5 -3
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/translation_transformers.py +26 -5
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/pyproject.toml +1 -1
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/LICENSE +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/README.md +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface.egg-info/SOURCES.txt +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface.egg-info/dependency_links.txt +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface.egg-info/requires.txt +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface.egg-info/top_level.txt +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/helpers/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/helpers/tags.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/image_to_image_diffusers.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/text_to_image_diffusers.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/helpers/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/helpers/tags.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/templates/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/templates/hugging_face_embedding_extractor.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/templates/speaker_embedding_from_audio.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/helpers/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/helpers/grounding_dino_keys.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/helpers/tags.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/templates/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/templates/grounding_dino.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/templates/grounding_dino_classification.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/helpers/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/helpers/tags.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/helpers/text_to_sentences.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/pali_gemma/pali_gemma_base.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/pali_gemma/pali_gemma_detection.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/pali_gemma/pali_gemma_inference.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/thirdparty/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/thirdparty/helpers.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.11}/setup.cfg +0 -0
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import Literal
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import torch
|
|
8
8
|
from diffusers import DiffusionPipeline
|
|
9
|
-
from pydantic import BaseModel, ConfigDict
|
|
9
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
10
10
|
from sinapsis_core.data_containers.data_packet import DataContainer, ImagePacket
|
|
11
11
|
from sinapsis_core.template_base import Template
|
|
12
12
|
from sinapsis_core.template_base.base_models import (
|
|
@@ -20,6 +20,37 @@ from sinapsis_core.utils.env_var_keys import SINAPSIS_CACHE_DIR
|
|
|
20
20
|
from sinapsis_huggingface_diffusers.helpers.tags import Tags
|
|
21
21
|
|
|
22
22
|
|
|
23
|
+
class BaseGenerationParams(BaseModel):
|
|
24
|
+
"""Defines the core parameters for a diffusers generation pipeline.
|
|
25
|
+
|
|
26
|
+
This model is flexible and allows any other parameters (e.g., `strength`)
|
|
27
|
+
to be passed, which will be forwarded to the underlying pipeline.
|
|
28
|
+
|
|
29
|
+
Attributes:
|
|
30
|
+
prompt (str | list[str] | None): The text prompt(s) to guide image generation.
|
|
31
|
+
height (int | None): The height in pixels of the generated image.
|
|
32
|
+
width (int | None): The width in pixels of the generated image.
|
|
33
|
+
negative_prompt (str | list[str] | None): Prompt(s) to guide the model away
|
|
34
|
+
from generating certain things.
|
|
35
|
+
num_inference_steps (int | None): The number of denoising steps. More steps
|
|
36
|
+
typically result in higher quality but are slower. Defaults to 50.
|
|
37
|
+
guidance_scale (float | None): Controls how much the prompt influences the
|
|
38
|
+
output. Higher values mean stronger adherence. Defaults to 7.5.
|
|
39
|
+
num_images_per_prompt (int | None): The number of images to generate per
|
|
40
|
+
prompt. Defaults to 1.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
model_config = ConfigDict(extra="allow")
|
|
44
|
+
|
|
45
|
+
prompt: str | list[str] | None = None
|
|
46
|
+
height: int | None = None
|
|
47
|
+
width: int | None = None
|
|
48
|
+
negative_prompt: str | list[str] | None = None
|
|
49
|
+
num_inference_steps: int | None = 50
|
|
50
|
+
guidance_scale: float | None = 7.5
|
|
51
|
+
num_images_per_prompt: int | None = 1
|
|
52
|
+
|
|
53
|
+
|
|
23
54
|
class BaseDiffusersAttributes(TemplateAttributes):
|
|
24
55
|
"""Configuration attributes for setting up a diffusion pipeline and generating images.
|
|
25
56
|
|
|
@@ -33,7 +64,7 @@ class BaseDiffusersAttributes(TemplateAttributes):
|
|
|
33
64
|
device (Literal["cuda", "cpu"]): Device for computations, either "cpu" or "cuda".
|
|
34
65
|
torch_dtype (Literal["float16", "float32"]): Data type for PyTorch tensors.
|
|
35
66
|
enable_model_cpu_offload (bool): If True, enables CPU offloading to reduce GPU memory usage.
|
|
36
|
-
generation_params (
|
|
67
|
+
generation_params (BaseGenerationParams): Parameters for image generation (e.g., prompt, guidance_scale).
|
|
37
68
|
seed (int | list[int] | None): Random seed(s) for reproducibility.
|
|
38
69
|
overwrite_images (bool): Whether to overwrite the existing images in the container.
|
|
39
70
|
Defaults to False.
|
|
@@ -44,7 +75,7 @@ class BaseDiffusersAttributes(TemplateAttributes):
|
|
|
44
75
|
device: Literal["cuda", "cpu"]
|
|
45
76
|
torch_dtype: Literal["float16", "float32"] = "float16"
|
|
46
77
|
enable_model_cpu_offload: bool = False
|
|
47
|
-
generation_params:
|
|
78
|
+
generation_params: BaseGenerationParams = Field(default_factory=BaseGenerationParams)
|
|
48
79
|
seed: int | list[int] | None = None
|
|
49
80
|
overwrite_images: bool = False
|
|
50
81
|
|
|
@@ -79,7 +110,7 @@ class BaseDiffusers(Template, ABC):
|
|
|
79
110
|
|
|
80
111
|
self.pipeline = self._make_pipeline()
|
|
81
112
|
self.pipeline.set_progress_bar_config(disable=True)
|
|
82
|
-
self.num_images_per_prompt = self.attributes.generation_params.
|
|
113
|
+
self.num_images_per_prompt = self.attributes.generation_params.num_images_per_prompt
|
|
83
114
|
self.generator = self._make_generator()
|
|
84
115
|
|
|
85
116
|
if self.attributes.enable_model_cpu_offload:
|
|
@@ -163,7 +194,7 @@ class BaseDiffusers(Template, ABC):
|
|
|
163
194
|
inputs = {}
|
|
164
195
|
output = self.pipeline(
|
|
165
196
|
**inputs,
|
|
166
|
-
**self.attributes.generation_params,
|
|
197
|
+
**self.attributes.generation_params.model_dump(exclude_none=True),
|
|
167
198
|
generator=self.generator,
|
|
168
199
|
output_type="np",
|
|
169
200
|
)
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
2
|
from diffusers import I2VGenXLPipeline
|
|
3
|
+
from pydantic import Field
|
|
4
4
|
from sinapsis_core.template_base.base_models import TemplateAttributeType
|
|
5
5
|
|
|
6
6
|
from sinapsis_huggingface_diffusers.helpers.tags import Tags
|
|
7
|
+
from sinapsis_huggingface_diffusers.templates.base_diffusers import BaseDiffusersAttributes, BaseGenerationParams
|
|
7
8
|
from sinapsis_huggingface_diffusers.templates.image_to_image_diffusers import (
|
|
8
9
|
ImageToImageDiffusers,
|
|
9
10
|
)
|
|
@@ -12,6 +13,32 @@ ImageToVideoGenXLDiffusersUIProperties = ImageToImageDiffusers.UIProperties
|
|
|
12
13
|
ImageToVideoGenXLDiffusersUIProperties.tags.extend([Tags.VIDEO, Tags.IMAGE_TO_VIDEO])
|
|
13
14
|
|
|
14
15
|
|
|
16
|
+
class ImageToVideoGenerationParams(BaseGenerationParams):
|
|
17
|
+
"""Defines the specific parameters for image-to-video generation pipelines.
|
|
18
|
+
|
|
19
|
+
Attributes:
|
|
20
|
+
target_fps (int | None): The target frames per second for the generated video.
|
|
21
|
+
num_frames (int | None): The total number of frames to generate in the video. Defaults to 16.
|
|
22
|
+
num_videos_per_prompt (int | None): The number of different videos to generate
|
|
23
|
+
from the same input image and prompt.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
target_fps: int | None = None
|
|
27
|
+
num_frames: int | None = 16
|
|
28
|
+
num_videos_per_prompt: int | None = None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ImageToVideoGenXLDiffusersAttributes(BaseDiffusersAttributes):
|
|
32
|
+
"""Defines the complete set of attributes for the ImageToVideoGenXLDiffusers template.
|
|
33
|
+
|
|
34
|
+
Attributes:
|
|
35
|
+
generation_params (ImageToVideoGenerationParams): Task-specific parameters for
|
|
36
|
+
video generation, such as `num_frames` and `target_fps`.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
generation_params: ImageToVideoGenerationParams = Field(default_factory=ImageToVideoGenerationParams)
|
|
40
|
+
|
|
41
|
+
|
|
15
42
|
class ImageToVideoGenXLDiffusers(ImageToImageDiffusers):
|
|
16
43
|
"""This class implements a specific template for image-to-video generation using Hugging Face's
|
|
17
44
|
diffusers. The `ImageToVideoGenXLDiffusers` class inherits from the `ImageToImageDiffusers` template
|
|
@@ -42,12 +69,12 @@ class ImageToVideoGenXLDiffusers(ImageToImageDiffusers):
|
|
|
42
69
|
|
|
43
70
|
"""
|
|
44
71
|
|
|
72
|
+
AttributesBaseModel = ImageToVideoGenXLDiffusersAttributes
|
|
45
73
|
UIProperties = ImageToVideoGenXLDiffusersUIProperties
|
|
46
|
-
DEFAULT_NUM_FRAMES = 16
|
|
47
74
|
|
|
48
75
|
def __init__(self, attributes: TemplateAttributeType) -> None:
|
|
49
76
|
super().__init__(attributes)
|
|
50
|
-
self.num_duplicates = self.attributes.generation_params.
|
|
77
|
+
self.num_duplicates = self.attributes.generation_params.num_frames
|
|
51
78
|
self.requires_pil = True
|
|
52
79
|
self.output_attribute = "frames"
|
|
53
80
|
|
|
@@ -327,6 +327,6 @@ class InpaintingDiffusers(ImageToImageDiffusers):
|
|
|
327
327
|
if old_packet.annotations:
|
|
328
328
|
new_packet.annotations = old_packet.annotations
|
|
329
329
|
for ann in new_packet.annotations:
|
|
330
|
-
ann.label_str = str(self.attributes.generation_params.
|
|
330
|
+
ann.label_str = str(self.attributes.generation_params.prompt)
|
|
331
331
|
|
|
332
332
|
return new_packets, old_packets
|
|
@@ -6,7 +6,7 @@ from abc import abstractmethod
|
|
|
6
6
|
from typing import Any, Literal
|
|
7
7
|
|
|
8
8
|
import torch
|
|
9
|
-
from pydantic import Field
|
|
9
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
10
10
|
from sinapsis_core.data_containers.data_packet import DataContainer
|
|
11
11
|
from sinapsis_core.template_base import Template
|
|
12
12
|
from sinapsis_core.template_base.base_models import TemplateAttributes, TemplateAttributeType, UIPropertiesMetadata
|
|
@@ -17,6 +17,29 @@ from transformers.pipelines import Pipeline
|
|
|
17
17
|
from sinapsis_huggingface_transformers.helpers.tags import Tags
|
|
18
18
|
|
|
19
19
|
|
|
20
|
+
class BaseInferenceKwargs(BaseModel):
|
|
21
|
+
"""A flexible container for keyword arguments passed during inference.
|
|
22
|
+
|
|
23
|
+
Attributes:
|
|
24
|
+
generate_kwargs (dict[str, Any] | None): A dictionary of advanced parameters passed directly to the
|
|
25
|
+
model's `generate` method for fine-tuning the pipeline generation.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
generate_kwargs: dict[str, Any] | None = None
|
|
29
|
+
model_config = ConfigDict(extra="allow")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class PipelineKwargs(BaseModel):
|
|
33
|
+
"""A flexible container for keyword arguments used to create the pipeline.
|
|
34
|
+
|
|
35
|
+
This model allows any extra parameters to be passed during pipeline instantiation.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
device: Literal["cuda", "cpu"]
|
|
39
|
+
torch_dtype: Literal["float16", "float32", "auto"] = "float16"
|
|
40
|
+
model_config = ConfigDict(extra="allow")
|
|
41
|
+
|
|
42
|
+
|
|
20
43
|
class TransformersBaseAttributes(TemplateAttributes):
|
|
21
44
|
"""Attributes for configuring the TransformersPipelineTemplate.
|
|
22
45
|
|
|
@@ -31,20 +54,18 @@ class TransformersBaseAttributes(TemplateAttributes):
|
|
|
31
54
|
seed (int | None): Random seed for reproducibility. If provided, this seed will ensure
|
|
32
55
|
consistent results for pipelines that involve randomness. If not provided, a random seed
|
|
33
56
|
will be generated internally.
|
|
34
|
-
pipeline_kwargs (
|
|
57
|
+
pipeline_kwargs (PipelineKwargs): Keyword arguments passed during the instantiation of the
|
|
35
58
|
Hugging Face pipeline.
|
|
36
|
-
inference_kwargs (
|
|
59
|
+
inference_kwargs (BaseInferenceKwargs): Keyword arguments passed during the task execution or
|
|
37
60
|
inference phase. These allow dynamic customization of the task, such as `max_length`
|
|
38
61
|
and `min_length` for summarization, or `max_new_tokens` for image-to-text.
|
|
39
62
|
"""
|
|
40
63
|
|
|
41
64
|
model_path: str
|
|
42
65
|
model_cache_dir: str = str(SINAPSIS_CACHE_DIR)
|
|
43
|
-
device: Literal["cuda", "cpu"]
|
|
44
|
-
torch_dtype: Literal["float16", "float32"] = "float16"
|
|
45
66
|
seed: int | None = None
|
|
46
|
-
pipeline_kwargs:
|
|
47
|
-
inference_kwargs:
|
|
67
|
+
pipeline_kwargs: PipelineKwargs = Field(default_factory=PipelineKwargs)
|
|
68
|
+
inference_kwargs: BaseInferenceKwargs = Field(default_factory=BaseInferenceKwargs)
|
|
48
69
|
|
|
49
70
|
|
|
50
71
|
class TransformersBase(Template):
|
|
@@ -123,9 +144,7 @@ class TransformersBase(Template):
|
|
|
123
144
|
return pipeline(
|
|
124
145
|
task=self.task,
|
|
125
146
|
model=self.attributes.model_path,
|
|
126
|
-
|
|
127
|
-
torch_dtype=self._TORCH_DTYPE.get(self.attributes.torch_dtype),
|
|
128
|
-
**self.attributes.pipeline_kwargs,
|
|
147
|
+
**self.attributes.pipeline_kwargs.model_dump(),
|
|
129
148
|
**kwargs,
|
|
130
149
|
)
|
|
131
150
|
|
|
@@ -2,17 +2,47 @@
|
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
from PIL import Image
|
|
5
|
+
from pydantic import Field
|
|
5
6
|
from sinapsis_core.data_containers.data_packet import DataContainer, TextPacket
|
|
6
7
|
from sinapsis_core.template_base.base_models import OutputTypes
|
|
7
8
|
|
|
8
9
|
from sinapsis_huggingface_transformers.helpers.tags import Tags
|
|
9
|
-
from sinapsis_huggingface_transformers.templates.base_transformers import
|
|
10
|
+
from sinapsis_huggingface_transformers.templates.base_transformers import (
|
|
11
|
+
BaseInferenceKwargs,
|
|
12
|
+
TransformersBase,
|
|
13
|
+
TransformersBaseAttributes,
|
|
14
|
+
)
|
|
10
15
|
|
|
11
16
|
ImageToTextTransformersUIProperties = TransformersBase.UIProperties
|
|
12
17
|
ImageToTextTransformersUIProperties.output_type = OutputTypes.TEXT
|
|
13
18
|
ImageToTextTransformersUIProperties.tags.extend([Tags.IMAGE, Tags.TEXT, Tags.IMAGE_TO_TEXT])
|
|
14
19
|
|
|
15
20
|
|
|
21
|
+
class ImageToTextInferenceKwargs(BaseInferenceKwargs):
|
|
22
|
+
"""Specific keyword arguments for the image-to-text pipeline.
|
|
23
|
+
|
|
24
|
+
Attributes:
|
|
25
|
+
max_new_tokens (int | None): The maximum number of tokens to generate in the description.
|
|
26
|
+
timeout (float | None): The maximum time in seconds to wait for fetching images from the web.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
max_new_tokens: int | None = None
|
|
30
|
+
timeout: float | None = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ImageToTextTransformersAttributes(TransformersBaseAttributes):
|
|
34
|
+
"""Defines the complete set of attributes for the ImageToTextTransformers template.
|
|
35
|
+
|
|
36
|
+
Inherits general transformer settings from TransformersBaseAttributes.
|
|
37
|
+
|
|
38
|
+
Attributes:
|
|
39
|
+
inference_kwargs (ImageToTextInferenceKwargs): Task-specific parameters for the image-to-text pipeline,
|
|
40
|
+
such as `max_new_tokens`.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
inference_kwargs: ImageToTextInferenceKwargs = Field(default_factory=ImageToTextInferenceKwargs)
|
|
44
|
+
|
|
45
|
+
|
|
16
46
|
class ImageToTextTransformers(TransformersBase):
|
|
17
47
|
"""ImageToTextTransformers template to generate text from an image.
|
|
18
48
|
|
|
@@ -37,6 +67,7 @@ class ImageToTextTransformers(TransformersBase):
|
|
|
37
67
|
|
|
38
68
|
"""
|
|
39
69
|
|
|
70
|
+
AttributesBaseModel = ImageToTextTransformersAttributes
|
|
40
71
|
GENERATED_TEXT_KEY = "generated_text"
|
|
41
72
|
UIProperties = ImageToTextTransformersUIProperties
|
|
42
73
|
|
|
@@ -78,7 +109,9 @@ class ImageToTextTransformers(TransformersBase):
|
|
|
78
109
|
"""
|
|
79
110
|
for image_packet in container.images:
|
|
80
111
|
image = self._convert_to_pil(image_packet.content)
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
112
|
+
results = self.pipeline(image, **self.attributes.inference_kwargs.model_dump(exclude_none=True))
|
|
113
|
+
if results:
|
|
114
|
+
text_description = results[0].get(self.GENERATED_TEXT_KEY)
|
|
115
|
+
if text_description:
|
|
116
|
+
container.texts.append(TextPacket(content=text_description))
|
|
84
117
|
return container
|
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
+
from typing import Literal
|
|
2
3
|
|
|
3
4
|
import numpy as np
|
|
5
|
+
from pydantic import Field
|
|
4
6
|
from sinapsis_core.data_containers.data_packet import DataContainer, TextPacket
|
|
5
7
|
from sinapsis_core.template_base.base_models import OutputTypes
|
|
6
8
|
|
|
7
9
|
from sinapsis_huggingface_transformers.helpers.tags import Tags
|
|
8
|
-
from sinapsis_huggingface_transformers.templates.base_transformers import
|
|
10
|
+
from sinapsis_huggingface_transformers.templates.base_transformers import (
|
|
11
|
+
BaseInferenceKwargs,
|
|
12
|
+
TransformersBase,
|
|
13
|
+
TransformersBaseAttributes,
|
|
14
|
+
)
|
|
9
15
|
|
|
10
16
|
SpeechToTextTransformersUIProperties = TransformersBase.UIProperties
|
|
11
17
|
SpeechToTextTransformersUIProperties.output_type = OutputTypes.TEXT
|
|
@@ -14,6 +20,30 @@ SpeechToTextTransformersUIProperties.tags.extend(
|
|
|
14
20
|
)
|
|
15
21
|
|
|
16
22
|
|
|
23
|
+
class SpeechToTextInferenceKwargs(BaseInferenceKwargs):
|
|
24
|
+
"""Specific keyword arguments for the automatic-speech-recognition pipeline.
|
|
25
|
+
|
|
26
|
+
Attributes:
|
|
27
|
+
return_timestamps (Literal["char", "word"] | bool | None ): If set, controls the granularity of
|
|
28
|
+
timestamps returned with the transcribed text. Can be "char", "word", or True for segments.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
return_timestamps: Literal["char", "word"] | bool | None = None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class SpeechToTextTransformersAttributes(TransformersBaseAttributes):
|
|
35
|
+
"""Defines the set of attributes for the SpeechToTextTransformers template.
|
|
36
|
+
|
|
37
|
+
Inherits general transformer settings from TransformersBaseAttributes.
|
|
38
|
+
|
|
39
|
+
Attributes:
|
|
40
|
+
inference_kwargs (SpeechToTextInferenceKwargs): Task-specific parameters for the speech-to-text pipeline,
|
|
41
|
+
such as `return_timestamps`.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
inference_kwargs: SpeechToTextInferenceKwargs = Field(default_factory=SpeechToTextInferenceKwargs)
|
|
45
|
+
|
|
46
|
+
|
|
17
47
|
class SpeechToTextTransformers(TransformersBase):
|
|
18
48
|
"""Template to perform speech-to-text actions
|
|
19
49
|
using the HuggingFace module through the 'transformers' architecture.
|
|
@@ -40,6 +70,7 @@ class SpeechToTextTransformers(TransformersBase):
|
|
|
40
70
|
|
|
41
71
|
"""
|
|
42
72
|
|
|
73
|
+
AttributesBaseModel = SpeechToTextTransformersAttributes
|
|
43
74
|
TEXT_KEY = "text"
|
|
44
75
|
UIProperties = SpeechToTextTransformersUIProperties
|
|
45
76
|
|
|
@@ -65,11 +96,15 @@ class SpeechToTextTransformers(TransformersBase):
|
|
|
65
96
|
for audio_packet in container.audios:
|
|
66
97
|
audio = audio_packet.content
|
|
67
98
|
audio = audio.astype(np.float32)
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
99
|
+
results = self.pipeline(audio, **self.attributes.inference_kwargs.model_dump(exclude_none=True))
|
|
100
|
+
if results:
|
|
101
|
+
transcribed_text = results.get(self.TEXT_KEY)
|
|
102
|
+
if transcribed_text:
|
|
103
|
+
self.logger.info(f"Speech-to-text transcription: {transcribed_text}")
|
|
104
|
+
container.texts.append(
|
|
105
|
+
TextPacket(
|
|
106
|
+
content=transcribed_text,
|
|
107
|
+
source=audio_packet.source,
|
|
108
|
+
)
|
|
109
|
+
)
|
|
75
110
|
return container
|
|
@@ -1,16 +1,48 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
|
|
3
|
+
from pydantic import Field
|
|
3
4
|
from sinapsis_core.data_containers.data_packet import DataContainer
|
|
4
5
|
from sinapsis_core.template_base.base_models import OutputTypes
|
|
5
6
|
|
|
6
7
|
from sinapsis_huggingface_transformers.helpers.tags import Tags
|
|
7
|
-
from sinapsis_huggingface_transformers.templates.base_transformers import
|
|
8
|
+
from sinapsis_huggingface_transformers.templates.base_transformers import (
|
|
9
|
+
BaseInferenceKwargs,
|
|
10
|
+
TransformersBase,
|
|
11
|
+
TransformersBaseAttributes,
|
|
12
|
+
)
|
|
8
13
|
|
|
9
14
|
SummarizationTransformersUIProperties = TransformersBase.UIProperties
|
|
10
15
|
SummarizationTransformersUIProperties.output_type = OutputTypes.TEXT
|
|
11
16
|
SummarizationTransformersUIProperties.tags.extend([Tags.SUMMARIZATION, Tags.TEXT])
|
|
12
17
|
|
|
13
18
|
|
|
19
|
+
class SummarizationInferenceKwargs(BaseInferenceKwargs):
|
|
20
|
+
"""Specific keyword arguments for the summarization pipeline.
|
|
21
|
+
|
|
22
|
+
Attributes:
|
|
23
|
+
return_text (bool | None): Whether or not to include the decoded texts in the outputs.
|
|
24
|
+
return_tensors (bool | None): Whether or not to include the tensors of predictions.
|
|
25
|
+
clean_up_tokenization_spaces (bool | None): Whether or not to clean up the potential extra spaces.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
return_text: bool | None = True
|
|
29
|
+
return_tensors: bool | None = False
|
|
30
|
+
clean_up_tokenization_spaces: bool | None = False
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SummarizationTransformersAttributes(TransformersBaseAttributes):
|
|
34
|
+
"""Defines the complete set of attributes for the SummarizationTransformers template.
|
|
35
|
+
|
|
36
|
+
Inherits general transformer settings from TransformersBaseAttributes.
|
|
37
|
+
|
|
38
|
+
Attributes:
|
|
39
|
+
inference_kwargs: Task-specific parameters for the summarization pipeline,
|
|
40
|
+
such as `clean_up_tokenization_spaces`.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
inference_kwargs: SummarizationInferenceKwargs = Field(default_factory=SummarizationInferenceKwargs)
|
|
44
|
+
|
|
45
|
+
|
|
14
46
|
class SummarizationTransformers(TransformersBase):
|
|
15
47
|
"""Template for text summarization using a Hugging Face Transformers pipeline.
|
|
16
48
|
|
|
@@ -39,6 +71,7 @@ class SummarizationTransformers(TransformersBase):
|
|
|
39
71
|
|
|
40
72
|
"""
|
|
41
73
|
|
|
74
|
+
AttributesBaseModel = SummarizationTransformersAttributes
|
|
42
75
|
SUMMARY_TEXT_KEY = "summary_text"
|
|
43
76
|
UIProperties = SummarizationTransformersUIProperties
|
|
44
77
|
|
|
@@ -63,9 +96,11 @@ class SummarizationTransformers(TransformersBase):
|
|
|
63
96
|
DataContainer: DataContainer including the summarized text.
|
|
64
97
|
"""
|
|
65
98
|
for text_packet in container.texts:
|
|
66
|
-
|
|
67
|
-
self.
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
99
|
+
results = self.pipeline(
|
|
100
|
+
text_packet.content, **self.attributes.inference_kwargs.model_dump(exclude_none=True)
|
|
101
|
+
)
|
|
102
|
+
if results:
|
|
103
|
+
summarized_text = results[0].get(self.SUMMARY_TEXT_KEY)
|
|
104
|
+
if summarized_text:
|
|
105
|
+
text_packet.content = summarized_text
|
|
71
106
|
return container
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
2
|
import numpy as np
|
|
4
3
|
import torch
|
|
5
4
|
from sinapsis_core.data_containers.data_packet import AudioPacket, DataContainer, TextPacket
|
|
@@ -65,6 +64,7 @@ class TextToSpeechTransformers(TransformersBase):
|
|
|
65
64
|
"""
|
|
66
65
|
|
|
67
66
|
AttributesBaseModel = TextToSpeechAttributes
|
|
67
|
+
SAMPLE_RATE_KEY = "sampling_rate"
|
|
68
68
|
UIProperties = TextToSpeechTransformersUIProperties
|
|
69
69
|
|
|
70
70
|
def initialize(self) -> None:
|
|
@@ -94,7 +94,7 @@ class TextToSpeechTransformers(TransformersBase):
|
|
|
94
94
|
else {}
|
|
95
95
|
)
|
|
96
96
|
output = self.pipeline("Fetching sampling rate.", forward_params=forward_params)
|
|
97
|
-
sample_rate = output.get(
|
|
97
|
+
sample_rate = output.get(self.SAMPLE_RATE_KEY, 16000)
|
|
98
98
|
|
|
99
99
|
return sample_rate
|
|
100
100
|
|
|
@@ -132,7 +132,9 @@ class TextToSpeechTransformers(TransformersBase):
|
|
|
132
132
|
else {}
|
|
133
133
|
)
|
|
134
134
|
for chunk in sentences:
|
|
135
|
-
output = self.pipeline(
|
|
135
|
+
output = self.pipeline(
|
|
136
|
+
chunk, forward_params=forward_params, **self.attributes.inference_kwargs.model_dump(exclude_none=True)
|
|
137
|
+
)
|
|
136
138
|
total_audio.append(output["audio"][0] if output["audio"].ndim == 2 else output["audio"])
|
|
137
139
|
if total_audio:
|
|
138
140
|
total_audio = np.concatenate(total_audio)
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
from pydantic import Field
|
|
4
4
|
from sinapsis_core.data_containers.data_packet import DataContainer
|
|
5
5
|
from sinapsis_core.template_base.base_models import OutputTypes
|
|
6
6
|
|
|
7
7
|
from sinapsis_huggingface_transformers.helpers.tags import Tags
|
|
8
8
|
from sinapsis_huggingface_transformers.templates.base_transformers import (
|
|
9
|
+
BaseInferenceKwargs,
|
|
9
10
|
TransformersBase,
|
|
10
11
|
TransformersBaseAttributes,
|
|
11
12
|
)
|
|
@@ -15,14 +16,31 @@ TranslationTransformersUIProperties.output_type = OutputTypes.TEXT
|
|
|
15
16
|
TranslationTransformersUIProperties.tags.extend([Tags.LANGUAGE, Tags.TRANSLATION])
|
|
16
17
|
|
|
17
18
|
|
|
19
|
+
class TranslationInferenceKwargs(BaseInferenceKwargs):
|
|
20
|
+
"""Specific keyword arguments for the translation pipeline.
|
|
21
|
+
|
|
22
|
+
Attributes:
|
|
23
|
+
return_text (bool | None): Whether or not to include the decoded texts in the outputs.
|
|
24
|
+
return_tensors (bool | None): Whether or not to include the tensors of predictions.
|
|
25
|
+
clean_up_tokenization_spaces (bool | None): Whether or not to clean up the potential extra spaces.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
return_text: bool | None = True
|
|
29
|
+
return_tensors: bool | None = False
|
|
30
|
+
clean_up_tokenization_spaces: bool | None = False
|
|
31
|
+
|
|
32
|
+
|
|
18
33
|
class TranslationTransformersAttributes(TransformersBaseAttributes):
|
|
19
34
|
"""Attributes for the transformers pipeline translation task.
|
|
20
35
|
|
|
21
36
|
Attributes:
|
|
37
|
+
inference_kwargs: Task-specific parameters for the transaltion pipeline,
|
|
38
|
+
such as `clean_up_tokenization_spaces`.
|
|
22
39
|
source_language (str): The language code of the source language (e.g., "en" for English).
|
|
23
40
|
target_language (str): The language code of the target language (e.g., "fr" for French).
|
|
24
41
|
"""
|
|
25
42
|
|
|
43
|
+
inference_kwargs: TranslationInferenceKwargs = Field(default_factory=TranslationInferenceKwargs)
|
|
26
44
|
source_language: str
|
|
27
45
|
target_language: str
|
|
28
46
|
|
|
@@ -79,8 +97,11 @@ class TranslationTransformers(TransformersBase):
|
|
|
79
97
|
DataContainer: DataContainer including the translated text.
|
|
80
98
|
"""
|
|
81
99
|
for text_packet in container.texts:
|
|
82
|
-
|
|
83
|
-
self.
|
|
84
|
-
|
|
85
|
-
|
|
100
|
+
results = self.pipeline(
|
|
101
|
+
text_packet.content, **self.attributes.inference_kwargs.model_dump(exclude_none=True)
|
|
102
|
+
)
|
|
103
|
+
if results:
|
|
104
|
+
translated_text = results[0].get(self.TRANSLATION_TEXT_KEY)
|
|
105
|
+
if translated_text:
|
|
106
|
+
text_packet.content = translated_text
|
|
86
107
|
return container
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|