sinapsis-huggingface 0.2.10__tar.gz → 0.2.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/PKG-INFO +1 -1
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface.egg-info/PKG-INFO +1 -1
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/base_diffusers.py +77 -14
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/image_to_image_diffusers.py +12 -11
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/image_to_video_gen_xl_diffusers.py +39 -7
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/inpainting_diffusers.py +8 -4
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/text_to_image_diffusers.py +1 -1
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/templates/grounding_dino.py +31 -11
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/base_transformers.py +29 -10
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/image_to_text_transformers.py +37 -4
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/speech_to_text_transformers.py +43 -8
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/summarization_transformers.py +41 -6
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/text_to_speech_transformers.py +5 -3
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/translation_transformers.py +26 -5
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/pyproject.toml +1 -1
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/LICENSE +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/README.md +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface.egg-info/SOURCES.txt +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface.egg-info/dependency_links.txt +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface.egg-info/requires.txt +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface.egg-info/top_level.txt +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/helpers/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/helpers/tags.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/helpers/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/helpers/tags.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/templates/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/templates/hugging_face_embedding_extractor.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/templates/speaker_embedding_from_audio.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/helpers/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/helpers/grounding_dino_keys.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/helpers/tags.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/templates/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/templates/grounding_dino_classification.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/helpers/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/helpers/tags.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/helpers/text_to_sentences.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/pali_gemma/pali_gemma_base.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/pali_gemma/pali_gemma_detection.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/pali_gemma/pali_gemma_inference.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/thirdparty/__init__.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/thirdparty/helpers.py +0 -0
- {sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/setup.cfg +0 -0
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
|
|
3
|
+
import gc
|
|
3
4
|
from abc import ABC, abstractmethod
|
|
4
|
-
from typing import
|
|
5
|
+
from typing import Literal
|
|
5
6
|
|
|
6
7
|
import numpy as np
|
|
7
8
|
import torch
|
|
8
9
|
from diffusers import DiffusionPipeline
|
|
9
|
-
from pydantic import BaseModel, ConfigDict
|
|
10
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
10
11
|
from sinapsis_core.data_containers.data_packet import DataContainer, ImagePacket
|
|
11
12
|
from sinapsis_core.template_base import Template
|
|
12
13
|
from sinapsis_core.template_base.base_models import (
|
|
@@ -20,6 +21,37 @@ from sinapsis_core.utils.env_var_keys import SINAPSIS_CACHE_DIR
|
|
|
20
21
|
from sinapsis_huggingface_diffusers.helpers.tags import Tags
|
|
21
22
|
|
|
22
23
|
|
|
24
|
+
class BaseGenerationParams(BaseModel):
|
|
25
|
+
"""Defines the core parameters for a diffusers generation pipeline.
|
|
26
|
+
|
|
27
|
+
This model is flexible and allows any other parameters (e.g., `strength`)
|
|
28
|
+
to be passed, which will be forwarded to the underlying pipeline.
|
|
29
|
+
|
|
30
|
+
Attributes:
|
|
31
|
+
prompt (str | list[str] | None): The text prompt(s) to guide image generation.
|
|
32
|
+
height (int | None): The height in pixels of the generated image.
|
|
33
|
+
width (int | None): The width in pixels of the generated image.
|
|
34
|
+
negative_prompt (str | list[str] | None): Prompt(s) to guide the model away
|
|
35
|
+
from generating certain things.
|
|
36
|
+
num_inference_steps (int | None): The number of denoising steps. More steps
|
|
37
|
+
typically result in higher quality but are slower. Defaults to 50.
|
|
38
|
+
guidance_scale (float | None): Controls how much the prompt influences the
|
|
39
|
+
output. Higher values mean stronger adherence. Defaults to 7.5.
|
|
40
|
+
num_images_per_prompt (int | None): The number of images to generate per
|
|
41
|
+
prompt. Defaults to 1.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
model_config = ConfigDict(extra="allow")
|
|
45
|
+
|
|
46
|
+
prompt: str | list[str] | None = None
|
|
47
|
+
height: int | None = None
|
|
48
|
+
width: int | None = None
|
|
49
|
+
negative_prompt: str | list[str] | None = None
|
|
50
|
+
num_inference_steps: int | None = 50
|
|
51
|
+
guidance_scale: float | None = 7.5
|
|
52
|
+
num_images_per_prompt: int | None = 1
|
|
53
|
+
|
|
54
|
+
|
|
23
55
|
class BaseDiffusersAttributes(TemplateAttributes):
|
|
24
56
|
"""Configuration attributes for setting up a diffusion pipeline and generating images.
|
|
25
57
|
|
|
@@ -33,7 +65,7 @@ class BaseDiffusersAttributes(TemplateAttributes):
|
|
|
33
65
|
device (Literal["cuda", "cpu"]): Device for computations, either "cpu" or "cuda".
|
|
34
66
|
torch_dtype (Literal["float16", "float32"]): Data type for PyTorch tensors.
|
|
35
67
|
enable_model_cpu_offload (bool): If True, enables CPU offloading to reduce GPU memory usage.
|
|
36
|
-
generation_params (
|
|
68
|
+
generation_params (BaseGenerationParams): Parameters for image generation (e.g., prompt, guidance_scale).
|
|
37
69
|
seed (int | list[int] | None): Random seed(s) for reproducibility.
|
|
38
70
|
overwrite_images (bool): Whether to overwrite the existing images in the container.
|
|
39
71
|
Defaults to False.
|
|
@@ -44,7 +76,7 @@ class BaseDiffusersAttributes(TemplateAttributes):
|
|
|
44
76
|
device: Literal["cuda", "cpu"]
|
|
45
77
|
torch_dtype: Literal["float16", "float32"] = "float16"
|
|
46
78
|
enable_model_cpu_offload: bool = False
|
|
47
|
-
generation_params:
|
|
79
|
+
generation_params: BaseGenerationParams = Field(default_factory=BaseGenerationParams)
|
|
48
80
|
seed: int | list[int] | None = None
|
|
49
81
|
overwrite_images: bool = False
|
|
50
82
|
|
|
@@ -76,10 +108,17 @@ class BaseDiffusers(Template, ABC):
|
|
|
76
108
|
|
|
77
109
|
def __init__(self, attributes: TemplateAttributeType) -> None:
|
|
78
110
|
super().__init__(attributes)
|
|
111
|
+
self.initialize()
|
|
79
112
|
|
|
113
|
+
def initialize(self) -> None:
|
|
114
|
+
"""Initializes the template's common state for creation or reset.
|
|
115
|
+
|
|
116
|
+
This method is called by both `__init__` and `reset_state` to ensure
|
|
117
|
+
a consistent state. Can be overriden by subclasses for specific behaviour.
|
|
118
|
+
"""
|
|
80
119
|
self.pipeline = self._make_pipeline()
|
|
81
120
|
self.pipeline.set_progress_bar_config(disable=True)
|
|
82
|
-
self.num_images_per_prompt = self.attributes.generation_params.
|
|
121
|
+
self.num_images_per_prompt = self.attributes.generation_params.num_images_per_prompt
|
|
83
122
|
self.generator = self._make_generator()
|
|
84
123
|
|
|
85
124
|
if self.attributes.enable_model_cpu_offload:
|
|
@@ -124,7 +163,7 @@ class BaseDiffusers(Template, ABC):
|
|
|
124
163
|
pipeline_class = self._pipeline_class()
|
|
125
164
|
return pipeline_class.from_pretrained(
|
|
126
165
|
self.attributes.model_path,
|
|
127
|
-
|
|
166
|
+
dtype=self.TORCH_DTYPE.get(self.attributes.torch_dtype),
|
|
128
167
|
cache_dir=self.attributes.model_cache_dir,
|
|
129
168
|
).to(self.attributes.device)
|
|
130
169
|
|
|
@@ -163,13 +202,15 @@ class BaseDiffusers(Template, ABC):
|
|
|
163
202
|
inputs = {}
|
|
164
203
|
output = self.pipeline(
|
|
165
204
|
**inputs,
|
|
166
|
-
**self.attributes.generation_params,
|
|
205
|
+
**self.attributes.generation_params.model_dump(exclude_none=True),
|
|
167
206
|
generator=self.generator,
|
|
168
207
|
output_type="np",
|
|
169
208
|
)
|
|
170
209
|
generated_images = output.images if output_attribute == "images" else output.frames[0]
|
|
210
|
+
images_as_uint8 = [(image * 255).clip(0, 255).astype(np.uint8) for image in generated_images]
|
|
211
|
+
del output
|
|
171
212
|
|
|
172
|
-
return
|
|
213
|
+
return images_as_uint8
|
|
173
214
|
|
|
174
215
|
def _update_images_in_container(self, container: DataContainer, new_packets: list[ImagePacket]) -> None:
|
|
175
216
|
"""Updates the container with new image packets based on the `overwrite_images` attribute.
|
|
@@ -187,17 +228,39 @@ class BaseDiffusers(Template, ABC):
|
|
|
187
228
|
else:
|
|
188
229
|
container.images.extend(new_packets)
|
|
189
230
|
|
|
190
|
-
|
|
231
|
+
@staticmethod
|
|
232
|
+
def clear_memory() -> None:
|
|
191
233
|
"""Clears memory to free up resources.
|
|
192
234
|
|
|
193
235
|
This method performs garbage collection and clears GPU memory (if applicable) to prevent memory leaks
|
|
194
236
|
and ensure efficient resource usage.
|
|
195
237
|
"""
|
|
196
|
-
|
|
197
|
-
if
|
|
238
|
+
gc.collect()
|
|
239
|
+
if torch.cuda.is_available():
|
|
198
240
|
torch.cuda.empty_cache()
|
|
199
|
-
torch.cuda.ipc_collect()
|
|
200
241
|
|
|
201
242
|
def reset_state(self, template_name: str | None = None) -> None:
|
|
202
|
-
|
|
203
|
-
|
|
243
|
+
"""Releases the pipeline and processor from memory and re-instantiates the template.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
template_name (str | None, optional): The name of the template instance being reset. Defaults to None.
|
|
247
|
+
"""
|
|
248
|
+
_ = template_name
|
|
249
|
+
|
|
250
|
+
if hasattr(self, "pipeline") and self.pipeline is not None:
|
|
251
|
+
components = self.pipeline.components
|
|
252
|
+
for component in components.values():
|
|
253
|
+
if isinstance(component, torch.nn.Module):
|
|
254
|
+
component.to("cpu")
|
|
255
|
+
|
|
256
|
+
del self.pipeline
|
|
257
|
+
|
|
258
|
+
if hasattr(self, "processor"):
|
|
259
|
+
del self.processor
|
|
260
|
+
|
|
261
|
+
if hasattr(self, "generator"):
|
|
262
|
+
del self.generator
|
|
263
|
+
|
|
264
|
+
self.clear_memory()
|
|
265
|
+
self.initialize()
|
|
266
|
+
self.logger.info(f"Reset template instance `{self.instance_name}`")
|
|
@@ -8,7 +8,6 @@ import numpy as np
|
|
|
8
8
|
from diffusers import AutoPipelineForImage2Image
|
|
9
9
|
from PIL import Image
|
|
10
10
|
from sinapsis_core.data_containers.data_packet import DataContainer, ImagePacket
|
|
11
|
-
from sinapsis_core.template_base.base_models import TemplateAttributeType
|
|
12
11
|
|
|
13
12
|
from sinapsis_huggingface_diffusers.helpers.tags import Tags
|
|
14
13
|
from sinapsis_huggingface_diffusers.templates.base_diffusers import BaseDiffusers
|
|
@@ -52,9 +51,13 @@ class ImageToImageDiffusers(BaseDiffusers):
|
|
|
52
51
|
|
|
53
52
|
UIProperties = ImageToImageDiffusersUIProperties
|
|
54
53
|
|
|
55
|
-
def
|
|
56
|
-
|
|
57
|
-
|
|
54
|
+
def initialize(self) -> None:
|
|
55
|
+
"""Initializes the template's common state for creation or reset.
|
|
56
|
+
|
|
57
|
+
This method is called by both `__init__` and `reset_state` to ensure
|
|
58
|
+
a consistent state. Can be overriden by subclasses for specific behaviour.
|
|
59
|
+
"""
|
|
60
|
+
super().initialize()
|
|
58
61
|
self.output_attribute: Literal["images", "frames"] = "images"
|
|
59
62
|
self.num_duplicates = self.num_images_per_prompt
|
|
60
63
|
|
|
@@ -67,7 +70,8 @@ class ImageToImageDiffusers(BaseDiffusers):
|
|
|
67
70
|
"""
|
|
68
71
|
return AutoPipelineForImage2Image
|
|
69
72
|
|
|
70
|
-
|
|
73
|
+
@staticmethod
|
|
74
|
+
def _convert_image_format(image_packet: ImagePacket) -> Image.Image:
|
|
71
75
|
"""Converts the input image into the appropriate format for the pipeline.
|
|
72
76
|
|
|
73
77
|
The format depends on the `requires_pil` attribute:
|
|
@@ -78,12 +82,9 @@ class ImageToImageDiffusers(BaseDiffusers):
|
|
|
78
82
|
image_packet (ImagePacket): The input image packet.
|
|
79
83
|
|
|
80
84
|
Returns:
|
|
81
|
-
|
|
82
|
-
PIL Image.
|
|
85
|
+
Image.Image: The converted image as a PIL Image.
|
|
83
86
|
"""
|
|
84
|
-
|
|
85
|
-
return Image.fromarray(image_packet.content)
|
|
86
|
-
return image_packet.content / 255.0
|
|
87
|
+
return Image.fromarray(image_packet.content)
|
|
87
88
|
|
|
88
89
|
def preprocess_inputs(self, image_packet: ImagePacket) -> dict[str, np.ndarray | list[np.ndarray]]:
|
|
89
90
|
"""Prepares the input image for the image-to-image pipeline.
|
|
@@ -157,6 +158,6 @@ class ImageToImageDiffusers(BaseDiffusers):
|
|
|
157
158
|
new_packets = [ImagePacket(content=image) for image in all_generated_images]
|
|
158
159
|
processed_packets, _ = self.post_processing_packets(new_packets, old_packets)
|
|
159
160
|
self._update_images_in_container(container, processed_packets)
|
|
160
|
-
self.
|
|
161
|
+
self.clear_memory()
|
|
161
162
|
|
|
162
163
|
return container
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
+
from typing import Literal
|
|
2
3
|
|
|
3
4
|
from diffusers import I2VGenXLPipeline
|
|
4
|
-
from
|
|
5
|
+
from pydantic import Field
|
|
5
6
|
|
|
6
7
|
from sinapsis_huggingface_diffusers.helpers.tags import Tags
|
|
8
|
+
from sinapsis_huggingface_diffusers.templates.base_diffusers import BaseDiffusersAttributes, BaseGenerationParams
|
|
7
9
|
from sinapsis_huggingface_diffusers.templates.image_to_image_diffusers import (
|
|
8
10
|
ImageToImageDiffusers,
|
|
9
11
|
)
|
|
@@ -12,6 +14,32 @@ ImageToVideoGenXLDiffusersUIProperties = ImageToImageDiffusers.UIProperties
|
|
|
12
14
|
ImageToVideoGenXLDiffusersUIProperties.tags.extend([Tags.VIDEO, Tags.IMAGE_TO_VIDEO])
|
|
13
15
|
|
|
14
16
|
|
|
17
|
+
class ImageToVideoGenerationParams(BaseGenerationParams):
|
|
18
|
+
"""Defines the specific parameters for image-to-video generation pipelines.
|
|
19
|
+
|
|
20
|
+
Attributes:
|
|
21
|
+
target_fps (int | None): The target frames per second for the generated video.
|
|
22
|
+
num_frames (int | None): The total number of frames to generate in the video. Defaults to 16.
|
|
23
|
+
num_videos_per_prompt (int | None): The number of different videos to generate
|
|
24
|
+
from the same input image and prompt.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
target_fps: int | None = None
|
|
28
|
+
num_frames: int | None = 16
|
|
29
|
+
num_videos_per_prompt: int | None = None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ImageToVideoGenXLDiffusersAttributes(BaseDiffusersAttributes):
|
|
33
|
+
"""Defines the complete set of attributes for the ImageToVideoGenXLDiffusers template.
|
|
34
|
+
|
|
35
|
+
Attributes:
|
|
36
|
+
generation_params (ImageToVideoGenerationParams): Task-specific parameters for
|
|
37
|
+
video generation, such as `num_frames` and `target_fps`.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
generation_params: ImageToVideoGenerationParams = Field(default_factory=ImageToVideoGenerationParams)
|
|
41
|
+
|
|
42
|
+
|
|
15
43
|
class ImageToVideoGenXLDiffusers(ImageToImageDiffusers):
|
|
16
44
|
"""This class implements a specific template for image-to-video generation using Hugging Face's
|
|
17
45
|
diffusers. The `ImageToVideoGenXLDiffusers` class inherits from the `ImageToImageDiffusers` template
|
|
@@ -42,14 +70,18 @@ class ImageToVideoGenXLDiffusers(ImageToImageDiffusers):
|
|
|
42
70
|
|
|
43
71
|
"""
|
|
44
72
|
|
|
73
|
+
AttributesBaseModel = ImageToVideoGenXLDiffusersAttributes
|
|
45
74
|
UIProperties = ImageToVideoGenXLDiffusersUIProperties
|
|
46
|
-
DEFAULT_NUM_FRAMES = 16
|
|
47
75
|
|
|
48
|
-
def
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
76
|
+
def initialize(self) -> None:
|
|
77
|
+
"""Initializes the template's common state for creation or reset.
|
|
78
|
+
|
|
79
|
+
This method is called by both `__init__` and `reset_state` to ensure
|
|
80
|
+
a consistent state. Can be overriden by subclasses for specific behaviour.
|
|
81
|
+
"""
|
|
82
|
+
super().initialize()
|
|
83
|
+
self.output_attribute: Literal["images", "frames"] = "frames"
|
|
84
|
+
self.num_duplicates = self.attributes.generation_params.num_frames
|
|
53
85
|
|
|
54
86
|
@staticmethod
|
|
55
87
|
def _pipeline_class() -> I2VGenXLPipeline:
|
|
@@ -7,7 +7,6 @@ import cv2
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
from diffusers import AutoPipelineForInpainting
|
|
9
9
|
from sinapsis_core.data_containers.data_packet import ImageAnnotations, ImagePacket
|
|
10
|
-
from sinapsis_core.template_base.base_models import TemplateAttributeType
|
|
11
10
|
|
|
12
11
|
from sinapsis_huggingface_diffusers.helpers.tags import Tags
|
|
13
12
|
from sinapsis_huggingface_diffusers.templates.base_diffusers import (
|
|
@@ -75,8 +74,13 @@ class InpaintingDiffusers(ImageToImageDiffusers):
|
|
|
75
74
|
UIProperties = InpaintingDiffusersUIProperties
|
|
76
75
|
AttributesBaseModel = InpaintingDiffusersAttributes
|
|
77
76
|
|
|
78
|
-
def
|
|
79
|
-
|
|
77
|
+
def initialize(self) -> None:
|
|
78
|
+
"""Initializes the template's common state for creation or reset.
|
|
79
|
+
|
|
80
|
+
This method is called by both `__init__` and `reset_state` to ensure
|
|
81
|
+
a consistent state. Can be overriden by subclasses for specific behaviour.
|
|
82
|
+
"""
|
|
83
|
+
super().initialize()
|
|
80
84
|
if self.attributes.preserve_outside_content and self.attributes.dilation_radius is None:
|
|
81
85
|
raise ValueError("Need to specify a dilation_radius if preserve_outside_content=True")
|
|
82
86
|
|
|
@@ -327,6 +331,6 @@ class InpaintingDiffusers(ImageToImageDiffusers):
|
|
|
327
331
|
if old_packet.annotations:
|
|
328
332
|
new_packet.annotations = old_packet.annotations
|
|
329
333
|
for ann in new_packet.annotations:
|
|
330
|
-
ann.label_str = str(self.attributes.generation_params.
|
|
334
|
+
ann.label_str = str(self.attributes.generation_params.prompt)
|
|
331
335
|
|
|
332
336
|
return new_packets, old_packets
|
|
@@ -80,6 +80,6 @@ class TextToImageDiffusers(BaseDiffusers):
|
|
|
80
80
|
image_packets = [ImagePacket(content=image) for image in generated_images]
|
|
81
81
|
self._set_packet_sources(image_packets)
|
|
82
82
|
self._update_images_in_container(container, image_packets)
|
|
83
|
-
self.
|
|
83
|
+
self.clear_memory()
|
|
84
84
|
|
|
85
85
|
return container
|
|
@@ -111,11 +111,18 @@ class GroundingDINO(Template):
|
|
|
111
111
|
attributes (dict[str, Any]): Dictionary containing configuration parameters.
|
|
112
112
|
"""
|
|
113
113
|
super().__init__(attributes)
|
|
114
|
-
self.
|
|
114
|
+
self.initialize()
|
|
115
|
+
|
|
116
|
+
def initialize(self) -> None:
|
|
117
|
+
"""Initializes the template's common state for creation or reset.
|
|
118
|
+
|
|
119
|
+
This method is called by both `__init__` and `reset_state` to ensure
|
|
120
|
+
a consistent state. Can be overriden by subclasses for specific behaviour.
|
|
121
|
+
"""
|
|
115
122
|
self.processor = AutoProcessor.from_pretrained(
|
|
116
123
|
self.attributes.model_path, cache_dir=self.attributes.model_cache_dir
|
|
117
124
|
)
|
|
118
|
-
self.model = self._set_model().to(self.device)
|
|
125
|
+
self.model = self._set_model().to(self.attributes.device)
|
|
119
126
|
self.max_tokens = self.processor.tokenizer.model_max_length
|
|
120
127
|
self.text_input = self.validate_and_format_text_input(self.attributes.text_input)
|
|
121
128
|
|
|
@@ -171,7 +178,7 @@ class GroundingDINO(Template):
|
|
|
171
178
|
images=image_packet.content,
|
|
172
179
|
text=self.text_input,
|
|
173
180
|
return_tensors="pt",
|
|
174
|
-
).to(self.device)
|
|
181
|
+
).to(self.attributes.device)
|
|
175
182
|
|
|
176
183
|
with torch.no_grad():
|
|
177
184
|
outputs = self.model(**inputs)
|
|
@@ -350,19 +357,32 @@ class GroundingDINO(Template):
|
|
|
350
357
|
|
|
351
358
|
return container
|
|
352
359
|
|
|
353
|
-
|
|
360
|
+
@staticmethod
|
|
361
|
+
def clear_memory() -> None:
|
|
354
362
|
"""Clears memory to free up resources.
|
|
355
363
|
|
|
356
364
|
This method performs garbage collection and clears GPU memory (if applicable) to prevent memory leaks
|
|
357
365
|
and ensure efficient resource usage.
|
|
358
366
|
"""
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
gc.collect()
|
|
362
|
-
if self.attributes.device == "cuda":
|
|
367
|
+
gc.collect()
|
|
368
|
+
if torch.cuda.is_available():
|
|
363
369
|
torch.cuda.empty_cache()
|
|
364
|
-
torch.cuda.ipc_collect()
|
|
365
370
|
|
|
366
371
|
def reset_state(self, template_name: str | None = None) -> None:
|
|
367
|
-
|
|
368
|
-
|
|
372
|
+
"""Releases the pipeline and processor from memory and re-instantiates the template.
|
|
373
|
+
|
|
374
|
+
Args:
|
|
375
|
+
template_name (str | None, optional): The name of the template instance being reset. Defaults to None.
|
|
376
|
+
"""
|
|
377
|
+
_ = template_name
|
|
378
|
+
|
|
379
|
+
if hasattr(self, "model") and self.model is not None:
|
|
380
|
+
self.model.to("cpu")
|
|
381
|
+
del self.model
|
|
382
|
+
|
|
383
|
+
if hasattr(self, "processor"):
|
|
384
|
+
del self.processor
|
|
385
|
+
|
|
386
|
+
self.clear_memory()
|
|
387
|
+
self.initialize()
|
|
388
|
+
self.logger.info(f"Reset template instance `{self.instance_name}`")
|
|
@@ -6,7 +6,7 @@ from abc import abstractmethod
|
|
|
6
6
|
from typing import Any, Literal
|
|
7
7
|
|
|
8
8
|
import torch
|
|
9
|
-
from pydantic import Field
|
|
9
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
10
10
|
from sinapsis_core.data_containers.data_packet import DataContainer
|
|
11
11
|
from sinapsis_core.template_base import Template
|
|
12
12
|
from sinapsis_core.template_base.base_models import TemplateAttributes, TemplateAttributeType, UIPropertiesMetadata
|
|
@@ -17,6 +17,29 @@ from transformers.pipelines import Pipeline
|
|
|
17
17
|
from sinapsis_huggingface_transformers.helpers.tags import Tags
|
|
18
18
|
|
|
19
19
|
|
|
20
|
+
class BaseInferenceKwargs(BaseModel):
|
|
21
|
+
"""A flexible container for keyword arguments passed during inference.
|
|
22
|
+
|
|
23
|
+
Attributes:
|
|
24
|
+
generate_kwargs (dict[str, Any] | None): A dictionary of advanced parameters passed directly to the
|
|
25
|
+
model's `generate` method for fine-tuning the pipeline generation.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
generate_kwargs: dict[str, Any] | None = None
|
|
29
|
+
model_config = ConfigDict(extra="allow")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class PipelineKwargs(BaseModel):
|
|
33
|
+
"""A flexible container for keyword arguments used to create the pipeline.
|
|
34
|
+
|
|
35
|
+
This model allows any extra parameters to be passed during pipeline instantiation.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
device: Literal["cuda", "cpu"]
|
|
39
|
+
torch_dtype: Literal["float16", "float32", "auto"] = "float16"
|
|
40
|
+
model_config = ConfigDict(extra="allow")
|
|
41
|
+
|
|
42
|
+
|
|
20
43
|
class TransformersBaseAttributes(TemplateAttributes):
|
|
21
44
|
"""Attributes for configuring the TransformersPipelineTemplate.
|
|
22
45
|
|
|
@@ -31,20 +54,18 @@ class TransformersBaseAttributes(TemplateAttributes):
|
|
|
31
54
|
seed (int | None): Random seed for reproducibility. If provided, this seed will ensure
|
|
32
55
|
consistent results for pipelines that involve randomness. If not provided, a random seed
|
|
33
56
|
will be generated internally.
|
|
34
|
-
pipeline_kwargs (
|
|
57
|
+
pipeline_kwargs (PipelineKwargs): Keyword arguments passed during the instantiation of the
|
|
35
58
|
Hugging Face pipeline.
|
|
36
|
-
inference_kwargs (
|
|
59
|
+
inference_kwargs (BaseInferenceKwargs): Keyword arguments passed during the task execution or
|
|
37
60
|
inference phase. These allow dynamic customization of the task, such as `max_length`
|
|
38
61
|
and `min_length` for summarization, or `max_new_tokens` for image-to-text.
|
|
39
62
|
"""
|
|
40
63
|
|
|
41
64
|
model_path: str
|
|
42
65
|
model_cache_dir: str = str(SINAPSIS_CACHE_DIR)
|
|
43
|
-
device: Literal["cuda", "cpu"]
|
|
44
|
-
torch_dtype: Literal["float16", "float32"] = "float16"
|
|
45
66
|
seed: int | None = None
|
|
46
|
-
pipeline_kwargs:
|
|
47
|
-
inference_kwargs:
|
|
67
|
+
pipeline_kwargs: PipelineKwargs = Field(default_factory=PipelineKwargs)
|
|
68
|
+
inference_kwargs: BaseInferenceKwargs = Field(default_factory=BaseInferenceKwargs)
|
|
48
69
|
|
|
49
70
|
|
|
50
71
|
class TransformersBase(Template):
|
|
@@ -123,9 +144,7 @@ class TransformersBase(Template):
|
|
|
123
144
|
return pipeline(
|
|
124
145
|
task=self.task,
|
|
125
146
|
model=self.attributes.model_path,
|
|
126
|
-
|
|
127
|
-
torch_dtype=self._TORCH_DTYPE.get(self.attributes.torch_dtype),
|
|
128
|
-
**self.attributes.pipeline_kwargs,
|
|
147
|
+
**self.attributes.pipeline_kwargs.model_dump(),
|
|
129
148
|
**kwargs,
|
|
130
149
|
)
|
|
131
150
|
|
|
@@ -2,17 +2,47 @@
|
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
from PIL import Image
|
|
5
|
+
from pydantic import Field
|
|
5
6
|
from sinapsis_core.data_containers.data_packet import DataContainer, TextPacket
|
|
6
7
|
from sinapsis_core.template_base.base_models import OutputTypes
|
|
7
8
|
|
|
8
9
|
from sinapsis_huggingface_transformers.helpers.tags import Tags
|
|
9
|
-
from sinapsis_huggingface_transformers.templates.base_transformers import
|
|
10
|
+
from sinapsis_huggingface_transformers.templates.base_transformers import (
|
|
11
|
+
BaseInferenceKwargs,
|
|
12
|
+
TransformersBase,
|
|
13
|
+
TransformersBaseAttributes,
|
|
14
|
+
)
|
|
10
15
|
|
|
11
16
|
ImageToTextTransformersUIProperties = TransformersBase.UIProperties
|
|
12
17
|
ImageToTextTransformersUIProperties.output_type = OutputTypes.TEXT
|
|
13
18
|
ImageToTextTransformersUIProperties.tags.extend([Tags.IMAGE, Tags.TEXT, Tags.IMAGE_TO_TEXT])
|
|
14
19
|
|
|
15
20
|
|
|
21
|
+
class ImageToTextInferenceKwargs(BaseInferenceKwargs):
|
|
22
|
+
"""Specific keyword arguments for the image-to-text pipeline.
|
|
23
|
+
|
|
24
|
+
Attributes:
|
|
25
|
+
max_new_tokens (int | None): The maximum number of tokens to generate in the description.
|
|
26
|
+
timeout (float | None): The maximum time in seconds to wait for fetching images from the web.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
max_new_tokens: int | None = None
|
|
30
|
+
timeout: float | None = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ImageToTextTransformersAttributes(TransformersBaseAttributes):
|
|
34
|
+
"""Defines the complete set of attributes for the ImageToTextTransformers template.
|
|
35
|
+
|
|
36
|
+
Inherits general transformer settings from TransformersBaseAttributes.
|
|
37
|
+
|
|
38
|
+
Attributes:
|
|
39
|
+
inference_kwargs (ImageToTextInferenceKwargs): Task-specific parameters for the image-to-text pipeline,
|
|
40
|
+
such as `max_new_tokens`.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
inference_kwargs: ImageToTextInferenceKwargs = Field(default_factory=ImageToTextInferenceKwargs)
|
|
44
|
+
|
|
45
|
+
|
|
16
46
|
class ImageToTextTransformers(TransformersBase):
|
|
17
47
|
"""ImageToTextTransformers template to generate text from an image.
|
|
18
48
|
|
|
@@ -37,6 +67,7 @@ class ImageToTextTransformers(TransformersBase):
|
|
|
37
67
|
|
|
38
68
|
"""
|
|
39
69
|
|
|
70
|
+
AttributesBaseModel = ImageToTextTransformersAttributes
|
|
40
71
|
GENERATED_TEXT_KEY = "generated_text"
|
|
41
72
|
UIProperties = ImageToTextTransformersUIProperties
|
|
42
73
|
|
|
@@ -78,7 +109,9 @@ class ImageToTextTransformers(TransformersBase):
|
|
|
78
109
|
"""
|
|
79
110
|
for image_packet in container.images:
|
|
80
111
|
image = self._convert_to_pil(image_packet.content)
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
112
|
+
results = self.pipeline(image, **self.attributes.inference_kwargs.model_dump(exclude_none=True))
|
|
113
|
+
if results:
|
|
114
|
+
text_description = results[0].get(self.GENERATED_TEXT_KEY)
|
|
115
|
+
if text_description:
|
|
116
|
+
container.texts.append(TextPacket(content=text_description))
|
|
84
117
|
return container
|
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
+
from typing import Literal
|
|
2
3
|
|
|
3
4
|
import numpy as np
|
|
5
|
+
from pydantic import Field
|
|
4
6
|
from sinapsis_core.data_containers.data_packet import DataContainer, TextPacket
|
|
5
7
|
from sinapsis_core.template_base.base_models import OutputTypes
|
|
6
8
|
|
|
7
9
|
from sinapsis_huggingface_transformers.helpers.tags import Tags
|
|
8
|
-
from sinapsis_huggingface_transformers.templates.base_transformers import
|
|
10
|
+
from sinapsis_huggingface_transformers.templates.base_transformers import (
|
|
11
|
+
BaseInferenceKwargs,
|
|
12
|
+
TransformersBase,
|
|
13
|
+
TransformersBaseAttributes,
|
|
14
|
+
)
|
|
9
15
|
|
|
10
16
|
SpeechToTextTransformersUIProperties = TransformersBase.UIProperties
|
|
11
17
|
SpeechToTextTransformersUIProperties.output_type = OutputTypes.TEXT
|
|
@@ -14,6 +20,30 @@ SpeechToTextTransformersUIProperties.tags.extend(
|
|
|
14
20
|
)
|
|
15
21
|
|
|
16
22
|
|
|
23
|
+
class SpeechToTextInferenceKwargs(BaseInferenceKwargs):
|
|
24
|
+
"""Specific keyword arguments for the automatic-speech-recognition pipeline.
|
|
25
|
+
|
|
26
|
+
Attributes:
|
|
27
|
+
return_timestamps (Literal["char", "word"] | bool | None ): If set, controls the granularity of
|
|
28
|
+
timestamps returned with the transcribed text. Can be "char", "word", or True for segments.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
return_timestamps: Literal["char", "word"] | bool | None = None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class SpeechToTextTransformersAttributes(TransformersBaseAttributes):
|
|
35
|
+
"""Defines the set of attributes for the SpeechToTextTransformers template.
|
|
36
|
+
|
|
37
|
+
Inherits general transformer settings from TransformersBaseAttributes.
|
|
38
|
+
|
|
39
|
+
Attributes:
|
|
40
|
+
inference_kwargs (SpeechToTextInferenceKwargs): Task-specific parameters for the speech-to-text pipeline,
|
|
41
|
+
such as `return_timestamps`.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
inference_kwargs: SpeechToTextInferenceKwargs = Field(default_factory=SpeechToTextInferenceKwargs)
|
|
45
|
+
|
|
46
|
+
|
|
17
47
|
class SpeechToTextTransformers(TransformersBase):
|
|
18
48
|
"""Template to perform speech-to-text actions
|
|
19
49
|
using the HuggingFace module through the 'transformers' architecture.
|
|
@@ -40,6 +70,7 @@ class SpeechToTextTransformers(TransformersBase):
|
|
|
40
70
|
|
|
41
71
|
"""
|
|
42
72
|
|
|
73
|
+
AttributesBaseModel = SpeechToTextTransformersAttributes
|
|
43
74
|
TEXT_KEY = "text"
|
|
44
75
|
UIProperties = SpeechToTextTransformersUIProperties
|
|
45
76
|
|
|
@@ -65,11 +96,15 @@ class SpeechToTextTransformers(TransformersBase):
|
|
|
65
96
|
for audio_packet in container.audios:
|
|
66
97
|
audio = audio_packet.content
|
|
67
98
|
audio = audio.astype(np.float32)
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
99
|
+
results = self.pipeline(audio, **self.attributes.inference_kwargs.model_dump(exclude_none=True))
|
|
100
|
+
if results:
|
|
101
|
+
transcribed_text = results.get(self.TEXT_KEY)
|
|
102
|
+
if transcribed_text:
|
|
103
|
+
self.logger.info(f"Speech-to-text transcription: {transcribed_text}")
|
|
104
|
+
container.texts.append(
|
|
105
|
+
TextPacket(
|
|
106
|
+
content=transcribed_text,
|
|
107
|
+
source=audio_packet.source,
|
|
108
|
+
)
|
|
109
|
+
)
|
|
75
110
|
return container
|
|
@@ -1,16 +1,48 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
|
|
3
|
+
from pydantic import Field
|
|
3
4
|
from sinapsis_core.data_containers.data_packet import DataContainer
|
|
4
5
|
from sinapsis_core.template_base.base_models import OutputTypes
|
|
5
6
|
|
|
6
7
|
from sinapsis_huggingface_transformers.helpers.tags import Tags
|
|
7
|
-
from sinapsis_huggingface_transformers.templates.base_transformers import
|
|
8
|
+
from sinapsis_huggingface_transformers.templates.base_transformers import (
|
|
9
|
+
BaseInferenceKwargs,
|
|
10
|
+
TransformersBase,
|
|
11
|
+
TransformersBaseAttributes,
|
|
12
|
+
)
|
|
8
13
|
|
|
9
14
|
SummarizationTransformersUIProperties = TransformersBase.UIProperties
|
|
10
15
|
SummarizationTransformersUIProperties.output_type = OutputTypes.TEXT
|
|
11
16
|
SummarizationTransformersUIProperties.tags.extend([Tags.SUMMARIZATION, Tags.TEXT])
|
|
12
17
|
|
|
13
18
|
|
|
19
|
+
class SummarizationInferenceKwargs(BaseInferenceKwargs):
|
|
20
|
+
"""Specific keyword arguments for the summarization pipeline.
|
|
21
|
+
|
|
22
|
+
Attributes:
|
|
23
|
+
return_text (bool | None): Whether or not to include the decoded texts in the outputs.
|
|
24
|
+
return_tensors (bool | None): Whether or not to include the tensors of predictions.
|
|
25
|
+
clean_up_tokenization_spaces (bool | None): Whether or not to clean up the potential extra spaces.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
return_text: bool | None = True
|
|
29
|
+
return_tensors: bool | None = False
|
|
30
|
+
clean_up_tokenization_spaces: bool | None = False
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SummarizationTransformersAttributes(TransformersBaseAttributes):
|
|
34
|
+
"""Defines the complete set of attributes for the SummarizationTransformers template.
|
|
35
|
+
|
|
36
|
+
Inherits general transformer settings from TransformersBaseAttributes.
|
|
37
|
+
|
|
38
|
+
Attributes:
|
|
39
|
+
inference_kwargs: Task-specific parameters for the summarization pipeline,
|
|
40
|
+
such as `clean_up_tokenization_spaces`.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
inference_kwargs: SummarizationInferenceKwargs = Field(default_factory=SummarizationInferenceKwargs)
|
|
44
|
+
|
|
45
|
+
|
|
14
46
|
class SummarizationTransformers(TransformersBase):
|
|
15
47
|
"""Template for text summarization using a Hugging Face Transformers pipeline.
|
|
16
48
|
|
|
@@ -39,6 +71,7 @@ class SummarizationTransformers(TransformersBase):
|
|
|
39
71
|
|
|
40
72
|
"""
|
|
41
73
|
|
|
74
|
+
AttributesBaseModel = SummarizationTransformersAttributes
|
|
42
75
|
SUMMARY_TEXT_KEY = "summary_text"
|
|
43
76
|
UIProperties = SummarizationTransformersUIProperties
|
|
44
77
|
|
|
@@ -63,9 +96,11 @@ class SummarizationTransformers(TransformersBase):
|
|
|
63
96
|
DataContainer: DataContainer including the summarized text.
|
|
64
97
|
"""
|
|
65
98
|
for text_packet in container.texts:
|
|
66
|
-
|
|
67
|
-
self.
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
99
|
+
results = self.pipeline(
|
|
100
|
+
text_packet.content, **self.attributes.inference_kwargs.model_dump(exclude_none=True)
|
|
101
|
+
)
|
|
102
|
+
if results:
|
|
103
|
+
summarized_text = results[0].get(self.SUMMARY_TEXT_KEY)
|
|
104
|
+
if summarized_text:
|
|
105
|
+
text_packet.content = summarized_text
|
|
71
106
|
return container
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
2
|
import numpy as np
|
|
4
3
|
import torch
|
|
5
4
|
from sinapsis_core.data_containers.data_packet import AudioPacket, DataContainer, TextPacket
|
|
@@ -65,6 +64,7 @@ class TextToSpeechTransformers(TransformersBase):
|
|
|
65
64
|
"""
|
|
66
65
|
|
|
67
66
|
AttributesBaseModel = TextToSpeechAttributes
|
|
67
|
+
SAMPLE_RATE_KEY = "sampling_rate"
|
|
68
68
|
UIProperties = TextToSpeechTransformersUIProperties
|
|
69
69
|
|
|
70
70
|
def initialize(self) -> None:
|
|
@@ -94,7 +94,7 @@ class TextToSpeechTransformers(TransformersBase):
|
|
|
94
94
|
else {}
|
|
95
95
|
)
|
|
96
96
|
output = self.pipeline("Fetching sampling rate.", forward_params=forward_params)
|
|
97
|
-
sample_rate = output.get(
|
|
97
|
+
sample_rate = output.get(self.SAMPLE_RATE_KEY, 16000)
|
|
98
98
|
|
|
99
99
|
return sample_rate
|
|
100
100
|
|
|
@@ -132,7 +132,9 @@ class TextToSpeechTransformers(TransformersBase):
|
|
|
132
132
|
else {}
|
|
133
133
|
)
|
|
134
134
|
for chunk in sentences:
|
|
135
|
-
output = self.pipeline(
|
|
135
|
+
output = self.pipeline(
|
|
136
|
+
chunk, forward_params=forward_params, **self.attributes.inference_kwargs.model_dump(exclude_none=True)
|
|
137
|
+
)
|
|
136
138
|
total_audio.append(output["audio"][0] if output["audio"].ndim == 2 else output["audio"])
|
|
137
139
|
if total_audio:
|
|
138
140
|
total_audio = np.concatenate(total_audio)
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
from pydantic import Field
|
|
4
4
|
from sinapsis_core.data_containers.data_packet import DataContainer
|
|
5
5
|
from sinapsis_core.template_base.base_models import OutputTypes
|
|
6
6
|
|
|
7
7
|
from sinapsis_huggingface_transformers.helpers.tags import Tags
|
|
8
8
|
from sinapsis_huggingface_transformers.templates.base_transformers import (
|
|
9
|
+
BaseInferenceKwargs,
|
|
9
10
|
TransformersBase,
|
|
10
11
|
TransformersBaseAttributes,
|
|
11
12
|
)
|
|
@@ -15,14 +16,31 @@ TranslationTransformersUIProperties.output_type = OutputTypes.TEXT
|
|
|
15
16
|
TranslationTransformersUIProperties.tags.extend([Tags.LANGUAGE, Tags.TRANSLATION])
|
|
16
17
|
|
|
17
18
|
|
|
19
|
+
class TranslationInferenceKwargs(BaseInferenceKwargs):
|
|
20
|
+
"""Specific keyword arguments for the translation pipeline.
|
|
21
|
+
|
|
22
|
+
Attributes:
|
|
23
|
+
return_text (bool | None): Whether or not to include the decoded texts in the outputs.
|
|
24
|
+
return_tensors (bool | None): Whether or not to include the tensors of predictions.
|
|
25
|
+
clean_up_tokenization_spaces (bool | None): Whether or not to clean up the potential extra spaces.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
return_text: bool | None = True
|
|
29
|
+
return_tensors: bool | None = False
|
|
30
|
+
clean_up_tokenization_spaces: bool | None = False
|
|
31
|
+
|
|
32
|
+
|
|
18
33
|
class TranslationTransformersAttributes(TransformersBaseAttributes):
|
|
19
34
|
"""Attributes for the transformers pipeline translation task.
|
|
20
35
|
|
|
21
36
|
Attributes:
|
|
37
|
+
inference_kwargs: Task-specific parameters for the transaltion pipeline,
|
|
38
|
+
such as `clean_up_tokenization_spaces`.
|
|
22
39
|
source_language (str): The language code of the source language (e.g., "en" for English).
|
|
23
40
|
target_language (str): The language code of the target language (e.g., "fr" for French).
|
|
24
41
|
"""
|
|
25
42
|
|
|
43
|
+
inference_kwargs: TranslationInferenceKwargs = Field(default_factory=TranslationInferenceKwargs)
|
|
26
44
|
source_language: str
|
|
27
45
|
target_language: str
|
|
28
46
|
|
|
@@ -79,8 +97,11 @@ class TranslationTransformers(TransformersBase):
|
|
|
79
97
|
DataContainer: DataContainer including the translated text.
|
|
80
98
|
"""
|
|
81
99
|
for text_packet in container.texts:
|
|
82
|
-
|
|
83
|
-
self.
|
|
84
|
-
|
|
85
|
-
|
|
100
|
+
results = self.pipeline(
|
|
101
|
+
text_packet.content, **self.attributes.inference_kwargs.model_dump(exclude_none=True)
|
|
102
|
+
)
|
|
103
|
+
if results:
|
|
104
|
+
translated_text = results[0].get(self.TRANSLATION_TEXT_KEY)
|
|
105
|
+
if translated_text:
|
|
106
|
+
text_packet.content = translated_text
|
|
86
107
|
return container
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|