sinapsis-huggingface 0.2.7__tar.gz → 0.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/PKG-INFO +1 -1
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface.egg-info/PKG-INFO +1 -1
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface.egg-info/SOURCES.txt +0 -1
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/base_diffusers.py +1 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/templates/speaker_embedding_from_audio.py +5 -5
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/templates/grounding_dino.py +4 -2
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/base_transformers.py +30 -3
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/image_to_text_transformers.py +9 -5
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/pali_gemma/pali_gemma_base.py +29 -3
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/pali_gemma/pali_gemma_detection.py +6 -3
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/pali_gemma/pali_gemma_inference.py +10 -4
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/speech_to_text_transformers.py +9 -5
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/summarization_transformers.py +16 -13
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/text_to_speech_transformers.py +9 -5
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/translation_transformers.py +9 -5
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/pyproject.toml +1 -1
- sinapsis_huggingface-0.2.7/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/templates/speaker_embedding_from_dataset.py +0 -93
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/LICENSE +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/README.md +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface.egg-info/dependency_links.txt +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface.egg-info/requires.txt +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface.egg-info/top_level.txt +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/__init__.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/helpers/__init__.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/helpers/tags.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/__init__.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/image_to_image_diffusers.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/image_to_video_gen_xl_diffusers.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/inpainting_diffusers.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/text_to_image_diffusers.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/__init__.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/helpers/__init__.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/helpers/tags.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/templates/__init__.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/templates/hugging_face_embedding_extractor.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/__init__.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/helpers/__init__.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/helpers/grounding_dino_keys.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/helpers/tags.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/templates/__init__.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/templates/grounding_dino_classification.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/__init__.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/helpers/__init__.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/helpers/tags.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/helpers/text_to_sentences.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/__init__.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/thirdparty/__init__.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/thirdparty/helpers.py +0 -0
- {sinapsis_huggingface-0.2.7 → sinapsis_huggingface-0.2.9}/setup.cfg +0 -0
|
@@ -21,7 +21,6 @@ packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/hel
|
|
|
21
21
|
packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/templates/__init__.py
|
|
22
22
|
packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/templates/hugging_face_embedding_extractor.py
|
|
23
23
|
packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/templates/speaker_embedding_from_audio.py
|
|
24
|
-
packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/templates/speaker_embedding_from_dataset.py
|
|
25
24
|
packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/__init__.py
|
|
26
25
|
packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/helpers/__init__.py
|
|
27
26
|
packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/helpers/grounding_dino_keys.py
|
|
@@ -193,6 +193,7 @@ class BaseDiffusers(Template, ABC):
|
|
|
193
193
|
This method performs garbage collection and clears GPU memory (if applicable) to prevent memory leaks
|
|
194
194
|
and ensure efficient resource usage.
|
|
195
195
|
"""
|
|
196
|
+
del self.generator
|
|
196
197
|
if self.attributes.device == "cuda":
|
|
197
198
|
torch.cuda.empty_cache()
|
|
198
199
|
torch.cuda.ipc_collect()
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
import io
|
|
4
4
|
from typing import Literal
|
|
5
5
|
|
|
6
|
+
import numpy as np
|
|
6
7
|
import soundfile as sf
|
|
7
8
|
import torch
|
|
8
9
|
import torch.nn.functional as F
|
|
@@ -89,7 +90,7 @@ class SpeakerEmbeddingFromAudio(Template):
|
|
|
89
90
|
return EncoderClassifier.from_hparams(source=self.attributes.model_path, savedir=self.attributes.data_cache_dir)
|
|
90
91
|
|
|
91
92
|
@staticmethod
|
|
92
|
-
def _postprocess_speaker_embedding(speaker_embedding: torch.Tensor) ->
|
|
93
|
+
def _postprocess_speaker_embedding(speaker_embedding: torch.Tensor) -> np.ndarray:
|
|
93
94
|
"""Normalize and convert the speaker embedding tensor into a list of floats.
|
|
94
95
|
|
|
95
96
|
Args:
|
|
@@ -99,7 +100,7 @@ class SpeakerEmbeddingFromAudio(Template):
|
|
|
99
100
|
list[float]: A normalized and flattened embedding as a list of floats.
|
|
100
101
|
"""
|
|
101
102
|
speaker_embedding = F.normalize(speaker_embedding, dim=2)
|
|
102
|
-
speaker_embedding_list:
|
|
103
|
+
speaker_embedding_list: np.ndarray = speaker_embedding.detach().numpy().squeeze()
|
|
103
104
|
return speaker_embedding_list
|
|
104
105
|
|
|
105
106
|
@staticmethod
|
|
@@ -158,14 +159,13 @@ class SpeakerEmbeddingFromAudio(Template):
|
|
|
158
159
|
|
|
159
160
|
if len(container.audios) == 1:
|
|
160
161
|
for packet in packets:
|
|
161
|
-
packet.embedding = embeddings[0]
|
|
162
|
+
packet.embedding = [embeddings[0]]
|
|
162
163
|
elif len(container.audios) == len(packets):
|
|
163
164
|
for packet, embedding in zip(packets, embeddings):
|
|
164
|
-
packet.embedding = embedding
|
|
165
|
+
packet.embedding = [embedding]
|
|
165
166
|
else:
|
|
166
167
|
raise ValueError(
|
|
167
168
|
"Mismatch between the number of audio packets and target packets. "
|
|
168
169
|
"Ensure either a single audio or matching numbers of audios and target packets."
|
|
169
170
|
)
|
|
170
|
-
|
|
171
171
|
return container
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
|
-
|
|
2
|
+
import gc
|
|
4
3
|
from typing import Any, Literal
|
|
5
4
|
|
|
6
5
|
import torch
|
|
@@ -357,6 +356,9 @@ class GroundingDINO(Template):
|
|
|
357
356
|
This method performs garbage collection and clears GPU memory (if applicable) to prevent memory leaks
|
|
358
357
|
and ensure efficient resource usage.
|
|
359
358
|
"""
|
|
359
|
+
for child in self.model.children():
|
|
360
|
+
child.cpu()
|
|
361
|
+
gc.collect()
|
|
360
362
|
if self.attributes.device == "cuda":
|
|
361
363
|
torch.cuda.empty_cache()
|
|
362
364
|
torch.cuda.ipc_collect()
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
|
|
3
|
+
import gc
|
|
3
4
|
import random
|
|
4
5
|
from abc import abstractmethod
|
|
5
6
|
from typing import Any, Literal
|
|
@@ -64,6 +65,14 @@ class TransformersBase(Template):
|
|
|
64
65
|
super().__init__(attributes)
|
|
65
66
|
self._TORCH_DTYPE = {"float16": torch.float16, "float32": torch.float32}
|
|
66
67
|
self.task: str | None = None
|
|
68
|
+
self.initialize()
|
|
69
|
+
|
|
70
|
+
def initialize(self) -> None:
|
|
71
|
+
"""Initializes the template's common state for creation or reset.
|
|
72
|
+
|
|
73
|
+
This method is called by both `__init__` and `reset_state` to ensure
|
|
74
|
+
a consistent state. Can be overriden by subclasses for specific behaviour.
|
|
75
|
+
"""
|
|
67
76
|
self._set_seed()
|
|
68
77
|
|
|
69
78
|
def setup_pipeline(self) -> None:
|
|
@@ -147,7 +156,25 @@ class TransformersBase(Template):
|
|
|
147
156
|
return transformed_data_container
|
|
148
157
|
|
|
149
158
|
def reset_state(self, template_name: str | None = None) -> None:
|
|
150
|
-
|
|
159
|
+
"""Releases the pipeline and processor from memory and re-instantiates the template.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
template_name (str | None, optional): The name of the template instance being reset. Defaults to None.
|
|
163
|
+
"""
|
|
164
|
+
_ = template_name
|
|
165
|
+
|
|
166
|
+
if hasattr(self, "pipeline") and self.pipeline is not None:
|
|
167
|
+
if self.pipeline.model is not None:
|
|
168
|
+
self.pipeline.model.to("cpu")
|
|
169
|
+
del self.pipeline
|
|
170
|
+
|
|
171
|
+
if hasattr(self, "processor"):
|
|
172
|
+
del self.processor
|
|
173
|
+
|
|
174
|
+
gc.collect()
|
|
175
|
+
|
|
176
|
+
if torch.cuda.is_available():
|
|
151
177
|
torch.cuda.empty_cache()
|
|
152
|
-
|
|
153
|
-
|
|
178
|
+
|
|
179
|
+
self.initialize()
|
|
180
|
+
self.logger.info(f"Reset template instance `{self.instance_name}`")
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
from PIL import Image
|
|
5
5
|
from sinapsis_core.data_containers.data_packet import DataContainer, TextPacket
|
|
6
|
-
from sinapsis_core.template_base.base_models import OutputTypes
|
|
6
|
+
from sinapsis_core.template_base.base_models import OutputTypes
|
|
7
7
|
|
|
8
8
|
from sinapsis_huggingface_transformers.helpers.tags import Tags
|
|
9
9
|
from sinapsis_huggingface_transformers.templates.base_transformers import TransformersBase
|
|
@@ -14,8 +14,7 @@ ImageToTextTransformersUIProperties.tags.extend([Tags.IMAGE, Tags.TEXT, Tags.IMA
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class ImageToTextTransformers(TransformersBase):
|
|
17
|
-
"""
|
|
18
|
-
ImageToTextTransformers template to generate text from an image.
|
|
17
|
+
"""ImageToTextTransformers template to generate text from an image.
|
|
19
18
|
|
|
20
19
|
This template uses a Hugging Face Transformers pipeline to generate textual descriptions
|
|
21
20
|
from input images.
|
|
@@ -41,8 +40,13 @@ class ImageToTextTransformers(TransformersBase):
|
|
|
41
40
|
GENERATED_TEXT_KEY = "generated_text"
|
|
42
41
|
UIProperties = ImageToTextTransformersUIProperties
|
|
43
42
|
|
|
44
|
-
def
|
|
45
|
-
|
|
43
|
+
def initialize(self) -> None:
|
|
44
|
+
"""Initializes the template's common state for creation or reset.
|
|
45
|
+
|
|
46
|
+
This method is called by both `__init__` and `reset_state` to ensure
|
|
47
|
+
a consistent state.
|
|
48
|
+
"""
|
|
49
|
+
super().initialize()
|
|
46
50
|
self.task = "image-to-text"
|
|
47
51
|
self.setup_pipeline()
|
|
48
52
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
+
import gc
|
|
2
3
|
from abc import abstractmethod
|
|
3
4
|
from typing import Any, ClassVar, Literal
|
|
4
5
|
|
|
@@ -53,6 +54,14 @@ class PaliGemmaBase(Template):
|
|
|
53
54
|
|
|
54
55
|
def __init__(self, attributes: TemplateAttributeType) -> None:
|
|
55
56
|
super().__init__(attributes)
|
|
57
|
+
self.initialize()
|
|
58
|
+
|
|
59
|
+
def initialize(self) -> None:
|
|
60
|
+
"""Initializes the template's common state for creation or reset.
|
|
61
|
+
|
|
62
|
+
This method is called by both `__init__` and `reset_state` to ensure
|
|
63
|
+
a consistent state. Can be overriden by subclasses for specific behaviour.
|
|
64
|
+
"""
|
|
56
65
|
self.model = self._setup_model()
|
|
57
66
|
self.processor = self._setup_processor()
|
|
58
67
|
|
|
@@ -104,7 +113,24 @@ class PaliGemmaBase(Template):
|
|
|
104
113
|
"""
|
|
105
114
|
|
|
106
115
|
def reset_state(self, template_name: str | None = None) -> None:
|
|
107
|
-
|
|
116
|
+
"""Releases the model and processor from memory and re-instantiates the template.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
template_name (str | None, optional): The name of the template instance being reset. Defaults to None.
|
|
120
|
+
"""
|
|
121
|
+
_ = template_name
|
|
122
|
+
|
|
123
|
+
if hasattr(self, "model"):
|
|
124
|
+
self.model.to("cpu")
|
|
125
|
+
del self.model
|
|
126
|
+
|
|
127
|
+
if hasattr(self, "processor"):
|
|
128
|
+
del self.processor
|
|
129
|
+
|
|
130
|
+
gc.collect()
|
|
131
|
+
|
|
132
|
+
if torch.cuda.is_available():
|
|
108
133
|
torch.cuda.empty_cache()
|
|
109
|
-
|
|
110
|
-
|
|
134
|
+
|
|
135
|
+
self.initialize()
|
|
136
|
+
self.logger.info(f"Reset template instance `{self.instance_name}`")
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
|
|
4
4
|
from sinapsis_core.data_containers.annotations import ImageAnnotations
|
|
5
|
-
from sinapsis_core.template_base.base_models import TemplateAttributeType
|
|
6
5
|
from sinapsis_huggingface_transformers.templates.pali_gemma.pali_gemma_inference import (
|
|
7
6
|
PaliGemmaInference,
|
|
8
7
|
PaliGemmaInferenceAttributes,
|
|
@@ -66,9 +65,13 @@ class PaliGemmaDetection(PaliGemmaInference):
|
|
|
66
65
|
AttributesBaseModel = PaliGemmaDetectionAttributes
|
|
67
66
|
KEYS = PaliGemmaDetectionKeys
|
|
68
67
|
|
|
69
|
-
def
|
|
70
|
-
|
|
68
|
+
def initialize(self) -> None:
|
|
69
|
+
"""Initializes the template's common state for creation or reset.
|
|
71
70
|
|
|
71
|
+
This method is called by both `__init__` and `reset_state` to ensure
|
|
72
|
+
a consistent state.
|
|
73
|
+
"""
|
|
74
|
+
super().initialize()
|
|
72
75
|
objects_str = self.initialize_objects_str()
|
|
73
76
|
self.prompt = self.KEYS.detection_prompt.format(objects_str)
|
|
74
77
|
|
|
@@ -3,7 +3,6 @@ import numpy as np
|
|
|
3
3
|
import torch
|
|
4
4
|
from sinapsis_core.data_containers.annotations import BoundingBox, ImageAnnotations
|
|
5
5
|
from sinapsis_core.data_containers.data_packet import DataContainer, ImagePacket
|
|
6
|
-
from sinapsis_core.template_base.base_models import TemplateAttributeType
|
|
7
6
|
from sinapsis_data_visualization.helpers.detection_utils import bbox_xyxy_to_xywh
|
|
8
7
|
from sinapsis_huggingface_transformers.helpers.tags import Tags
|
|
9
8
|
from sinapsis_huggingface_transformers.templates.pali_gemma.pali_gemma_base import (
|
|
@@ -67,8 +66,14 @@ class PaliGemmaInference(PaliGemmaBase):
|
|
|
67
66
|
INPUT_IDS = "input_ids"
|
|
68
67
|
UIProperties = PaliGemmaInferenceUIProperties
|
|
69
68
|
|
|
70
|
-
def
|
|
71
|
-
|
|
69
|
+
def initialize(self) -> None:
|
|
70
|
+
"""Initializes the template's common state for creation or reset.
|
|
71
|
+
|
|
72
|
+
This method is called by both `__init__` and `reset_state` to ensure
|
|
73
|
+
a consistent state.
|
|
74
|
+
"""
|
|
75
|
+
super().initialize()
|
|
76
|
+
self.model = self.model.eval()
|
|
72
77
|
self.prompt = self.attributes.prompt
|
|
73
78
|
|
|
74
79
|
def _prepare_inputs(self, image_content: np.ndarray) -> dict:
|
|
@@ -237,11 +242,12 @@ class PaliGemmaInference(PaliGemmaBase):
|
|
|
237
242
|
Returns:
|
|
238
243
|
DataContainer: Processed container with added annotations
|
|
239
244
|
"""
|
|
245
|
+
self.logger.debug("EXECUTING TEMPLATE")
|
|
240
246
|
if container.texts:
|
|
241
247
|
self.process_from_text_packet(container)
|
|
242
248
|
else:
|
|
243
249
|
self.process_from_prompt(container)
|
|
244
|
-
|
|
250
|
+
self.logger.debug("finished execution")
|
|
245
251
|
return container
|
|
246
252
|
|
|
247
253
|
@staticmethod
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
from sinapsis_core.data_containers.data_packet import DataContainer, TextPacket
|
|
5
|
-
from sinapsis_core.template_base.base_models import OutputTypes
|
|
5
|
+
from sinapsis_core.template_base.base_models import OutputTypes
|
|
6
6
|
|
|
7
7
|
from sinapsis_huggingface_transformers.helpers.tags import Tags
|
|
8
8
|
from sinapsis_huggingface_transformers.templates.base_transformers import TransformersBase
|
|
@@ -15,8 +15,7 @@ SpeechToTextTransformersUIProperties.tags.extend(
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class SpeechToTextTransformers(TransformersBase):
|
|
18
|
-
"""
|
|
19
|
-
Template to perform speech-to-text actions
|
|
18
|
+
"""Template to perform speech-to-text actions
|
|
20
19
|
using the HuggingFace module through the 'transformers' architecture.
|
|
21
20
|
|
|
22
21
|
The template takes an Audio from the DataContainer and uses a speech-recognition
|
|
@@ -44,8 +43,13 @@ class SpeechToTextTransformers(TransformersBase):
|
|
|
44
43
|
TEXT_KEY = "text"
|
|
45
44
|
UIProperties = SpeechToTextTransformersUIProperties
|
|
46
45
|
|
|
47
|
-
def
|
|
48
|
-
|
|
46
|
+
def initialize(self) -> None:
|
|
47
|
+
"""Initializes the template's common state for creation or reset.
|
|
48
|
+
|
|
49
|
+
This method is called by both `__init__` and `reset_state` to ensure
|
|
50
|
+
a consistent state.
|
|
51
|
+
"""
|
|
52
|
+
super().initialize()
|
|
49
53
|
self.task = "automatic-speech-recognition"
|
|
50
54
|
self.setup_pipeline()
|
|
51
55
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
|
|
3
3
|
from sinapsis_core.data_containers.data_packet import DataContainer
|
|
4
|
-
from sinapsis_core.template_base.base_models import OutputTypes
|
|
4
|
+
from sinapsis_core.template_base.base_models import OutputTypes
|
|
5
5
|
|
|
6
6
|
from sinapsis_huggingface_transformers.helpers.tags import Tags
|
|
7
7
|
from sinapsis_huggingface_transformers.templates.base_transformers import TransformersBase
|
|
@@ -12,12 +12,10 @@ SummarizationTransformersUIProperties.tags.extend([Tags.SUMMARIZATION, Tags.TEXT
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class SummarizationTransformers(TransformersBase):
|
|
15
|
-
"""
|
|
16
|
-
Template for text summarization using a Hugging Face Transformers pipeline.
|
|
15
|
+
"""Template for text summarization using a Hugging Face Transformers pipeline.
|
|
17
16
|
|
|
18
17
|
This class provides a reusable framework for summarizing text using a pre-trained
|
|
19
|
-
Hugging Face model.
|
|
20
|
-
of the generated summaries.
|
|
18
|
+
Hugging Face model.
|
|
21
19
|
|
|
22
20
|
Usage example:
|
|
23
21
|
|
|
@@ -35,14 +33,22 @@ class SummarizationTransformers(TransformersBase):
|
|
|
35
33
|
model_cache_dir: /path/to/cache/dir
|
|
36
34
|
device: 'cuda'
|
|
37
35
|
torch_dtype: float16
|
|
36
|
+
inference_kwargs:
|
|
37
|
+
min_length: 5
|
|
38
|
+
max_length: 20
|
|
38
39
|
|
|
39
40
|
"""
|
|
40
41
|
|
|
41
42
|
SUMMARY_TEXT_KEY = "summary_text"
|
|
42
43
|
UIProperties = SummarizationTransformersUIProperties
|
|
43
44
|
|
|
44
|
-
def
|
|
45
|
-
|
|
45
|
+
def initialize(self) -> None:
|
|
46
|
+
"""Initializes the template's common state for creation or reset.
|
|
47
|
+
|
|
48
|
+
This method is called by both `__init__` and `reset_state` to ensure
|
|
49
|
+
a consistent state.
|
|
50
|
+
"""
|
|
51
|
+
super().initialize()
|
|
46
52
|
self.task = "summarization"
|
|
47
53
|
self.setup_pipeline()
|
|
48
54
|
|
|
@@ -57,12 +63,9 @@ class SummarizationTransformers(TransformersBase):
|
|
|
57
63
|
DataContainer: DataContainer including the summarized text.
|
|
58
64
|
"""
|
|
59
65
|
for text_packet in container.texts:
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
max_length=self.attributes.max_length,
|
|
64
|
-
min_length=self.attributes.min_length,
|
|
65
|
-
)[0][self.SUMMARY_TEXT_KEY]
|
|
66
|
+
summarized_text = self.pipeline(text_packet.content, **self.attributes.inference_kwargs)[0][
|
|
67
|
+
self.SUMMARY_TEXT_KEY
|
|
68
|
+
]
|
|
66
69
|
|
|
67
70
|
text_packet.content = summarized_text
|
|
68
71
|
return container
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import torch
|
|
5
5
|
from sinapsis_core.data_containers.data_packet import AudioPacket, DataContainer, TextPacket
|
|
6
|
-
from sinapsis_core.template_base.base_models import OutputTypes
|
|
6
|
+
from sinapsis_core.template_base.base_models import OutputTypes
|
|
7
7
|
|
|
8
8
|
from sinapsis_huggingface_transformers.helpers import sentences_to_n_words, split_text_into_sentences
|
|
9
9
|
from sinapsis_huggingface_transformers.helpers.tags import Tags
|
|
@@ -37,8 +37,7 @@ class TextToSpeechAttributes(TransformersBaseAttributes):
|
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
class TextToSpeechTransformers(TransformersBase):
|
|
40
|
-
"""
|
|
41
|
-
The template generates an audio from a prompt that is passed
|
|
40
|
+
"""The template generates an audio from a prompt that is passed
|
|
42
41
|
through the text packet in the DataContainer.
|
|
43
42
|
It uses the transformers architecture and a HuggingFace model to
|
|
44
43
|
produce the audio. Finally, it sends the audio through the DataContainer
|
|
@@ -68,8 +67,13 @@ class TextToSpeechTransformers(TransformersBase):
|
|
|
68
67
|
AttributesBaseModel = TextToSpeechAttributes
|
|
69
68
|
UIProperties = TextToSpeechTransformersUIProperties
|
|
70
69
|
|
|
71
|
-
def
|
|
72
|
-
|
|
70
|
+
def initialize(self) -> None:
|
|
71
|
+
"""Initializes the template's common state for creation or reset.
|
|
72
|
+
|
|
73
|
+
This method is called by both `__init__` and `reset_state` to ensure
|
|
74
|
+
a consistent state.
|
|
75
|
+
"""
|
|
76
|
+
super().initialize()
|
|
73
77
|
self.task = "text-to-speech"
|
|
74
78
|
self.setup_pipeline()
|
|
75
79
|
self.sample_rate = self._get_sample_rate()
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
from sinapsis_core.data_containers.data_packet import DataContainer
|
|
5
|
-
from sinapsis_core.template_base.base_models import OutputTypes
|
|
5
|
+
from sinapsis_core.template_base.base_models import OutputTypes
|
|
6
6
|
|
|
7
7
|
from sinapsis_huggingface_transformers.helpers.tags import Tags
|
|
8
8
|
from sinapsis_huggingface_transformers.templates.base_transformers import (
|
|
@@ -28,8 +28,7 @@ class TranslationTransformersAttributes(TransformersBaseAttributes):
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
class TranslationTransformers(TransformersBase):
|
|
31
|
-
"""
|
|
32
|
-
Template for text translation using a Hugging Face Transformers pipeline.
|
|
31
|
+
"""Template for text translation using a Hugging Face Transformers pipeline.
|
|
33
32
|
|
|
34
33
|
This class provides a reusable framework for translating text from one language
|
|
35
34
|
to another using a pre-trained Hugging Face model. The source and target languages
|
|
@@ -59,8 +58,13 @@ class TranslationTransformers(TransformersBase):
|
|
|
59
58
|
TRANSLATION_TEXT_KEY = "translation_text"
|
|
60
59
|
UIProperties = TranslationTransformersUIProperties
|
|
61
60
|
|
|
62
|
-
def
|
|
63
|
-
|
|
61
|
+
def initialize(self) -> None:
|
|
62
|
+
"""Initializes the template's common state for creation or reset.
|
|
63
|
+
|
|
64
|
+
This method is called by both `__init__` and `reset_state` to ensure
|
|
65
|
+
a consistent state.
|
|
66
|
+
"""
|
|
67
|
+
super().initialize()
|
|
64
68
|
self.task = f"translation_{self.attributes.source_language}_to_{self.attributes.target_language}"
|
|
65
69
|
self.setup_pipeline()
|
|
66
70
|
|
|
@@ -1,93 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
|
-
from typing import Literal
|
|
4
|
-
|
|
5
|
-
from datasets import load_dataset
|
|
6
|
-
from sinapsis_core.data_containers.data_packet import DataContainer
|
|
7
|
-
from sinapsis_core.template_base import Template
|
|
8
|
-
from sinapsis_core.template_base.base_models import OutputTypes, TemplateAttributes, UIPropertiesMetadata
|
|
9
|
-
from sinapsis_core.utils.env_var_keys import SINAPSIS_CACHE_DIR
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class SpeakerEmbeddingFromDatasetAttributes(TemplateAttributes):
|
|
13
|
-
"""Attributes for the SpeakerEmbeddingFromDataset template.
|
|
14
|
-
|
|
15
|
-
Attributes:
|
|
16
|
-
dataset_path (str): Path or name of the Hugging Face dataset containing speaker embeddings.
|
|
17
|
-
For example, `"Matthijs/cmu-arctic-xvectors"`.
|
|
18
|
-
data_cache_dir (str): Directory to cache the downloaded dataset. Defaults to the value of
|
|
19
|
-
the `SINAPSIS_CACHE_DIR` environment variable.
|
|
20
|
-
split (str): Dataset split to use (e.g., "train", "validation", or "test").
|
|
21
|
-
Defaults to `"validation"`.
|
|
22
|
-
sample_idx (int): Index of the dataset sample to extract the embedding from.
|
|
23
|
-
xvector_key (str): Key in the dataset sample that stores the xvector. Defaults to `"xvector"`.
|
|
24
|
-
target_packet (Literal["texts", "audios"]): Type of packet in the `DataContainer` to which
|
|
25
|
-
the embedding will be attached. Must be either `"texts"` or `"audios"`.
|
|
26
|
-
"""
|
|
27
|
-
|
|
28
|
-
dataset_path: str
|
|
29
|
-
data_cache_dir: str = str(SINAPSIS_CACHE_DIR)
|
|
30
|
-
split: str = "validation"
|
|
31
|
-
sample_idx: int
|
|
32
|
-
xvector_key: str = "xvector"
|
|
33
|
-
target_packet: Literal["texts", "audios"]
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class SpeakerEmbeddingFromDataset(Template):
|
|
37
|
-
"""
|
|
38
|
-
Template to retrieve and attach speaker embeddings from a Hugging Face dataset.
|
|
39
|
-
This template extracts a specified embedding (e.g., xvector) from a dataset and attaches
|
|
40
|
-
it to the `embedding` attribute of each `TextPacket` in a `DataContainer`.
|
|
41
|
-
|
|
42
|
-
Usage example:
|
|
43
|
-
|
|
44
|
-
agent:
|
|
45
|
-
name: my_test_agent
|
|
46
|
-
templates:
|
|
47
|
-
- template_name: InputTemplate
|
|
48
|
-
class_name: InputTemplate
|
|
49
|
-
attributes: {}
|
|
50
|
-
- template_name: SpeakerEmbeddingFromDataset
|
|
51
|
-
class_name: SpeakerEmbeddingFromDataset
|
|
52
|
-
template_input: InputTemplate
|
|
53
|
-
attributes:
|
|
54
|
-
dataset_path: '/path/to/hugging/face/dataset'
|
|
55
|
-
data_cache_dir: /path/to/cache/dir
|
|
56
|
-
split: validation
|
|
57
|
-
sample_idx: '1'
|
|
58
|
-
xvector_key: xvector
|
|
59
|
-
target_packet: 'audios'
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
"""
|
|
64
|
-
|
|
65
|
-
AttributesBaseModel = SpeakerEmbeddingFromDatasetAttributes
|
|
66
|
-
UIProperties = UIPropertiesMetadata(category="HuggingFace", output_type=OutputTypes.AUDIO)
|
|
67
|
-
|
|
68
|
-
def execute(self, container: DataContainer) -> DataContainer:
|
|
69
|
-
"""Retrieve and attach speaker embeddings to specified packets in a DataContainer.
|
|
70
|
-
|
|
71
|
-
Args:
|
|
72
|
-
container (DataContainer): The container holding the packets to which the embedding will be
|
|
73
|
-
attached.
|
|
74
|
-
|
|
75
|
-
Returns:
|
|
76
|
-
DataContainer: The updated container with embeddings attached to the `embedding`
|
|
77
|
-
attribute of the specified packet type.
|
|
78
|
-
"""
|
|
79
|
-
packets = getattr(container, self.attributes.target_packet)
|
|
80
|
-
embeddings_dataset = load_dataset(
|
|
81
|
-
self.attributes.dataset_path,
|
|
82
|
-
split=self.attributes.split,
|
|
83
|
-
cache_dir=self.attributes.data_cache_dir,
|
|
84
|
-
)
|
|
85
|
-
speaker_embedding = embeddings_dataset[self.attributes.sample_idx][self.attributes.xvector_key]
|
|
86
|
-
self.logger.info(
|
|
87
|
-
f"Attaching embedding from index {self.attributes.sample_idx} to "
|
|
88
|
-
f"{len(packets)} {self.attributes.target_packet} packets."
|
|
89
|
-
)
|
|
90
|
-
for packet in packets:
|
|
91
|
-
packet.embedding = speaker_embedding
|
|
92
|
-
|
|
93
|
-
return container
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|