sinapsis-speech 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_base.py +1 -6
- sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_sts.py +0 -2
- sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_tts.py +0 -1
- sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_clone.py +0 -2
- sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_generation.py +0 -1
- sinapsis_f5_tts/src/sinapsis_f5_tts/templates/f5_tts_inference.py +14 -1
- sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/templates/parakeet_tdt.py +8 -4
- {sinapsis_speech-0.4.3.dist-info → sinapsis_speech-0.4.5.dist-info}/METADATA +1 -1
- {sinapsis_speech-0.4.3.dist-info → sinapsis_speech-0.4.5.dist-info}/RECORD +12 -12
- {sinapsis_speech-0.4.3.dist-info → sinapsis_speech-0.4.5.dist-info}/WHEEL +0 -0
- {sinapsis_speech-0.4.3.dist-info → sinapsis_speech-0.4.5.dist-info}/licenses/LICENSE +0 -0
- {sinapsis_speech-0.4.3.dist-info → sinapsis_speech-0.4.5.dist-info}/top_level.txt +0 -0
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
"""Base template for ElevenLabs speech synthesis"""
|
|
3
3
|
|
|
4
4
|
import abc
|
|
5
|
-
import os
|
|
6
5
|
from typing import Generator, Iterable, Iterator, Literal
|
|
7
6
|
|
|
8
7
|
import numpy as np
|
|
@@ -18,7 +17,6 @@ from sinapsis_core.template_base.base_models import (
|
|
|
18
17
|
UIPropertiesMetadata,
|
|
19
18
|
)
|
|
20
19
|
from sinapsis_core.template_base.template import Template
|
|
21
|
-
from sinapsis_core.utils.env_var_keys import WORKING_DIR
|
|
22
20
|
from sinapsis_generic_data_tools.helpers.audio_encoder import audio_bytes_to_numpy
|
|
23
21
|
|
|
24
22
|
from sinapsis_elevenlabs.helpers.env_var_keys import ELEVENLABS_API_KEY
|
|
@@ -47,8 +45,7 @@ class ElevenLabsBase(Template, abc.ABC):
|
|
|
47
45
|
Args:
|
|
48
46
|
api_key (str): The API used key to authenticate with ElevenLabs' API.
|
|
49
47
|
model (Literal): The model identifier to use for speech synthesis.
|
|
50
|
-
|
|
51
|
-
If not provided a random UUI will be used as file name. Defaults to None.
|
|
48
|
+
|
|
52
49
|
output_format (OutputFormat): The output audio format and quality. Options include:
|
|
53
50
|
["mp3_22050_32", "mp3_44100_32", "mp3_44100_64", "mp3_44100_96", "mp3_44100_128",
|
|
54
51
|
"mp3_44100_192", "pcm_16000", "pcm_22050", "pcm_24000", "pcm_44100", "ulaw_8000"]
|
|
@@ -71,9 +68,7 @@ class ElevenLabsBase(Template, abc.ABC):
|
|
|
71
68
|
"eleven_english_sts_v2",
|
|
72
69
|
"eleven_multilingual_sts_v2",
|
|
73
70
|
] = "eleven_turbo_v2_5"
|
|
74
|
-
output_file_name: str | None = None
|
|
75
71
|
output_format: OutputFormat = "mp3_44100_128"
|
|
76
|
-
output_folder: str = os.path.join(WORKING_DIR, "elevenlabs", "audios")
|
|
77
72
|
stream: bool = False
|
|
78
73
|
voice: str | Voice | None = None
|
|
79
74
|
voice_settings: VoiceSettings = Field(default_factory=dict) # type: ignore[arg-type]
|
|
@@ -33,9 +33,7 @@ class ElevenLabsSTS(ElevenLabsBase):
|
|
|
33
33
|
attributes:
|
|
34
34
|
api_key: null
|
|
35
35
|
model: eleven_multilingual_sts_v2
|
|
36
|
-
output_file_name: null
|
|
37
36
|
output_format: mp3_44100_128
|
|
38
|
-
output_folder: <WORKING_DIR>/elevenlabs/audios
|
|
39
37
|
stream: false
|
|
40
38
|
voice: null
|
|
41
39
|
voice_settings:
|
|
@@ -32,9 +32,7 @@ class ElevenLabsVoiceClone(ElevenLabsTTS):
|
|
|
32
32
|
attributes:
|
|
33
33
|
api_key: null
|
|
34
34
|
model: eleven_turbo_v2_5
|
|
35
|
-
output_file_name: null
|
|
36
35
|
output_format: mp3_44100_128
|
|
37
|
-
output_folder: <WORKING_DIR>/elevenlabs/audios
|
|
38
36
|
stream: false
|
|
39
37
|
voice: null
|
|
40
38
|
voice_settings:
|
|
@@ -37,7 +37,6 @@ class ElevenLabsVoiceGeneration(ElevenLabsBase):
|
|
|
37
37
|
voice_settings: null
|
|
38
38
|
model: eleven_turbo_v2_5
|
|
39
39
|
output_format: mp3_44100_128
|
|
40
|
-
output_folder: <WORKING_DIR>/elevenlabs/audios
|
|
41
40
|
stream: false
|
|
42
41
|
voice_description: An old British male with a raspy, deep voice. Professional,
|
|
43
42
|
relaxed and assertive
|
|
@@ -14,7 +14,13 @@ from sinapsis_core.data_containers.data_packet import (
|
|
|
14
14
|
DataContainer,
|
|
15
15
|
)
|
|
16
16
|
from sinapsis_core.template_base import Template
|
|
17
|
-
from sinapsis_core.template_base.base_models import
|
|
17
|
+
from sinapsis_core.template_base.base_models import (
|
|
18
|
+
OutputTypes,
|
|
19
|
+
TemplateAttributes,
|
|
20
|
+
TemplateAttributeType,
|
|
21
|
+
UIPropertiesMetadata,
|
|
22
|
+
)
|
|
23
|
+
from sinapsis_core.utils.env_var_keys import SINAPSIS_CACHE_DIR
|
|
18
24
|
|
|
19
25
|
from sinapsis_f5_tts.helpers.tags import Tags
|
|
20
26
|
|
|
@@ -107,6 +113,8 @@ class F5TTSInferenceAttributes(TemplateAttributes):
|
|
|
107
113
|
|
|
108
114
|
device: str | None = Field(default=None, json_schema_extra={F5CliKeys.cli_param: "--device"})
|
|
109
115
|
|
|
116
|
+
root_dir: str | None = None
|
|
117
|
+
|
|
110
118
|
|
|
111
119
|
class F5TTSInference(Template):
|
|
112
120
|
"""Template for performing text-to-speech synthesis using the F5TTS model.
|
|
@@ -155,6 +163,11 @@ class F5TTSInference(Template):
|
|
|
155
163
|
tags=[Tags.AUDIO, Tags.AUDIO_GENERATION, Tags.F5TTS, Tags.SPEECH, Tags.TEXT_TO_SPEECH],
|
|
156
164
|
)
|
|
157
165
|
|
|
166
|
+
def __init__(self, attributes: TemplateAttributeType) -> None:
|
|
167
|
+
super().__init__(attributes)
|
|
168
|
+
self.attributes.root_dir = self.attributes.root_dir or SINAPSIS_CACHE_DIR
|
|
169
|
+
self.attributes.ref_audio = os.path.join(self.attributes.root_dir, self.attributes.ref_audio)
|
|
170
|
+
|
|
158
171
|
def _add_attribute_to_command(self, cli_command: list[str], field_name: str, field: Any) -> None:
|
|
159
172
|
"""
|
|
160
173
|
This method examines each attribute field's metadata to determine if and how
|
|
@@ -15,6 +15,7 @@ from sinapsis_core.template_base.base_models import (
|
|
|
15
15
|
UIPropertiesMetadata,
|
|
16
16
|
)
|
|
17
17
|
from sinapsis_core.template_base.template import Template
|
|
18
|
+
from sinapsis_core.utils.env_var_keys import SINAPSIS_CACHE_DIR
|
|
18
19
|
|
|
19
20
|
from sinapsis_parakeet_tdt.helpers.tags import Tags
|
|
20
21
|
|
|
@@ -36,6 +37,7 @@ class ParakeetTDTInferenceAttributes(TemplateAttributes):
|
|
|
36
37
|
|
|
37
38
|
model_name: str = "nvidia/parakeet-tdt-0.6b-v2"
|
|
38
39
|
audio_paths: list[str] | None = None
|
|
40
|
+
root_dir: str | None = None
|
|
39
41
|
enable_timestamps: bool = False
|
|
40
42
|
timestamp_level: Literal["char", "word", "segment"] = "word"
|
|
41
43
|
device: Literal["cpu", "cuda"] = "cuda"
|
|
@@ -88,6 +90,7 @@ class ParakeetTDTInference(Template):
|
|
|
88
90
|
|
|
89
91
|
def __init__(self, attributes: TemplateAttributes) -> None:
|
|
90
92
|
super().__init__(attributes)
|
|
93
|
+
self.attributes.root_dir = self.attributes.root_dir or SINAPSIS_CACHE_DIR
|
|
91
94
|
self._load_model()
|
|
92
95
|
|
|
93
96
|
def _load_model(self) -> None:
|
|
@@ -131,9 +134,10 @@ class ParakeetTDTInference(Template):
|
|
|
131
134
|
"""
|
|
132
135
|
sources = []
|
|
133
136
|
for path in paths:
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
+
full_path = os.path.join(self.attributes.root_dir, path)
|
|
138
|
+
if not os.path.exists(full_path):
|
|
139
|
+
self.logger.warning(f"Audio file not found: {full_path}")
|
|
140
|
+
sources.append(full_path)
|
|
137
141
|
return sources
|
|
138
142
|
|
|
139
143
|
def get_audio_sources(self, container: DataContainer) -> list[str]:
|
|
@@ -156,7 +160,6 @@ class ParakeetTDTInference(Template):
|
|
|
156
160
|
|
|
157
161
|
if not sources and self.attributes.audio_paths:
|
|
158
162
|
sources = self.get_sources_from_paths(self.attributes.audio_paths)
|
|
159
|
-
|
|
160
163
|
return sources
|
|
161
164
|
|
|
162
165
|
@staticmethod
|
|
@@ -220,6 +223,7 @@ class ParakeetTDTInference(Template):
|
|
|
220
223
|
Returns:
|
|
221
224
|
list[Any]: List of transcription results from the ASR model.
|
|
222
225
|
"""
|
|
226
|
+
|
|
223
227
|
return self.model.transcribe(
|
|
224
228
|
sources,
|
|
225
229
|
timestamps=self.attributes.enable_timestamps,
|
|
@@ -4,16 +4,16 @@ sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/env_var_keys.py,sha256=j8J64
|
|
|
4
4
|
sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/tags.py,sha256=EzEwYJqwPFhSzJB7K8g1HGm3xiy6M_kE1j19TYQAfS8,402
|
|
5
5
|
sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/voice_utils.py,sha256=2Ym4suCk8wy-Nj2Hmk0uu3_-3nu1QlSs_KubDydm5wY,3383
|
|
6
6
|
sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/__init__.py,sha256=UG35_hown3HITVR42iK_e3yVsUbuq2oYTLpCGwJ89L4,708
|
|
7
|
-
sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_base.py,sha256=
|
|
8
|
-
sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_sts.py,sha256=
|
|
9
|
-
sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_tts.py,sha256=
|
|
10
|
-
sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_clone.py,sha256=
|
|
11
|
-
sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_generation.py,sha256=
|
|
7
|
+
sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_base.py,sha256=dry8bhtdn1KBZroPoUAqDHTuXlSqrmD549DEe-HcEXw,7542
|
|
8
|
+
sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_sts.py,sha256=PaWfAPyo0eTjHxE3plEzQ4Rk5m88Q_2Nqp21yyJLHTQ,3713
|
|
9
|
+
sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_tts.py,sha256=8yJwh-COxotjMTrQln85HpE7-znq6GSs4_qHlod9U_g,2992
|
|
10
|
+
sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_clone.py,sha256=YEtT-z6qv8hwAAwrNIZHBpV3vhvgUVaSbgpf3bm8_ac,4881
|
|
11
|
+
sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_generation.py,sha256=VTgquS9l9jimVtaDG62TCKYC8xXCFqwcJVueeCQXSpQ,3023
|
|
12
12
|
sinapsis_f5_tts/src/sinapsis_f5_tts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
13
|
sinapsis_f5_tts/src/sinapsis_f5_tts/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
14
|
sinapsis_f5_tts/src/sinapsis_f5_tts/helpers/tags.py,sha256=T9RbgsPgq7Wc-0Lu3W8Si4bxjQsyWbX6hPrc-dakdCs,208
|
|
15
15
|
sinapsis_f5_tts/src/sinapsis_f5_tts/templates/__init__.py,sha256=28BOPAr9GG1jYcrXi45ZWO1n2FAZJOdDcmRkOXdEYmk,496
|
|
16
|
-
sinapsis_f5_tts/src/sinapsis_f5_tts/templates/f5_tts_inference.py,sha256=
|
|
16
|
+
sinapsis_f5_tts/src/sinapsis_f5_tts/templates/f5_tts_inference.py,sha256=OBAWVOg_QId14hbftEa_oJHap6jpqAZeVj8ZnYN0Vsk,16774
|
|
17
17
|
sinapsis_kokoro/src/sinapsis_kokoro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
18
|
sinapsis_kokoro/src/sinapsis_kokoro/helpers/kokoro_utils.py,sha256=2IMJuwURPKK7keIkgS-rpGD28REG5M1FwW0COGcm3nI,1573
|
|
19
19
|
sinapsis_kokoro/src/sinapsis_kokoro/helpers/tags.py,sha256=AiHIvqmujKX6tYQ4lEXjRGhq8Ujst8gZwVmcAjS7u3k,210
|
|
@@ -29,8 +29,8 @@ sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/__init__.py,sha256=47DEQpj8HBSa-
|
|
|
29
29
|
sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
30
30
|
sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/helpers/tags.py,sha256=OKZbq4zIL6XWM7eG5WuQ3dWYkmYNWjuCnlseXmjR_j0,262
|
|
31
31
|
sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/templates/__init__.py,sha256=3LppgbS6v70Rmx__yXXQgnoZ2ZBHcXkXeWZYQQf6Zwg,504
|
|
32
|
-
sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/templates/parakeet_tdt.py,sha256=
|
|
33
|
-
sinapsis_speech-0.4.
|
|
32
|
+
sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/templates/parakeet_tdt.py,sha256=Tw9S8Nqf74lXwUxBodaLK_JaQvh9ITt8cWFQJ2QNP6s,10210
|
|
33
|
+
sinapsis_speech-0.4.5.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
34
34
|
sinapsis_zonos/src/sinapsis_zonos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
35
|
sinapsis_zonos/src/sinapsis_zonos/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
36
|
sinapsis_zonos/src/sinapsis_zonos/helpers/tags.py,sha256=Y7MKQNx1E2k7ebF6r_1l1nBeS5k8hO424yFTT9NI7Rg,244
|
|
@@ -38,7 +38,7 @@ sinapsis_zonos/src/sinapsis_zonos/helpers/zonos_keys.py,sha256=m1GdOYfzP73JGmtxH
|
|
|
38
38
|
sinapsis_zonos/src/sinapsis_zonos/helpers/zonos_tts_utils.py,sha256=bwu88wsJGzEqbssgb-wpS_7lFscJ74J8cgyca-hX_Qw,6422
|
|
39
39
|
sinapsis_zonos/src/sinapsis_zonos/templates/__init__.py,sha256=A-_F0K3hbEFqeWWAh4YftgU9CFX-WHrauSiCAww9yp8,482
|
|
40
40
|
sinapsis_zonos/src/sinapsis_zonos/templates/zonos_tts.py,sha256=h5EToXoJgAgjqvz9WLDfSjhCsV5zgBwZrX5cTJ4VnhM,7679
|
|
41
|
-
sinapsis_speech-0.4.
|
|
42
|
-
sinapsis_speech-0.4.
|
|
43
|
-
sinapsis_speech-0.4.
|
|
44
|
-
sinapsis_speech-0.4.
|
|
41
|
+
sinapsis_speech-0.4.5.dist-info/METADATA,sha256=ZBGpQgEu2_I7DDsO_t2MO690zMA0OtncYXMUmGTA6-M,12783
|
|
42
|
+
sinapsis_speech-0.4.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
43
|
+
sinapsis_speech-0.4.5.dist-info/top_level.txt,sha256=KvdwXupt5wnqb_4XGRcuJaL9Glgdw-DBvRkNzhgl_Ds,110
|
|
44
|
+
sinapsis_speech-0.4.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|