sinapsis-speech 0.3.4__tar.gz → 0.3.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/PKG-INFO +1 -1
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_base.py +14 -8
- sinapsis_speech-0.3.5/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_sts.py +56 -0
- sinapsis_speech-0.3.5/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_clone.py +51 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_speech.egg-info/PKG-INFO +1 -1
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_speech.egg-info/SOURCES.txt +2 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/pyproject.toml +1 -1
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/LICENSE +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/README.md +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/__init__.py +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/__init__.py +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/env_var_keys.py +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/voice_utils.py +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/__init__.py +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_tts.py +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_generation.py +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_f5_tts/src/sinapsis_f5_tts/__init__.py +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_f5_tts/src/sinapsis_f5_tts/templates/__init__.py +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_f5_tts/src/sinapsis_f5_tts/templates/f5_tts_inference.py +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_kokoro/src/sinapsis_kokoro/helpers/kokoro_utils.py +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_kokoro/src/sinapsis_kokoro/templates/__init__.py +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_kokoro/src/sinapsis_kokoro/templates/kokoro_tts.py +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_speech.egg-info/dependency_links.txt +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_speech.egg-info/requires.txt +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_speech.egg-info/top_level.txt +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_zonos/src/sinapsis_zonos/__init__.py +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_zonos/src/sinapsis_zonos/helpers/__init__.py +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_zonos/src/sinapsis_zonos/helpers/zonos_keys.py +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_zonos/src/sinapsis_zonos/helpers/zonos_tts_utils.py +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_zonos/src/sinapsis_zonos/templates/__init__.py +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_zonos/src/sinapsis_zonos/templates/zonos_tts.py +0 -0
- {sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/setup.cfg +0 -0
|
@@ -46,6 +46,8 @@ class ElevenLabsBase(Template, abc.ABC):
|
|
|
46
46
|
Args:
|
|
47
47
|
api_key (str): The API used key to authenticate with ElevenLabs' API.
|
|
48
48
|
model (Literal): The model identifier to use for speech synthesis.
|
|
49
|
+
output_file_name (str | None): Optional name for saved audio file.
|
|
50
|
+
If not provided a random UUI will be used as file name. Defaults to None.
|
|
49
51
|
output_format (OutputFormat): The output audio format and quality. Options include:
|
|
50
52
|
["mp3_22050_32", "mp3_44100_32", "mp3_44100_64", "mp3_44100_96", "mp3_44100_128",
|
|
51
53
|
"mp3_44100_192", "pcm_16000", "pcm_22050", "pcm_24000", "pcm_44100", "ulaw_8000"]
|
|
@@ -70,10 +72,10 @@ class ElevenLabsBase(Template, abc.ABC):
|
|
|
70
72
|
"eleven_english_sts_v2",
|
|
71
73
|
"eleven_multilingual_sts_v2",
|
|
72
74
|
] = "eleven_turbo_v2_5"
|
|
75
|
+
output_file_name: str | None = None
|
|
73
76
|
output_format: OutputFormat = "mp3_44100_128"
|
|
74
77
|
output_folder: str = os.path.join(SINAPSIS_CACHE_DIR, "elevenlabs", "audios")
|
|
75
78
|
stream: bool = False
|
|
76
|
-
file_name : str = str(uuid.uuid4())
|
|
77
79
|
voice: VoiceId | VoiceName | Voice = None
|
|
78
80
|
voice_settings: VoiceSettings = Field(default_factory=dict) # type: ignore[arg-type]
|
|
79
81
|
|
|
@@ -98,9 +100,14 @@ class ElevenLabsBase(Template, abc.ABC):
|
|
|
98
100
|
def synthesize_speech(self, input_data: list[Packet]) -> RESPONSE_TYPE:
|
|
99
101
|
"""Abstract method for ElevenLabs speech synthesis."""
|
|
100
102
|
|
|
101
|
-
def _save_audio(self, response: Iterator[bytes] | bytes, file_format: str) -> str:
|
|
103
|
+
def _save_audio(self, response: Iterator[bytes] | bytes, file_format: str, idx: int) -> str:
|
|
102
104
|
"""Saves the audio to a file and returns the file path."""
|
|
103
|
-
|
|
105
|
+
if self.attributes.output_file_name:
|
|
106
|
+
file_name = self.attributes.output_file_name + "_" + str(idx)
|
|
107
|
+
else:
|
|
108
|
+
file_name = uuid.uuid4()
|
|
109
|
+
|
|
110
|
+
output_file = os.path.join(self.attributes.output_folder, f"{file_name}.{file_format}")
|
|
104
111
|
try:
|
|
105
112
|
save(response, output_file)
|
|
106
113
|
self.logger.info(f"Audio saved to: {output_file}")
|
|
@@ -132,13 +139,12 @@ class ElevenLabsBase(Template, abc.ABC):
|
|
|
132
139
|
self.logger.error(f"Value error while processing audio chunks: {e}")
|
|
133
140
|
raise
|
|
134
141
|
|
|
135
|
-
def _process_audio_output(self, response: Iterator[bytes] | bytes) -> str | IO[bytes]:
|
|
142
|
+
def _process_audio_output(self, idx: int, response: Iterator[bytes] | bytes) -> str | IO[bytes]:
|
|
136
143
|
"""Processes a single audio output (either stream or file)."""
|
|
137
144
|
if self.attributes.stream:
|
|
138
145
|
return self._generate_audio_stream(response)
|
|
139
|
-
else
|
|
140
|
-
|
|
141
|
-
return self._save_audio(response, file_format)
|
|
146
|
+
file_format = "mp3" if "mp3" in self.attributes.output_format else "wav"
|
|
147
|
+
return self._save_audio(response, file_format, idx)
|
|
142
148
|
|
|
143
149
|
def generate_speech(self, input_data: list[Packet]) -> list[str | IO[bytes]] | None:
|
|
144
150
|
"""Generates speech and saves it to a file."""
|
|
@@ -149,7 +155,7 @@ class ElevenLabsBase(Template, abc.ABC):
|
|
|
149
155
|
if isinstance(responses, Iterator):
|
|
150
156
|
responses = [responses]
|
|
151
157
|
|
|
152
|
-
audio_outputs = [self._process_audio_output(response) for response in responses]
|
|
158
|
+
audio_outputs = [self._process_audio_output(idx, response) for idx, response in enumerate(responses)]
|
|
153
159
|
return audio_outputs
|
|
154
160
|
|
|
155
161
|
def _handle_streaming_output(self, audio_outputs: list[str | IO[bytes]]) -> list[AudioPacket]:
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""Speech-To-Speech template for ElevenLabs"""
|
|
3
|
+
|
|
4
|
+
from typing import Callable, Iterator, Literal
|
|
5
|
+
|
|
6
|
+
from sinapsis_core.data_containers.data_packet import AudioPacket
|
|
7
|
+
|
|
8
|
+
from sinapsis_elevenlabs.helpers.voice_utils import create_voice_settings, get_voice_id
|
|
9
|
+
from sinapsis_elevenlabs.templates.elevenlabs_base import ElevenLabsBase
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ElevenLabsSTS(ElevenLabsBase):
|
|
13
|
+
"""Template to interact with ElevenLabs speech-to-speech API."""
|
|
14
|
+
|
|
15
|
+
PACKET_TYPE_NAME: str = "audios"
|
|
16
|
+
|
|
17
|
+
class AttributesBaseModel(ElevenLabsBase.AttributesBaseModel):
|
|
18
|
+
"""Attributes specific to ElevenLabs STS API interaction.
|
|
19
|
+
|
|
20
|
+
This class overrides the base attributes of `ElevenLabsBase` to define
|
|
21
|
+
default models specific to the ElevenLabs STS system.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
model: Literal["eleven_english_sts_v2", "eleven_multilingual_sts_v2"] = "eleven_multilingual_sts_v2"
|
|
25
|
+
|
|
26
|
+
def synthesize_speech(self, input_data: list[AudioPacket]) -> Iterator[bytes]:
|
|
27
|
+
"""
|
|
28
|
+
Sends an audio input to the ElevenLabs API for speech-to-speech synthesis.
|
|
29
|
+
|
|
30
|
+
This method processes the provided audio input using the specified voice, model,
|
|
31
|
+
and settings to generate a new audio response.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
method: Callable[..., Iterator[bytes]] = (
|
|
36
|
+
self.client.speech_to_speech.convert_as_stream
|
|
37
|
+
if self.attributes.stream
|
|
38
|
+
else self.client.speech_to_speech.convert
|
|
39
|
+
)
|
|
40
|
+
return method(
|
|
41
|
+
audio=input_data[0].content,
|
|
42
|
+
voice_id=get_voice_id(self.client, voice=self.attributes.voice),
|
|
43
|
+
model_id=self.attributes.model,
|
|
44
|
+
voice_settings=create_voice_settings(self.attributes.voice_settings),
|
|
45
|
+
output_format=self.attributes.output_format,
|
|
46
|
+
optimize_streaming_latency=str(self.attributes.streaming_latency),
|
|
47
|
+
)
|
|
48
|
+
except ValueError as e:
|
|
49
|
+
self.logger.error(f"Value error synthesizing speech: {e}")
|
|
50
|
+
raise
|
|
51
|
+
except TypeError as e:
|
|
52
|
+
self.logger.error(f"Type error in input data or parameters: {e}")
|
|
53
|
+
raise
|
|
54
|
+
except KeyError as e:
|
|
55
|
+
self.logger.error(f"Missing key in input data or settings: {e}")
|
|
56
|
+
raise
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""Text-To-Speech template for ElevenLabs"""
|
|
3
|
+
|
|
4
|
+
from elevenlabs import Voice
|
|
5
|
+
from sinapsis_core.data_containers.data_packet import AudioPacket, DataContainer
|
|
6
|
+
|
|
7
|
+
from sinapsis_elevenlabs.templates.elevenlabs_tts import ElevenLabsTTS
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ElevenLabsVoiceClone(ElevenLabsTTS):
|
|
11
|
+
"""Template to clone a voice using ElevenLabs API."""
|
|
12
|
+
|
|
13
|
+
class AttributesBaseModel(ElevenLabsTTS.AttributesBaseModel):
|
|
14
|
+
"""Attributes specific to the ElevenLabsVoiceClone class."""
|
|
15
|
+
|
|
16
|
+
name: str | None = None
|
|
17
|
+
description: str | None = None
|
|
18
|
+
|
|
19
|
+
def clone_voice(self, input_data: list[AudioPacket]) -> Voice:
|
|
20
|
+
"""Clones a voice using the provided audio files."""
|
|
21
|
+
files = [f.content for f in input_data]
|
|
22
|
+
try:
|
|
23
|
+
add_voice_response = self.client.voices.add(
|
|
24
|
+
name=self.attributes.name,
|
|
25
|
+
description=self.attributes.description,
|
|
26
|
+
files=files,
|
|
27
|
+
)
|
|
28
|
+
cloned_voice = self.client.voices.get(add_voice_response.voice_id)
|
|
29
|
+
self.logger.info(f"Voice cloned successfully: {cloned_voice.name}")
|
|
30
|
+
return cloned_voice
|
|
31
|
+
except ValueError as e:
|
|
32
|
+
self.logger.error(f"Value error in input data or parameters: {e}")
|
|
33
|
+
raise
|
|
34
|
+
except TypeError as e:
|
|
35
|
+
self.logger.error(f"Type error with input data or files: {e}")
|
|
36
|
+
raise
|
|
37
|
+
except KeyError as e:
|
|
38
|
+
self.logger.error(f"Missing expected key in API response: {e}")
|
|
39
|
+
raise
|
|
40
|
+
|
|
41
|
+
def execute(self, container: DataContainer) -> DataContainer:
|
|
42
|
+
"""Executes the voice cloning process and generates the speech output."""
|
|
43
|
+
audios = getattr(container, "audios", None)
|
|
44
|
+
if not audios:
|
|
45
|
+
self.logger.debug("No audios provided to clone voice")
|
|
46
|
+
return container
|
|
47
|
+
self.attributes.voice = self.clone_voice(audios)
|
|
48
|
+
|
|
49
|
+
container = super().execute(container)
|
|
50
|
+
|
|
51
|
+
return container
|
{sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_speech.egg-info/SOURCES.txt
RENAMED
|
@@ -7,7 +7,9 @@ packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/env_var_keys.py
|
|
|
7
7
|
packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/voice_utils.py
|
|
8
8
|
packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/__init__.py
|
|
9
9
|
packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_base.py
|
|
10
|
+
packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_sts.py
|
|
10
11
|
packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_tts.py
|
|
12
|
+
packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_clone.py
|
|
11
13
|
packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_generation.py
|
|
12
14
|
packages/sinapsis_f5_tts/src/sinapsis_f5_tts/__init__.py
|
|
13
15
|
packages/sinapsis_f5_tts/src/sinapsis_f5_tts/templates/__init__.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_speech.egg-info/requires.txt
RENAMED
|
File without changes
|
{sinapsis_speech-0.3.4 → sinapsis_speech-0.3.5}/packages/sinapsis_speech.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|