sinapsis-speech 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -46,6 +46,8 @@ class ElevenLabsBase(Template, abc.ABC):
46
46
  Args:
47
47
  api_key (str): The API used key to authenticate with ElevenLabs' API.
48
48
  model (Literal): The model identifier to use for speech synthesis.
49
+ output_file_name (str | None): Optional name for saved audio file.
50
+ If not provided a random UUI will be used as file name. Defaults to None.
49
51
  output_format (OutputFormat): The output audio format and quality. Options include:
50
52
  ["mp3_22050_32", "mp3_44100_32", "mp3_44100_64", "mp3_44100_96", "mp3_44100_128",
51
53
  "mp3_44100_192", "pcm_16000", "pcm_22050", "pcm_24000", "pcm_44100", "ulaw_8000"]
@@ -70,6 +72,7 @@ class ElevenLabsBase(Template, abc.ABC):
70
72
  "eleven_english_sts_v2",
71
73
  "eleven_multilingual_sts_v2",
72
74
  ] = "eleven_turbo_v2_5"
75
+ output_file_name: str | None = None
73
76
  output_format: OutputFormat = "mp3_44100_128"
74
77
  output_folder: str = os.path.join(SINAPSIS_CACHE_DIR, "elevenlabs", "audios")
75
78
  stream: bool = False
@@ -97,9 +100,14 @@ class ElevenLabsBase(Template, abc.ABC):
97
100
  def synthesize_speech(self, input_data: list[Packet]) -> RESPONSE_TYPE:
98
101
  """Abstract method for ElevenLabs speech synthesis."""
99
102
 
100
- def _save_audio(self, response: Iterator[bytes] | bytes, file_format: str) -> str:
103
+ def _save_audio(self, response: Iterator[bytes] | bytes, file_format: str, idx: int) -> str:
101
104
  """Saves the audio to a file and returns the file path."""
102
- output_file = os.path.join(self.attributes.output_folder, f"{uuid.uuid4()}.{file_format}")
105
+ if self.attributes.output_file_name:
106
+ file_name = self.attributes.output_file_name + "_" + str(idx)
107
+ else:
108
+ file_name = uuid.uuid4()
109
+
110
+ output_file = os.path.join(self.attributes.output_folder, f"{file_name}.{file_format}")
103
111
  try:
104
112
  save(response, output_file)
105
113
  self.logger.info(f"Audio saved to: {output_file}")
@@ -131,13 +139,12 @@ class ElevenLabsBase(Template, abc.ABC):
131
139
  self.logger.error(f"Value error while processing audio chunks: {e}")
132
140
  raise
133
141
 
134
- def _process_audio_output(self, response: Iterator[bytes] | bytes) -> str | IO[bytes]:
142
+ def _process_audio_output(self, idx: int, response: Iterator[bytes] | bytes) -> str | IO[bytes]:
135
143
  """Processes a single audio output (either stream or file)."""
136
144
  if self.attributes.stream:
137
145
  return self._generate_audio_stream(response)
138
- else:
139
- file_format = "mp3" if "mp3" in self.attributes.output_format else "wav"
140
- return self._save_audio(response, file_format)
146
+ file_format = "mp3" if "mp3" in self.attributes.output_format else "wav"
147
+ return self._save_audio(response, file_format, idx)
141
148
 
142
149
  def generate_speech(self, input_data: list[Packet]) -> list[str | IO[bytes]] | None:
143
150
  """Generates speech and saves it to a file."""
@@ -148,7 +155,7 @@ class ElevenLabsBase(Template, abc.ABC):
148
155
  if isinstance(responses, Iterator):
149
156
  responses = [responses]
150
157
 
151
- audio_outputs = [self._process_audio_output(response) for response in responses]
158
+ audio_outputs = [self._process_audio_output(idx, response) for idx, response in enumerate(responses)]
152
159
  return audio_outputs
153
160
 
154
161
  def _handle_streaming_output(self, audio_outputs: list[str | IO[bytes]]) -> list[AudioPacket]:
@@ -0,0 +1,56 @@
1
+ # -*- coding: utf-8 -*-
2
+ """Speech-To-Speech template for ElevenLabs"""
3
+
4
+ from typing import Callable, Iterator, Literal
5
+
6
+ from sinapsis_core.data_containers.data_packet import AudioPacket
7
+
8
+ from sinapsis_elevenlabs.helpers.voice_utils import create_voice_settings, get_voice_id
9
+ from sinapsis_elevenlabs.templates.elevenlabs_base import ElevenLabsBase
10
+
11
+
12
+ class ElevenLabsSTS(ElevenLabsBase):
13
+ """Template to interact with ElevenLabs speech-to-speech API."""
14
+
15
+ PACKET_TYPE_NAME: str = "audios"
16
+
17
+ class AttributesBaseModel(ElevenLabsBase.AttributesBaseModel):
18
+ """Attributes specific to ElevenLabs STS API interaction.
19
+
20
+ This class overrides the base attributes of `ElevenLabsBase` to define
21
+ default models specific to the ElevenLabs STS system.
22
+ """
23
+
24
+ model: Literal["eleven_english_sts_v2", "eleven_multilingual_sts_v2"] = "eleven_multilingual_sts_v2"
25
+
26
+ def synthesize_speech(self, input_data: list[AudioPacket]) -> Iterator[bytes]:
27
+ """
28
+ Sends an audio input to the ElevenLabs API for speech-to-speech synthesis.
29
+
30
+ This method processes the provided audio input using the specified voice, model,
31
+ and settings to generate a new audio response.
32
+ """
33
+
34
+ try:
35
+ method: Callable[..., Iterator[bytes]] = (
36
+ self.client.speech_to_speech.convert_as_stream
37
+ if self.attributes.stream
38
+ else self.client.speech_to_speech.convert
39
+ )
40
+ return method(
41
+ audio=input_data[0].content,
42
+ voice_id=get_voice_id(self.client, voice=self.attributes.voice),
43
+ model_id=self.attributes.model,
44
+ voice_settings=create_voice_settings(self.attributes.voice_settings),
45
+ output_format=self.attributes.output_format,
46
+ optimize_streaming_latency=str(self.attributes.streaming_latency),
47
+ )
48
+ except ValueError as e:
49
+ self.logger.error(f"Value error synthesizing speech: {e}")
50
+ raise
51
+ except TypeError as e:
52
+ self.logger.error(f"Type error in input data or parameters: {e}")
53
+ raise
54
+ except KeyError as e:
55
+ self.logger.error(f"Missing key in input data or settings: {e}")
56
+ raise
@@ -0,0 +1,51 @@
1
+ # -*- coding: utf-8 -*-
2
+ """Text-To-Speech template for ElevenLabs"""
3
+
4
+ from elevenlabs import Voice
5
+ from sinapsis_core.data_containers.data_packet import AudioPacket, DataContainer
6
+
7
+ from sinapsis_elevenlabs.templates.elevenlabs_tts import ElevenLabsTTS
8
+
9
+
10
+ class ElevenLabsVoiceClone(ElevenLabsTTS):
11
+ """Template to clone a voice using ElevenLabs API."""
12
+
13
+ class AttributesBaseModel(ElevenLabsTTS.AttributesBaseModel):
14
+ """Attributes specific to the ElevenLabsVoiceClone class."""
15
+
16
+ name: str | None = None
17
+ description: str | None = None
18
+
19
+ def clone_voice(self, input_data: list[AudioPacket]) -> Voice:
20
+ """Clones a voice using the provided audio files."""
21
+ files = [f.content for f in input_data]
22
+ try:
23
+ add_voice_response = self.client.voices.add(
24
+ name=self.attributes.name,
25
+ description=self.attributes.description,
26
+ files=files,
27
+ )
28
+ cloned_voice = self.client.voices.get(add_voice_response.voice_id)
29
+ self.logger.info(f"Voice cloned successfully: {cloned_voice.name}")
30
+ return cloned_voice
31
+ except ValueError as e:
32
+ self.logger.error(f"Value error in input data or parameters: {e}")
33
+ raise
34
+ except TypeError as e:
35
+ self.logger.error(f"Type error with input data or files: {e}")
36
+ raise
37
+ except KeyError as e:
38
+ self.logger.error(f"Missing expected key in API response: {e}")
39
+ raise
40
+
41
+ def execute(self, container: DataContainer) -> DataContainer:
42
+ """Executes the voice cloning process and generates the speech output."""
43
+ audios = getattr(container, "audios", None)
44
+ if not audios:
45
+ self.logger.debug("No audios provided to clone voice")
46
+ return container
47
+ self.attributes.voice = self.clone_voice(audios)
48
+
49
+ container = super().execute(container)
50
+
51
+ return container
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sinapsis-speech
3
- Version: 0.3.3
3
+ Version: 0.3.5
4
4
  Summary: Generate speech using various libraries.
5
5
  Author-email: SinapsisAI <dev@sinapsis.tech>
6
6
  Project-URL: Homepage, https://sinapsis.tech
@@ -3,8 +3,10 @@ sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/__init__.py,sha256=47DEQpj8H
3
3
  sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/env_var_keys.py,sha256=j8J64iplBNaff1WvmfJ03eJozE1f5SdqtqQeldV2vPY,998
4
4
  sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/voice_utils.py,sha256=fR1r1aaoFy_rQGfJLunUNdZfVxDyAo7shevS4TAXH_M,2420
5
5
  sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/__init__.py,sha256=pyTWPBLN_P6sxFTF1QqfL7iTZd9E0EaggpfwB0qLLHI,579
6
- sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_base.py,sha256=TKFVZLEBWKo2GGcZJYtFsgFGpFpAc5ul1gRFctWVjHo,8476
6
+ sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_base.py,sha256=dKzn98X-502y1ELRyDMcYY6JZe6ZDUDeQ7Kp0DOiQOY,8881
7
+ sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_sts.py,sha256=QOEnD8fNXRhE_WzvM4jidkXRetAekRpps50JAjRNF2g,2292
7
8
  sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_tts.py,sha256=WVTROfB2ODAksHmWwV5RKcub3Hoc29OM_eAw75c9yio,2847
9
+ sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_clone.py,sha256=ilkQb_YSYGIpt-QVprOGtVwGDndzk9O152tZ3i2CVkE,1972
8
10
  sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_generation.py,sha256=bKo7zhfsiZwsn-qZx_MCVAIx_MmaKnaP3lc-07AwAaY,2819
9
11
  sinapsis_f5_tts/src/sinapsis_f5_tts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
12
  sinapsis_f5_tts/src/sinapsis_f5_tts/templates/__init__.py,sha256=28BOPAr9GG1jYcrXi45ZWO1n2FAZJOdDcmRkOXdEYmk,496
@@ -12,14 +14,14 @@ sinapsis_f5_tts/src/sinapsis_f5_tts/templates/f5_tts_inference.py,sha256=lEkcimV
12
14
  sinapsis_kokoro/src/sinapsis_kokoro/helpers/kokoro_utils.py,sha256=2IMJuwURPKK7keIkgS-rpGD28REG5M1FwW0COGcm3nI,1573
13
15
  sinapsis_kokoro/src/sinapsis_kokoro/templates/__init__.py,sha256=aX25GCUNGzIBeY5kifomsB-nSzW-unfq0-aC2Rpnaws,485
14
16
  sinapsis_kokoro/src/sinapsis_kokoro/templates/kokoro_tts.py,sha256=eRSEpH1HAUR3sy9Eb7ZRWhrk1IPZ7Z-ymS34ONFmxOg,5440
15
- sinapsis_speech-0.3.3.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
17
+ sinapsis_speech-0.3.5.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
16
18
  sinapsis_zonos/src/sinapsis_zonos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
19
  sinapsis_zonos/src/sinapsis_zonos/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
20
  sinapsis_zonos/src/sinapsis_zonos/helpers/zonos_keys.py,sha256=m1GdOYfzP73JGmtxH30mNiqbNkzFsQl9o2QaT7QxSVU,2470
19
21
  sinapsis_zonos/src/sinapsis_zonos/helpers/zonos_tts_utils.py,sha256=8Tr2YgxjBfRqv_Hf6sw36X2pLzW7fdQWqa6QPBxNZK8,6419
20
22
  sinapsis_zonos/src/sinapsis_zonos/templates/__init__.py,sha256=A-_F0K3hbEFqeWWAh4YftgU9CFX-WHrauSiCAww9yp8,482
21
23
  sinapsis_zonos/src/sinapsis_zonos/templates/zonos_tts.py,sha256=Zz0hcXVevPyho7d0q3Q2Zl9yDTPl_XhtueerxmzY_Jc,7687
22
- sinapsis_speech-0.3.3.dist-info/METADATA,sha256=7n_q_BIXOU9d_FkHD2eKmCOCLj0UMBIa8DhY33wAxSM,10030
23
- sinapsis_speech-0.3.3.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
24
- sinapsis_speech-0.3.3.dist-info/top_level.txt,sha256=dd-bGAKXxelJCHcNxFZM4OTJ2mylgM2astOGPpj91yo,67
25
- sinapsis_speech-0.3.3.dist-info/RECORD,,
24
+ sinapsis_speech-0.3.5.dist-info/METADATA,sha256=UuE_zyyOsxT2slRkHQCCrXN2vSuzlpWVQfRZVPE9B7M,10030
25
+ sinapsis_speech-0.3.5.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
26
+ sinapsis_speech-0.3.5.dist-info/top_level.txt,sha256=dd-bGAKXxelJCHcNxFZM4OTJ2mylgM2astOGPpj91yo,67
27
+ sinapsis_speech-0.3.5.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.1.0)
2
+ Generator: setuptools (80.3.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5