sinapsis-speech 0.3.4__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/PKG-INFO +38 -2
  2. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/README.md +35 -1
  3. sinapsis_speech-0.4.0/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/voice_utils.py +100 -0
  4. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/__init__.py +2 -0
  5. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_base.py +19 -13
  6. sinapsis_speech-0.4.0/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_sts.py +95 -0
  7. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_tts.py +8 -7
  8. sinapsis_speech-0.4.0/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_clone.py +123 -0
  9. sinapsis_speech-0.4.0/packages/sinapsis_orpheus_cpp/src/sinapsis_orpheus_cpp/templates/__init__.py +20 -0
  10. sinapsis_speech-0.4.0/packages/sinapsis_orpheus_cpp/src/sinapsis_orpheus_cpp/templates/orpheus_tts.py +300 -0
  11. sinapsis_speech-0.4.0/packages/sinapsis_orpheus_cpp/src/sinapsis_orpheus_cpp/thirdparty/helpers.py +69 -0
  12. sinapsis_speech-0.4.0/packages/sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/templates/__init__.py +20 -0
  13. sinapsis_speech-0.4.0/packages/sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/templates/parakeet_tdt.py +270 -0
  14. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_speech.egg-info/PKG-INFO +38 -2
  15. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_speech.egg-info/SOURCES.txt +7 -0
  16. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_speech.egg-info/requires.txt +2 -0
  17. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_speech.egg-info/top_level.txt +2 -0
  18. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/pyproject.toml +6 -1
  19. sinapsis_speech-0.3.4/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/voice_utils.py +0 -64
  20. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/LICENSE +0 -0
  21. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/__init__.py +0 -0
  22. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/__init__.py +0 -0
  23. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/env_var_keys.py +0 -0
  24. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_generation.py +0 -0
  25. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_f5_tts/src/sinapsis_f5_tts/__init__.py +0 -0
  26. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_f5_tts/src/sinapsis_f5_tts/templates/__init__.py +0 -0
  27. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_f5_tts/src/sinapsis_f5_tts/templates/f5_tts_inference.py +0 -0
  28. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_kokoro/src/sinapsis_kokoro/helpers/kokoro_utils.py +0 -0
  29. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_kokoro/src/sinapsis_kokoro/templates/__init__.py +0 -0
  30. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_kokoro/src/sinapsis_kokoro/templates/kokoro_tts.py +0 -0
  31. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_speech.egg-info/dependency_links.txt +0 -0
  32. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_zonos/src/sinapsis_zonos/__init__.py +0 -0
  33. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_zonos/src/sinapsis_zonos/helpers/__init__.py +0 -0
  34. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_zonos/src/sinapsis_zonos/helpers/zonos_keys.py +0 -0
  35. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_zonos/src/sinapsis_zonos/helpers/zonos_tts_utils.py +0 -0
  36. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_zonos/src/sinapsis_zonos/templates/__init__.py +0 -0
  37. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/packages/sinapsis_zonos/src/sinapsis_zonos/templates/zonos_tts.py +0 -0
  38. {sinapsis_speech-0.3.4 → sinapsis_speech-0.4.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sinapsis-speech
3
- Version: 0.3.4
3
+ Version: 0.4.0
4
4
  Summary: Generate speech using various libraries.
5
5
  Author-email: SinapsisAI <dev@sinapsis.tech>
6
6
  Project-URL: Homepage, https://sinapsis.tech
@@ -18,6 +18,8 @@ Requires-Dist: sinapsis-f5-tts[all]; extra == "all"
18
18
  Requires-Dist: sinapsis-kokoro[all]; extra == "all"
19
19
  Requires-Dist: sinapsis-speech[gradio-app]; extra == "all"
20
20
  Requires-Dist: sinapsis-zonos[all]; extra == "all"
21
+ Requires-Dist: sinapsis-parakeet-tdt[all]; extra == "all"
22
+ Requires-Dist: sinapsis-orpheus-cpp[all]; extra == "all"
21
23
  Provides-Extra: gradio-app
22
24
  Requires-Dist: sinapsis[webapp]>=0.2.3; extra == "gradio-app"
23
25
  Dynamic: license-file
@@ -55,8 +57,10 @@ This repo includes packages for performing speech synthesis using different tool
55
57
 
56
58
  * <code>sinapsis-elevenlabs</code>
57
59
  * <code>sinapsis-f5-tts</code>
58
- * * <code>sinapsis-kokoro</code>
60
+ * <code>sinapsis-kokoro</code>
59
61
  * <code>sinapsis-zonos</code>
62
+ * <code>sinapsis-orpheus-cpp</code>
63
+ * <code>sinapsis-parakeet</code>
60
64
 
61
65
  Install using your preferred package manager. We strongly recommend using <code>uv</code>. To install <code>uv</code>, refer to the [official documentation](https://docs.astral.sh/uv/getting-started/installation/#installation-methods).
62
66
 
@@ -205,6 +209,16 @@ docker compose -f docker/compose_apps.yaml up -d sinapsis-kokoro
205
209
  docker compose -f docker/compose_apps.yaml up -d sinapsis-zonos
206
210
  ```
207
211
 
212
+ - For Orpheus-CPP:
213
+ ```bash
214
+ docker compose -f docker/compose_apps.yaml up -d sinapsis-orpheus-tts
215
+ ```
216
+
217
+ - For Parakeet:
218
+ ```bash
219
+ docker compose -f docker/compose_apps.yaml up -d sinapsis-parakeet
220
+ ```
221
+
208
222
  3. **Check the logs**
209
223
 
210
224
  - For ElevenLabs:
@@ -224,6 +238,17 @@ docker logs -f sinapsis-kokoro
224
238
  ```bash
225
239
  docker logs -f sinapsis-zonos
226
240
  ```
241
+
242
+ - For Orpheus-CPP:
243
+ ```bash
244
+ docker logs -f sinapsis-orpheus-tts
245
+ ```
246
+
247
+ - For Parakeet:
248
+ ```bash
249
+ docker logs -f sinapsis-parakeet
250
+ ```
251
+
227
252
  4. **The logs will display the URL to access the webapp, e.g.,:**:
228
253
  ```bash
229
254
  Running on local URL: http://127.0.0.1:7860
@@ -240,6 +265,17 @@ docker compose -f docker/compose_apps.yaml down
240
265
 
241
266
  To run the webapp using the <code>uv</code> package manager, follow these steps:
242
267
 
268
+
269
+ > [!IMPORTANT]
270
+ > If you're using sinapsis-orpheus-cpp, you need to export cuda environment variables:
271
+
272
+
273
+ ```bash
274
+ export CMAKE_ARGS="-DGGML_CUDA=on"
275
+ export FORCE_CMAKE="1"
276
+ export CUDACXX=$(command -v nvcc)
277
+ ```
278
+
243
279
  1. **Sync the virtual environment**:
244
280
 
245
281
  ```bash
@@ -31,8 +31,10 @@ This repo includes packages for performing speech synthesis using different tool
31
31
 
32
32
  * <code>sinapsis-elevenlabs</code>
33
33
  * <code>sinapsis-f5-tts</code>
34
- * * <code>sinapsis-kokoro</code>
34
+ * <code>sinapsis-kokoro</code>
35
35
  * <code>sinapsis-zonos</code>
36
+ * <code>sinapsis-orpheus-cpp</code>
37
+ * <code>sinapsis-parakeet</code>
36
38
 
37
39
  Install using your preferred package manager. We strongly recommend using <code>uv</code>. To install <code>uv</code>, refer to the [official documentation](https://docs.astral.sh/uv/getting-started/installation/#installation-methods).
38
40
 
@@ -181,6 +183,16 @@ docker compose -f docker/compose_apps.yaml up -d sinapsis-kokoro
181
183
  docker compose -f docker/compose_apps.yaml up -d sinapsis-zonos
182
184
  ```
183
185
 
186
+ - For Orpheus-CPP:
187
+ ```bash
188
+ docker compose -f docker/compose_apps.yaml up -d sinapsis-orpheus-tts
189
+ ```
190
+
191
+ - For Parakeet:
192
+ ```bash
193
+ docker compose -f docker/compose_apps.yaml up -d sinapsis-parakeet
194
+ ```
195
+
184
196
  3. **Check the logs**
185
197
 
186
198
  - For ElevenLabs:
@@ -200,6 +212,17 @@ docker logs -f sinapsis-kokoro
200
212
  ```bash
201
213
  docker logs -f sinapsis-zonos
202
214
  ```
215
+
216
+ - For Orpheus-CPP:
217
+ ```bash
218
+ docker logs -f sinapsis-orpheus-tts
219
+ ```
220
+
221
+ - For Parakeet:
222
+ ```bash
223
+ docker logs -f sinapsis-parakeet
224
+ ```
225
+
203
226
  4. **The logs will display the URL to access the webapp, e.g.,:**:
204
227
  ```bash
205
228
  Running on local URL: http://127.0.0.1:7860
@@ -216,6 +239,17 @@ docker compose -f docker/compose_apps.yaml down
216
239
 
217
240
  To run the webapp using the <code>uv</code> package manager, follow these steps:
218
241
 
242
+
243
+ > [!IMPORTANT]
244
+ > If you're using sinapsis-orpheus-cpp, you need to export cuda environment variables:
245
+
246
+
247
+ ```bash
248
+ export CMAKE_ARGS="-DGGML_CUDA=on"
249
+ export FORCE_CMAKE="1"
250
+ export CUDACXX=$(command -v nvcc)
251
+ ```
252
+
219
253
  1. **Sync the virtual environment**:
220
254
 
221
255
  ```bash
@@ -0,0 +1,100 @@
1
+ # -*- coding: utf-8 -*-
2
+ import json
3
+
4
+ from elevenlabs import Voice, VoiceSettings
5
+ from elevenlabs.client import ElevenLabs
6
+ from sinapsis_core.data_containers.data_packet import TextPacket
7
+ from sinapsis_core.utils.logging_utils import sinapsis_logger
8
+
9
+
10
+ def create_voice_settings(settings: VoiceSettings, as_json: bool = False) -> VoiceSettings | None | str:
11
+ """
12
+ Creates or updates a `VoiceSettings` object based on the provided settings.
13
+
14
+ Args:
15
+ settings (VoiceSettings | None): An instance of `VoiceSettings` containing the settings to be applied.
16
+ If `None`, the function returns the default settings.
17
+ as_json (bool): Whether to return the settings as JSON string.
18
+
19
+ Returns:
20
+ VoiceSettings | None | str: The provided `VoiceSettings` object if `settings` is not `None`. Otherwise,
21
+ `None` is returned for default settings.
22
+ """
23
+ if not settings:
24
+ return None
25
+
26
+ if as_json:
27
+ return json.dumps(settings.model_dump(exclude_none=True))
28
+
29
+ return settings
30
+
31
+
32
+ def get_voice_id(client: ElevenLabs, voice: str | Voice | None) -> str:
33
+ """
34
+ Resolves the voice ID for a given voice name or ID.
35
+
36
+ This function searches through available voices from the ElevenLabs API
37
+ to match the provided voice name or ID. If the specified voice is not found,
38
+ it logs the error and returns the first available voice ID as a fallback.
39
+
40
+ Args:
41
+ client (ElevenLabs): The ElevenLabs API client instance.
42
+ voice (str | Voice | None): The name or ID of the desired voice.
43
+
44
+ Returns:
45
+ str: The resolved voice ID.
46
+
47
+ Raises:
48
+ ValueError: If no voices are available to resolve.
49
+ """
50
+ if not voice:
51
+ return get_default_voice(client).voice_id
52
+
53
+ if isinstance(voice, Voice):
54
+ sinapsis_logger.debug(f"Voice object provided, using voice_id: {voice.voice_id}")
55
+ return voice.voice_id
56
+
57
+ try:
58
+ voices_response = client.voices.get_all()
59
+ voices = voices_response.voices
60
+
61
+ for v in voices:
62
+ if voice == v.name or voice == v.voice_id:
63
+ sinapsis_logger.debug(f"Voice {voice} resolved to ID: {v.voice_id}")
64
+ return v.voice_id
65
+
66
+ sinapsis_logger.error(f"Voice {voice} is not available.")
67
+ if voices:
68
+ sinapsis_logger.info(f"Returning default voice ID: {voices[0].voice_id}")
69
+ return voices[0].voice_id
70
+
71
+ raise ValueError("No available voices to resolve. Ensure the client is configured correctly.")
72
+ except Exception as e:
73
+ sinapsis_logger.error(f"Error resolving voice ID: {e}")
74
+ raise
75
+
76
+
77
+ def get_default_voice(client: ElevenLabs) -> Voice:
78
+ """
79
+ Gets the first available voice as default.
80
+
81
+ Args:
82
+ client (ElevenLabs): The ElevenLabs API client instance.
83
+
84
+ Returns:
85
+ Voice: The default voice object.
86
+ """
87
+ try:
88
+ voices_response = client.voices.get_all()
89
+ voices = voices_response.voices
90
+ if voices:
91
+ return voices[0]
92
+ raise ValueError("No voices available")
93
+ except Exception as e:
94
+ sinapsis_logger.error(f"Error getting default voice: {e}")
95
+ raise
96
+
97
+
98
+ def load_input_text(input_data: list[TextPacket]) -> str:
99
+ """Loads and concatenates the text content from a list of TextPacket objects."""
100
+ return "".join([item.content for item in input_data])
@@ -7,6 +7,8 @@ _root_lib_path = "sinapsis_elevenlabs.templates"
7
7
  _template_lookup = {
8
8
  "ElevenLabsTTS": f"{_root_lib_path}.elevenlabs_tts",
9
9
  "ElevenLabsVoiceGeneration": f"{_root_lib_path}.elevenlabs_voice_generation",
10
+ "ElevenLabsVoiceClone": f"{_root_lib_path}.elevenlabs_voice_clone",
11
+ "ElevenLabsSTS": f"{_root_lib_path}.elevenlabs_sts",
10
12
  }
11
13
 
12
14
 
@@ -5,10 +5,10 @@ import abc
5
5
  import os
6
6
  import uuid
7
7
  from io import BytesIO
8
- from typing import IO, Iterator, Literal
8
+ from typing import IO, Iterable, Iterator, Literal
9
9
 
10
10
  from elevenlabs import Voice, VoiceSettings, save
11
- from elevenlabs.client import ElevenLabs, VoiceId, VoiceName
11
+ from elevenlabs.client import ElevenLabs
12
12
  from elevenlabs.types import OutputFormat
13
13
  from pydantic import Field
14
14
  from sinapsis_core.data_containers.data_packet import AudioPacket, DataContainer, Packet
@@ -46,12 +46,14 @@ class ElevenLabsBase(Template, abc.ABC):
46
46
  Args:
47
47
  api_key (str): The API used key to authenticate with ElevenLabs' API.
48
48
  model (Literal): The model identifier to use for speech synthesis.
49
+ output_file_name (str | None): Optional name for saved audio file.
50
+ If not provided a random UUI will be used as file name. Defaults to None.
49
51
  output_format (OutputFormat): The output audio format and quality. Options include:
50
52
  ["mp3_22050_32", "mp3_44100_32", "mp3_44100_64", "mp3_44100_96", "mp3_44100_128",
51
53
  "mp3_44100_192", "pcm_16000", "pcm_22050", "pcm_24000", "pcm_44100", "ulaw_8000"]
52
54
  output_folder (str): The folder where generated audio files will be saved.
53
55
  stream (bool): If True, the audio is returned as a stream; otherwise, saved to a file.
54
- voice (VoiceId | VoiceName | Voice): The voice to use for speech synthesis. This can be a voice ID (str),
56
+ voice (str | Voice | None): The voice to use for speech synthesis. This can be a voice ID (str),
55
57
  a voice name (str) or an elevenlabs voice object (Voice).
56
58
  voice_settings (VoiceSettings): A dictionary of settings that control the behavior of the voice.
57
59
  - stability (float)
@@ -70,11 +72,11 @@ class ElevenLabsBase(Template, abc.ABC):
70
72
  "eleven_english_sts_v2",
71
73
  "eleven_multilingual_sts_v2",
72
74
  ] = "eleven_turbo_v2_5"
75
+ output_file_name: str | None = None
73
76
  output_format: OutputFormat = "mp3_44100_128"
74
77
  output_folder: str = os.path.join(SINAPSIS_CACHE_DIR, "elevenlabs", "audios")
75
78
  stream: bool = False
76
- file_name : str = str(uuid.uuid4())
77
- voice: VoiceId | VoiceName | Voice = None
79
+ voice: str | Voice | None = None
78
80
  voice_settings: VoiceSettings = Field(default_factory=dict) # type: ignore[arg-type]
79
81
 
80
82
  UIProperties = UIPropertiesMetadata(category="Elevenlabs", output_type=OutputTypes.AUDIO)
@@ -98,9 +100,14 @@ class ElevenLabsBase(Template, abc.ABC):
98
100
  def synthesize_speech(self, input_data: list[Packet]) -> RESPONSE_TYPE:
99
101
  """Abstract method for ElevenLabs speech synthesis."""
100
102
 
101
- def _save_audio(self, response: Iterator[bytes] | bytes, file_format: str) -> str:
103
+ def _save_audio(self, response: Iterable | bytes, file_format: str, idx: int) -> str:
102
104
  """Saves the audio to a file and returns the file path."""
103
- output_file = os.path.join(self.attributes.output_folder, f"{self.attributes.file_name}.{file_format}")
105
+ if self.attributes.output_file_name:
106
+ file_name = self.attributes.output_file_name + "_" + str(idx)
107
+ else:
108
+ file_name = uuid.uuid4()
109
+
110
+ output_file = os.path.join(self.attributes.output_folder, f"{file_name}.{file_format}")
104
111
  try:
105
112
  save(response, output_file)
106
113
  self.logger.info(f"Audio saved to: {output_file}")
@@ -109,7 +116,7 @@ class ElevenLabsBase(Template, abc.ABC):
109
116
  self.logger.error(f"File system error while saving speech to file: {e}")
110
117
  raise
111
118
 
112
- def _generate_audio_stream(self, response: Iterator[bytes] | bytes) -> IO[bytes]:
119
+ def _generate_audio_stream(self, response: Iterable | bytes) -> IO[bytes]:
113
120
  """Generates and returns the audio stream."""
114
121
  audio_stream = BytesIO()
115
122
  try:
@@ -132,13 +139,12 @@ class ElevenLabsBase(Template, abc.ABC):
132
139
  self.logger.error(f"Value error while processing audio chunks: {e}")
133
140
  raise
134
141
 
135
- def _process_audio_output(self, response: Iterator[bytes] | bytes) -> str | IO[bytes]:
142
+ def _process_audio_output(self, idx: int, response: Iterable | bytes) -> str | IO[bytes]:
136
143
  """Processes a single audio output (either stream or file)."""
137
144
  if self.attributes.stream:
138
145
  return self._generate_audio_stream(response)
139
- else:
140
- file_format = "mp3" if "mp3" in self.attributes.output_format else "wav"
141
- return self._save_audio(response, file_format)
146
+ file_format = "mp3" if "mp3" in self.attributes.output_format else "wav"
147
+ return self._save_audio(response, file_format, idx)
142
148
 
143
149
  def generate_speech(self, input_data: list[Packet]) -> list[str | IO[bytes]] | None:
144
150
  """Generates speech and saves it to a file."""
@@ -149,7 +155,7 @@ class ElevenLabsBase(Template, abc.ABC):
149
155
  if isinstance(responses, Iterator):
150
156
  responses = [responses]
151
157
 
152
- audio_outputs = [self._process_audio_output(response) for response in responses]
158
+ audio_outputs = [self._process_audio_output(idx, response) for idx, response in enumerate(responses)]
153
159
  return audio_outputs
154
160
 
155
161
  def _handle_streaming_output(self, audio_outputs: list[str | IO[bytes]]) -> list[AudioPacket]:
@@ -0,0 +1,95 @@
1
+ # -*- coding: utf-8 -*-
2
+ """Speech-To-Speech template for ElevenLabs."""
3
+
4
+ from typing import Callable, Iterator, Literal
5
+
6
+ from sinapsis_core.data_containers.data_packet import AudioPacket
7
+
8
+ from sinapsis_elevenlabs.helpers.voice_utils import create_voice_settings, get_voice_id
9
+ from sinapsis_elevenlabs.templates.elevenlabs_base import ElevenLabsBase
10
+
11
+
12
+ class ElevenLabsSTS(ElevenLabsBase):
13
+ """Template to interact with the ElevenLabs Speech-to-Speech API.
14
+
15
+ This template takes an input audio and converts it to a new voice using
16
+ the ElevenLabs Speech-to-Speech (STS) API.
17
+
18
+ Usage example:
19
+
20
+ agent:
21
+ name: my_test_agent
22
+ templates:
23
+ - template_name: InputTemplate
24
+ class_name: InputTemplate
25
+ attributes: {}
26
+ - template_name: ElevenLabsSTS
27
+ class_name: ElevenLabsSTS
28
+ template_input: InputTemplate
29
+ attributes:
30
+ api_key: null
31
+ model: eleven_multilingual_sts_v2
32
+ output_file_name: null
33
+ output_format: mp3_44100_128
34
+ output_folder: ~/.cache/sinapsis/elevenlabs/audios
35
+ stream: false
36
+ voice: null
37
+ voice_settings:
38
+ stability: null
39
+ similarity_boost: null
40
+ style: null
41
+ use_speaker_boost: null
42
+ speed: null
43
+ streaming_latency: null
44
+
45
+ """
46
+
47
+ PACKET_TYPE_NAME: str = "audios"
48
+
49
+ class AttributesBaseModel(ElevenLabsBase.AttributesBaseModel):
50
+ """Attributes specific to ElevenLabs STS API interaction.
51
+
52
+ Attributes:
53
+ model (Literal): The STS model to use. Options are "eleven_english_sts_v2" or "eleven_multilingual_sts_v2".
54
+ streaming_latency (int | None): Optional latency optimization for streaming. Defaults to None.
55
+ """
56
+
57
+ model: Literal["eleven_english_sts_v2", "eleven_multilingual_sts_v2"] = "eleven_multilingual_sts_v2"
58
+ streaming_latency: int | None = None
59
+
60
+ def synthesize_speech(self, input_data: list[AudioPacket]) -> Iterator[bytes]:
61
+ """Sends an audio input to the ElevenLabs API for speech-to-speech synthesis.
62
+
63
+ Args:
64
+ input_data (list[AudioPacket]): List of AudioPacket objects containing the audio to be converted.
65
+ Only the first AudioPacket in the list is used.
66
+
67
+ Returns:
68
+ Iterator[bytes]: An iterator yielding audio data chunks in the output format specified.
69
+
70
+ Raises:
71
+ ValueError: If there is a problem with the input data or parameters.
72
+ TypeError: If the input data or files are of incorrect type.
73
+ KeyError: If the expected key is missing in the API response.
74
+ """
75
+ try:
76
+ method: Callable[..., Iterator[bytes]] = (
77
+ self.client.speech_to_speech.stream if self.attributes.stream else self.client.speech_to_speech.convert
78
+ )
79
+ return method(
80
+ voice_id=get_voice_id(self.client, voice=self.attributes.voice),
81
+ audio=input_data[0].content,
82
+ model_id=self.attributes.model,
83
+ voice_settings=create_voice_settings(self.attributes.voice_settings, as_json=True),
84
+ output_format=self.attributes.output_format,
85
+ optimize_streaming_latency=self.attributes.streaming_latency,
86
+ )
87
+ except ValueError as e:
88
+ self.logger.error(f"Value error synthesizing speech: {e}")
89
+ raise
90
+ except TypeError as e:
91
+ self.logger.error(f"Type error in input data or parameters: {e}")
92
+ raise
93
+ except KeyError as e:
94
+ self.logger.error(f"Missing key in input data or settings: {e}")
95
+ raise
@@ -1,12 +1,13 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  """Text-To-Speech template for ElevenLabs"""
3
3
 
4
- from typing import Iterator, Literal
4
+ from typing import Callable, Iterator, Literal
5
5
 
6
6
  from sinapsis_core.data_containers.data_packet import TextPacket
7
7
 
8
8
  from sinapsis_elevenlabs.helpers.voice_utils import (
9
9
  create_voice_settings,
10
+ get_voice_id,
10
11
  load_input_text,
11
12
  )
12
13
  from sinapsis_elevenlabs.templates.elevenlabs_base import ElevenLabsBase
@@ -64,16 +65,16 @@ class ElevenLabsTTS(ElevenLabsBase):
64
65
  """
65
66
  input_text: str = load_input_text(input_data)
66
67
  try:
67
- response: Iterator[bytes] = self.client.generate(
68
+ method: Callable[..., Iterator[bytes]] = (
69
+ self.client.text_to_speech.stream if self.attributes.stream else self.client.text_to_speech.convert
70
+ )
71
+ return method(
68
72
  text=input_text,
69
- voice=self.attributes.voice,
70
- model=self.attributes.model,
73
+ voice_id=get_voice_id(self.client, self.attributes.voice),
74
+ model_id=self.attributes.model,
71
75
  voice_settings=create_voice_settings(self.attributes.voice_settings),
72
76
  output_format=self.attributes.output_format,
73
- stream=self.attributes.stream,
74
77
  )
75
-
76
- return response
77
78
  except ValueError as e:
78
79
  self.logger.error(f"Value error synthesizing speech: {e}")
79
80
  raise
@@ -0,0 +1,123 @@
1
+ # -*- coding: utf-8 -*-
2
+ """Text-To-Speech template for ElevenLabs Voice Cloning."""
3
+
4
+ from elevenlabs import Voice
5
+ from sinapsis_core.data_containers.data_packet import AudioPacket, DataContainer
6
+
7
+ from sinapsis_elevenlabs.templates.elevenlabs_tts import ElevenLabsTTS
8
+
9
+
10
+ class ElevenLabsVoiceClone(ElevenLabsTTS):
11
+ """Template to clone a voice using the ElevenLabs API.
12
+
13
+ This template allows you to create a new custom voice in ElevenLabs by providing
14
+ one or more audio samples. The cloned voice can then be used for subsequent
15
+ text-to-speech synthesis within the Sinapsis pipeline.
16
+
17
+ Usage example:
18
+
19
+ agent:
20
+ name: my_test_agent
21
+ templates:
22
+ - template_name: InputTemplate
23
+ class_name: InputTemplate
24
+ attributes: {}
25
+ - template_name: ElevenLabsVoiceClone
26
+ class_name: ElevenLabsVoiceClone
27
+ template_input: InputTemplate
28
+ attributes:
29
+ api_key: null
30
+ model: eleven_turbo_v2_5
31
+ output_file_name: null
32
+ output_format: mp3_44100_128
33
+ output_folder: ~/.cache/sinapsis/elevenlabs/audios
34
+ stream: false
35
+ voice: null
36
+ voice_settings:
37
+ stability: null
38
+ similarity_boost: null
39
+ style: null
40
+ use_speaker_boost: null
41
+ speed: null
42
+ name: null
43
+ description: null
44
+ remove_background_noise: false
45
+
46
+ """
47
+
48
+ class AttributesBaseModel(ElevenLabsTTS.AttributesBaseModel):
49
+ """Attributes specific to the ElevenLabsVoiceClone class.
50
+
51
+ Attributes:
52
+ name (str | None): Name for the cloned voice. If None, a default name may be used.
53
+ description (str | None): Description for the cloned voice. Optional.
54
+ remove_background_noise (bool): Whether to remove background noise from samples. Defaults to False.
55
+ """
56
+
57
+ name: str | None = None
58
+ description: str | None = None
59
+ remove_background_noise: bool = False
60
+
61
+ def clone_voice(self, input_data: list[AudioPacket]) -> Voice:
62
+ """Clones a voice using the provided audio files.
63
+
64
+ Args:
65
+ input_data (list[AudioPacket]): List of AudioPacket objects containing the audio samples
66
+ to be used for voice cloning. Each AudioPacket's `content` should be a file-like object
67
+ or bytes representing the audio data.
68
+ **NOTE:** All provided audio packets are used as reference for a single cloned voice.
69
+
70
+ Returns:
71
+ Voice: The cloned Voice object as returned by the ElevenLabs API.
72
+
73
+ Raises:
74
+ ValueError: If there is a problem with the input data or parameters.
75
+ TypeError: If the input data or files are of incorrect type.
76
+ KeyError: If the expected key is missing in the API response.
77
+ """
78
+ files = [audio.content for audio in input_data]
79
+ try:
80
+ clone_response = self.client.voices.ivc.create(
81
+ name=self.attributes.name,
82
+ files=files,
83
+ description=self.attributes.description,
84
+ remove_background_noise=self.attributes.remove_background_noise,
85
+ )
86
+ cloned_voice = self.client.voices.get(clone_response.voice_id)
87
+ self.logger.info(f"Voice cloned successfully with IVC: {cloned_voice.name}")
88
+ return cloned_voice
89
+ except ValueError as e:
90
+ self.logger.error(f"Value error in input data or parameters: {e}")
91
+ raise
92
+ except TypeError as e:
93
+ self.logger.error(f"Type error with input data or files: {e}")
94
+ raise
95
+ except KeyError as e:
96
+ self.logger.error(f"Missing expected key in API response: {e}")
97
+ raise
98
+
99
+ def execute(self, container: DataContainer) -> DataContainer:
100
+ """Executes the voice cloning process and generates the speech output.
101
+
102
+ Args:
103
+ container (DataContainer): The input DataContainer, expected to contain
104
+ one or more AudioPacket objects in the `audios` attribute.
105
+
106
+ Returns:
107
+ DataContainer: The updated DataContainer. If cloning is successful,
108
+ the cloned voice is set in `self.attributes.voice` and the parent
109
+ TTS execution is performed using the new voice.
110
+
111
+ Side Effects:
112
+ - Updates `self.attributes.voice` with the cloned Voice object.
113
+ - May log errors or info messages.
114
+ """
115
+ audios = container.audios
116
+ if not audios:
117
+ self.logger.debug("No audios provided to clone voice")
118
+ return container
119
+ self.attributes.voice = self.clone_voice(audios)
120
+
121
+ container = super().execute(container)
122
+
123
+ return container
@@ -0,0 +1,20 @@
1
+ # -*- coding: utf-8 -*-
2
+ import importlib
3
+ from typing import Callable
4
+
5
+ _root_lib_path = "sinapsis_orpheus_cpp.templates"
6
+
7
+ _template_lookup = {
8
+ "OrpheusTTS": f"{_root_lib_path}.orpheus_tts",
9
+ }
10
+
11
+
12
+ def __getattr__(name: str) -> Callable:
13
+ if name in _template_lookup:
14
+ module = importlib.import_module(_template_lookup[name])
15
+ return getattr(module, name)
16
+
17
+ raise AttributeError(f"template `{name}` not found in {_root_lib_path}")
18
+
19
+
20
+ __all__ = list(_template_lookup.keys())