PyPI - sinapsis-speech - Versions diffs - 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

sinapsis-speech 0.3.5py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/tags.py ADDED Viewed

@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+from enum import Enum
+class Tags(Enum):
+    AUDIO = "audio"
+    AUDIO_GENERATION = "audio_generation"
+    ELEVENLABS = "elevenlabs"
+    PROMPT = "prompt"
+    SPEECH = "speech"
+    SPEECH_TO_SPEECH = "speech_to_speech"
+    TEXT_TO_SPEECH = "text_to_speech"
+    VOICE_CONVERSION = "voice_conversion"
+    VOICE_CLONING = "voice_cloning"
+    VOICE_GENERATION = "voice_generation"

sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/voice_utils.py CHANGED Viewed

@@ -1,29 +1,35 @@
 # -*- coding: utf-8 -*-
-from elevenlabs import VoiceSettings
-from elevenlabs.client import DEFAULT_VOICE, ElevenLabs, VoiceId, VoiceName
+import json
+from elevenlabs import Voice, VoiceSettings
+from elevenlabs.client import ElevenLabs
 from sinapsis_core.data_containers.data_packet import TextPacket
 from sinapsis_core.utils.logging_utils import sinapsis_logger
-def create_voice_settings(settings: VoiceSettings) -> VoiceSettings | None:
+def create_voice_settings(settings: VoiceSettings, as_json: bool = False) -> VoiceSettings | None | str:
     """
     Creates or updates a `VoiceSettings` object based on the provided settings.
     Args:
         settings (VoiceSettings | None): An instance of `VoiceSettings` containing the settings to be applied.
             If `None`, the function returns the default settings.
+        as_json (bool): Whether to return the settings as JSON string.
     Returns:
-        VoiceSettings: The provided `VoiceSettings` object if `settings` is not `None`. Otherwise,
-            `DEFAULT_VOICE.settings` is returned.
+        VoiceSettings | None | str: The provided `VoiceSettings` object if `settings` is not `None`. Otherwise,
+            `None` is returned for default settings.
     """
     if not settings:
-        return DEFAULT_VOICE.settings
+        return None
+    if as_json:
+        return json.dumps(settings.model_dump(exclude_none=True))
     return settings
-def get_voice_id(client: ElevenLabs, voice: VoiceId | VoiceName) -> VoiceId:
+def get_voice_id(client: ElevenLabs, voice: str | Voice | None) -> str:
     """
     Resolves the voice ID for a given voice name or ID.
@@ -33,29 +39,59 @@ def get_voice_id(client: ElevenLabs, voice: VoiceId | VoiceName) -> VoiceId:
     Args:
         client (ElevenLabs): The ElevenLabs API client instance.
-        voice (VoiceId | VoiceName): The name or ID of the desired voice.
+        voice (str | Voice | None): The name or ID of the desired voice.
     Returns:
-        VoiceId: The resolved voice ID.
+        str: The resolved voice ID.
     Raises:
         ValueError: If no voices are available to resolve.
     """
+    if not voice:
+        return get_default_voice(client).voice_id
+    if isinstance(voice, Voice):
+        sinapsis_logger.debug(f"Voice object provided, using voice_id: {voice.voice_id}")
+        return voice.voice_id
     try:
-        voices = client.voices.get_all().voices
+        voices_response = client.voices.get_all()
+        voices = voices_response.voices
         for v in voices:
             if voice == v.name or voice == v.voice_id:
-                sinapsis_logger.debug("Voice '%s' resolved to ID: %s", voice, v.voice_id)
+                sinapsis_logger.debug(f"Voice {voice} resolved to ID: {v.voice_id}")
                 return v.voice_id
-        sinapsis_logger.error("Voice '%s' is not available.", voice)
+        sinapsis_logger.error(f"Voice {voice} is not available.")
         if voices:
-            sinapsis_logger.info("Returning default voice ID: %s", voices[0].voice_id)
+            sinapsis_logger.info(f"Returning default voice ID: {voices[0].voice_id}")
             return voices[0].voice_id
         raise ValueError("No available voices to resolve. Ensure the client is configured correctly.")
     except Exception as e:
-        sinapsis_logger.error("Error resolving voice ID: %s", e)
+        sinapsis_logger.error(f"Error resolving voice ID: {e}")
+        raise
+def get_default_voice(client: ElevenLabs) -> Voice:
+    """
+    Gets the first available voice as default.
+    Args:
+        client (ElevenLabs): The ElevenLabs API client instance.
+    Returns:
+        Voice: The default voice object.
+    """
+    try:
+        voices_response = client.voices.get_all()
+        voices = voices_response.voices
+        if voices:
+            return voices[0]
+        raise ValueError("No voices available")
+    except Exception as e:
+        sinapsis_logger.error(f"Error getting default voice: {e}")
         raise

sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/__init__.py CHANGED Viewed

@@ -7,6 +7,8 @@ _root_lib_path = "sinapsis_elevenlabs.templates"
 _template_lookup = {
     "ElevenLabsTTS": f"{_root_lib_path}.elevenlabs_tts",
     "ElevenLabsVoiceGeneration": f"{_root_lib_path}.elevenlabs_voice_generation",
+    "ElevenLabsVoiceClone": f"{_root_lib_path}.elevenlabs_voice_clone",
+    "ElevenLabsSTS": f"{_root_lib_path}.elevenlabs_sts",
 }

sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_base.py CHANGED Viewed

@@ -3,12 +3,11 @@
 import abc
 import os
-import uuid
-from io import BytesIO
-from typing import IO, Iterator, Literal
+from typing import Generator, Iterable, Iterator, Literal
-from elevenlabs import Voice, VoiceSettings, save
-from elevenlabs.client import ElevenLabs, VoiceId, VoiceName
+import numpy as np
+from elevenlabs import Voice, VoiceSettings
+from elevenlabs.client import ElevenLabs
 from elevenlabs.types import OutputFormat
 from pydantic import Field
 from sinapsis_core.data_containers.data_packet import AudioPacket, DataContainer, Packet
@@ -19,9 +18,11 @@ from sinapsis_core.template_base.base_models import (
     UIPropertiesMetadata,
 )
 from sinapsis_core.template_base.template import Template
-from sinapsis_core.utils.env_var_keys import SINAPSIS_CACHE_DIR
+from sinapsis_core.utils.env_var_keys import WORKING_DIR
+from sinapsis_generic_data_tools.helpers.audio_encoder import audio_bytes_to_numpy
 from sinapsis_elevenlabs.helpers.env_var_keys import ELEVENLABS_API_KEY
+from sinapsis_elevenlabs.helpers.tags import Tags
 RESPONSE_TYPE = Iterator[bytes] | list[bytes] | list[Iterator[bytes]] | None
@@ -51,9 +52,7 @@ class ElevenLabsBase(Template, abc.ABC):
             output_format (OutputFormat): The output audio format and quality. Options include:
                 ["mp3_22050_32", "mp3_44100_32", "mp3_44100_64", "mp3_44100_96", "mp3_44100_128",
                 "mp3_44100_192", "pcm_16000", "pcm_22050", "pcm_24000", "pcm_44100", "ulaw_8000"]
-            output_folder (str): The folder where generated audio files will be saved.
-            stream (bool): If True, the audio is returned as a stream; otherwise, saved to a file.
-            voice (VoiceId | VoiceName | Voice): The voice to use for speech synthesis. This can be a voice ID (str),
+            voice (str | Voice | None): The voice to use for speech synthesis. This can be a voice ID (str),
                 a voice name (str) or an elevenlabs voice object (Voice).
             voice_settings (VoiceSettings): A dictionary of settings that control the behavior of the voice.
                 - stability (float)
@@ -74,17 +73,20 @@ class ElevenLabsBase(Template, abc.ABC):
         ] = "eleven_turbo_v2_5"
         output_file_name: str | None = None
         output_format: OutputFormat = "mp3_44100_128"
-        output_folder: str = os.path.join(SINAPSIS_CACHE_DIR, "elevenlabs", "audios")
+        output_folder: str = os.path.join(WORKING_DIR, "elevenlabs", "audios")
         stream: bool = False
-        voice: VoiceId | VoiceName | Voice = None
+        voice: str | Voice | None = None
         voice_settings: VoiceSettings = Field(default_factory=dict)  # type: ignore[arg-type]
-    UIProperties = UIPropertiesMetadata(category="Elevenlabs", output_type=OutputTypes.AUDIO)
+    UIProperties = UIPropertiesMetadata(
+        category="Elevenlabs",
+        output_type=OutputTypes.AUDIO,
+        tags=[Tags.AUDIO, Tags.ELEVENLABS, Tags.SPEECH],
+    )
     def __init__(self, attributes: TemplateAttributeType) -> None:
         """Initializes the ElevenLabs API client with the given attributes."""
         super().__init__(attributes)
-        os.makedirs(self.attributes.output_folder, exist_ok=True)
         self.client = self.init_elevenlabs_client()
     def init_elevenlabs_client(self) -> ElevenLabs:
@@ -92,44 +94,27 @@ class ElevenLabsBase(Template, abc.ABC):
         key = self.attributes.api_key if self.attributes.api_key else ELEVENLABS_API_KEY
         return ElevenLabs(api_key=key)
-    def reset_state(self) -> None:
+    def reset_state(self, template_name: str | None = None) -> None:
         """Resets state of model"""
+        _ = template_name
         self.client = self.init_elevenlabs_client()
     @abc.abstractmethod
     def synthesize_speech(self, input_data: list[Packet]) -> RESPONSE_TYPE:
         """Abstract method for ElevenLabs speech synthesis."""
-    def _save_audio(self, response: Iterator[bytes] | bytes, file_format: str, idx: int) -> str:
-        """Saves the audio to a file and returns the file path."""
-        if self.attributes.output_file_name:
-            file_name = self.attributes.output_file_name + "_" + str(idx)
-        else:
-            file_name = uuid.uuid4()
-        output_file = os.path.join(self.attributes.output_folder, f"{file_name}.{file_format}")
-        try:
-            save(response, output_file)
-            self.logger.info(f"Audio saved to: {output_file}")
-            return output_file
-        except OSError as e:
-            self.logger.error(f"File system error while saving speech to file: {e}")
-            raise
-    def _generate_audio_stream(self, response: Iterator[bytes] | bytes) -> IO[bytes]:
+    def _generate_audio_stream(self, response: Iterable | bytes) -> bytes:
         """Generates and returns the audio stream."""
-        audio_stream = BytesIO()
         try:
             if isinstance(response, Iterator):
-                for chunk in response:
-                    if chunk:
-                        audio_stream.write(chunk)
+                audio_stream = b"".join(chunk for chunk in response)
             elif isinstance(response, bytes):
-                audio_stream.write(response)
+                audio_stream = response
             else:
                 raise TypeError(f"Unsupported response type: {type(response)}")
-            audio_stream.seek(0)
             self.logger.info("Returning audio stream")
             return audio_stream
         except IOError as e:
@@ -139,14 +124,15 @@ class ElevenLabsBase(Template, abc.ABC):
             self.logger.error(f"Value error while processing audio chunks: {e}")
             raise
-    def _process_audio_output(self, idx: int, response: Iterator[bytes] | bytes) -> str | IO[bytes]:
+    def _process_audio_output(self, response: Iterable | bytes) -> tuple[np.ndarray, int]:
         """Processes a single audio output (either stream or file)."""
-        if self.attributes.stream:
-            return self._generate_audio_stream(response)
-        file_format = "mp3" if "mp3" in self.attributes.output_format else "wav"
-        return self._save_audio(response, file_format, idx)
-    def generate_speech(self, input_data: list[Packet]) -> list[str | IO[bytes]] | None:
+        result = self._generate_audio_stream(response)
+        audio_np, sample_rate = audio_bytes_to_numpy(result)
+        return audio_np, sample_rate
+    def generate_speech(self, input_data: list[Packet]) -> list[tuple] | None:
         """Generates speech and saves it to a file."""
         responses: RESPONSE_TYPE = self.synthesize_speech(input_data)
         if not responses:
@@ -154,29 +140,29 @@ class ElevenLabsBase(Template, abc.ABC):
         if isinstance(responses, Iterator):
             responses = [responses]
-        audio_outputs = [self._process_audio_output(idx, response) for idx, response in enumerate(responses)]
+        elif isinstance(responses, Generator):
+            responses = list(responses)
+        audio_outputs = [self._process_audio_output(response) for response in responses]
         return audio_outputs
-    def _handle_streaming_output(self, audio_outputs: list[str | IO[bytes]]) -> list[AudioPacket]:
+    def _handle_streaming_output(self, audio_outputs: list[tuple]) -> list[AudioPacket]:
         """Handles audio stream output by adding it to the container as AudioPackets."""
         generated_audios: list[AudioPacket] = []
-        sample_rate = int(self.attributes.output_format.split("_")[1])
+        # sample_rate = int(self.attributes.output_format.split("_")[1])
         for audio_output in audio_outputs:
+            audio = audio_output[0]
+            sample_rate = audio_output[1]
             audio_packet = AudioPacket(
-                content=audio_output,
+                content=audio,
                 sample_rate=sample_rate,
             )
             generated_audios.append(audio_packet)
         return generated_audios
-    def _handle_audio_outputs(self, audio_outputs: list[str | IO[bytes]], container: DataContainer) -> None:
+    def _handle_audio_outputs(self, audio_outputs: list[tuple], container: DataContainer) -> None:
         """Handles the audio outputs by appending to the container based on the output type (stream or file)."""
-        if self.attributes.stream:
-            container.audios = container.audios or []
-            container.audios.extend(self._handle_streaming_output(audio_outputs))
-        else:
-            self._set_generic_data(container, audio_outputs)
+        container.audios = container.audios or []
+        container.audios = self._handle_streaming_output(audio_outputs)
     def execute(self, container: DataContainer) -> DataContainer:
         """

sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_sts.py CHANGED Viewed

@@ -1,49 +1,92 @@
 # -*- coding: utf-8 -*-
-"""Speech-To-Speech template for ElevenLabs"""
+"""Speech-To-Speech template for ElevenLabs."""
 from typing import Callable, Iterator, Literal
 from sinapsis_core.data_containers.data_packet import AudioPacket
+from sinapsis_elevenlabs.helpers.tags import Tags
 from sinapsis_elevenlabs.helpers.voice_utils import create_voice_settings, get_voice_id
 from sinapsis_elevenlabs.templates.elevenlabs_base import ElevenLabsBase
+ElevenLabsSTSUIProperties = ElevenLabsBase.UIProperties
+ElevenLabsSTSUIProperties.tags.extend([Tags.SPEECH_TO_SPEECH, Tags.VOICE_CONVERSION])
 class ElevenLabsSTS(ElevenLabsBase):
-    """Template to interact with ElevenLabs speech-to-speech API."""
+    """Template to interact with the ElevenLabs Speech-to-Speech API.
+    This template takes an input audio and converts it to a new voice using
+    the ElevenLabs Speech-to-Speech (STS) API.
+    Usage example:
+    agent:
+      name: my_test_agent
+    templates:
+    - template_name: InputTemplate
+      class_name: InputTemplate
+      attributes: {}
+    - template_name: ElevenLabsSTS
+      class_name: ElevenLabsSTS
+      template_input: InputTemplate
+      attributes:
+        api_key: null
+        model: eleven_multilingual_sts_v2
+        output_file_name: null
+        output_format: mp3_44100_128
+        output_folder: <WORKING_DIR>/elevenlabs/audios
+        stream: false
+        voice: null
+        voice_settings:
+            stability: null
+            similarity_boost: null
+            style: null
+            use_speaker_boost: null
+            speed: null
+        streaming_latency: null
+    """
     PACKET_TYPE_NAME: str = "audios"
+    UIProperties = ElevenLabsSTSUIProperties
     class AttributesBaseModel(ElevenLabsBase.AttributesBaseModel):
         """Attributes specific to ElevenLabs STS API interaction.
-        This class overrides the base attributes of `ElevenLabsBase` to define
-        default models specific to the ElevenLabs STS system.
+        Attributes:
+            model (Literal): The STS model to use. Options are "eleven_english_sts_v2" or "eleven_multilingual_sts_v2".
+            streaming_latency (int | None): Optional latency optimization for streaming. Defaults to None.
         """
         model: Literal["eleven_english_sts_v2", "eleven_multilingual_sts_v2"] = "eleven_multilingual_sts_v2"
+        streaming_latency: int | None = None
     def synthesize_speech(self, input_data: list[AudioPacket]) -> Iterator[bytes]:
-        """
-        Sends an audio input to the ElevenLabs API for speech-to-speech synthesis.
+        """Sends an audio input to the ElevenLabs API for speech-to-speech synthesis.
-        This method processes the provided audio input using the specified voice, model,
-        and settings to generate a new audio response.
-        """
+        Args:
+            input_data (list[AudioPacket]): List of AudioPacket objects containing the audio to be converted.
+                Only the first AudioPacket in the list is used.
+        Returns:
+            Iterator[bytes]: An iterator yielding audio data chunks in the output format specified.
+        Raises:
+            ValueError: If there is a problem with the input data or parameters.
+            TypeError: If the input data or files are of incorrect type.
+            KeyError: If the expected key is missing in the API response.
+        """
         try:
-            method: Callable[..., Iterator[bytes]] = (
-                self.client.speech_to_speech.convert_as_stream
-                if self.attributes.stream
-                else self.client.speech_to_speech.convert
-            )
+            method: Callable[..., Iterator[bytes]] = self.client.speech_to_speech.stream  # (
             return method(
-                audio=input_data[0].content,
                 voice_id=get_voice_id(self.client, voice=self.attributes.voice),
+                audio=input_data[0].content,
                 model_id=self.attributes.model,
-                voice_settings=create_voice_settings(self.attributes.voice_settings),
+                voice_settings=create_voice_settings(self.attributes.voice_settings, as_json=True),
                 output_format=self.attributes.output_format,
-                optimize_streaming_latency=str(self.attributes.streaming_latency),
+                optimize_streaming_latency=self.attributes.streaming_latency,
             )
         except ValueError as e:
             self.logger.error(f"Value error synthesizing speech: {e}")

sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_tts.py CHANGED Viewed

@@ -1,16 +1,21 @@
 # -*- coding: utf-8 -*-
 """Text-To-Speech template for ElevenLabs"""
-from typing import Iterator, Literal
+from typing import Callable, Iterator, Literal
 from sinapsis_core.data_containers.data_packet import TextPacket
+from sinapsis_elevenlabs.helpers.tags import Tags
 from sinapsis_elevenlabs.helpers.voice_utils import (
     create_voice_settings,
+    get_voice_id,
     load_input_text,
 )
 from sinapsis_elevenlabs.templates.elevenlabs_base import ElevenLabsBase
+ElevenLabsTTSUIProperties = ElevenLabsBase.UIProperties
+ElevenLabsTTSUIProperties.tags.extend([Tags.TEXT_TO_SPEECH])
 class ElevenLabsTTS(ElevenLabsBase):
     """Template to interact with ElevenLabs text-to-speech API.
@@ -35,7 +40,7 @@ class ElevenLabsTTS(ElevenLabsBase):
         voice_settings: null
         model: eleven_turbo_v2_5
         output_format: mp3_44100_128
-        output_folder: /sinapsis/cache/dir/elevenlabs/audios
+        output_folder: <WORKING_DIR>/elevenlabs/audios
         stream: false
     """
@@ -64,16 +69,15 @@ class ElevenLabsTTS(ElevenLabsBase):
         """
         input_text: str = load_input_text(input_data)
         try:
-            response: Iterator[bytes] = self.client.generate(
+            method: Callable[..., Iterator[bytes]] = self.client.text_to_speech.stream
+            return method(
                 text=input_text,
-                voice=self.attributes.voice,
-                model=self.attributes.model,
+                voice_id=get_voice_id(self.client, self.attributes.voice),
+                model_id=self.attributes.model,
                 voice_settings=create_voice_settings(self.attributes.voice_settings),
                 output_format=self.attributes.output_format,
-                stream=self.attributes.stream,
             )
-            return response
         except ValueError as e:
             self.logger.error(f"Value error synthesizing speech: {e}")
             raise

sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_clone.py CHANGED Viewed

@@ -1,32 +1,96 @@
 # -*- coding: utf-8 -*-
-"""Text-To-Speech template for ElevenLabs"""
+"""Text-To-Speech template for ElevenLabs Voice Cloning."""
 from elevenlabs import Voice
 from sinapsis_core.data_containers.data_packet import AudioPacket, DataContainer
+from sinapsis_elevenlabs.helpers.tags import Tags
 from sinapsis_elevenlabs.templates.elevenlabs_tts import ElevenLabsTTS
+ElevenLabsVoiceCloneUIProperties = ElevenLabsTTS.UIProperties
+ElevenLabsVoiceCloneUIProperties.tags.extend([Tags.VOICE_CLONING])
 class ElevenLabsVoiceClone(ElevenLabsTTS):
-    """Template to clone a voice using ElevenLabs API."""
+    """Template to clone a voice using the ElevenLabs API.
+    This template allows you to create a new custom voice in ElevenLabs by providing
+    one or more audio samples. The cloned voice can then be used for subsequent
+    text-to-speech synthesis within the Sinapsis pipeline.
+    Usage example:
+    agent:
+    name: my_test_agent
+    templates:
+    - template_name: InputTemplate
+      class_name: InputTemplate
+      attributes: {}
+    - template_name: ElevenLabsVoiceClone
+      class_name: ElevenLabsVoiceClone
+      template_input: InputTemplate
+      attributes:
+        api_key: null
+        model: eleven_turbo_v2_5
+        output_file_name: null
+        output_format: mp3_44100_128
+        output_folder: <WORKING_DIR>/elevenlabs/audios
+        stream: false
+        voice: null
+        voice_settings:
+            stability: null
+            similarity_boost: null
+            style: null
+            use_speaker_boost: null
+            speed: null
+        name: null
+        description: null
+        remove_background_noise: false
+    """
+    UIProperties = ElevenLabsVoiceCloneUIProperties
     class AttributesBaseModel(ElevenLabsTTS.AttributesBaseModel):
-        """Attributes specific to the ElevenLabsVoiceClone class."""
+        """Attributes specific to the ElevenLabsVoiceClone class.
+        Attributes:
+            name (str | None): Name for the cloned voice. If None, a default name may be used.
+            description (str | None): Description for the cloned voice. Optional.
+            remove_background_noise (bool): Whether to remove background noise from samples. Defaults to False.
+        """
         name: str | None = None
         description: str | None = None
+        remove_background_noise: bool = False
     def clone_voice(self, input_data: list[AudioPacket]) -> Voice:
-        """Clones a voice using the provided audio files."""
-        files = [f.content for f in input_data]
+        """Clones a voice using the provided audio files.
+        Args:
+            input_data (list[AudioPacket]): List of AudioPacket objects containing the audio samples
+                to be used for voice cloning. Each AudioPacket's `content` should be a file-like object
+                or bytes representing the audio data.
+                **NOTE:** All provided audio packets are used as reference for a single cloned voice.
+        Returns:
+            Voice: The cloned Voice object as returned by the ElevenLabs API.
+        Raises:
+            ValueError: If there is a problem with the input data or parameters.
+            TypeError: If the input data or files are of incorrect type.
+            KeyError: If the expected key is missing in the API response.
+        """
+        files = [audio.content for audio in input_data]
         try:
-            add_voice_response = self.client.voices.add(
+            clone_response = self.client.voices.ivc.create(
                 name=self.attributes.name,
-                description=self.attributes.description,
                 files=files,
+                description=self.attributes.description,
+                remove_background_noise=self.attributes.remove_background_noise,
             )
-            cloned_voice = self.client.voices.get(add_voice_response.voice_id)
-            self.logger.info(f"Voice cloned successfully: {cloned_voice.name}")
+            cloned_voice = self.client.voices.get(clone_response.voice_id)
+            self.logger.info(f"Voice cloned successfully with IVC: {cloned_voice.name}")
             return cloned_voice
         except ValueError as e:
             self.logger.error(f"Value error in input data or parameters: {e}")
@@ -39,8 +103,22 @@ class ElevenLabsVoiceClone(ElevenLabsTTS):
             raise
     def execute(self, container: DataContainer) -> DataContainer:
-        """Executes the voice cloning process and generates the speech output."""
-        audios = getattr(container, "audios", None)
+        """Executes the voice cloning process and generates the speech output.
+        Args:
+            container (DataContainer): The input DataContainer, expected to contain
+                one or more AudioPacket objects in the `audios` attribute.
+        Returns:
+            DataContainer: The updated DataContainer. If cloning is successful,
+                the cloned voice is set in `self.attributes.voice` and the parent
+                TTS execution is performed using the new voice.
+        Side Effects:
+            - Updates `self.attributes.voice` with the cloned Voice object.
+            - May log errors or info messages.
+        """
+        audios = container.audios
         if not audios:
             self.logger.debug("No audios provided to clone voice")
             return container

sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_generation.py CHANGED Viewed

@@ -5,9 +5,13 @@ import base64
 from sinapsis_core.data_containers.data_packet import TextPacket
+from sinapsis_elevenlabs.helpers.tags import Tags
 from sinapsis_elevenlabs.helpers.voice_utils import load_input_text
 from sinapsis_elevenlabs.templates.elevenlabs_base import ElevenLabsBase
+ElevenLabsVoiceGenerationUIProperties = ElevenLabsBase.UIProperties
+ElevenLabsVoiceGenerationUIProperties.tags.extend([Tags.VOICE_GENERATION, Tags.PROMPT])
 class ElevenLabsVoiceGeneration(ElevenLabsBase):
     """
@@ -33,12 +37,14 @@ class ElevenLabsVoiceGeneration(ElevenLabsBase):
         voice_settings: null
         model: eleven_turbo_v2_5
         output_format: mp3_44100_128
-        output_folder: /sinapsis/cache/dir/elevenlabs/audios
+        output_folder: <WORKING_DIR>/elevenlabs/audios
         stream: false
         voice_description: An old British male with a raspy, deep voice. Professional,
           relaxed and assertive
     """
+    UIProperties = ElevenLabsVoiceGenerationUIProperties
     class AttributesBaseModel(ElevenLabsBase.AttributesBaseModel):
         """
         Attributes for voice generation in ElevenLabs API.

sinapsis_f5_tts/src/sinapsis_f5_tts/helpers/__init__.py ADDED Viewed

File without changes

sinapsis_f5_tts/src/sinapsis_f5_tts/helpers/tags.py ADDED Viewed

@@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+from enum import Enum
+class Tags(Enum):
+    AUDIO = "audio"
+    AUDIO_GENERATION = "audio_generation"
+    F5TTS = "f5tts"
+    SPEECH = "speech"
+    TEXT_TO_SPEECH = "text_to_speech"

sinapsis-speech 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

sinapsis-speech 0.3.5py3-none-any.whl → 0.4.1py3-none-any.whl