sinapsis-speech 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/PKG-INFO +117 -63
  2. sinapsis_speech-0.2.0/README.md +243 -0
  3. {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/env_var_keys.py +1 -1
  4. {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/voice_utils.py +7 -23
  5. {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_base.py +13 -23
  6. {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_generation.py +4 -1
  7. sinapsis_speech-0.2.0/packages/sinapsis_f5_tts/src/sinapsis_f5_tts/__init__.py +0 -0
  8. sinapsis_speech-0.2.0/packages/sinapsis_f5_tts/src/sinapsis_f5_tts/templates/__init__.py +20 -0
  9. sinapsis_speech-0.2.0/packages/sinapsis_f5_tts/src/sinapsis_f5_tts/templates/f5_tts_inference.py +357 -0
  10. {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_speech.egg-info/PKG-INFO +117 -63
  11. {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_speech.egg-info/SOURCES.txt +10 -1
  12. sinapsis_speech-0.2.0/packages/sinapsis_speech.egg-info/requires.txt +11 -0
  13. sinapsis_speech-0.2.0/packages/sinapsis_speech.egg-info/top_level.txt +3 -0
  14. sinapsis_speech-0.2.0/packages/sinapsis_zonos/src/sinapsis_zonos/__init__.py +0 -0
  15. sinapsis_speech-0.2.0/packages/sinapsis_zonos/src/sinapsis_zonos/helpers/__init__.py +0 -0
  16. sinapsis_speech-0.2.0/packages/sinapsis_zonos/src/sinapsis_zonos/helpers/zonos_keys.py +67 -0
  17. sinapsis_speech-0.2.0/packages/sinapsis_zonos/src/sinapsis_zonos/helpers/zonos_tts_utils.py +153 -0
  18. sinapsis_speech-0.2.0/packages/sinapsis_zonos/src/sinapsis_zonos/templates/__init__.py +20 -0
  19. sinapsis_speech-0.2.0/packages/sinapsis_zonos/src/sinapsis_zonos/templates/zonos_tts.py +172 -0
  20. {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/pyproject.toml +30 -28
  21. sinapsis_speech-0.1.0/README.md +0 -184
  22. sinapsis_speech-0.1.0/packages/sinapsis_speech.egg-info/requires.txt +0 -15
  23. sinapsis_speech-0.1.0/packages/sinapsis_speech.egg-info/top_level.txt +0 -1
  24. {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/LICENSE +0 -0
  25. {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/__init__.py +0 -0
  26. {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/__init__.py +0 -0
  27. {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/__init__.py +0 -0
  28. {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_tts.py +0 -0
  29. {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_speech.egg-info/dependency_links.txt +0 -0
  30. {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sinapsis-speech
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: Generate speech using various libraries.
5
5
  Author-email: SinapsisAI <dev@sinapsis-ai.com>
6
6
  License: GNU AFFERO GENERAL PUBLIC LICENSE
@@ -666,25 +666,20 @@ License: GNU AFFERO GENERAL PUBLIC LICENSE
666
666
  <https://www.gnu.org/licenses/>.
667
667
 
668
668
  Project-URL: Homepage, https://sinapsis.tech
669
- Project-URL: Documentation, https://docs.sinapsis.tech/docs
669
+ Project-URL: Documentation, https://docs.sinapsis.tech/docs/sinapsis-speech
670
670
  Project-URL: Tutorials, https://docs.sinapsis.tech/tutorials
671
671
  Project-URL: Repository, https://github.com/Sinapsis-AI/sinapsis-speech.git
672
672
  Requires-Python: >=3.10
673
673
  Description-Content-Type: text/markdown
674
- License-File: LICENSE
675
674
  Requires-Dist: pip>=24.3.1
676
- Requires-Dist: sinapsis>=0.1.1
677
- Provides-Extra: elevenlabs-app
678
- Requires-Dist: sinapsis-elevenlabs; extra == "elevenlabs-app"
679
- Requires-Dist: sinapsis-speech[gradio-app]; extra == "elevenlabs-app"
680
- Provides-Extra: gradio-app
681
- Requires-Dist: gradio>=5.14.0; extra == "gradio-app"
682
- Requires-Dist: sinapsis-data-readers>=0.1.0; extra == "gradio-app"
675
+ Requires-Dist: sinapsis>=0.2.2
683
676
  Provides-Extra: all
684
- Requires-Dist: sinapsis-elevenlabs; extra == "all"
685
- Requires-Dist: sinapsis-speech[gradio-app]; extra == "all"
686
- Requires-Dist: sinapsis-speech[elevenlabs-app]; extra == "all"
687
- Dynamic: license-file
677
+ Requires-Dist: sinapsis-elevenlabs[all]; extra == "all"
678
+ Requires-Dist: sinapsis-f5-tts[all]; extra == "all"
679
+ Requires-Dist: sinapsis-speech[webapp]; extra == "all"
680
+ Requires-Dist: sinapsis-zonos[all]; extra == "all"
681
+ Provides-Extra: gradio-app
682
+ Requires-Dist: sinapsis[webapp]>=0.2.3; extra == "gradio-app"
688
683
 
689
684
  <h1 align="center">
690
685
  <br>
@@ -702,7 +697,7 @@ Sinapsis Speech
702
697
  <p align="center">
703
698
  <a href="#installation">🐍 Installation</a> •
704
699
  <a href="#packages">📦 Packages</a> •
705
- <a href="#webapp">🌐 Webapp</a> •
700
+ <a href="#webapp">🌐 Webapps</a> •
706
701
  <a href="#documentation">📙 Documentation</a> •
707
702
  <a href="#packages">🔍 License</a>
708
703
  </p>
@@ -715,47 +710,93 @@ Sinapsis Speech
715
710
  > Sinapsis projects requires Python 3.10 or higher.
716
711
  >
717
712
 
718
- We strongly encourage the use of <code>uv</code>, although any other package manager should work too.
719
- If you need to install <code>uv</code> please see the [official documentation](https://docs.astral.sh/uv/getting-started/installation/#installation-methods).
713
+ This repo includes packages for performing speech synthesis using different tools:
720
714
 
715
+ * <code>sinapsis-elevenlabs</code>
716
+ * <code>sinapsis-f5-tts</code>
717
+ * <code>sinapsis-zonos</code>
721
718
 
722
- 1. Install using your favourite package manager.
719
+ Install using your preferred package manager. We strongly recommend using <code>uv</code>. To install <code>uv</code>, refer to the [official documentation](https://docs.astral.sh/uv/getting-started/installation/#installation-methods).
723
720
 
724
- Example with <code>uv</code>:
721
+
722
+ Install with <code>uv</code>:
725
723
  ```bash
726
- uv pip install sinapsis-elevenlabs --extra-index-url https://pypi.sinapsis.tech
724
+ uv pip install sinapsis-elevenlabs --extra-index-url https://pypi.sinapsis.tech
727
725
  ```
728
- or with raw <code>pip</code>:
726
+ Or with raw <code>pip</code>:
729
727
  ```bash
730
- pip install sinapsis-elevenlabs --extra-index-url https://pypi.sinapsis.tech
728
+ pip install sinapsis-elevenlabs --extra-index-url https://pypi.sinapsis.tech
731
729
  ```
732
- **Change the name of the package for the one you want to install**.
730
+
731
+ **Replace `sinapsis-elevenlabs` with the name of the package you intend to install**.
732
+
733
+ > [!IMPORTANT]
734
+ > Templates in each package may require additional dependencies. For development, we recommend installing the package all optional dependencies:
735
+ >
736
+ With <code>uv</code>:
737
+
738
+ ```bash
739
+ uv pip install sinapsis-elevenlabs[all] --extra-index-url https://pypi.sinapsis.tech
740
+ ```
741
+ Or with raw <code>pip</code>:
742
+ ```bash
743
+ pip install sinapsis-elevenlabs[all] --extra-index-url https://pypi.sinapsis.tech
744
+ ```
745
+
746
+ **Be sure to substitute `sinapsis-elevenlabs` with the appropriate package name**.
747
+
748
+
733
749
 
734
750
  > [!TIP]
735
751
  > You can also install all the packages within this project:
736
752
  >
737
753
  ```bash
738
- uv pip install sinapsis-speech[all] --extra-index-url https://pypi.sinapsis.tech
754
+ uv pip install sinapsis-speech[all] --extra-index-url https://pypi.sinapsis.tech
739
755
  ```
740
756
 
741
757
 
742
758
  <h2 id="packages">📦 Packages</h2>
743
759
 
744
- Each package can be used independently or combined to create more complex workflows. Below is an overview of the available packages:
760
+ This repository is organized into modular packages, each designed for integration with different text-to-speech tools. These packages provide ready-to-use templates for speech synthesis. Below is an overview of the available packages:
745
761
 
746
762
  <details>
747
- <summary id="elevenlabs"><strong><span style="font-size: 1.4em;"> Elevenlabs </span></strong></summary>
763
+ <summary id="elevenlabs"><strong><span style="font-size: 1.4em;"> Sinapsis ElevenLabs </span></strong></summary>
764
+
765
+ This package offers a suite of templates and utilities designed for effortless integrating, configuration, and execution of **text-to-speech (TTS)** and **voice generation** functionalities powered by [ElevenLabs](https://elevenlabs.io/).
748
766
 
749
- This package provides a suite of templates and utilities for seamlessly integrating, configuring, and running **text-to-speech (TTS)** and **voice generation** functionalities powered by [ElevenLabs](https://elevenlabs.io/):
767
+ - **ElevenLabsTTS**: Template for converting text into speech using ElevenLabs' voice models.
750
768
 
751
- - **Text-to-speech**: Template for converting text into speech using ElevenLabs' voice models.
769
+ - **ElevenLabsVoiceGeneration**: Template for generating custom synthetic voices based on user-provided descriptions.
752
770
 
753
- - **Voice generation**: Template for generating custom synthetic voices based on user-provided descriptions.
771
+ For specific instructions and further details, see the [README.md](https://github.com/Sinapsis-AI/sinapsis-speech/blob/main/packages/sinapsis_elevenlabs/README.md).
754
772
 
755
773
  </details>
756
- <h2 id="webapps">🌐 Webapps</h2>
757
- The webapps included in this project showcase the modularity of the templates, in this case
758
- for speech generation tasks.
774
+
775
+
776
+ <details>
777
+ <summary id="f5tts"><strong><span style="font-size: 1.4em;"> Sinapsis F5-TTS</span></strong></summary>
778
+
779
+ This package provides a template for seamlessly integrating, configuring, and running **text-to-speech (TTS)** functionalities powered by [F5TTS](https://github.com/SWivid/F5-TTS).
780
+
781
+ - **F5TTSInference**: Converts text to speech using the F5TTS model with voice cloning capabilities.
782
+
783
+ For specific instructions and further details, see the [README.md](https://github.com/Sinapsis-AI/sinapsis-speech/blob/main/packages/sinapsis_f5_tts/README.md).
784
+
785
+ </details>
786
+
787
+ <details>
788
+ <summary id="zonos"><strong><span style="font-size: 1.4em;"> Sinapsis Zonos</span></strong></summary>
789
+
790
+ This package provides a single template for integrating, configuring, and running **text-to-speech (TTS)** and **voice cloning** functionalities powered by [Zonos](https://github.com/Zyphra/Zonos/tree/main).
791
+
792
+ - **ZonosTTS**: Template for converting text to speech or performing voice cloning based on the presence of an audio sample.​
793
+
794
+ For specific instructions and further details, see the [README.md](https://github.com/Sinapsis-AI/sinapsis-speech/blob/main/packages/sinapsis_zonos/README.md).
795
+
796
+ </details>
797
+
798
+ <h2 id="webapp">🌐 Webapps</h2>
799
+ The webapps included in this project showcase the modularity of the templates, in this case for speech generation tasks.
759
800
 
760
801
  > [!IMPORTANT]
761
802
  > To run the app you first need to clone this repository:
@@ -768,89 +809,102 @@ cd sinapsis-speech
768
809
  > [!NOTE]
769
810
  > If you'd like to enable external app sharing in Gradio, `export GRADIO_SHARE_APP=True`
770
811
 
771
- > [!IMPORTANT]
772
- > The CosyVoice model requires at least 4GB of ram to work.
773
812
 
774
813
  > [!IMPORTANT]
775
- > Elevenlabs requires an api key to run any inference. Please go to the [official website](https://elevenlabs.io), create an account.
776
- If you already have an account, go to the [token page](https://elevenlabs.io/app/settings/api-keys) and generate a token.
814
+ > Elevenlabs requires an API key to run any inference. To get started, visit the [official website](https://elevenlabs.io) and create an account. If you already have an account, go to the [API keys page](https://elevenlabs.io/app/settings/api-keys) to generate a token.
777
815
 
778
816
  > [!IMPORTANT]
779
- > set your env var using <code> export ELEVENLABS_API_KEY='your-api-key'</code>
817
+ > Set your env var using <code> export ELEVENLABS_API_KEY='your-api-key'</code>
780
818
 
819
+ > [!IMPORTANT]
820
+ > F5-TTS requires a reference audio file for voice cloning. Make sure you have a reference audio file in the artifacts directory.
781
821
 
782
- > [!TIP]
783
- > The agent configuration can be updated using the AGENT_CONFIG_PATH environment var.
822
+ > [!NOTE]
823
+ > Agent configuration can be changed through the `AGENT_CONFIG_PATH` env var. You can check the available configurations in each package configs folder.
784
824
 
785
825
 
786
826
  <details>
787
- <summary id="docker"><strong><span style="font-size: 1.4em;">🐳 Build with Docker</span></strong></summary>
827
+ <summary id="docker"><strong><span style="font-size: 1.4em;">🐳 Docker</span></strong></summary>
788
828
 
789
- **IMPORTANT** This docker image depends on the sinapsis-nvidia:base image. Please refer to the official [sinapsis](https://github.com/Sinapsis-ai/sinapsis?tab=readme-ov-file#docker) instructions to Build with Docker.
829
+ **IMPORTANT**: This Docker image depends on the `sinapsis-nvidia:base` image. For detailed instructions, please refer to the [Sinapsis README](https://github.com/Sinapsis-ai/sinapsis?tab=readme-ov-file#docker).
790
830
 
791
- 1. **Build the Docker image**:
831
+ 1. **Build the sinapsis-speech image**:
792
832
  ```bash
793
833
  docker compose -f docker/compose.yaml build
794
834
  ```
795
835
 
796
-
797
- 2. **Launch the service**:
836
+ 2. **Start the app container**:
837
+ For ElevenLabs:
798
838
  ```bash
799
839
  docker compose -f docker/compose_apps.yaml up -d sinapsis-elevenlabs
800
840
  ```
841
+ For F5-TTS:
842
+ ```bash
843
+ docker compose -f docker/compose_apps.yaml up -d sinapsis-f5_tts
844
+ ```
845
+ For Zonos:
846
+ ```bash
847
+ docker compose -f docker/compose_apps.yaml up -d sinapsis-zonos
848
+ ```
801
849
 
802
-
803
- 2. **Check the logs**
850
+ 3. **Check the logs**
851
+ For ElevenLabs:
804
852
  ```bash
805
853
  docker logs -f sinapsis-elevenlabs
806
854
  ```
807
- 3. **The logs will display the URL to access the webapp, e.g.,:**:
855
+ For F5-TTS:
856
+ ```bash
857
+ docker logs -f sinapsis-f5tts
858
+ ```
859
+ For Zonos:
860
+ ```bash
861
+ docker logs -f sinapsis-zonos
862
+ ```
863
+ 4. **The logs will display the URL to access the webapp, e.g.,:**:
808
864
  ```bash
809
865
  Running on local URL: http://127.0.0.1:7860
810
866
  ```
811
- 4. To stop the app:
867
+ **NOTE**: The url may be different, check the output of logs.
868
+ 5. **To stop the app**:
812
869
  ```bash
813
- docker compose -f docker/compose_apps.yaml down sinapsis-elevenlabs
870
+ docker compose -f docker/compose_apps.yaml down
814
871
  ```
815
872
  </details>
816
873
 
817
874
  <details>
818
875
  <summary id="virtual-environment"><strong><span style="font-size: 1.4em;">💻 UV</span></strong></summary>
819
876
 
877
+ To run the webapp using the <code>uv</code> package manager, follow these steps:
820
878
 
821
879
  1. **Sync the virtual environment**:
822
880
 
823
881
  ```bash
824
882
  uv sync --frozen
825
883
  ```
826
- 2. Install the wheel:
884
+ 2. **Install the wheel**:
827
885
 
828
886
  ```bash
829
887
  uv pip install sinapsis-speech[all] --extra-index-url https://pypi.sinapsis.tech
830
888
  ```
831
889
 
832
-
833
- 3. **Activate the virtual environment**:
834
-
890
+ 3. **Run the webapp**:
891
+ For ElevenLabs:
835
892
  ```bash
836
- source .venv/bin/activate
893
+ uv run webapps/elevenlabs/elevenlabs_tts_app.py
837
894
  ```
838
- 4. **Declare PYTHONPATH**
895
+ For F5-TTS:
839
896
  ```bash
840
- export PYTHONPATH=$PWD/webapps
897
+ uv run webapps/f5-tts/f5_tts_app.py
841
898
  ```
842
- **NOTE** if not located in <code>sinapsis-speech</code> folder, change $PWD for the actual path to <code>sinapsis-speech</code>
843
-
844
- 5. **Launch the demo**:
845
-
899
+ For Zonos:
846
900
  ```bash
847
- python webapps/elevenlabs/elevenlabs_tts_app.py
901
+ uv run webapps/zonos/zonos_tts_app.py
848
902
  ```
849
- 6. Open the displayed URL, e.g.:
903
+ 4. **The terminal will display the URL to access the webapp (e.g.)**:
850
904
  ```bash
851
905
  Running on local URL: http://127.0.0.1:7860
852
906
  ```
853
- **NOTE**: The URL can be different, please make sure you check the logs.
907
+ **NOTE**: The URL may vary; check the terminal output for the correct address.
854
908
 
855
909
  </details>
856
910
 
@@ -0,0 +1,243 @@
1
+ <h1 align="center">
2
+ <br>
3
+ <a href="https://sinapsis.tech/">
4
+ <img
5
+ src="https://github.com/Sinapsis-AI/brand-resources/blob/main/sinapsis_logo/4x/logo.png?raw=true"
6
+ alt="" width="300">
7
+ </a><br>
8
+ Sinapsis Speech
9
+ <br>
10
+ </h1>
11
+
12
+ <h4 align="center"> Templates for a wide range of voice generation tasks.</h4>
13
+
14
+ <p align="center">
15
+ <a href="#installation">🐍 Installation</a> •
16
+ <a href="#packages">📦 Packages</a> •
17
+ <a href="#webapp">🌐 Webapps</a> •
18
+ <a href="#documentation">📙 Documentation</a> •
19
+ <a href="#packages">🔍 License</a>
20
+ </p>
21
+
22
+
23
+ <h2 id="installation">🐍 Installation</h2>
24
+
25
+
26
+ > [!IMPORTANT]
27
+ > Sinapsis projects requires Python 3.10 or higher.
28
+ >
29
+
30
+ This repo includes packages for performing speech synthesis using different tools:
31
+
32
+ * <code>sinapsis-elevenlabs</code>
33
+ * <code>sinapsis-f5-tts</code>
34
+ * <code>sinapsis-zonos</code>
35
+
36
+ Install using your preferred package manager. We strongly recommend using <code>uv</code>. To install <code>uv</code>, refer to the [official documentation](https://docs.astral.sh/uv/getting-started/installation/#installation-methods).
37
+
38
+
39
+ Install with <code>uv</code>:
40
+ ```bash
41
+ uv pip install sinapsis-elevenlabs --extra-index-url https://pypi.sinapsis.tech
42
+ ```
43
+ Or with raw <code>pip</code>:
44
+ ```bash
45
+ pip install sinapsis-elevenlabs --extra-index-url https://pypi.sinapsis.tech
46
+ ```
47
+
48
+ **Replace `sinapsis-elevenlabs` with the name of the package you intend to install**.
49
+
50
+ > [!IMPORTANT]
51
+ > Templates in each package may require additional dependencies. For development, we recommend installing the package all optional dependencies:
52
+ >
53
+ With <code>uv</code>:
54
+
55
+ ```bash
56
+ uv pip install sinapsis-elevenlabs[all] --extra-index-url https://pypi.sinapsis.tech
57
+ ```
58
+ Or with raw <code>pip</code>:
59
+ ```bash
60
+ pip install sinapsis-elevenlabs[all] --extra-index-url https://pypi.sinapsis.tech
61
+ ```
62
+
63
+ **Be sure to substitute `sinapsis-elevenlabs` with the appropriate package name**.
64
+
65
+
66
+
67
+ > [!TIP]
68
+ > You can also install all the packages within this project:
69
+ >
70
+ ```bash
71
+ uv pip install sinapsis-speech[all] --extra-index-url https://pypi.sinapsis.tech
72
+ ```
73
+
74
+
75
+ <h2 id="packages">📦 Packages</h2>
76
+
77
+ This repository is organized into modular packages, each designed for integration with different text-to-speech tools. These packages provide ready-to-use templates for speech synthesis. Below is an overview of the available packages:
78
+
79
+ <details>
80
+ <summary id="elevenlabs"><strong><span style="font-size: 1.4em;"> Sinapsis ElevenLabs </span></strong></summary>
81
+
82
+ This package offers a suite of templates and utilities designed for effortless integrating, configuration, and execution of **text-to-speech (TTS)** and **voice generation** functionalities powered by [ElevenLabs](https://elevenlabs.io/).
83
+
84
+ - **ElevenLabsTTS**: Template for converting text into speech using ElevenLabs' voice models.
85
+
86
+ - **ElevenLabsVoiceGeneration**: Template for generating custom synthetic voices based on user-provided descriptions.
87
+
88
+ For specific instructions and further details, see the [README.md](https://github.com/Sinapsis-AI/sinapsis-speech/blob/main/packages/sinapsis_elevenlabs/README.md).
89
+
90
+ </details>
91
+
92
+
93
+ <details>
94
+ <summary id="f5tts"><strong><span style="font-size: 1.4em;"> Sinapsis F5-TTS</span></strong></summary>
95
+
96
+ This package provides a template for seamlessly integrating, configuring, and running **text-to-speech (TTS)** functionalities powered by [F5TTS](https://github.com/SWivid/F5-TTS).
97
+
98
+ - **F5TTSInference**: Converts text to speech using the F5TTS model with voice cloning capabilities.
99
+
100
+ For specific instructions and further details, see the [README.md](https://github.com/Sinapsis-AI/sinapsis-speech/blob/main/packages/sinapsis_f5_tts/README.md).
101
+
102
+ </details>
103
+
104
+ <details>
105
+ <summary id="zonos"><strong><span style="font-size: 1.4em;"> Sinapsis Zonos</span></strong></summary>
106
+
107
+ This package provides a single template for integrating, configuring, and running **text-to-speech (TTS)** and **voice cloning** functionalities powered by [Zonos](https://github.com/Zyphra/Zonos/tree/main).
108
+
109
+ - **ZonosTTS**: Template for converting text to speech or performing voice cloning based on the presence of an audio sample.​
110
+
111
+ For specific instructions and further details, see the [README.md](https://github.com/Sinapsis-AI/sinapsis-speech/blob/main/packages/sinapsis_zonos/README.md).
112
+
113
+ </details>
114
+
115
+ <h2 id="webapp">🌐 Webapps</h2>
116
+ The webapps included in this project showcase the modularity of the templates, in this case for speech generation tasks.
117
+
118
+ > [!IMPORTANT]
119
+ > To run the app you first need to clone this repository:
120
+
121
+ ```bash
122
+ git clone git@github.com:Sinapsis-ai/sinapsis-speech.git
123
+ cd sinapsis-speech
124
+ ```
125
+
126
+ > [!NOTE]
127
+ > If you'd like to enable external app sharing in Gradio, `export GRADIO_SHARE_APP=True`
128
+
129
+
130
+ > [!IMPORTANT]
131
+ > Elevenlabs requires an API key to run any inference. To get started, visit the [official website](https://elevenlabs.io) and create an account. If you already have an account, go to the [API keys page](https://elevenlabs.io/app/settings/api-keys) to generate a token.
132
+
133
+ > [!IMPORTANT]
134
+ > Set your env var using <code> export ELEVENLABS_API_KEY='your-api-key'</code>
135
+
136
+ > [!IMPORTANT]
137
+ > F5-TTS requires a reference audio file for voice cloning. Make sure you have a reference audio file in the artifacts directory.
138
+
139
+ > [!NOTE]
140
+ > Agent configuration can be changed through the `AGENT_CONFIG_PATH` env var. You can check the available configurations in each package configs folder.
141
+
142
+
143
+ <details>
144
+ <summary id="docker"><strong><span style="font-size: 1.4em;">🐳 Docker</span></strong></summary>
145
+
146
+ **IMPORTANT**: This Docker image depends on the `sinapsis-nvidia:base` image. For detailed instructions, please refer to the [Sinapsis README](https://github.com/Sinapsis-ai/sinapsis?tab=readme-ov-file#docker).
147
+
148
+ 1. **Build the sinapsis-speech image**:
149
+ ```bash
150
+ docker compose -f docker/compose.yaml build
151
+ ```
152
+
153
+ 2. **Start the app container**:
154
+ For ElevenLabs:
155
+ ```bash
156
+ docker compose -f docker/compose_apps.yaml up -d sinapsis-elevenlabs
157
+ ```
158
+ For F5-TTS:
159
+ ```bash
160
+ docker compose -f docker/compose_apps.yaml up -d sinapsis-f5_tts
161
+ ```
162
+ For Zonos:
163
+ ```bash
164
+ docker compose -f docker/compose_apps.yaml up -d sinapsis-zonos
165
+ ```
166
+
167
+ 3. **Check the logs**
168
+ For ElevenLabs:
169
+ ```bash
170
+ docker logs -f sinapsis-elevenlabs
171
+ ```
172
+ For F5-TTS:
173
+ ```bash
174
+ docker logs -f sinapsis-f5tts
175
+ ```
176
+ For Zonos:
177
+ ```bash
178
+ docker logs -f sinapsis-zonos
179
+ ```
180
+ 4. **The logs will display the URL to access the webapp, e.g.,:**:
181
+ ```bash
182
+ Running on local URL: http://127.0.0.1:7860
183
+ ```
184
+ **NOTE**: The url may be different, check the output of logs.
185
+ 5. **To stop the app**:
186
+ ```bash
187
+ docker compose -f docker/compose_apps.yaml down
188
+ ```
189
+ </details>
190
+
191
+ <details>
192
+ <summary id="virtual-environment"><strong><span style="font-size: 1.4em;">💻 UV</span></strong></summary>
193
+
194
+ To run the webapp using the <code>uv</code> package manager, follow these steps:
195
+
196
+ 1. **Sync the virtual environment**:
197
+
198
+ ```bash
199
+ uv sync --frozen
200
+ ```
201
+ 2. **Install the wheel**:
202
+
203
+ ```bash
204
+ uv pip install sinapsis-speech[all] --extra-index-url https://pypi.sinapsis.tech
205
+ ```
206
+
207
+ 3. **Run the webapp**:
208
+ For ElevenLabs:
209
+ ```bash
210
+ uv run webapps/elevenlabs/elevenlabs_tts_app.py
211
+ ```
212
+ For F5-TTS:
213
+ ```bash
214
+ uv run webapps/f5-tts/f5_tts_app.py
215
+ ```
216
+ For Zonos:
217
+ ```bash
218
+ uv run webapps/zonos/zonos_tts_app.py
219
+ ```
220
+ 4. **The terminal will display the URL to access the webapp (e.g.)**:
221
+ ```bash
222
+ Running on local URL: http://127.0.0.1:7860
223
+ ```
224
+ **NOTE**: The URL may vary; check the terminal output for the correct address.
225
+
226
+ </details>
227
+
228
+
229
+
230
+ <h2 id="documentation">📙 Documentation</h2>
231
+
232
+ Documentation is available on the [sinapsis website](https://docs.sinapsis.tech/docs)
233
+
234
+ Tutorials for different projects within sinapsis are available at [sinapsis tutorials page](https://docs.sinapsis.tech/tutorials)
235
+
236
+ <h2 id="license">🔍 License</h2>
237
+
238
+ This project is licensed under the AGPLv3 license, which encourages open collaboration and sharing. For more details, please refer to the [LICENSE](LICENSE) file.
239
+
240
+ For commercial use, please refer to our [official Sinapsis website](https://sinapsis.tech) for information on obtaining a commercial license.
241
+
242
+
243
+
@@ -12,7 +12,7 @@ class _ElevenlabsKeys(BaseModel):
12
12
 
13
13
  ELEVENLABS_API_KEY: EnvVarEntry = EnvVarEntry(
14
14
  var_name="ELEVENLABS_API_KEY",
15
- default_value=" ",
15
+ default_value=None,
16
16
  allowed_values=None,
17
17
  description="set api key for Elevenlabs",
18
18
  )
@@ -9,34 +9,18 @@ def create_voice_settings(settings: VoiceSettings) -> VoiceSettings | None:
9
9
  """
10
10
  Creates or updates a `VoiceSettings` object based on the provided settings.
11
11
 
12
- This function attempts to create or update a `VoiceSettings` object using the provided
13
- `VoiceSettings` instance. If any of the fields in the settings contain `None`,
14
- the corresponding field is populated with a default value from `DEFAULT_VOICE.settings`.
15
- If all fields are valid (i.e., none are `None`), the provided `settings` object is returned unchanged.
16
-
17
- If the settings argument is `None` or if no valid settings are provided, the function
18
- returns `None`.
19
-
20
12
  Args:
21
- settings (VoiceSettings): An instance of `VoiceSettings` containing the settings to be applied.
22
- This object may have fields with `None` values that should be replaced with default values.
13
+ settings (VoiceSettings | None): An instance of `VoiceSettings` containing the settings to be applied.
14
+ If `None`, the function returns the default settings.
23
15
 
24
16
  Returns:
25
- VoiceSettings: A `VoiceSettings` object created or updated with the provided settings. If any field
26
- was `None`, it is updated with default values. If the settings are invalid or empty,
27
- `None` is returned.
17
+ VoiceSettings: The provided `VoiceSettings` object if `settings` is not `None`. Otherwise,
18
+ `DEFAULT_VOICE.settings` is returned.
28
19
  """
29
- if settings:
30
- settings_dict = settings.model_dump()
31
- if any(value is None for value in settings_dict.values()):
32
- for field, value in settings_dict.items():
33
- if value is None:
34
- settings_dict[field] = getattr(DEFAULT_VOICE.settings, field)
20
+ if not settings:
21
+ return DEFAULT_VOICE.settings
35
22
 
36
- return VoiceSettings(**settings_dict)
37
- else:
38
- return settings
39
- return None
23
+ return settings
40
24
 
41
25
 
42
26
  def get_voice_id(client: ElevenLabs, voice: VoiceId | VoiceName) -> VoiceId:
@@ -9,6 +9,8 @@ from typing import IO, Iterator, Literal
9
9
 
10
10
  from elevenlabs import Voice, VoiceSettings, save
11
11
  from elevenlabs.client import ElevenLabs, VoiceId, VoiceName
12
+ from elevenlabs.types import OutputFormat
13
+ from pydantic import Field
12
14
  from sinapsis_core.data_containers.data_packet import AudioPacket, DataContainer, Packet
13
15
  from sinapsis_core.template_base.template import (
14
16
  Template,
@@ -40,25 +42,23 @@ class ElevenLabsBase(Template, abc.ABC):
40
42
  """
41
43
  Attributes for ElevenLabs Base Class.
42
44
  Args:
43
- api_key (str): The API key to authenticate with ElevenLabs' API.
44
- voice (str|elevenlabs.Voice): The voice to use for speech synthesis. This can be a voice ID (str),
45
+ api_key (str): The API used key to authenticate with ElevenLabs' API.
46
+ model (Literal): The model identifier to use for speech synthesis.
47
+ output_format (OutputFormat): The output audio format and quality. Options include:
48
+ ["mp3_22050_32", "mp3_44100_32", "mp3_44100_64", "mp3_44100_96", "mp3_44100_128",
49
+ "mp3_44100_192", "pcm_16000", "pcm_22050", "pcm_24000", "pcm_44100", "ulaw_8000"]
50
+ output_folder (str): The folder where generated audio files will be saved.
51
+ stream (bool): If True, the audio is returned as a stream; otherwise, saved to a file.
52
+ voice (VoiceId | VoiceName | Voice): The voice to use for speech synthesis. This can be a voice ID (str),
45
53
  a voice name (str) or an elevenlabs voice object (Voice).
46
54
  voice_settings (VoiceSettings): A dictionary of settings that control the behavior of the voice.
47
55
  - stability (float)
48
56
  - similarity_boost (float)
49
57
  - style (float)
50
58
  - use_speaker_boost (bool)
51
- model (Literal): The model identifier to use for speech synthesis.
52
- output_format (Literal): The output audio format and quality. Options include:
53
- ["mp3_22050_32", "mp3_44100_32", "mp3_44100_64", "mp3_44100_96", "mp3_44100_128",
54
- "mp3_44100_192", "pcm_16000", "pcm_22050", "pcm_24000", "pcm_44100", "ulaw_8000"]
55
- output_folder (str): The folder where generated audio files will be saved.
56
- stream (bool): If True, the audio is returned as a stream; otherwise, saved to a file.
57
59
  """
58
60
 
59
61
  api_key: str | None = None
60
- voice: VoiceId | VoiceName | Voice = None
61
- voice_settings: VoiceSettings | None = None
62
62
  model: Literal[
63
63
  "eleven_turbo_v2_5",
64
64
  "eleven_multilingual_v2",
@@ -68,21 +68,11 @@ class ElevenLabsBase(Template, abc.ABC):
68
68
  "eleven_english_sts_v2",
69
69
  "eleven_multilingual_sts_v2",
70
70
  ] = "eleven_turbo_v2_5"
71
- output_format: Literal[
72
- "mp3_22050_32",
73
- "mp3_44100_32",
74
- "mp3_44100_64",
75
- "mp3_44100_96",
76
- "mp3_44100_128",
77
- "mp3_44100_192",
78
- "pcm_16000",
79
- "pcm_22050",
80
- "pcm_24000",
81
- "pcm_44100",
82
- "ulaw_8000",
83
- ] = "mp3_44100_128"
71
+ output_format: OutputFormat = "mp3_44100_128"
84
72
  output_folder: str = os.path.join(SINAPSIS_CACHE_DIR, "elevenlabs", "audios")
85
73
  stream: bool = False
74
+ voice: VoiceId | VoiceName | Voice = None
75
+ voice_settings: VoiceSettings = Field(default_factory=dict) # type: ignore[arg-type]
86
76
 
87
77
  def __init__(self, attributes: TemplateAttributeType) -> None:
88
78
  """Initializes the ElevenLabs API client with the given attributes."""