sinapsis-speech 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/PKG-INFO +117 -63
- sinapsis_speech-0.2.0/README.md +243 -0
- {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/env_var_keys.py +1 -1
- {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/voice_utils.py +7 -23
- {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_base.py +13 -23
- {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_generation.py +4 -1
- sinapsis_speech-0.2.0/packages/sinapsis_f5_tts/src/sinapsis_f5_tts/__init__.py +0 -0
- sinapsis_speech-0.2.0/packages/sinapsis_f5_tts/src/sinapsis_f5_tts/templates/__init__.py +20 -0
- sinapsis_speech-0.2.0/packages/sinapsis_f5_tts/src/sinapsis_f5_tts/templates/f5_tts_inference.py +357 -0
- {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_speech.egg-info/PKG-INFO +117 -63
- {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_speech.egg-info/SOURCES.txt +10 -1
- sinapsis_speech-0.2.0/packages/sinapsis_speech.egg-info/requires.txt +11 -0
- sinapsis_speech-0.2.0/packages/sinapsis_speech.egg-info/top_level.txt +3 -0
- sinapsis_speech-0.2.0/packages/sinapsis_zonos/src/sinapsis_zonos/__init__.py +0 -0
- sinapsis_speech-0.2.0/packages/sinapsis_zonos/src/sinapsis_zonos/helpers/__init__.py +0 -0
- sinapsis_speech-0.2.0/packages/sinapsis_zonos/src/sinapsis_zonos/helpers/zonos_keys.py +67 -0
- sinapsis_speech-0.2.0/packages/sinapsis_zonos/src/sinapsis_zonos/helpers/zonos_tts_utils.py +153 -0
- sinapsis_speech-0.2.0/packages/sinapsis_zonos/src/sinapsis_zonos/templates/__init__.py +20 -0
- sinapsis_speech-0.2.0/packages/sinapsis_zonos/src/sinapsis_zonos/templates/zonos_tts.py +172 -0
- {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/pyproject.toml +30 -28
- sinapsis_speech-0.1.0/README.md +0 -184
- sinapsis_speech-0.1.0/packages/sinapsis_speech.egg-info/requires.txt +0 -15
- sinapsis_speech-0.1.0/packages/sinapsis_speech.egg-info/top_level.txt +0 -1
- {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/LICENSE +0 -0
- {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/__init__.py +0 -0
- {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/__init__.py +0 -0
- {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/__init__.py +0 -0
- {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_tts.py +0 -0
- {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/packages/sinapsis_speech.egg-info/dependency_links.txt +0 -0
- {sinapsis_speech-0.1.0 → sinapsis_speech-0.2.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sinapsis-speech
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Generate speech using various libraries.
|
|
5
5
|
Author-email: SinapsisAI <dev@sinapsis-ai.com>
|
|
6
6
|
License: GNU AFFERO GENERAL PUBLIC LICENSE
|
|
@@ -666,25 +666,20 @@ License: GNU AFFERO GENERAL PUBLIC LICENSE
|
|
|
666
666
|
<https://www.gnu.org/licenses/>.
|
|
667
667
|
|
|
668
668
|
Project-URL: Homepage, https://sinapsis.tech
|
|
669
|
-
Project-URL: Documentation, https://docs.sinapsis.tech/docs
|
|
669
|
+
Project-URL: Documentation, https://docs.sinapsis.tech/docs/sinapsis-speech
|
|
670
670
|
Project-URL: Tutorials, https://docs.sinapsis.tech/tutorials
|
|
671
671
|
Project-URL: Repository, https://github.com/Sinapsis-AI/sinapsis-speech.git
|
|
672
672
|
Requires-Python: >=3.10
|
|
673
673
|
Description-Content-Type: text/markdown
|
|
674
|
-
License-File: LICENSE
|
|
675
674
|
Requires-Dist: pip>=24.3.1
|
|
676
|
-
Requires-Dist: sinapsis>=0.
|
|
677
|
-
Provides-Extra: elevenlabs-app
|
|
678
|
-
Requires-Dist: sinapsis-elevenlabs; extra == "elevenlabs-app"
|
|
679
|
-
Requires-Dist: sinapsis-speech[gradio-app]; extra == "elevenlabs-app"
|
|
680
|
-
Provides-Extra: gradio-app
|
|
681
|
-
Requires-Dist: gradio>=5.14.0; extra == "gradio-app"
|
|
682
|
-
Requires-Dist: sinapsis-data-readers>=0.1.0; extra == "gradio-app"
|
|
675
|
+
Requires-Dist: sinapsis>=0.2.2
|
|
683
676
|
Provides-Extra: all
|
|
684
|
-
Requires-Dist: sinapsis-elevenlabs; extra == "all"
|
|
685
|
-
Requires-Dist: sinapsis-
|
|
686
|
-
Requires-Dist: sinapsis-speech[
|
|
687
|
-
|
|
677
|
+
Requires-Dist: sinapsis-elevenlabs[all]; extra == "all"
|
|
678
|
+
Requires-Dist: sinapsis-f5-tts[all]; extra == "all"
|
|
679
|
+
Requires-Dist: sinapsis-speech[webapp]; extra == "all"
|
|
680
|
+
Requires-Dist: sinapsis-zonos[all]; extra == "all"
|
|
681
|
+
Provides-Extra: gradio-app
|
|
682
|
+
Requires-Dist: sinapsis[webapp]>=0.2.3; extra == "gradio-app"
|
|
688
683
|
|
|
689
684
|
<h1 align="center">
|
|
690
685
|
<br>
|
|
@@ -702,7 +697,7 @@ Sinapsis Speech
|
|
|
702
697
|
<p align="center">
|
|
703
698
|
<a href="#installation">🐍 Installation</a> •
|
|
704
699
|
<a href="#packages">📦 Packages</a> •
|
|
705
|
-
<a href="#webapp">🌐
|
|
700
|
+
<a href="#webapp">🌐 Webapps</a> •
|
|
706
701
|
<a href="#documentation">📙 Documentation</a> •
|
|
707
702
|
<a href="#packages">🔍 License</a>
|
|
708
703
|
</p>
|
|
@@ -715,47 +710,93 @@ Sinapsis Speech
|
|
|
715
710
|
> Sinapsis projects requires Python 3.10 or higher.
|
|
716
711
|
>
|
|
717
712
|
|
|
718
|
-
|
|
719
|
-
If you need to install <code>uv</code> please see the [official documentation](https://docs.astral.sh/uv/getting-started/installation/#installation-methods).
|
|
713
|
+
This repo includes packages for performing speech synthesis using different tools:
|
|
720
714
|
|
|
715
|
+
* <code>sinapsis-elevenlabs</code>
|
|
716
|
+
* <code>sinapsis-f5-tts</code>
|
|
717
|
+
* <code>sinapsis-zonos</code>
|
|
721
718
|
|
|
722
|
-
|
|
719
|
+
Install using your preferred package manager. We strongly recommend using <code>uv</code>. To install <code>uv</code>, refer to the [official documentation](https://docs.astral.sh/uv/getting-started/installation/#installation-methods).
|
|
723
720
|
|
|
724
|
-
|
|
721
|
+
|
|
722
|
+
Install with <code>uv</code>:
|
|
725
723
|
```bash
|
|
726
|
-
|
|
724
|
+
uv pip install sinapsis-elevenlabs --extra-index-url https://pypi.sinapsis.tech
|
|
727
725
|
```
|
|
728
|
-
|
|
726
|
+
Or with raw <code>pip</code>:
|
|
729
727
|
```bash
|
|
730
|
-
|
|
728
|
+
pip install sinapsis-elevenlabs --extra-index-url https://pypi.sinapsis.tech
|
|
731
729
|
```
|
|
732
|
-
|
|
730
|
+
|
|
731
|
+
**Replace `sinapsis-elevenlabs` with the name of the package you intend to install**.
|
|
732
|
+
|
|
733
|
+
> [!IMPORTANT]
|
|
734
|
+
> Templates in each package may require additional dependencies. For development, we recommend installing the package all optional dependencies:
|
|
735
|
+
>
|
|
736
|
+
With <code>uv</code>:
|
|
737
|
+
|
|
738
|
+
```bash
|
|
739
|
+
uv pip install sinapsis-elevenlabs[all] --extra-index-url https://pypi.sinapsis.tech
|
|
740
|
+
```
|
|
741
|
+
Or with raw <code>pip</code>:
|
|
742
|
+
```bash
|
|
743
|
+
pip install sinapsis-elevenlabs[all] --extra-index-url https://pypi.sinapsis.tech
|
|
744
|
+
```
|
|
745
|
+
|
|
746
|
+
**Be sure to substitute `sinapsis-elevenlabs` with the appropriate package name**.
|
|
747
|
+
|
|
748
|
+
|
|
733
749
|
|
|
734
750
|
> [!TIP]
|
|
735
751
|
> You can also install all the packages within this project:
|
|
736
752
|
>
|
|
737
753
|
```bash
|
|
738
|
-
|
|
754
|
+
uv pip install sinapsis-speech[all] --extra-index-url https://pypi.sinapsis.tech
|
|
739
755
|
```
|
|
740
756
|
|
|
741
757
|
|
|
742
758
|
<h2 id="packages">📦 Packages</h2>
|
|
743
759
|
|
|
744
|
-
|
|
760
|
+
This repository is organized into modular packages, each designed for integration with different text-to-speech tools. These packages provide ready-to-use templates for speech synthesis. Below is an overview of the available packages:
|
|
745
761
|
|
|
746
762
|
<details>
|
|
747
|
-
<summary id="elevenlabs"><strong><span style="font-size: 1.4em;">
|
|
763
|
+
<summary id="elevenlabs"><strong><span style="font-size: 1.4em;"> Sinapsis ElevenLabs </span></strong></summary>
|
|
764
|
+
|
|
765
|
+
This package offers a suite of templates and utilities designed for effortless integrating, configuration, and execution of **text-to-speech (TTS)** and **voice generation** functionalities powered by [ElevenLabs](https://elevenlabs.io/).
|
|
748
766
|
|
|
749
|
-
|
|
767
|
+
- **ElevenLabsTTS**: Template for converting text into speech using ElevenLabs' voice models.
|
|
750
768
|
|
|
751
|
-
- **
|
|
769
|
+
- **ElevenLabsVoiceGeneration**: Template for generating custom synthetic voices based on user-provided descriptions.
|
|
752
770
|
|
|
753
|
-
|
|
771
|
+
For specific instructions and further details, see the [README.md](https://github.com/Sinapsis-AI/sinapsis-speech/blob/main/packages/sinapsis_elevenlabs/README.md).
|
|
754
772
|
|
|
755
773
|
</details>
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
774
|
+
|
|
775
|
+
|
|
776
|
+
<details>
|
|
777
|
+
<summary id="f5tts"><strong><span style="font-size: 1.4em;"> Sinapsis F5-TTS</span></strong></summary>
|
|
778
|
+
|
|
779
|
+
This package provides a template for seamlessly integrating, configuring, and running **text-to-speech (TTS)** functionalities powered by [F5TTS](https://github.com/SWivid/F5-TTS).
|
|
780
|
+
|
|
781
|
+
- **F5TTSInference**: Converts text to speech using the F5TTS model with voice cloning capabilities.
|
|
782
|
+
|
|
783
|
+
For specific instructions and further details, see the [README.md](https://github.com/Sinapsis-AI/sinapsis-speech/blob/main/packages/sinapsis_f5_tts/README.md).
|
|
784
|
+
|
|
785
|
+
</details>
|
|
786
|
+
|
|
787
|
+
<details>
|
|
788
|
+
<summary id="zonos"><strong><span style="font-size: 1.4em;"> Sinapsis Zonos</span></strong></summary>
|
|
789
|
+
|
|
790
|
+
This package provides a single template for integrating, configuring, and running **text-to-speech (TTS)** and **voice cloning** functionalities powered by [Zonos](https://github.com/Zyphra/Zonos/tree/main).
|
|
791
|
+
|
|
792
|
+
- **ZonosTTS**: Template for converting text to speech or performing voice cloning based on the presence of an audio sample.
|
|
793
|
+
|
|
794
|
+
For specific instructions and further details, see the [README.md](https://github.com/Sinapsis-AI/sinapsis-speech/blob/main/packages/sinapsis_zonos/README.md).
|
|
795
|
+
|
|
796
|
+
</details>
|
|
797
|
+
|
|
798
|
+
<h2 id="webapp">🌐 Webapps</h2>
|
|
799
|
+
The webapps included in this project showcase the modularity of the templates, in this case for speech generation tasks.
|
|
759
800
|
|
|
760
801
|
> [!IMPORTANT]
|
|
761
802
|
> To run the app you first need to clone this repository:
|
|
@@ -768,89 +809,102 @@ cd sinapsis-speech
|
|
|
768
809
|
> [!NOTE]
|
|
769
810
|
> If you'd like to enable external app sharing in Gradio, `export GRADIO_SHARE_APP=True`
|
|
770
811
|
|
|
771
|
-
> [!IMPORTANT]
|
|
772
|
-
> The CosyVoice model requires at least 4GB of ram to work.
|
|
773
812
|
|
|
774
813
|
> [!IMPORTANT]
|
|
775
|
-
> Elevenlabs requires an
|
|
776
|
-
If you already have an account, go to the [token page](https://elevenlabs.io/app/settings/api-keys) and generate a token.
|
|
814
|
+
> Elevenlabs requires an API key to run any inference. To get started, visit the [official website](https://elevenlabs.io) and create an account. If you already have an account, go to the [API keys page](https://elevenlabs.io/app/settings/api-keys) to generate a token.
|
|
777
815
|
|
|
778
816
|
> [!IMPORTANT]
|
|
779
|
-
>
|
|
817
|
+
> Set your env var using <code> export ELEVENLABS_API_KEY='your-api-key'</code>
|
|
780
818
|
|
|
819
|
+
> [!IMPORTANT]
|
|
820
|
+
> F5-TTS requires a reference audio file for voice cloning. Make sure you have a reference audio file in the artifacts directory.
|
|
781
821
|
|
|
782
|
-
> [!
|
|
783
|
-
>
|
|
822
|
+
> [!NOTE]
|
|
823
|
+
> Agent configuration can be changed through the `AGENT_CONFIG_PATH` env var. You can check the available configurations in each package configs folder.
|
|
784
824
|
|
|
785
825
|
|
|
786
826
|
<details>
|
|
787
|
-
<summary id="docker"><strong><span style="font-size: 1.4em;">🐳
|
|
827
|
+
<summary id="docker"><strong><span style="font-size: 1.4em;">🐳 Docker</span></strong></summary>
|
|
788
828
|
|
|
789
|
-
**IMPORTANT
|
|
829
|
+
**IMPORTANT**: This Docker image depends on the `sinapsis-nvidia:base` image. For detailed instructions, please refer to the [Sinapsis README](https://github.com/Sinapsis-ai/sinapsis?tab=readme-ov-file#docker).
|
|
790
830
|
|
|
791
|
-
1. **Build the
|
|
831
|
+
1. **Build the sinapsis-speech image**:
|
|
792
832
|
```bash
|
|
793
833
|
docker compose -f docker/compose.yaml build
|
|
794
834
|
```
|
|
795
835
|
|
|
796
|
-
|
|
797
|
-
|
|
836
|
+
2. **Start the app container**:
|
|
837
|
+
For ElevenLabs:
|
|
798
838
|
```bash
|
|
799
839
|
docker compose -f docker/compose_apps.yaml up -d sinapsis-elevenlabs
|
|
800
840
|
```
|
|
841
|
+
For F5-TTS:
|
|
842
|
+
```bash
|
|
843
|
+
docker compose -f docker/compose_apps.yaml up -d sinapsis-f5_tts
|
|
844
|
+
```
|
|
845
|
+
For Zonos:
|
|
846
|
+
```bash
|
|
847
|
+
docker compose -f docker/compose_apps.yaml up -d sinapsis-zonos
|
|
848
|
+
```
|
|
801
849
|
|
|
802
|
-
|
|
803
|
-
|
|
850
|
+
3. **Check the logs**
|
|
851
|
+
For ElevenLabs:
|
|
804
852
|
```bash
|
|
805
853
|
docker logs -f sinapsis-elevenlabs
|
|
806
854
|
```
|
|
807
|
-
|
|
855
|
+
For F5-TTS:
|
|
856
|
+
```bash
|
|
857
|
+
docker logs -f sinapsis-f5tts
|
|
858
|
+
```
|
|
859
|
+
For Zonos:
|
|
860
|
+
```bash
|
|
861
|
+
docker logs -f sinapsis-zonos
|
|
862
|
+
```
|
|
863
|
+
4. **The logs will display the URL to access the webapp, e.g.,:**:
|
|
808
864
|
```bash
|
|
809
865
|
Running on local URL: http://127.0.0.1:7860
|
|
810
866
|
```
|
|
811
|
-
|
|
867
|
+
**NOTE**: The url may be different, check the output of logs.
|
|
868
|
+
5. **To stop the app**:
|
|
812
869
|
```bash
|
|
813
|
-
docker compose -f docker/compose_apps.yaml down
|
|
870
|
+
docker compose -f docker/compose_apps.yaml down
|
|
814
871
|
```
|
|
815
872
|
</details>
|
|
816
873
|
|
|
817
874
|
<details>
|
|
818
875
|
<summary id="virtual-environment"><strong><span style="font-size: 1.4em;">💻 UV</span></strong></summary>
|
|
819
876
|
|
|
877
|
+
To run the webapp using the <code>uv</code> package manager, follow these steps:
|
|
820
878
|
|
|
821
879
|
1. **Sync the virtual environment**:
|
|
822
880
|
|
|
823
881
|
```bash
|
|
824
882
|
uv sync --frozen
|
|
825
883
|
```
|
|
826
|
-
2. Install the wheel
|
|
884
|
+
2. **Install the wheel**:
|
|
827
885
|
|
|
828
886
|
```bash
|
|
829
887
|
uv pip install sinapsis-speech[all] --extra-index-url https://pypi.sinapsis.tech
|
|
830
888
|
```
|
|
831
889
|
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
890
|
+
3. **Run the webapp**:
|
|
891
|
+
For ElevenLabs:
|
|
835
892
|
```bash
|
|
836
|
-
|
|
893
|
+
uv run webapps/elevenlabs/elevenlabs_tts_app.py
|
|
837
894
|
```
|
|
838
|
-
|
|
895
|
+
For F5-TTS:
|
|
839
896
|
```bash
|
|
840
|
-
|
|
897
|
+
uv run webapps/f5-tts/f5_tts_app.py
|
|
841
898
|
```
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
5. **Launch the demo**:
|
|
845
|
-
|
|
899
|
+
For Zonos:
|
|
846
900
|
```bash
|
|
847
|
-
|
|
901
|
+
uv run webapps/zonos/zonos_tts_app.py
|
|
848
902
|
```
|
|
849
|
-
|
|
903
|
+
4. **The terminal will display the URL to access the webapp (e.g.)**:
|
|
850
904
|
```bash
|
|
851
905
|
Running on local URL: http://127.0.0.1:7860
|
|
852
906
|
```
|
|
853
|
-
**NOTE**: The URL
|
|
907
|
+
**NOTE**: The URL may vary; check the terminal output for the correct address.
|
|
854
908
|
|
|
855
909
|
</details>
|
|
856
910
|
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
<h1 align="center">
|
|
2
|
+
<br>
|
|
3
|
+
<a href="https://sinapsis.tech/">
|
|
4
|
+
<img
|
|
5
|
+
src="https://github.com/Sinapsis-AI/brand-resources/blob/main/sinapsis_logo/4x/logo.png?raw=true"
|
|
6
|
+
alt="" width="300">
|
|
7
|
+
</a><br>
|
|
8
|
+
Sinapsis Speech
|
|
9
|
+
<br>
|
|
10
|
+
</h1>
|
|
11
|
+
|
|
12
|
+
<h4 align="center"> Templates for a wide range of voice generation tasks.</h4>
|
|
13
|
+
|
|
14
|
+
<p align="center">
|
|
15
|
+
<a href="#installation">🐍 Installation</a> •
|
|
16
|
+
<a href="#packages">📦 Packages</a> •
|
|
17
|
+
<a href="#webapp">🌐 Webapps</a> •
|
|
18
|
+
<a href="#documentation">📙 Documentation</a> •
|
|
19
|
+
<a href="#packages">🔍 License</a>
|
|
20
|
+
</p>
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
<h2 id="installation">🐍 Installation</h2>
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
> [!IMPORTANT]
|
|
27
|
+
> Sinapsis projects requires Python 3.10 or higher.
|
|
28
|
+
>
|
|
29
|
+
|
|
30
|
+
This repo includes packages for performing speech synthesis using different tools:
|
|
31
|
+
|
|
32
|
+
* <code>sinapsis-elevenlabs</code>
|
|
33
|
+
* <code>sinapsis-f5-tts</code>
|
|
34
|
+
* <code>sinapsis-zonos</code>
|
|
35
|
+
|
|
36
|
+
Install using your preferred package manager. We strongly recommend using <code>uv</code>. To install <code>uv</code>, refer to the [official documentation](https://docs.astral.sh/uv/getting-started/installation/#installation-methods).
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
Install with <code>uv</code>:
|
|
40
|
+
```bash
|
|
41
|
+
uv pip install sinapsis-elevenlabs --extra-index-url https://pypi.sinapsis.tech
|
|
42
|
+
```
|
|
43
|
+
Or with raw <code>pip</code>:
|
|
44
|
+
```bash
|
|
45
|
+
pip install sinapsis-elevenlabs --extra-index-url https://pypi.sinapsis.tech
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
**Replace `sinapsis-elevenlabs` with the name of the package you intend to install**.
|
|
49
|
+
|
|
50
|
+
> [!IMPORTANT]
|
|
51
|
+
> Templates in each package may require additional dependencies. For development, we recommend installing the package all optional dependencies:
|
|
52
|
+
>
|
|
53
|
+
With <code>uv</code>:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
uv pip install sinapsis-elevenlabs[all] --extra-index-url https://pypi.sinapsis.tech
|
|
57
|
+
```
|
|
58
|
+
Or with raw <code>pip</code>:
|
|
59
|
+
```bash
|
|
60
|
+
pip install sinapsis-elevenlabs[all] --extra-index-url https://pypi.sinapsis.tech
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
**Be sure to substitute `sinapsis-elevenlabs` with the appropriate package name**.
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
> [!TIP]
|
|
68
|
+
> You can also install all the packages within this project:
|
|
69
|
+
>
|
|
70
|
+
```bash
|
|
71
|
+
uv pip install sinapsis-speech[all] --extra-index-url https://pypi.sinapsis.tech
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
<h2 id="packages">📦 Packages</h2>
|
|
76
|
+
|
|
77
|
+
This repository is organized into modular packages, each designed for integration with different text-to-speech tools. These packages provide ready-to-use templates for speech synthesis. Below is an overview of the available packages:
|
|
78
|
+
|
|
79
|
+
<details>
|
|
80
|
+
<summary id="elevenlabs"><strong><span style="font-size: 1.4em;"> Sinapsis ElevenLabs </span></strong></summary>
|
|
81
|
+
|
|
82
|
+
This package offers a suite of templates and utilities designed for effortless integrating, configuration, and execution of **text-to-speech (TTS)** and **voice generation** functionalities powered by [ElevenLabs](https://elevenlabs.io/).
|
|
83
|
+
|
|
84
|
+
- **ElevenLabsTTS**: Template for converting text into speech using ElevenLabs' voice models.
|
|
85
|
+
|
|
86
|
+
- **ElevenLabsVoiceGeneration**: Template for generating custom synthetic voices based on user-provided descriptions.
|
|
87
|
+
|
|
88
|
+
For specific instructions and further details, see the [README.md](https://github.com/Sinapsis-AI/sinapsis-speech/blob/main/packages/sinapsis_elevenlabs/README.md).
|
|
89
|
+
|
|
90
|
+
</details>
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
<details>
|
|
94
|
+
<summary id="f5tts"><strong><span style="font-size: 1.4em;"> Sinapsis F5-TTS</span></strong></summary>
|
|
95
|
+
|
|
96
|
+
This package provides a template for seamlessly integrating, configuring, and running **text-to-speech (TTS)** functionalities powered by [F5TTS](https://github.com/SWivid/F5-TTS).
|
|
97
|
+
|
|
98
|
+
- **F5TTSInference**: Converts text to speech using the F5TTS model with voice cloning capabilities.
|
|
99
|
+
|
|
100
|
+
For specific instructions and further details, see the [README.md](https://github.com/Sinapsis-AI/sinapsis-speech/blob/main/packages/sinapsis_f5_tts/README.md).
|
|
101
|
+
|
|
102
|
+
</details>
|
|
103
|
+
|
|
104
|
+
<details>
|
|
105
|
+
<summary id="zonos"><strong><span style="font-size: 1.4em;"> Sinapsis Zonos</span></strong></summary>
|
|
106
|
+
|
|
107
|
+
This package provides a single template for integrating, configuring, and running **text-to-speech (TTS)** and **voice cloning** functionalities powered by [Zonos](https://github.com/Zyphra/Zonos/tree/main).
|
|
108
|
+
|
|
109
|
+
- **ZonosTTS**: Template for converting text to speech or performing voice cloning based on the presence of an audio sample.
|
|
110
|
+
|
|
111
|
+
For specific instructions and further details, see the [README.md](https://github.com/Sinapsis-AI/sinapsis-speech/blob/main/packages/sinapsis_zonos/README.md).
|
|
112
|
+
|
|
113
|
+
</details>
|
|
114
|
+
|
|
115
|
+
<h2 id="webapp">🌐 Webapps</h2>
|
|
116
|
+
The webapps included in this project showcase the modularity of the templates, in this case for speech generation tasks.
|
|
117
|
+
|
|
118
|
+
> [!IMPORTANT]
|
|
119
|
+
> To run the app you first need to clone this repository:
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
git clone git@github.com:Sinapsis-ai/sinapsis-speech.git
|
|
123
|
+
cd sinapsis-speech
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
> [!NOTE]
|
|
127
|
+
> If you'd like to enable external app sharing in Gradio, `export GRADIO_SHARE_APP=True`
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
> [!IMPORTANT]
|
|
131
|
+
> Elevenlabs requires an API key to run any inference. To get started, visit the [official website](https://elevenlabs.io) and create an account. If you already have an account, go to the [API keys page](https://elevenlabs.io/app/settings/api-keys) to generate a token.
|
|
132
|
+
|
|
133
|
+
> [!IMPORTANT]
|
|
134
|
+
> Set your env var using <code> export ELEVENLABS_API_KEY='your-api-key'</code>
|
|
135
|
+
|
|
136
|
+
> [!IMPORTANT]
|
|
137
|
+
> F5-TTS requires a reference audio file for voice cloning. Make sure you have a reference audio file in the artifacts directory.
|
|
138
|
+
|
|
139
|
+
> [!NOTE]
|
|
140
|
+
> Agent configuration can be changed through the `AGENT_CONFIG_PATH` env var. You can check the available configurations in each package configs folder.
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
<details>
|
|
144
|
+
<summary id="docker"><strong><span style="font-size: 1.4em;">🐳 Docker</span></strong></summary>
|
|
145
|
+
|
|
146
|
+
**IMPORTANT**: This Docker image depends on the `sinapsis-nvidia:base` image. For detailed instructions, please refer to the [Sinapsis README](https://github.com/Sinapsis-ai/sinapsis?tab=readme-ov-file#docker).
|
|
147
|
+
|
|
148
|
+
1. **Build the sinapsis-speech image**:
|
|
149
|
+
```bash
|
|
150
|
+
docker compose -f docker/compose.yaml build
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
2. **Start the app container**:
|
|
154
|
+
For ElevenLabs:
|
|
155
|
+
```bash
|
|
156
|
+
docker compose -f docker/compose_apps.yaml up -d sinapsis-elevenlabs
|
|
157
|
+
```
|
|
158
|
+
For F5-TTS:
|
|
159
|
+
```bash
|
|
160
|
+
docker compose -f docker/compose_apps.yaml up -d sinapsis-f5_tts
|
|
161
|
+
```
|
|
162
|
+
For Zonos:
|
|
163
|
+
```bash
|
|
164
|
+
docker compose -f docker/compose_apps.yaml up -d sinapsis-zonos
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
3. **Check the logs**
|
|
168
|
+
For ElevenLabs:
|
|
169
|
+
```bash
|
|
170
|
+
docker logs -f sinapsis-elevenlabs
|
|
171
|
+
```
|
|
172
|
+
For F5-TTS:
|
|
173
|
+
```bash
|
|
174
|
+
docker logs -f sinapsis-f5tts
|
|
175
|
+
```
|
|
176
|
+
For Zonos:
|
|
177
|
+
```bash
|
|
178
|
+
docker logs -f sinapsis-zonos
|
|
179
|
+
```
|
|
180
|
+
4. **The logs will display the URL to access the webapp, e.g.,:**:
|
|
181
|
+
```bash
|
|
182
|
+
Running on local URL: http://127.0.0.1:7860
|
|
183
|
+
```
|
|
184
|
+
**NOTE**: The url may be different, check the output of logs.
|
|
185
|
+
5. **To stop the app**:
|
|
186
|
+
```bash
|
|
187
|
+
docker compose -f docker/compose_apps.yaml down
|
|
188
|
+
```
|
|
189
|
+
</details>
|
|
190
|
+
|
|
191
|
+
<details>
|
|
192
|
+
<summary id="virtual-environment"><strong><span style="font-size: 1.4em;">💻 UV</span></strong></summary>
|
|
193
|
+
|
|
194
|
+
To run the webapp using the <code>uv</code> package manager, follow these steps:
|
|
195
|
+
|
|
196
|
+
1. **Sync the virtual environment**:
|
|
197
|
+
|
|
198
|
+
```bash
|
|
199
|
+
uv sync --frozen
|
|
200
|
+
```
|
|
201
|
+
2. **Install the wheel**:
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
uv pip install sinapsis-speech[all] --extra-index-url https://pypi.sinapsis.tech
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
3. **Run the webapp**:
|
|
208
|
+
For ElevenLabs:
|
|
209
|
+
```bash
|
|
210
|
+
uv run webapps/elevenlabs/elevenlabs_tts_app.py
|
|
211
|
+
```
|
|
212
|
+
For F5-TTS:
|
|
213
|
+
```bash
|
|
214
|
+
uv run webapps/f5-tts/f5_tts_app.py
|
|
215
|
+
```
|
|
216
|
+
For Zonos:
|
|
217
|
+
```bash
|
|
218
|
+
uv run webapps/zonos/zonos_tts_app.py
|
|
219
|
+
```
|
|
220
|
+
4. **The terminal will display the URL to access the webapp (e.g.)**:
|
|
221
|
+
```bash
|
|
222
|
+
Running on local URL: http://127.0.0.1:7860
|
|
223
|
+
```
|
|
224
|
+
**NOTE**: The URL may vary; check the terminal output for the correct address.
|
|
225
|
+
|
|
226
|
+
</details>
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
<h2 id="documentation">📙 Documentation</h2>
|
|
231
|
+
|
|
232
|
+
Documentation is available on the [sinapsis website](https://docs.sinapsis.tech/docs)
|
|
233
|
+
|
|
234
|
+
Tutorials for different projects within sinapsis are available at [sinapsis tutorials page](https://docs.sinapsis.tech/tutorials)
|
|
235
|
+
|
|
236
|
+
<h2 id="license">🔍 License</h2>
|
|
237
|
+
|
|
238
|
+
This project is licensed under the AGPLv3 license, which encourages open collaboration and sharing. For more details, please refer to the [LICENSE](LICENSE) file.
|
|
239
|
+
|
|
240
|
+
For commercial use, please refer to our [official Sinapsis website](https://sinapsis.tech) for information on obtaining a commercial license.
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
|
|
@@ -9,34 +9,18 @@ def create_voice_settings(settings: VoiceSettings) -> VoiceSettings | None:
|
|
|
9
9
|
"""
|
|
10
10
|
Creates or updates a `VoiceSettings` object based on the provided settings.
|
|
11
11
|
|
|
12
|
-
This function attempts to create or update a `VoiceSettings` object using the provided
|
|
13
|
-
`VoiceSettings` instance. If any of the fields in the settings contain `None`,
|
|
14
|
-
the corresponding field is populated with a default value from `DEFAULT_VOICE.settings`.
|
|
15
|
-
If all fields are valid (i.e., none are `None`), the provided `settings` object is returned unchanged.
|
|
16
|
-
|
|
17
|
-
If the settings argument is `None` or if no valid settings are provided, the function
|
|
18
|
-
returns `None`.
|
|
19
|
-
|
|
20
12
|
Args:
|
|
21
|
-
settings (VoiceSettings): An instance of `VoiceSettings` containing the settings to be applied.
|
|
22
|
-
|
|
13
|
+
settings (VoiceSettings | None): An instance of `VoiceSettings` containing the settings to be applied.
|
|
14
|
+
If `None`, the function returns the default settings.
|
|
23
15
|
|
|
24
16
|
Returns:
|
|
25
|
-
VoiceSettings:
|
|
26
|
-
|
|
27
|
-
`None` is returned.
|
|
17
|
+
VoiceSettings: The provided `VoiceSettings` object if `settings` is not `None`. Otherwise,
|
|
18
|
+
`DEFAULT_VOICE.settings` is returned.
|
|
28
19
|
"""
|
|
29
|
-
if settings:
|
|
30
|
-
|
|
31
|
-
if any(value is None for value in settings_dict.values()):
|
|
32
|
-
for field, value in settings_dict.items():
|
|
33
|
-
if value is None:
|
|
34
|
-
settings_dict[field] = getattr(DEFAULT_VOICE.settings, field)
|
|
20
|
+
if not settings:
|
|
21
|
+
return DEFAULT_VOICE.settings
|
|
35
22
|
|
|
36
|
-
|
|
37
|
-
else:
|
|
38
|
-
return settings
|
|
39
|
-
return None
|
|
23
|
+
return settings
|
|
40
24
|
|
|
41
25
|
|
|
42
26
|
def get_voice_id(client: ElevenLabs, voice: VoiceId | VoiceName) -> VoiceId:
|
|
@@ -9,6 +9,8 @@ from typing import IO, Iterator, Literal
|
|
|
9
9
|
|
|
10
10
|
from elevenlabs import Voice, VoiceSettings, save
|
|
11
11
|
from elevenlabs.client import ElevenLabs, VoiceId, VoiceName
|
|
12
|
+
from elevenlabs.types import OutputFormat
|
|
13
|
+
from pydantic import Field
|
|
12
14
|
from sinapsis_core.data_containers.data_packet import AudioPacket, DataContainer, Packet
|
|
13
15
|
from sinapsis_core.template_base.template import (
|
|
14
16
|
Template,
|
|
@@ -40,25 +42,23 @@ class ElevenLabsBase(Template, abc.ABC):
|
|
|
40
42
|
"""
|
|
41
43
|
Attributes for ElevenLabs Base Class.
|
|
42
44
|
Args:
|
|
43
|
-
api_key (str): The API key to authenticate with ElevenLabs' API.
|
|
44
|
-
|
|
45
|
+
api_key (str): The API used key to authenticate with ElevenLabs' API.
|
|
46
|
+
model (Literal): The model identifier to use for speech synthesis.
|
|
47
|
+
output_format (OutputFormat): The output audio format and quality. Options include:
|
|
48
|
+
["mp3_22050_32", "mp3_44100_32", "mp3_44100_64", "mp3_44100_96", "mp3_44100_128",
|
|
49
|
+
"mp3_44100_192", "pcm_16000", "pcm_22050", "pcm_24000", "pcm_44100", "ulaw_8000"]
|
|
50
|
+
output_folder (str): The folder where generated audio files will be saved.
|
|
51
|
+
stream (bool): If True, the audio is returned as a stream; otherwise, saved to a file.
|
|
52
|
+
voice (VoiceId | VoiceName | Voice): The voice to use for speech synthesis. This can be a voice ID (str),
|
|
45
53
|
a voice name (str) or an elevenlabs voice object (Voice).
|
|
46
54
|
voice_settings (VoiceSettings): A dictionary of settings that control the behavior of the voice.
|
|
47
55
|
- stability (float)
|
|
48
56
|
- similarity_boost (float)
|
|
49
57
|
- style (float)
|
|
50
58
|
- use_speaker_boost (bool)
|
|
51
|
-
model (Literal): The model identifier to use for speech synthesis.
|
|
52
|
-
output_format (Literal): The output audio format and quality. Options include:
|
|
53
|
-
["mp3_22050_32", "mp3_44100_32", "mp3_44100_64", "mp3_44100_96", "mp3_44100_128",
|
|
54
|
-
"mp3_44100_192", "pcm_16000", "pcm_22050", "pcm_24000", "pcm_44100", "ulaw_8000"]
|
|
55
|
-
output_folder (str): The folder where generated audio files will be saved.
|
|
56
|
-
stream (bool): If True, the audio is returned as a stream; otherwise, saved to a file.
|
|
57
59
|
"""
|
|
58
60
|
|
|
59
61
|
api_key: str | None = None
|
|
60
|
-
voice: VoiceId | VoiceName | Voice = None
|
|
61
|
-
voice_settings: VoiceSettings | None = None
|
|
62
62
|
model: Literal[
|
|
63
63
|
"eleven_turbo_v2_5",
|
|
64
64
|
"eleven_multilingual_v2",
|
|
@@ -68,21 +68,11 @@ class ElevenLabsBase(Template, abc.ABC):
|
|
|
68
68
|
"eleven_english_sts_v2",
|
|
69
69
|
"eleven_multilingual_sts_v2",
|
|
70
70
|
] = "eleven_turbo_v2_5"
|
|
71
|
-
output_format:
|
|
72
|
-
"mp3_22050_32",
|
|
73
|
-
"mp3_44100_32",
|
|
74
|
-
"mp3_44100_64",
|
|
75
|
-
"mp3_44100_96",
|
|
76
|
-
"mp3_44100_128",
|
|
77
|
-
"mp3_44100_192",
|
|
78
|
-
"pcm_16000",
|
|
79
|
-
"pcm_22050",
|
|
80
|
-
"pcm_24000",
|
|
81
|
-
"pcm_44100",
|
|
82
|
-
"ulaw_8000",
|
|
83
|
-
] = "mp3_44100_128"
|
|
71
|
+
output_format: OutputFormat = "mp3_44100_128"
|
|
84
72
|
output_folder: str = os.path.join(SINAPSIS_CACHE_DIR, "elevenlabs", "audios")
|
|
85
73
|
stream: bool = False
|
|
74
|
+
voice: VoiceId | VoiceName | Voice = None
|
|
75
|
+
voice_settings: VoiceSettings = Field(default_factory=dict) # type: ignore[arg-type]
|
|
86
76
|
|
|
87
77
|
def __init__(self, attributes: TemplateAttributeType) -> None:
|
|
88
78
|
"""Initializes the ElevenLabs API client with the given attributes."""
|