sinapsis-speech 0.4.6__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sinapsis_speech-0.4.6/packages/sinapsis_speech.egg-info → sinapsis_speech-0.5.0}/PKG-INFO +33 -1
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/README.md +31 -0
- sinapsis_speech-0.5.0/packages/sinapsis_csm/src/sinapsis_csm/helpers/generator.py +43 -0
- sinapsis_speech-0.5.0/packages/sinapsis_csm/src/sinapsis_csm/templates/__init__.py +19 -0
- sinapsis_speech-0.5.0/packages/sinapsis_csm/src/sinapsis_csm/templates/csm_tts.py +88 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0/packages/sinapsis_speech.egg-info}/PKG-INFO +33 -1
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_speech.egg-info/SOURCES.txt +5 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_speech.egg-info/requires.txt +1 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_speech.egg-info/top_level.txt +1 -0
- sinapsis_speech-0.5.0/packages/sinapsis_zonos/src/sinapsis_zonos/__init__.py +0 -0
- sinapsis_speech-0.5.0/packages/sinapsis_zonos/src/sinapsis_zonos/helpers/__init__.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/pyproject.toml +3 -2
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/LICENSE +0 -0
- {sinapsis_speech-0.4.6/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs → sinapsis_speech-0.5.0/packages/sinapsis_csm}/__init__.py +0 -0
- {sinapsis_speech-0.4.6/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers → sinapsis_speech-0.5.0/packages/sinapsis_csm/src/sinapsis_csm}/__init__.py +0 -0
- {sinapsis_speech-0.4.6/packages/sinapsis_f5_tts/src/sinapsis_f5_tts → sinapsis_speech-0.5.0/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs}/__init__.py +0 -0
- {sinapsis_speech-0.4.6/packages/sinapsis_f5_tts/src/sinapsis_f5_tts → sinapsis_speech-0.5.0/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs}/helpers/__init__.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/env_var_keys.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/tags.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/voice_utils.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/__init__.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_base.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_sts.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_tts.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_clone.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_generation.py +0 -0
- {sinapsis_speech-0.4.6/packages/sinapsis_kokoro/src/sinapsis_kokoro → sinapsis_speech-0.5.0/packages/sinapsis_f5_tts/src/sinapsis_f5_tts}/__init__.py +0 -0
- {sinapsis_speech-0.4.6/packages/sinapsis_orpheus_cpp/src/sinapsis_orpheus_cpp → sinapsis_speech-0.5.0/packages/sinapsis_f5_tts/src/sinapsis_f5_tts/helpers}/__init__.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_f5_tts/src/sinapsis_f5_tts/helpers/tags.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_f5_tts/src/sinapsis_f5_tts/templates/__init__.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_f5_tts/src/sinapsis_f5_tts/templates/f5_tts_inference.py +0 -0
- {sinapsis_speech-0.4.6/packages/sinapsis_orpheus_cpp/src/sinapsis_orpheus_cpp/helpers → sinapsis_speech-0.5.0/packages/sinapsis_kokoro/src/sinapsis_kokoro}/__init__.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_kokoro/src/sinapsis_kokoro/helpers/kokoro_utils.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_kokoro/src/sinapsis_kokoro/helpers/tags.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_kokoro/src/sinapsis_kokoro/templates/__init__.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_kokoro/src/sinapsis_kokoro/templates/kokoro_tts.py +0 -0
- {sinapsis_speech-0.4.6/packages/sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt → sinapsis_speech-0.5.0/packages/sinapsis_orpheus_cpp/src/sinapsis_orpheus_cpp}/__init__.py +0 -0
- {sinapsis_speech-0.4.6/packages/sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt → sinapsis_speech-0.5.0/packages/sinapsis_orpheus_cpp/src/sinapsis_orpheus_cpp}/helpers/__init__.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_orpheus_cpp/src/sinapsis_orpheus_cpp/helpers/tags.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_orpheus_cpp/src/sinapsis_orpheus_cpp/templates/__init__.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_orpheus_cpp/src/sinapsis_orpheus_cpp/templates/orpheus_tts.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_orpheus_cpp/src/sinapsis_orpheus_cpp/thirdparty/helpers.py +0 -0
- {sinapsis_speech-0.4.6/packages/sinapsis_zonos/src/sinapsis_zonos → sinapsis_speech-0.5.0/packages/sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt}/__init__.py +0 -0
- {sinapsis_speech-0.4.6/packages/sinapsis_zonos/src/sinapsis_zonos → sinapsis_speech-0.5.0/packages/sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt}/helpers/__init__.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/helpers/tags.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/templates/__init__.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/templates/parakeet_tdt.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_speech.egg-info/dependency_links.txt +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_zonos/src/sinapsis_zonos/helpers/tags.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_zonos/src/sinapsis_zonos/helpers/zonos_keys.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_zonos/src/sinapsis_zonos/helpers/zonos_tts_utils.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_zonos/src/sinapsis_zonos/templates/__init__.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_zonos/src/sinapsis_zonos/templates/zonos_tts.py +0 -0
- {sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sinapsis-speech
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Generate speech using various libraries.
|
|
5
5
|
Author-email: SinapsisAI <dev@sinapsis.tech>
|
|
6
6
|
Project-URL: Homepage, https://sinapsis.tech
|
|
@@ -20,6 +20,7 @@ Requires-Dist: sinapsis-speech[gradio-app]; extra == "all"
|
|
|
20
20
|
Requires-Dist: sinapsis-zonos[all]; extra == "all"
|
|
21
21
|
Requires-Dist: sinapsis-parakeet-tdt[all]; extra == "all"
|
|
22
22
|
Requires-Dist: sinapsis-orpheus-cpp[all]; extra == "all"
|
|
23
|
+
Requires-Dist: sinapsis-csm[all]; extra == "all"
|
|
23
24
|
Provides-Extra: gradio-app
|
|
24
25
|
Requires-Dist: sinapsis[webapp]>=0.2.3; extra == "gradio-app"
|
|
25
26
|
Dynamic: license-file
|
|
@@ -61,6 +62,7 @@ This repo includes packages for performing speech synthesis using different tool
|
|
|
61
62
|
* <code>sinapsis-zonos</code>
|
|
62
63
|
* <code>sinapsis-orpheus-cpp</code>
|
|
63
64
|
* <code>sinapsis-parakeet</code>
|
|
65
|
+
* <code>sinapsis-csm</code>
|
|
64
66
|
|
|
65
67
|
Install using your preferred package manager. We strongly recommend using <code>uv</code>. To install <code>uv</code>, refer to the [official documentation](https://docs.astral.sh/uv/getting-started/installation/#installation-methods).
|
|
66
68
|
|
|
@@ -176,6 +178,17 @@ For specific instructions and further details, see the [README.md](https://githu
|
|
|
176
178
|
|
|
177
179
|
</details>
|
|
178
180
|
|
|
181
|
+
<details>
|
|
182
|
+
<summary id="parakeet-tdt"><strong><span style="font-size: 1.4em;"> Sinapsis Parakeet-TDT</span></strong></summary>
|
|
183
|
+
|
|
184
|
+
This package provides a template for seamlessly integrating, configuring, and running **speech-to-text (STT)** functionalities powered by [SesameAILabs CSM](https://github.com/SesameAILabs/csm/tree/main?tab=readme-ov-file).
|
|
185
|
+
|
|
186
|
+
- **CSMTTS**: Converts text into speech using the CSM model. This template processes text packets from the input container and adds the resulting audio packets to the container.
|
|
187
|
+
|
|
188
|
+
For specific instructions and further details, see the [README.md](https://github.com/Sinapsis-AI/sinapsis-speech/blob/main/packages/sinapsis_csm/README.md).
|
|
189
|
+
|
|
190
|
+
</details>
|
|
191
|
+
|
|
179
192
|
<h2 id="webapp">🌐 Webapps</h2>
|
|
180
193
|
The webapps included in this project showcase the modularity of the templates, in this case for speech generation tasks.
|
|
181
194
|
|
|
@@ -200,6 +213,9 @@ cd sinapsis-speech
|
|
|
200
213
|
> [!IMPORTANT]
|
|
201
214
|
> F5-TTS requires a reference audio file for voice cloning. Make sure you have a reference audio file in the artifacts directory.
|
|
202
215
|
|
|
216
|
+
> [!IMPORTANT]
|
|
217
|
+
> CSM requires an HF_TOKEN to run any inference. See the [official instructions](https://huggingface.co/docs/hub/security-tokens) and set it using <code>export HF_TOKEN="token-provided-by-hf"</code>
|
|
218
|
+
|
|
203
219
|
> [!NOTE]
|
|
204
220
|
> Agent configuration can be changed through the `AGENT_CONFIG_PATH` env var. You can check the available configurations in each package configs folder.
|
|
205
221
|
|
|
@@ -246,6 +262,11 @@ docker compose -f docker/compose_apps.yaml up -d sinapsis-orpheus-tts
|
|
|
246
262
|
docker compose -f docker/compose_apps.yaml up -d sinapsis-parakeet
|
|
247
263
|
```
|
|
248
264
|
|
|
265
|
+
- For CSM:
|
|
266
|
+
```bash
|
|
267
|
+
docker compose -f docker/compose_apps.yaml up -d sinapsis-csm
|
|
268
|
+
```
|
|
269
|
+
|
|
249
270
|
3. **Check the logs**
|
|
250
271
|
|
|
251
272
|
- For ElevenLabs:
|
|
@@ -276,6 +297,11 @@ docker logs -f sinapsis-orpheus-tts
|
|
|
276
297
|
docker logs -f sinapsis-parakeet
|
|
277
298
|
```
|
|
278
299
|
|
|
300
|
+
- For CSM:
|
|
301
|
+
```bash
|
|
302
|
+
docker logs -f sinapsis-csm
|
|
303
|
+
```
|
|
304
|
+
|
|
279
305
|
4. **The logs will display the URL to access the webapp, e.g.,:**:
|
|
280
306
|
```bash
|
|
281
307
|
Running on local URL: http://127.0.0.1:7860
|
|
@@ -335,6 +361,12 @@ uv run webapps/packet_tts_apps/kokoro_tts_app.py
|
|
|
335
361
|
```bash
|
|
336
362
|
uv run webapps/generic_tts_apps/zonos_tts_app.py
|
|
337
363
|
```
|
|
364
|
+
|
|
365
|
+
- For CSM:
|
|
366
|
+
```bash
|
|
367
|
+
uv run webapps/generic_tts_apps/csm_tts_app.py
|
|
368
|
+
```
|
|
369
|
+
|
|
338
370
|
4. **The terminal will display the URL to access the webapp (e.g.)**:
|
|
339
371
|
```bash
|
|
340
372
|
Running on local URL: http://127.0.0.1:7860
|
|
@@ -35,6 +35,7 @@ This repo includes packages for performing speech synthesis using different tool
|
|
|
35
35
|
* <code>sinapsis-zonos</code>
|
|
36
36
|
* <code>sinapsis-orpheus-cpp</code>
|
|
37
37
|
* <code>sinapsis-parakeet</code>
|
|
38
|
+
* <code>sinapsis-csm</code>
|
|
38
39
|
|
|
39
40
|
Install using your preferred package manager. We strongly recommend using <code>uv</code>. To install <code>uv</code>, refer to the [official documentation](https://docs.astral.sh/uv/getting-started/installation/#installation-methods).
|
|
40
41
|
|
|
@@ -150,6 +151,17 @@ For specific instructions and further details, see the [README.md](https://githu
|
|
|
150
151
|
|
|
151
152
|
</details>
|
|
152
153
|
|
|
154
|
+
<details>
|
|
155
|
+
<summary id="parakeet-tdt"><strong><span style="font-size: 1.4em;"> Sinapsis Parakeet-TDT</span></strong></summary>
|
|
156
|
+
|
|
157
|
+
This package provides a template for seamlessly integrating, configuring, and running **speech-to-text (STT)** functionalities powered by [SesameAILabs CSM](https://github.com/SesameAILabs/csm/tree/main?tab=readme-ov-file).
|
|
158
|
+
|
|
159
|
+
- **CSMTTS**: Converts text into speech using the CSM model. This template processes text packets from the input container and adds the resulting audio packets to the container.
|
|
160
|
+
|
|
161
|
+
For specific instructions and further details, see the [README.md](https://github.com/Sinapsis-AI/sinapsis-speech/blob/main/packages/sinapsis_csm/README.md).
|
|
162
|
+
|
|
163
|
+
</details>
|
|
164
|
+
|
|
153
165
|
<h2 id="webapp">🌐 Webapps</h2>
|
|
154
166
|
The webapps included in this project showcase the modularity of the templates, in this case for speech generation tasks.
|
|
155
167
|
|
|
@@ -174,6 +186,9 @@ cd sinapsis-speech
|
|
|
174
186
|
> [!IMPORTANT]
|
|
175
187
|
> F5-TTS requires a reference audio file for voice cloning. Make sure you have a reference audio file in the artifacts directory.
|
|
176
188
|
|
|
189
|
+
> [!IMPORTANT]
|
|
190
|
+
> CSM requires an HF_TOKEN to run any inference. See the [official instructions](https://huggingface.co/docs/hub/security-tokens) and set it using <code>export HF_TOKEN="token-provided-by-hf"</code>
|
|
191
|
+
|
|
177
192
|
> [!NOTE]
|
|
178
193
|
> Agent configuration can be changed through the `AGENT_CONFIG_PATH` env var. You can check the available configurations in each package configs folder.
|
|
179
194
|
|
|
@@ -220,6 +235,11 @@ docker compose -f docker/compose_apps.yaml up -d sinapsis-orpheus-tts
|
|
|
220
235
|
docker compose -f docker/compose_apps.yaml up -d sinapsis-parakeet
|
|
221
236
|
```
|
|
222
237
|
|
|
238
|
+
- For CSM:
|
|
239
|
+
```bash
|
|
240
|
+
docker compose -f docker/compose_apps.yaml up -d sinapsis-csm
|
|
241
|
+
```
|
|
242
|
+
|
|
223
243
|
3. **Check the logs**
|
|
224
244
|
|
|
225
245
|
- For ElevenLabs:
|
|
@@ -250,6 +270,11 @@ docker logs -f sinapsis-orpheus-tts
|
|
|
250
270
|
docker logs -f sinapsis-parakeet
|
|
251
271
|
```
|
|
252
272
|
|
|
273
|
+
- For CSM:
|
|
274
|
+
```bash
|
|
275
|
+
docker logs -f sinapsis-csm
|
|
276
|
+
```
|
|
277
|
+
|
|
253
278
|
4. **The logs will display the URL to access the webapp, e.g.,:**:
|
|
254
279
|
```bash
|
|
255
280
|
Running on local URL: http://127.0.0.1:7860
|
|
@@ -309,6 +334,12 @@ uv run webapps/packet_tts_apps/kokoro_tts_app.py
|
|
|
309
334
|
```bash
|
|
310
335
|
uv run webapps/generic_tts_apps/zonos_tts_app.py
|
|
311
336
|
```
|
|
337
|
+
|
|
338
|
+
- For CSM:
|
|
339
|
+
```bash
|
|
340
|
+
uv run webapps/generic_tts_apps/csm_tts_app.py
|
|
341
|
+
```
|
|
342
|
+
|
|
312
343
|
4. **The terminal will display the URL to access the webapp (e.g.)**:
|
|
313
344
|
```bash
|
|
314
345
|
Running on local URL: http://127.0.0.1:7860
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
from typing import Literal
|
|
3
|
+
|
|
4
|
+
import torch
|
|
5
|
+
from csm.generator import Generator
|
|
6
|
+
from csm.models import Model
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class CSMGenerator:
|
|
10
|
+
"""
|
|
11
|
+
Wrapper around the CSM model providing a simple interface
|
|
12
|
+
for text-to-speech generation
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, device: Literal["cpu", "cuda"] = "cpu", sample_rate: int = 24000) -> None:
|
|
16
|
+
self.device: str = device
|
|
17
|
+
self.sample_rate: int = sample_rate
|
|
18
|
+
self.model: Model = Model.from_pretrained("sesame/csm-1b")
|
|
19
|
+
self.model.to(device=device)
|
|
20
|
+
self.model.sample_rate = sample_rate
|
|
21
|
+
self.generator = Generator(self.model)
|
|
22
|
+
|
|
23
|
+
def generate(
|
|
24
|
+
self, text: str, speaker: int = 0, context: list[str] | None = None, max_audio_length_ms: int = 10000
|
|
25
|
+
) -> torch.Tensor:
|
|
26
|
+
if context is None:
|
|
27
|
+
context = []
|
|
28
|
+
return self.generator.generate(
|
|
29
|
+
text=text,
|
|
30
|
+
speaker=speaker,
|
|
31
|
+
context=context,
|
|
32
|
+
max_audio_length_ms=max_audio_length_ms,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def load_csm_1b(device: Literal["cpu", "cuda"] = "cpu", sample_rate: int = 24000) -> CSMGenerator:
|
|
37
|
+
"""
|
|
38
|
+
Loads and configures the CSM TTS model.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
CSMGenerator: Model wrapper with ready-to-use generate method.
|
|
42
|
+
"""
|
|
43
|
+
return CSMGenerator(device=device, sample_rate=sample_rate)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
from typing import Callable
|
|
3
|
+
from sinapsis_csm.templates.csm_tts import CSMTTS
|
|
4
|
+
|
|
5
|
+
_root_lib_path = "sinapsis_csm.templates"
|
|
6
|
+
_template_lookup = {
|
|
7
|
+
"CSMTTS": f"{_root_lib_path}.csm_tts",
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
def __getattr__(name: str) -> Callable:
|
|
11
|
+
if name in _template_lookup:
|
|
12
|
+
module = importlib.import_module(_template_lookup[name])
|
|
13
|
+
return getattr(module, name)
|
|
14
|
+
raise AttributeError(f"Template `{name}` not found in `{_root_lib_path}`.")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
__all__ = ["CSMTTS"]
|
|
18
|
+
|
|
19
|
+
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
import torch
|
|
3
|
+
from sinapsis_core.data_containers.data_packet import AudioPacket, DataContainer
|
|
4
|
+
from sinapsis_core.template_base import Template
|
|
5
|
+
from sinapsis_core.template_base.base_models import TemplateAttributes, TemplateAttributeType
|
|
6
|
+
from sinapsis_csm.helpers.generator import load_csm_1b
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class CSMTTS(Template):
|
|
10
|
+
"""
|
|
11
|
+
Sinapsis template for converting text into speech using the CSM TTS model.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
class AttributesBaseModel(TemplateAttributes): # type: ignore
|
|
15
|
+
"""
|
|
16
|
+
Defines configurable attributes for the CSMTTS template.
|
|
17
|
+
"""
|
|
18
|
+
speaker_id: int = 0
|
|
19
|
+
max_audio_length_ms: int = 10000
|
|
20
|
+
device: Literal["cuda", "cpu"] = "cpu"
|
|
21
|
+
context: list[str] | None = None
|
|
22
|
+
sample_rate_hz: int = 24000
|
|
23
|
+
|
|
24
|
+
def __init__(self, attributes: TemplateAttributeType) -> None:
|
|
25
|
+
"""
|
|
26
|
+
Initializes the template and loads the CSM model.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
attributes (TemplateAttributeType): User-defined attributes from YAML configuration.
|
|
30
|
+
"""
|
|
31
|
+
super().__init__(attributes)
|
|
32
|
+
self.model = load_csm_1b(
|
|
33
|
+
device=self.attributes.device,
|
|
34
|
+
sample_rate=self.attributes.sample_rate_hz
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
def generate_audio(self, text: str) -> torch.Tensor:
|
|
38
|
+
"""
|
|
39
|
+
Converts input text to audio using the CSM model.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
text (str): Input text string.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
torch.Tensor: Audio waveform tensor.
|
|
46
|
+
"""
|
|
47
|
+
context = self.attributes.context if self.attributes.context else []
|
|
48
|
+
return self.model.generate(
|
|
49
|
+
text=text,
|
|
50
|
+
speaker=self.attributes.speaker_id,
|
|
51
|
+
context=context,
|
|
52
|
+
max_audio_length_ms=self.attributes.max_audio_length_ms,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
def generate_audio_packet(self, audio: torch.Tensor, source_text: str) -> AudioPacket:
|
|
56
|
+
"""
|
|
57
|
+
Wraps a raw audio tensor into a sinapsis compatible audioPacket
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
audio (torch.Tensor): Audio waveform.
|
|
61
|
+
source_text (str): Original input text used for generation.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
AudioPacket: Encapsulated audio data with metadata.
|
|
65
|
+
"""
|
|
66
|
+
audio_np = audio.cpu().numpy()
|
|
67
|
+
return AudioPacket(
|
|
68
|
+
content=audio_np,
|
|
69
|
+
sample_rate=self.attributes.sample_rate_hz,
|
|
70
|
+
generic_data={"source_text": source_text, "model": "CSM"}
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
def execute(self, container: DataContainer) -> DataContainer:
|
|
74
|
+
"""
|
|
75
|
+
Main method executed by Sinapsis. Converts all text packets in the input container to audio.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
container (DataContainer): Input container with text packets.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
DataContainer: Output container with generated audio packets.
|
|
82
|
+
"""
|
|
83
|
+
for packet in container.texts:
|
|
84
|
+
audio = self.generate_audio(packet.content)
|
|
85
|
+
audio_packet = self.generate_audio_packet(audio, packet.content)
|
|
86
|
+
audio_packet.source = self.instance_name
|
|
87
|
+
container.audios.append(audio_packet)
|
|
88
|
+
return container
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sinapsis-speech
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Generate speech using various libraries.
|
|
5
5
|
Author-email: SinapsisAI <dev@sinapsis.tech>
|
|
6
6
|
Project-URL: Homepage, https://sinapsis.tech
|
|
@@ -20,6 +20,7 @@ Requires-Dist: sinapsis-speech[gradio-app]; extra == "all"
|
|
|
20
20
|
Requires-Dist: sinapsis-zonos[all]; extra == "all"
|
|
21
21
|
Requires-Dist: sinapsis-parakeet-tdt[all]; extra == "all"
|
|
22
22
|
Requires-Dist: sinapsis-orpheus-cpp[all]; extra == "all"
|
|
23
|
+
Requires-Dist: sinapsis-csm[all]; extra == "all"
|
|
23
24
|
Provides-Extra: gradio-app
|
|
24
25
|
Requires-Dist: sinapsis[webapp]>=0.2.3; extra == "gradio-app"
|
|
25
26
|
Dynamic: license-file
|
|
@@ -61,6 +62,7 @@ This repo includes packages for performing speech synthesis using different tool
|
|
|
61
62
|
* <code>sinapsis-zonos</code>
|
|
62
63
|
* <code>sinapsis-orpheus-cpp</code>
|
|
63
64
|
* <code>sinapsis-parakeet</code>
|
|
65
|
+
* <code>sinapsis-csm</code>
|
|
64
66
|
|
|
65
67
|
Install using your preferred package manager. We strongly recommend using <code>uv</code>. To install <code>uv</code>, refer to the [official documentation](https://docs.astral.sh/uv/getting-started/installation/#installation-methods).
|
|
66
68
|
|
|
@@ -176,6 +178,17 @@ For specific instructions and further details, see the [README.md](https://githu
|
|
|
176
178
|
|
|
177
179
|
</details>
|
|
178
180
|
|
|
181
|
+
<details>
|
|
182
|
+
<summary id="parakeet-tdt"><strong><span style="font-size: 1.4em;"> Sinapsis Parakeet-TDT</span></strong></summary>
|
|
183
|
+
|
|
184
|
+
This package provides a template for seamlessly integrating, configuring, and running **speech-to-text (STT)** functionalities powered by [SesameAILabs CSM](https://github.com/SesameAILabs/csm/tree/main?tab=readme-ov-file).
|
|
185
|
+
|
|
186
|
+
- **CSMTTS**: Converts text into speech using the CSM model. This template processes text packets from the input container and adds the resulting audio packets to the container.
|
|
187
|
+
|
|
188
|
+
For specific instructions and further details, see the [README.md](https://github.com/Sinapsis-AI/sinapsis-speech/blob/main/packages/sinapsis_csm/README.md).
|
|
189
|
+
|
|
190
|
+
</details>
|
|
191
|
+
|
|
179
192
|
<h2 id="webapp">🌐 Webapps</h2>
|
|
180
193
|
The webapps included in this project showcase the modularity of the templates, in this case for speech generation tasks.
|
|
181
194
|
|
|
@@ -200,6 +213,9 @@ cd sinapsis-speech
|
|
|
200
213
|
> [!IMPORTANT]
|
|
201
214
|
> F5-TTS requires a reference audio file for voice cloning. Make sure you have a reference audio file in the artifacts directory.
|
|
202
215
|
|
|
216
|
+
> [!IMPORTANT]
|
|
217
|
+
> CSM requires an HF_TOKEN to run any inference. See the [official instructions](https://huggingface.co/docs/hub/security-tokens) and set it using <code>export HF_TOKEN="token-provided-by-hf"</code>
|
|
218
|
+
|
|
203
219
|
> [!NOTE]
|
|
204
220
|
> Agent configuration can be changed through the `AGENT_CONFIG_PATH` env var. You can check the available configurations in each package configs folder.
|
|
205
221
|
|
|
@@ -246,6 +262,11 @@ docker compose -f docker/compose_apps.yaml up -d sinapsis-orpheus-tts
|
|
|
246
262
|
docker compose -f docker/compose_apps.yaml up -d sinapsis-parakeet
|
|
247
263
|
```
|
|
248
264
|
|
|
265
|
+
- For CSM:
|
|
266
|
+
```bash
|
|
267
|
+
docker compose -f docker/compose_apps.yaml up -d sinapsis-csm
|
|
268
|
+
```
|
|
269
|
+
|
|
249
270
|
3. **Check the logs**
|
|
250
271
|
|
|
251
272
|
- For ElevenLabs:
|
|
@@ -276,6 +297,11 @@ docker logs -f sinapsis-orpheus-tts
|
|
|
276
297
|
docker logs -f sinapsis-parakeet
|
|
277
298
|
```
|
|
278
299
|
|
|
300
|
+
- For CSM:
|
|
301
|
+
```bash
|
|
302
|
+
docker logs -f sinapsis-csm
|
|
303
|
+
```
|
|
304
|
+
|
|
279
305
|
4. **The logs will display the URL to access the webapp, e.g.,:**:
|
|
280
306
|
```bash
|
|
281
307
|
Running on local URL: http://127.0.0.1:7860
|
|
@@ -335,6 +361,12 @@ uv run webapps/packet_tts_apps/kokoro_tts_app.py
|
|
|
335
361
|
```bash
|
|
336
362
|
uv run webapps/generic_tts_apps/zonos_tts_app.py
|
|
337
363
|
```
|
|
364
|
+
|
|
365
|
+
- For CSM:
|
|
366
|
+
```bash
|
|
367
|
+
uv run webapps/generic_tts_apps/csm_tts_app.py
|
|
368
|
+
```
|
|
369
|
+
|
|
338
370
|
4. **The terminal will display the URL to access the webapp (e.g.)**:
|
|
339
371
|
```bash
|
|
340
372
|
Running on local URL: http://127.0.0.1:7860
|
{sinapsis_speech-0.4.6 → sinapsis_speech-0.5.0}/packages/sinapsis_speech.egg-info/SOURCES.txt
RENAMED
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
LICENSE
|
|
2
2
|
README.md
|
|
3
3
|
pyproject.toml
|
|
4
|
+
packages/sinapsis_csm/__init__.py
|
|
5
|
+
packages/sinapsis_csm/src/sinapsis_csm/__init__.py
|
|
6
|
+
packages/sinapsis_csm/src/sinapsis_csm/helpers/generator.py
|
|
7
|
+
packages/sinapsis_csm/src/sinapsis_csm/templates/__init__.py
|
|
8
|
+
packages/sinapsis_csm/src/sinapsis_csm/templates/csm_tts.py
|
|
4
9
|
packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/__init__.py
|
|
5
10
|
packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/__init__.py
|
|
6
11
|
packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/env_var_keys.py
|
|
File without changes
|
|
File without changes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "sinapsis-speech"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.5.0"
|
|
4
4
|
description = "Generate speech using various libraries."
|
|
5
5
|
authors = [
|
|
6
6
|
{name = "SinapsisAI", email = "dev@sinapsis.tech"},
|
|
@@ -28,6 +28,7 @@ all = [
|
|
|
28
28
|
"sinapsis-zonos[all]",
|
|
29
29
|
"sinapsis-parakeet-tdt[all]",
|
|
30
30
|
"sinapsis-orpheus-cpp[all]",
|
|
31
|
+
"sinapsis-csm[all]",
|
|
31
32
|
|
|
32
33
|
]
|
|
33
34
|
gradio-app = [
|
|
@@ -51,7 +52,7 @@ sinapsis-zonos = { workspace = true }
|
|
|
51
52
|
sinapsis-speech = { workspace = true }
|
|
52
53
|
sinapsis-parakeet-tdt = { workspace = true }
|
|
53
54
|
sinapsis-orpheus-cpp = { workspace = true }
|
|
54
|
-
sinapsis-
|
|
55
|
+
sinapsis-csm = { workspace = true }
|
|
55
56
|
|
|
56
57
|
|
|
57
58
|
[[tool.uv.index]]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|