sinapsis-speech 0.4.5__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {sinapsis_speech-0.4.5/packages/sinapsis_speech.egg-info → sinapsis_speech-0.5.0}/PKG-INFO +33 -1
  2. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/README.md +31 -0
  3. sinapsis_speech-0.5.0/packages/sinapsis_csm/src/sinapsis_csm/helpers/generator.py +43 -0
  4. sinapsis_speech-0.5.0/packages/sinapsis_csm/src/sinapsis_csm/templates/__init__.py +19 -0
  5. sinapsis_speech-0.5.0/packages/sinapsis_csm/src/sinapsis_csm/templates/csm_tts.py +88 -0
  6. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0/packages/sinapsis_speech.egg-info}/PKG-INFO +33 -1
  7. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_speech.egg-info/SOURCES.txt +5 -0
  8. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_speech.egg-info/requires.txt +1 -0
  9. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_speech.egg-info/top_level.txt +1 -0
  10. sinapsis_speech-0.5.0/packages/sinapsis_zonos/src/sinapsis_zonos/__init__.py +0 -0
  11. sinapsis_speech-0.5.0/packages/sinapsis_zonos/src/sinapsis_zonos/helpers/__init__.py +0 -0
  12. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/pyproject.toml +3 -2
  13. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/LICENSE +0 -0
  14. {sinapsis_speech-0.4.5/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs → sinapsis_speech-0.5.0/packages/sinapsis_csm}/__init__.py +0 -0
  15. {sinapsis_speech-0.4.5/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers → sinapsis_speech-0.5.0/packages/sinapsis_csm/src/sinapsis_csm}/__init__.py +0 -0
  16. {sinapsis_speech-0.4.5/packages/sinapsis_f5_tts/src/sinapsis_f5_tts → sinapsis_speech-0.5.0/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs}/__init__.py +0 -0
  17. {sinapsis_speech-0.4.5/packages/sinapsis_f5_tts/src/sinapsis_f5_tts → sinapsis_speech-0.5.0/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs}/helpers/__init__.py +0 -0
  18. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/env_var_keys.py +0 -0
  19. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/tags.py +0 -0
  20. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/voice_utils.py +0 -0
  21. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/__init__.py +0 -0
  22. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_base.py +0 -0
  23. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_sts.py +0 -0
  24. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_tts.py +0 -0
  25. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_clone.py +0 -0
  26. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_generation.py +0 -0
  27. {sinapsis_speech-0.4.5/packages/sinapsis_kokoro/src/sinapsis_kokoro → sinapsis_speech-0.5.0/packages/sinapsis_f5_tts/src/sinapsis_f5_tts}/__init__.py +0 -0
  28. {sinapsis_speech-0.4.5/packages/sinapsis_orpheus_cpp/src/sinapsis_orpheus_cpp → sinapsis_speech-0.5.0/packages/sinapsis_f5_tts/src/sinapsis_f5_tts/helpers}/__init__.py +0 -0
  29. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_f5_tts/src/sinapsis_f5_tts/helpers/tags.py +0 -0
  30. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_f5_tts/src/sinapsis_f5_tts/templates/__init__.py +0 -0
  31. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_f5_tts/src/sinapsis_f5_tts/templates/f5_tts_inference.py +0 -0
  32. {sinapsis_speech-0.4.5/packages/sinapsis_orpheus_cpp/src/sinapsis_orpheus_cpp/helpers → sinapsis_speech-0.5.0/packages/sinapsis_kokoro/src/sinapsis_kokoro}/__init__.py +0 -0
  33. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_kokoro/src/sinapsis_kokoro/helpers/kokoro_utils.py +0 -0
  34. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_kokoro/src/sinapsis_kokoro/helpers/tags.py +0 -0
  35. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_kokoro/src/sinapsis_kokoro/templates/__init__.py +0 -0
  36. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_kokoro/src/sinapsis_kokoro/templates/kokoro_tts.py +0 -0
  37. {sinapsis_speech-0.4.5/packages/sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt → sinapsis_speech-0.5.0/packages/sinapsis_orpheus_cpp/src/sinapsis_orpheus_cpp}/__init__.py +0 -0
  38. {sinapsis_speech-0.4.5/packages/sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt → sinapsis_speech-0.5.0/packages/sinapsis_orpheus_cpp/src/sinapsis_orpheus_cpp}/helpers/__init__.py +0 -0
  39. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_orpheus_cpp/src/sinapsis_orpheus_cpp/helpers/tags.py +0 -0
  40. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_orpheus_cpp/src/sinapsis_orpheus_cpp/templates/__init__.py +0 -0
  41. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_orpheus_cpp/src/sinapsis_orpheus_cpp/templates/orpheus_tts.py +0 -0
  42. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_orpheus_cpp/src/sinapsis_orpheus_cpp/thirdparty/helpers.py +0 -0
  43. {sinapsis_speech-0.4.5/packages/sinapsis_zonos/src/sinapsis_zonos → sinapsis_speech-0.5.0/packages/sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt}/__init__.py +0 -0
  44. {sinapsis_speech-0.4.5/packages/sinapsis_zonos/src/sinapsis_zonos → sinapsis_speech-0.5.0/packages/sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt}/helpers/__init__.py +0 -0
  45. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/helpers/tags.py +0 -0
  46. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/templates/__init__.py +0 -0
  47. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/templates/parakeet_tdt.py +0 -0
  48. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_speech.egg-info/dependency_links.txt +0 -0
  49. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_zonos/src/sinapsis_zonos/helpers/tags.py +0 -0
  50. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_zonos/src/sinapsis_zonos/helpers/zonos_keys.py +0 -0
  51. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_zonos/src/sinapsis_zonos/helpers/zonos_tts_utils.py +0 -0
  52. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_zonos/src/sinapsis_zonos/templates/__init__.py +0 -0
  53. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/packages/sinapsis_zonos/src/sinapsis_zonos/templates/zonos_tts.py +0 -0
  54. {sinapsis_speech-0.4.5 → sinapsis_speech-0.5.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sinapsis-speech
3
- Version: 0.4.5
3
+ Version: 0.5.0
4
4
  Summary: Generate speech using various libraries.
5
5
  Author-email: SinapsisAI <dev@sinapsis.tech>
6
6
  Project-URL: Homepage, https://sinapsis.tech
@@ -20,6 +20,7 @@ Requires-Dist: sinapsis-speech[gradio-app]; extra == "all"
20
20
  Requires-Dist: sinapsis-zonos[all]; extra == "all"
21
21
  Requires-Dist: sinapsis-parakeet-tdt[all]; extra == "all"
22
22
  Requires-Dist: sinapsis-orpheus-cpp[all]; extra == "all"
23
+ Requires-Dist: sinapsis-csm[all]; extra == "all"
23
24
  Provides-Extra: gradio-app
24
25
  Requires-Dist: sinapsis[webapp]>=0.2.3; extra == "gradio-app"
25
26
  Dynamic: license-file
@@ -61,6 +62,7 @@ This repo includes packages for performing speech synthesis using different tool
61
62
  * <code>sinapsis-zonos</code>
62
63
  * <code>sinapsis-orpheus-cpp</code>
63
64
  * <code>sinapsis-parakeet</code>
65
+ * <code>sinapsis-csm</code>
64
66
 
65
67
  Install using your preferred package manager. We strongly recommend using <code>uv</code>. To install <code>uv</code>, refer to the [official documentation](https://docs.astral.sh/uv/getting-started/installation/#installation-methods).
66
68
 
@@ -176,6 +178,17 @@ For specific instructions and further details, see the [README.md](https://githu
176
178
 
177
179
  </details>
178
180
 
181
+ <details>
182
+ <summary id="parakeet-tdt"><strong><span style="font-size: 1.4em;"> Sinapsis Parakeet-TDT</span></strong></summary>
183
+
184
+ This package provides a template for seamlessly integrating, configuring, and running **speech-to-text (STT)** functionalities powered by [SesameAILabs CSM](https://github.com/SesameAILabs/csm/tree/main?tab=readme-ov-file).
185
+
186
+ - **CSMTTS**: Converts text into speech using the CSM model. This template processes text packets from the input container and adds the resulting audio packets to the container.
187
+
188
+ For specific instructions and further details, see the [README.md](https://github.com/Sinapsis-AI/sinapsis-speech/blob/main/packages/sinapsis_csm/README.md).
189
+
190
+ </details>
191
+
179
192
  <h2 id="webapp">🌐 Webapps</h2>
180
193
  The webapps included in this project showcase the modularity of the templates, in this case for speech generation tasks.
181
194
 
@@ -200,6 +213,9 @@ cd sinapsis-speech
200
213
  > [!IMPORTANT]
201
214
  > F5-TTS requires a reference audio file for voice cloning. Make sure you have a reference audio file in the artifacts directory.
202
215
 
216
+ > [!IMPORTANT]
217
+ > CSM requires an HF_TOKEN to run any inference. See the [official instructions](https://huggingface.co/docs/hub/security-tokens) and set it using <code>export HF_TOKEN="token-provided-by-hf"</code>
218
+
203
219
  > [!NOTE]
204
220
  > Agent configuration can be changed through the `AGENT_CONFIG_PATH` env var. You can check the available configurations in each package configs folder.
205
221
 
@@ -246,6 +262,11 @@ docker compose -f docker/compose_apps.yaml up -d sinapsis-orpheus-tts
246
262
  docker compose -f docker/compose_apps.yaml up -d sinapsis-parakeet
247
263
  ```
248
264
 
265
+ - For CSM:
266
+ ```bash
267
+ docker compose -f docker/compose_apps.yaml up -d sinapsis-csm
268
+ ```
269
+
249
270
  3. **Check the logs**
250
271
 
251
272
  - For ElevenLabs:
@@ -276,6 +297,11 @@ docker logs -f sinapsis-orpheus-tts
276
297
  docker logs -f sinapsis-parakeet
277
298
  ```
278
299
 
300
+ - For CSM:
301
+ ```bash
302
+ docker logs -f sinapsis-csm
303
+ ```
304
+
279
305
  4. **The logs will display the URL to access the webapp, e.g.,:**:
280
306
  ```bash
281
307
  Running on local URL: http://127.0.0.1:7860
@@ -335,6 +361,12 @@ uv run webapps/packet_tts_apps/kokoro_tts_app.py
335
361
  ```bash
336
362
  uv run webapps/generic_tts_apps/zonos_tts_app.py
337
363
  ```
364
+
365
+ - For CSM:
366
+ ```bash
367
+ uv run webapps/generic_tts_apps/csm_tts_app.py
368
+ ```
369
+
338
370
  4. **The terminal will display the URL to access the webapp (e.g.)**:
339
371
  ```bash
340
372
  Running on local URL: http://127.0.0.1:7860
@@ -35,6 +35,7 @@ This repo includes packages for performing speech synthesis using different tool
35
35
  * <code>sinapsis-zonos</code>
36
36
  * <code>sinapsis-orpheus-cpp</code>
37
37
  * <code>sinapsis-parakeet</code>
38
+ * <code>sinapsis-csm</code>
38
39
 
39
40
  Install using your preferred package manager. We strongly recommend using <code>uv</code>. To install <code>uv</code>, refer to the [official documentation](https://docs.astral.sh/uv/getting-started/installation/#installation-methods).
40
41
 
@@ -150,6 +151,17 @@ For specific instructions and further details, see the [README.md](https://githu
150
151
 
151
152
  </details>
152
153
 
154
+ <details>
155
+ <summary id="parakeet-tdt"><strong><span style="font-size: 1.4em;"> Sinapsis Parakeet-TDT</span></strong></summary>
156
+
157
+ This package provides a template for seamlessly integrating, configuring, and running **speech-to-text (STT)** functionalities powered by [SesameAILabs CSM](https://github.com/SesameAILabs/csm/tree/main?tab=readme-ov-file).
158
+
159
+ - **CSMTTS**: Converts text into speech using the CSM model. This template processes text packets from the input container and adds the resulting audio packets to the container.
160
+
161
+ For specific instructions and further details, see the [README.md](https://github.com/Sinapsis-AI/sinapsis-speech/blob/main/packages/sinapsis_csm/README.md).
162
+
163
+ </details>
164
+
153
165
  <h2 id="webapp">🌐 Webapps</h2>
154
166
  The webapps included in this project showcase the modularity of the templates, in this case for speech generation tasks.
155
167
 
@@ -174,6 +186,9 @@ cd sinapsis-speech
174
186
  > [!IMPORTANT]
175
187
  > F5-TTS requires a reference audio file for voice cloning. Make sure you have a reference audio file in the artifacts directory.
176
188
 
189
+ > [!IMPORTANT]
190
+ > CSM requires an HF_TOKEN to run any inference. See the [official instructions](https://huggingface.co/docs/hub/security-tokens) and set it using <code>export HF_TOKEN="token-provided-by-hf"</code>
191
+
177
192
  > [!NOTE]
178
193
  > Agent configuration can be changed through the `AGENT_CONFIG_PATH` env var. You can check the available configurations in each package configs folder.
179
194
 
@@ -220,6 +235,11 @@ docker compose -f docker/compose_apps.yaml up -d sinapsis-orpheus-tts
220
235
  docker compose -f docker/compose_apps.yaml up -d sinapsis-parakeet
221
236
  ```
222
237
 
238
+ - For CSM:
239
+ ```bash
240
+ docker compose -f docker/compose_apps.yaml up -d sinapsis-csm
241
+ ```
242
+
223
243
  3. **Check the logs**
224
244
 
225
245
  - For ElevenLabs:
@@ -250,6 +270,11 @@ docker logs -f sinapsis-orpheus-tts
250
270
  docker logs -f sinapsis-parakeet
251
271
  ```
252
272
 
273
+ - For CSM:
274
+ ```bash
275
+ docker logs -f sinapsis-csm
276
+ ```
277
+
253
278
  4. **The logs will display the URL to access the webapp, e.g.,:**:
254
279
  ```bash
255
280
  Running on local URL: http://127.0.0.1:7860
@@ -309,6 +334,12 @@ uv run webapps/packet_tts_apps/kokoro_tts_app.py
309
334
  ```bash
310
335
  uv run webapps/generic_tts_apps/zonos_tts_app.py
311
336
  ```
337
+
338
+ - For CSM:
339
+ ```bash
340
+ uv run webapps/generic_tts_apps/csm_tts_app.py
341
+ ```
342
+
312
343
  4. **The terminal will display the URL to access the webapp (e.g.)**:
313
344
  ```bash
314
345
  Running on local URL: http://127.0.0.1:7860
@@ -0,0 +1,43 @@
1
+ # -*- coding: utf-8 -*-
2
+ from typing import Literal
3
+
4
+ import torch
5
+ from csm.generator import Generator
6
+ from csm.models import Model
7
+
8
+
9
+ class CSMGenerator:
10
+ """
11
+ Wrapper around the CSM model providing a simple interface
12
+ for text-to-speech generation
13
+ """
14
+
15
+ def __init__(self, device: Literal["cpu", "cuda"] = "cpu", sample_rate: int = 24000) -> None:
16
+ self.device: str = device
17
+ self.sample_rate: int = sample_rate
18
+ self.model: Model = Model.from_pretrained("sesame/csm-1b")
19
+ self.model.to(device=device)
20
+ self.model.sample_rate = sample_rate
21
+ self.generator = Generator(self.model)
22
+
23
+ def generate(
24
+ self, text: str, speaker: int = 0, context: list[str] | None = None, max_audio_length_ms: int = 10000
25
+ ) -> torch.Tensor:
26
+ if context is None:
27
+ context = []
28
+ return self.generator.generate(
29
+ text=text,
30
+ speaker=speaker,
31
+ context=context,
32
+ max_audio_length_ms=max_audio_length_ms,
33
+ )
34
+
35
+
36
+ def load_csm_1b(device: Literal["cpu", "cuda"] = "cpu", sample_rate: int = 24000) -> CSMGenerator:
37
+ """
38
+ Loads and configures the CSM TTS model.
39
+
40
+ Returns:
41
+ CSMGenerator: Model wrapper with ready-to-use generate method.
42
+ """
43
+ return CSMGenerator(device=device, sample_rate=sample_rate)
@@ -0,0 +1,19 @@
1
+ import importlib
2
+ from typing import Callable
3
+ from sinapsis_csm.templates.csm_tts import CSMTTS
4
+
5
+ _root_lib_path = "sinapsis_csm.templates"
6
+ _template_lookup = {
7
+ "CSMTTS": f"{_root_lib_path}.csm_tts",
8
+ }
9
+
10
+ def __getattr__(name: str) -> Callable:
11
+ if name in _template_lookup:
12
+ module = importlib.import_module(_template_lookup[name])
13
+ return getattr(module, name)
14
+ raise AttributeError(f"Template `{name}` not found in `{_root_lib_path}`.")
15
+
16
+
17
+ __all__ = ["CSMTTS"]
18
+
19
+
@@ -0,0 +1,88 @@
1
+ from typing import Literal
2
+ import torch
3
+ from sinapsis_core.data_containers.data_packet import AudioPacket, DataContainer
4
+ from sinapsis_core.template_base import Template
5
+ from sinapsis_core.template_base.base_models import TemplateAttributes, TemplateAttributeType
6
+ from sinapsis_csm.helpers.generator import load_csm_1b
7
+
8
+
9
+ class CSMTTS(Template):
10
+ """
11
+ Sinapsis template for converting text into speech using the CSM TTS model.
12
+ """
13
+
14
+ class AttributesBaseModel(TemplateAttributes): # type: ignore
15
+ """
16
+ Defines configurable attributes for the CSMTTS template.
17
+ """
18
+ speaker_id: int = 0
19
+ max_audio_length_ms: int = 10000
20
+ device: Literal["cuda", "cpu"] = "cpu"
21
+ context: list[str] | None = None
22
+ sample_rate_hz: int = 24000
23
+
24
+ def __init__(self, attributes: TemplateAttributeType) -> None:
25
+ """
26
+ Initializes the template and loads the CSM model.
27
+
28
+ Args:
29
+ attributes (TemplateAttributeType): User-defined attributes from YAML configuration.
30
+ """
31
+ super().__init__(attributes)
32
+ self.model = load_csm_1b(
33
+ device=self.attributes.device,
34
+ sample_rate=self.attributes.sample_rate_hz
35
+ )
36
+
37
+ def generate_audio(self, text: str) -> torch.Tensor:
38
+ """
39
+ Converts input text to audio using the CSM model.
40
+
41
+ Args:
42
+ text (str): Input text string.
43
+
44
+ Returns:
45
+ torch.Tensor: Audio waveform tensor.
46
+ """
47
+ context = self.attributes.context if self.attributes.context else []
48
+ return self.model.generate(
49
+ text=text,
50
+ speaker=self.attributes.speaker_id,
51
+ context=context,
52
+ max_audio_length_ms=self.attributes.max_audio_length_ms,
53
+ )
54
+
55
+ def generate_audio_packet(self, audio: torch.Tensor, source_text: str) -> AudioPacket:
56
+ """
57
+ Wraps a raw audio tensor into a sinapsis compatible audioPacket
58
+
59
+ Args:
60
+ audio (torch.Tensor): Audio waveform.
61
+ source_text (str): Original input text used for generation.
62
+
63
+ Returns:
64
+ AudioPacket: Encapsulated audio data with metadata.
65
+ """
66
+ audio_np = audio.cpu().numpy()
67
+ return AudioPacket(
68
+ content=audio_np,
69
+ sample_rate=self.attributes.sample_rate_hz,
70
+ generic_data={"source_text": source_text, "model": "CSM"}
71
+ )
72
+
73
+ def execute(self, container: DataContainer) -> DataContainer:
74
+ """
75
+ Main method executed by Sinapsis. Converts all text packets in the input container to audio.
76
+
77
+ Args:
78
+ container (DataContainer): Input container with text packets.
79
+
80
+ Returns:
81
+ DataContainer: Output container with generated audio packets.
82
+ """
83
+ for packet in container.texts:
84
+ audio = self.generate_audio(packet.content)
85
+ audio_packet = self.generate_audio_packet(audio, packet.content)
86
+ audio_packet.source = self.instance_name
87
+ container.audios.append(audio_packet)
88
+ return container
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sinapsis-speech
3
- Version: 0.4.5
3
+ Version: 0.5.0
4
4
  Summary: Generate speech using various libraries.
5
5
  Author-email: SinapsisAI <dev@sinapsis.tech>
6
6
  Project-URL: Homepage, https://sinapsis.tech
@@ -20,6 +20,7 @@ Requires-Dist: sinapsis-speech[gradio-app]; extra == "all"
20
20
  Requires-Dist: sinapsis-zonos[all]; extra == "all"
21
21
  Requires-Dist: sinapsis-parakeet-tdt[all]; extra == "all"
22
22
  Requires-Dist: sinapsis-orpheus-cpp[all]; extra == "all"
23
+ Requires-Dist: sinapsis-csm[all]; extra == "all"
23
24
  Provides-Extra: gradio-app
24
25
  Requires-Dist: sinapsis[webapp]>=0.2.3; extra == "gradio-app"
25
26
  Dynamic: license-file
@@ -61,6 +62,7 @@ This repo includes packages for performing speech synthesis using different tool
61
62
  * <code>sinapsis-zonos</code>
62
63
  * <code>sinapsis-orpheus-cpp</code>
63
64
  * <code>sinapsis-parakeet</code>
65
+ * <code>sinapsis-csm</code>
64
66
 
65
67
  Install using your preferred package manager. We strongly recommend using <code>uv</code>. To install <code>uv</code>, refer to the [official documentation](https://docs.astral.sh/uv/getting-started/installation/#installation-methods).
66
68
 
@@ -176,6 +178,17 @@ For specific instructions and further details, see the [README.md](https://githu
176
178
 
177
179
  </details>
178
180
 
181
+ <details>
182
+ <summary id="parakeet-tdt"><strong><span style="font-size: 1.4em;"> Sinapsis Parakeet-TDT</span></strong></summary>
183
+
184
+ This package provides a template for seamlessly integrating, configuring, and running **speech-to-text (STT)** functionalities powered by [SesameAILabs CSM](https://github.com/SesameAILabs/csm/tree/main?tab=readme-ov-file).
185
+
186
+ - **CSMTTS**: Converts text into speech using the CSM model. This template processes text packets from the input container and adds the resulting audio packets to the container.
187
+
188
+ For specific instructions and further details, see the [README.md](https://github.com/Sinapsis-AI/sinapsis-speech/blob/main/packages/sinapsis_csm/README.md).
189
+
190
+ </details>
191
+
179
192
  <h2 id="webapp">🌐 Webapps</h2>
180
193
  The webapps included in this project showcase the modularity of the templates, in this case for speech generation tasks.
181
194
 
@@ -200,6 +213,9 @@ cd sinapsis-speech
200
213
  > [!IMPORTANT]
201
214
  > F5-TTS requires a reference audio file for voice cloning. Make sure you have a reference audio file in the artifacts directory.
202
215
 
216
+ > [!IMPORTANT]
217
+ > CSM requires an HF_TOKEN to run any inference. See the [official instructions](https://huggingface.co/docs/hub/security-tokens) and set it using <code>export HF_TOKEN="token-provided-by-hf"</code>
218
+
203
219
  > [!NOTE]
204
220
  > Agent configuration can be changed through the `AGENT_CONFIG_PATH` env var. You can check the available configurations in each package configs folder.
205
221
 
@@ -246,6 +262,11 @@ docker compose -f docker/compose_apps.yaml up -d sinapsis-orpheus-tts
246
262
  docker compose -f docker/compose_apps.yaml up -d sinapsis-parakeet
247
263
  ```
248
264
 
265
+ - For CSM:
266
+ ```bash
267
+ docker compose -f docker/compose_apps.yaml up -d sinapsis-csm
268
+ ```
269
+
249
270
  3. **Check the logs**
250
271
 
251
272
  - For ElevenLabs:
@@ -276,6 +297,11 @@ docker logs -f sinapsis-orpheus-tts
276
297
  docker logs -f sinapsis-parakeet
277
298
  ```
278
299
 
300
+ - For CSM:
301
+ ```bash
302
+ docker logs -f sinapsis-csm
303
+ ```
304
+
279
305
  4. **The logs will display the URL to access the webapp, e.g.,:**:
280
306
  ```bash
281
307
  Running on local URL: http://127.0.0.1:7860
@@ -335,6 +361,12 @@ uv run webapps/packet_tts_apps/kokoro_tts_app.py
335
361
  ```bash
336
362
  uv run webapps/generic_tts_apps/zonos_tts_app.py
337
363
  ```
364
+
365
+ - For CSM:
366
+ ```bash
367
+ uv run webapps/generic_tts_apps/csm_tts_app.py
368
+ ```
369
+
338
370
  4. **The terminal will display the URL to access the webapp (e.g.)**:
339
371
  ```bash
340
372
  Running on local URL: http://127.0.0.1:7860
@@ -1,6 +1,11 @@
1
1
  LICENSE
2
2
  README.md
3
3
  pyproject.toml
4
+ packages/sinapsis_csm/__init__.py
5
+ packages/sinapsis_csm/src/sinapsis_csm/__init__.py
6
+ packages/sinapsis_csm/src/sinapsis_csm/helpers/generator.py
7
+ packages/sinapsis_csm/src/sinapsis_csm/templates/__init__.py
8
+ packages/sinapsis_csm/src/sinapsis_csm/templates/csm_tts.py
4
9
  packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/__init__.py
5
10
  packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/__init__.py
6
11
  packages/sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/env_var_keys.py
@@ -9,6 +9,7 @@ sinapsis-speech[gradio-app]
9
9
  sinapsis-zonos[all]
10
10
  sinapsis-parakeet-tdt[all]
11
11
  sinapsis-orpheus-cpp[all]
12
+ sinapsis-csm[all]
12
13
 
13
14
  [gradio-app]
14
15
  sinapsis[webapp]>=0.2.3
@@ -1,3 +1,4 @@
1
+ sinapsis_csm
1
2
  sinapsis_elevenlabs
2
3
  sinapsis_f5_tts
3
4
  sinapsis_kokoro
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sinapsis-speech"
3
- version = "0.4.5"
3
+ version = "0.5.0"
4
4
  description = "Generate speech using various libraries."
5
5
  authors = [
6
6
  {name = "SinapsisAI", email = "dev@sinapsis.tech"},
@@ -28,6 +28,7 @@ all = [
28
28
  "sinapsis-zonos[all]",
29
29
  "sinapsis-parakeet-tdt[all]",
30
30
  "sinapsis-orpheus-cpp[all]",
31
+ "sinapsis-csm[all]",
31
32
 
32
33
  ]
33
34
  gradio-app = [
@@ -51,7 +52,7 @@ sinapsis-zonos = { workspace = true }
51
52
  sinapsis-speech = { workspace = true }
52
53
  sinapsis-parakeet-tdt = { workspace = true }
53
54
  sinapsis-orpheus-cpp = { workspace = true }
54
- sinapsis-chatterbox = { workspace = true }
55
+ sinapsis-csm = { workspace = true }
55
56
 
56
57
 
57
58
  [[tool.uv.index]]
File without changes