sinapsis-speech 0.4.5__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
File without changes
@@ -0,0 +1,43 @@
1
+ # -*- coding: utf-8 -*-
2
+ from typing import Literal
3
+
4
+ import torch
5
+ from csm.generator import Generator
6
+ from csm.models import Model
7
+
8
+
9
+ class CSMGenerator:
10
+ """
11
+ Wrapper around the CSM model providing a simple interface
12
+ for text-to-speech generation
13
+ """
14
+
15
+ def __init__(self, device: Literal["cpu", "cuda"] = "cpu", sample_rate: int = 24000) -> None:
16
+ self.device: str = device
17
+ self.sample_rate: int = sample_rate
18
+ self.model: Model = Model.from_pretrained("sesame/csm-1b")
19
+ self.model.to(device=device)
20
+ self.model.sample_rate = sample_rate
21
+ self.generator = Generator(self.model)
22
+
23
+ def generate(
24
+ self, text: str, speaker: int = 0, context: list[str] | None = None, max_audio_length_ms: int = 10000
25
+ ) -> torch.Tensor:
26
+ if context is None:
27
+ context = []
28
+ return self.generator.generate(
29
+ text=text,
30
+ speaker=speaker,
31
+ context=context,
32
+ max_audio_length_ms=max_audio_length_ms,
33
+ )
34
+
35
+
36
+ def load_csm_1b(device: Literal["cpu", "cuda"] = "cpu", sample_rate: int = 24000) -> CSMGenerator:
37
+ """
38
+ Loads and configures the CSM TTS model.
39
+
40
+ Returns:
41
+ CSMGenerator: Model wrapper with ready-to-use generate method.
42
+ """
43
+ return CSMGenerator(device=device, sample_rate=sample_rate)
@@ -0,0 +1,19 @@
1
+ import importlib
2
+ from typing import Callable
3
+ from sinapsis_csm.templates.csm_tts import CSMTTS
4
+
5
+ _root_lib_path = "sinapsis_csm.templates"
6
+ _template_lookup = {
7
+ "CSMTTS": f"{_root_lib_path}.csm_tts",
8
+ }
9
+
10
+ def __getattr__(name: str) -> Callable:
11
+ if name in _template_lookup:
12
+ module = importlib.import_module(_template_lookup[name])
13
+ return getattr(module, name)
14
+ raise AttributeError(f"Template `{name}` not found in `{_root_lib_path}`.")
15
+
16
+
17
+ __all__ = ["CSMTTS"]
18
+
19
+
@@ -0,0 +1,88 @@
1
+ from typing import Literal
2
+ import torch
3
+ from sinapsis_core.data_containers.data_packet import AudioPacket, DataContainer
4
+ from sinapsis_core.template_base import Template
5
+ from sinapsis_core.template_base.base_models import TemplateAttributes, TemplateAttributeType
6
+ from sinapsis_csm.helpers.generator import load_csm_1b
7
+
8
+
9
+ class CSMTTS(Template):
10
+ """
11
+ Sinapsis template for converting text into speech using the CSM TTS model.
12
+ """
13
+
14
+ class AttributesBaseModel(TemplateAttributes): # type: ignore
15
+ """
16
+ Defines configurable attributes for the CSMTTS template.
17
+ """
18
+ speaker_id: int = 0
19
+ max_audio_length_ms: int = 10000
20
+ device: Literal["cuda", "cpu"] = "cpu"
21
+ context: list[str] | None = None
22
+ sample_rate_hz: int = 24000
23
+
24
+ def __init__(self, attributes: TemplateAttributeType) -> None:
25
+ """
26
+ Initializes the template and loads the CSM model.
27
+
28
+ Args:
29
+ attributes (TemplateAttributeType): User-defined attributes from YAML configuration.
30
+ """
31
+ super().__init__(attributes)
32
+ self.model = load_csm_1b(
33
+ device=self.attributes.device,
34
+ sample_rate=self.attributes.sample_rate_hz
35
+ )
36
+
37
+ def generate_audio(self, text: str) -> torch.Tensor:
38
+ """
39
+ Converts input text to audio using the CSM model.
40
+
41
+ Args:
42
+ text (str): Input text string.
43
+
44
+ Returns:
45
+ torch.Tensor: Audio waveform tensor.
46
+ """
47
+ context = self.attributes.context if self.attributes.context else []
48
+ return self.model.generate(
49
+ text=text,
50
+ speaker=self.attributes.speaker_id,
51
+ context=context,
52
+ max_audio_length_ms=self.attributes.max_audio_length_ms,
53
+ )
54
+
55
+ def generate_audio_packet(self, audio: torch.Tensor, source_text: str) -> AudioPacket:
56
+ """
57
+ Wraps a raw audio tensor into a sinapsis compatible audioPacket
58
+
59
+ Args:
60
+ audio (torch.Tensor): Audio waveform.
61
+ source_text (str): Original input text used for generation.
62
+
63
+ Returns:
64
+ AudioPacket: Encapsulated audio data with metadata.
65
+ """
66
+ audio_np = audio.cpu().numpy()
67
+ return AudioPacket(
68
+ content=audio_np,
69
+ sample_rate=self.attributes.sample_rate_hz,
70
+ generic_data={"source_text": source_text, "model": "CSM"}
71
+ )
72
+
73
+ def execute(self, container: DataContainer) -> DataContainer:
74
+ """
75
+ Main method executed by Sinapsis. Converts all text packets in the input container to audio.
76
+
77
+ Args:
78
+ container (DataContainer): Input container with text packets.
79
+
80
+ Returns:
81
+ DataContainer: Output container with generated audio packets.
82
+ """
83
+ for packet in container.texts:
84
+ audio = self.generate_audio(packet.content)
85
+ audio_packet = self.generate_audio_packet(audio, packet.content)
86
+ audio_packet.source = self.instance_name
87
+ container.audios.append(audio_packet)
88
+ return container
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sinapsis-speech
3
- Version: 0.4.5
3
+ Version: 0.5.0
4
4
  Summary: Generate speech using various libraries.
5
5
  Author-email: SinapsisAI <dev@sinapsis.tech>
6
6
  Project-URL: Homepage, https://sinapsis.tech
@@ -20,6 +20,7 @@ Requires-Dist: sinapsis-speech[gradio-app]; extra == "all"
20
20
  Requires-Dist: sinapsis-zonos[all]; extra == "all"
21
21
  Requires-Dist: sinapsis-parakeet-tdt[all]; extra == "all"
22
22
  Requires-Dist: sinapsis-orpheus-cpp[all]; extra == "all"
23
+ Requires-Dist: sinapsis-csm[all]; extra == "all"
23
24
  Provides-Extra: gradio-app
24
25
  Requires-Dist: sinapsis[webapp]>=0.2.3; extra == "gradio-app"
25
26
  Dynamic: license-file
@@ -61,6 +62,7 @@ This repo includes packages for performing speech synthesis using different tool
61
62
  * <code>sinapsis-zonos</code>
62
63
  * <code>sinapsis-orpheus-cpp</code>
63
64
  * <code>sinapsis-parakeet</code>
65
+ * <code>sinapsis-csm</code>
64
66
 
65
67
  Install using your preferred package manager. We strongly recommend using <code>uv</code>. To install <code>uv</code>, refer to the [official documentation](https://docs.astral.sh/uv/getting-started/installation/#installation-methods).
66
68
 
@@ -176,6 +178,17 @@ For specific instructions and further details, see the [README.md](https://githu
176
178
 
177
179
  </details>
178
180
 
181
+ <details>
182
+ <summary id="parakeet-tdt"><strong><span style="font-size: 1.4em;"> Sinapsis Parakeet-TDT</span></strong></summary>
183
+
184
+ This package provides a template for seamlessly integrating, configuring, and running **speech-to-text (STT)** functionalities powered by [SesameAILabs CSM](https://github.com/SesameAILabs/csm/tree/main?tab=readme-ov-file).
185
+
186
+ - **CSMTTS**: Converts text into speech using the CSM model. This template processes text packets from the input container and adds the resulting audio packets to the container.
187
+
188
+ For specific instructions and further details, see the [README.md](https://github.com/Sinapsis-AI/sinapsis-speech/blob/main/packages/sinapsis_csm/README.md).
189
+
190
+ </details>
191
+
179
192
  <h2 id="webapp">🌐 Webapps</h2>
180
193
  The webapps included in this project showcase the modularity of the templates, in this case for speech generation tasks.
181
194
 
@@ -200,6 +213,9 @@ cd sinapsis-speech
200
213
  > [!IMPORTANT]
201
214
  > F5-TTS requires a reference audio file for voice cloning. Make sure you have a reference audio file in the artifacts directory.
202
215
 
216
+ > [!IMPORTANT]
217
+ > CSM requires an HF_TOKEN to run any inference. See the [official instructions](https://huggingface.co/docs/hub/security-tokens) and set it using <code>export HF_TOKEN="token-provided-by-hf"</code>
218
+
203
219
  > [!NOTE]
204
220
  > Agent configuration can be changed through the `AGENT_CONFIG_PATH` env var. You can check the available configurations in each package configs folder.
205
221
 
@@ -246,6 +262,11 @@ docker compose -f docker/compose_apps.yaml up -d sinapsis-orpheus-tts
246
262
  docker compose -f docker/compose_apps.yaml up -d sinapsis-parakeet
247
263
  ```
248
264
 
265
+ - For CSM:
266
+ ```bash
267
+ docker compose -f docker/compose_apps.yaml up -d sinapsis-csm
268
+ ```
269
+
249
270
  3. **Check the logs**
250
271
 
251
272
  - For ElevenLabs:
@@ -276,6 +297,11 @@ docker logs -f sinapsis-orpheus-tts
276
297
  docker logs -f sinapsis-parakeet
277
298
  ```
278
299
 
300
+ - For CSM:
301
+ ```bash
302
+ docker logs -f sinapsis-csm
303
+ ```
304
+
279
305
  4. **The logs will display the URL to access the webapp, e.g.,:**:
280
306
  ```bash
281
307
  Running on local URL: http://127.0.0.1:7860
@@ -335,6 +361,12 @@ uv run webapps/packet_tts_apps/kokoro_tts_app.py
335
361
  ```bash
336
362
  uv run webapps/generic_tts_apps/zonos_tts_app.py
337
363
  ```
364
+
365
+ - For CSM:
366
+ ```bash
367
+ uv run webapps/generic_tts_apps/csm_tts_app.py
368
+ ```
369
+
338
370
  4. **The terminal will display the URL to access the webapp (e.g.)**:
339
371
  ```bash
340
372
  Running on local URL: http://127.0.0.1:7860
@@ -1,3 +1,8 @@
1
+ sinapsis_csm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ sinapsis_csm/src/sinapsis_csm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ sinapsis_csm/src/sinapsis_csm/helpers/generator.py,sha256=YQkZOHqc6bHiriasRJpPNElTdV2W3r5egiSZtRTCEAs,1336
4
+ sinapsis_csm/src/sinapsis_csm/templates/__init__.py,sha256=ByEZu3rcqETfkVG0He91bTDzGWWUdY-Zn5b4i2MLHc4,485
5
+ sinapsis_csm/src/sinapsis_csm/templates/csm_tts.py,sha256=s4zj_QzSR9FehlPvz_3oLA9WLyoI4LYHjhiHb4-HfW0,3101
1
6
  sinapsis_elevenlabs/src/sinapsis_elevenlabs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
7
  sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
8
  sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/env_var_keys.py,sha256=j8J64iplBNaff1WvmfJ03eJozE1f5SdqtqQeldV2vPY,998
@@ -30,7 +35,7 @@ sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/helpers/__init__.py,sha256=47DEQ
30
35
  sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/helpers/tags.py,sha256=OKZbq4zIL6XWM7eG5WuQ3dWYkmYNWjuCnlseXmjR_j0,262
31
36
  sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/templates/__init__.py,sha256=3LppgbS6v70Rmx__yXXQgnoZ2ZBHcXkXeWZYQQf6Zwg,504
32
37
  sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/templates/parakeet_tdt.py,sha256=Tw9S8Nqf74lXwUxBodaLK_JaQvh9ITt8cWFQJ2QNP6s,10210
33
- sinapsis_speech-0.4.5.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
38
+ sinapsis_speech-0.5.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
34
39
  sinapsis_zonos/src/sinapsis_zonos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
40
  sinapsis_zonos/src/sinapsis_zonos/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
41
  sinapsis_zonos/src/sinapsis_zonos/helpers/tags.py,sha256=Y7MKQNx1E2k7ebF6r_1l1nBeS5k8hO424yFTT9NI7Rg,244
@@ -38,7 +43,7 @@ sinapsis_zonos/src/sinapsis_zonos/helpers/zonos_keys.py,sha256=m1GdOYfzP73JGmtxH
38
43
  sinapsis_zonos/src/sinapsis_zonos/helpers/zonos_tts_utils.py,sha256=bwu88wsJGzEqbssgb-wpS_7lFscJ74J8cgyca-hX_Qw,6422
39
44
  sinapsis_zonos/src/sinapsis_zonos/templates/__init__.py,sha256=A-_F0K3hbEFqeWWAh4YftgU9CFX-WHrauSiCAww9yp8,482
40
45
  sinapsis_zonos/src/sinapsis_zonos/templates/zonos_tts.py,sha256=h5EToXoJgAgjqvz9WLDfSjhCsV5zgBwZrX5cTJ4VnhM,7679
41
- sinapsis_speech-0.4.5.dist-info/METADATA,sha256=ZBGpQgEu2_I7DDsO_t2MO690zMA0OtncYXMUmGTA6-M,12783
42
- sinapsis_speech-0.4.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
- sinapsis_speech-0.4.5.dist-info/top_level.txt,sha256=KvdwXupt5wnqb_4XGRcuJaL9Glgdw-DBvRkNzhgl_Ds,110
44
- sinapsis_speech-0.4.5.dist-info/RECORD,,
46
+ sinapsis_speech-0.5.0.dist-info/METADATA,sha256=4GliLgI5CoPUwqQnYPnSdxDGI30WAciSCRNSpjNO9FQ,13987
47
+ sinapsis_speech-0.5.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
48
+ sinapsis_speech-0.5.0.dist-info/top_level.txt,sha256=V3zOrj7E7CvmLsN7sNeISyc_yJPwKaw_V-msOpmvK30,123
49
+ sinapsis_speech-0.5.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,3 +1,4 @@
1
+ sinapsis_csm
1
2
  sinapsis_elevenlabs
2
3
  sinapsis_f5_tts
3
4
  sinapsis_kokoro