dv-pipecat-ai 0.0.85.dev830__py3-none-any.whl → 0.0.85.dev831__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.85.dev830.dist-info → dv_pipecat_ai-0.0.85.dev831.dist-info}/METADATA +1 -1
- {dv_pipecat_ai-0.0.85.dev830.dist-info → dv_pipecat_ai-0.0.85.dev831.dist-info}/RECORD +6 -6
- pipecat/services/cartesia/tts.py +48 -10
- {dv_pipecat_ai-0.0.85.dev830.dist-info → dv_pipecat_ai-0.0.85.dev831.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.85.dev830.dist-info → dv_pipecat_ai-0.0.85.dev831.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.85.dev830.dist-info → dv_pipecat_ai-0.0.85.dev831.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
dv_pipecat_ai-0.0.85.
|
|
1
|
+
dv_pipecat_ai-0.0.85.dev831.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
|
|
2
2
|
pipecat/__init__.py,sha256=j0Xm6adxHhd7D06dIyyPV_GlBYLlBnTAERVvD_jAARQ,861
|
|
3
3
|
pipecat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
pipecat/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -205,7 +205,7 @@ pipecat/services/azure/realtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
|
205
205
|
pipecat/services/azure/realtime/llm.py,sha256=MnDiw-YJP3kll1gbkta4z4vsWfWZ5oBprZCinMP9O0M,2385
|
|
206
206
|
pipecat/services/cartesia/__init__.py,sha256=vzh0jBnfPwWdxFfV-tu0x1HFoOTgr9s91GYmD-CJUtY,284
|
|
207
207
|
pipecat/services/cartesia/stt.py,sha256=00k9gQYo_xPKb-RRJ-RNV4LPFw-7xXiFU7ACFLYttWY,12388
|
|
208
|
-
pipecat/services/cartesia/tts.py,sha256=
|
|
208
|
+
pipecat/services/cartesia/tts.py,sha256=Fh6hm5AUj2rNX8J4UOjHA7uAPIGcie1Dyxv5WBvV1OY,26279
|
|
209
209
|
pipecat/services/cerebras/__init__.py,sha256=5zBmqq9Zfcl-HC7ylekVS5qrRedbl1mAeEwUT-T-c_o,259
|
|
210
210
|
pipecat/services/cerebras/llm.py,sha256=-yzSe_6YDGigwzES-LZS4vNXMPugmvsIYEpTySyr5nA,3047
|
|
211
211
|
pipecat/services/deepgram/__init__.py,sha256=IjRtMI7WytRDdmYVpk2qDWClXUiNgdl7ZkvEAWg1eYE,304
|
|
@@ -415,7 +415,7 @@ pipecat/utils/tracing/service_decorators.py,sha256=fwzxFpi8DJl6BJbK74G0UEB4ccMJg
|
|
|
415
415
|
pipecat/utils/tracing/setup.py,sha256=7TEgPNpq6M8lww8OQvf0P9FzYc5A30xICGklVA-fua0,2892
|
|
416
416
|
pipecat/utils/tracing/turn_context_provider.py,sha256=ikon3plFOx0XbMrH6DdeHttNpb-U0gzMZIm3bWLc9eI,2485
|
|
417
417
|
pipecat/utils/tracing/turn_trace_observer.py,sha256=dma16SBJpYSOE58YDWy89QzHyQFc_9gQZszKeWixuwc,9725
|
|
418
|
-
dv_pipecat_ai-0.0.85.
|
|
419
|
-
dv_pipecat_ai-0.0.85.
|
|
420
|
-
dv_pipecat_ai-0.0.85.
|
|
421
|
-
dv_pipecat_ai-0.0.85.
|
|
418
|
+
dv_pipecat_ai-0.0.85.dev831.dist-info/METADATA,sha256=5ahEs864DAPIEEiiv7-7Oa-vRhRN1Ede341NuqED3Sw,32924
|
|
419
|
+
dv_pipecat_ai-0.0.85.dev831.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
420
|
+
dv_pipecat_ai-0.0.85.dev831.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
|
|
421
|
+
dv_pipecat_ai-0.0.85.dev831.dist-info/RECORD,,
|
pipecat/services/cartesia/tts.py
CHANGED
|
@@ -15,7 +15,6 @@ from typing import AsyncGenerator, List, Literal, Optional, Union
|
|
|
15
15
|
from loguru import logger
|
|
16
16
|
from pydantic import BaseModel, Field
|
|
17
17
|
|
|
18
|
-
|
|
19
18
|
from pipecat.frames.frames import (
|
|
20
19
|
CancelFrame,
|
|
21
20
|
EndFrame,
|
|
@@ -49,6 +48,26 @@ except ModuleNotFoundError as e:
|
|
|
49
48
|
raise Exception(f"Missing module: {e}")
|
|
50
49
|
|
|
51
50
|
|
|
51
|
+
class GenerationConfig(BaseModel):
|
|
52
|
+
"""Configuration for Cartesia Sonic-3 generation parameters.
|
|
53
|
+
|
|
54
|
+
Sonic-3 interprets these parameters as guidance to ensure natural speech.
|
|
55
|
+
Test against your content for best results.
|
|
56
|
+
|
|
57
|
+
Parameters:
|
|
58
|
+
volume: Volume multiplier for generated speech. Valid range: [0.5, 2.0]. Default is 1.0.
|
|
59
|
+
speed: Speed multiplier for generated speech. Valid range: [0.6, 1.5]. Default is 1.0.
|
|
60
|
+
emotion: Single emotion string to guide the emotional tone. Examples include neutral,
|
|
61
|
+
angry, excited, content, sad, scared. Over 60 emotions are supported. For best
|
|
62
|
+
results, use with recommended voices: Leo, Jace, Kyle, Gavin, Maya, Tessa, Dana,
|
|
63
|
+
and Marian.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
volume: Optional[float] = None
|
|
67
|
+
speed: Optional[float] = None
|
|
68
|
+
emotion: Optional[str] = None
|
|
69
|
+
|
|
70
|
+
|
|
52
71
|
def language_to_cartesia_language(language: Language) -> Optional[str]:
|
|
53
72
|
"""Convert a Language enum to Cartesia language code.
|
|
54
73
|
|
|
@@ -102,16 +121,20 @@ class CartesiaTTSService(AudioContextWordTTSService):
|
|
|
102
121
|
|
|
103
122
|
Parameters:
|
|
104
123
|
language: Language to use for synthesis.
|
|
105
|
-
speed: Voice speed control.
|
|
106
|
-
emotion: List of emotion controls.
|
|
124
|
+
speed: Voice speed control for non-Sonic-3 models (literal values).
|
|
125
|
+
emotion: List of emotion controls for non-Sonic-3 models.
|
|
107
126
|
|
|
108
127
|
.. deprecated:: 0.0.68
|
|
109
128
|
The `emotion` parameter is deprecated and will be removed in a future version.
|
|
129
|
+
|
|
130
|
+
generation_config: Generation configuration for Sonic-3 models. Includes volume,
|
|
131
|
+
speed (numeric), and emotion (string) parameters.
|
|
110
132
|
"""
|
|
111
133
|
|
|
112
134
|
language: Optional[Language] = Language.EN
|
|
113
135
|
speed: Optional[Literal["slow", "normal", "fast"]] = None
|
|
114
136
|
emotion: Optional[List[str]] = []
|
|
137
|
+
generation_config: Optional[GenerationConfig] = None
|
|
115
138
|
|
|
116
139
|
def __init__(
|
|
117
140
|
self,
|
|
@@ -120,7 +143,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
|
|
|
120
143
|
voice_id: str,
|
|
121
144
|
cartesia_version: str = "2025-04-16",
|
|
122
145
|
url: str = "wss://api.cartesia.ai/tts/websocket",
|
|
123
|
-
model: str = "sonic-
|
|
146
|
+
model: str = "sonic-3",
|
|
124
147
|
sample_rate: Optional[int] = None,
|
|
125
148
|
encoding: str = "pcm_s16le",
|
|
126
149
|
container: str = "raw",
|
|
@@ -136,7 +159,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
|
|
|
136
159
|
voice_id: ID of the voice to use for synthesis.
|
|
137
160
|
cartesia_version: API version string for Cartesia service.
|
|
138
161
|
url: WebSocket URL for Cartesia TTS API.
|
|
139
|
-
model: TTS model to use (e.g., "sonic-
|
|
162
|
+
model: TTS model to use (e.g., "sonic-3").
|
|
140
163
|
sample_rate: Audio sample rate. If None, uses default.
|
|
141
164
|
encoding: Audio encoding format.
|
|
142
165
|
container: Audio container format.
|
|
@@ -180,6 +203,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
|
|
|
180
203
|
else "en",
|
|
181
204
|
"speed": params.speed,
|
|
182
205
|
"emotion": params.emotion,
|
|
206
|
+
"generation_config": params.generation_config,
|
|
183
207
|
}
|
|
184
208
|
self.set_model_name(model)
|
|
185
209
|
self.set_voice(voice_id)
|
|
@@ -298,6 +322,11 @@ class CartesiaTTSService(AudioContextWordTTSService):
|
|
|
298
322
|
if self._settings["speed"]:
|
|
299
323
|
msg["speed"] = self._settings["speed"]
|
|
300
324
|
|
|
325
|
+
if self._settings["generation_config"]:
|
|
326
|
+
msg["generation_config"] = self._settings["generation_config"].model_dump(
|
|
327
|
+
exclude_none=True
|
|
328
|
+
)
|
|
329
|
+
|
|
301
330
|
return json.dumps(msg)
|
|
302
331
|
|
|
303
332
|
async def start(self, frame: StartFrame):
|
|
@@ -419,7 +448,6 @@ class CartesiaTTSService(AudioContextWordTTSService):
|
|
|
419
448
|
logger.error(f"{self} error: {msg}")
|
|
420
449
|
await self.push_frame(TTSStoppedFrame())
|
|
421
450
|
await self.stop_all_metrics()
|
|
422
|
-
|
|
423
451
|
await self.push_error(ErrorFrame(f"{self} error: {msg['error']}"))
|
|
424
452
|
self._context_id = None
|
|
425
453
|
else:
|
|
@@ -484,23 +512,27 @@ class CartesiaHttpTTSService(TTSService):
|
|
|
484
512
|
|
|
485
513
|
Parameters:
|
|
486
514
|
language: Language to use for synthesis.
|
|
487
|
-
speed: Voice speed control.
|
|
488
|
-
emotion: List of emotion controls.
|
|
515
|
+
speed: Voice speed control for non-Sonic-3 models (literal values).
|
|
516
|
+
emotion: List of emotion controls for non-Sonic-3 models.
|
|
489
517
|
|
|
490
518
|
.. deprecated:: 0.0.68
|
|
491
519
|
The `emotion` parameter is deprecated and will be removed in a future version.
|
|
520
|
+
|
|
521
|
+
generation_config: Generation configuration for Sonic-3 models. Includes volume,
|
|
522
|
+
speed (numeric), and emotion (string) parameters.
|
|
492
523
|
"""
|
|
493
524
|
|
|
494
525
|
language: Optional[Language] = Language.EN
|
|
495
526
|
speed: Optional[Literal["slow", "normal", "fast"]] = None
|
|
496
527
|
emotion: Optional[List[str]] = Field(default_factory=list)
|
|
528
|
+
generation_config: Optional[GenerationConfig] = None
|
|
497
529
|
|
|
498
530
|
def __init__(
|
|
499
531
|
self,
|
|
500
532
|
*,
|
|
501
533
|
api_key: str,
|
|
502
534
|
voice_id: str,
|
|
503
|
-
model: str = "sonic-
|
|
535
|
+
model: str = "sonic-3",
|
|
504
536
|
base_url: str = "https://api.cartesia.ai",
|
|
505
537
|
cartesia_version: str = "2024-11-13",
|
|
506
538
|
sample_rate: Optional[int] = None,
|
|
@@ -514,7 +546,7 @@ class CartesiaHttpTTSService(TTSService):
|
|
|
514
546
|
Args:
|
|
515
547
|
api_key: Cartesia API key for authentication.
|
|
516
548
|
voice_id: ID of the voice to use for synthesis.
|
|
517
|
-
model: TTS model to use (e.g., "sonic-
|
|
549
|
+
model: TTS model to use (e.g., "sonic-3").
|
|
518
550
|
base_url: Base URL for Cartesia HTTP API.
|
|
519
551
|
cartesia_version: API version string for Cartesia service.
|
|
520
552
|
sample_rate: Audio sample rate. If None, uses default.
|
|
@@ -541,6 +573,7 @@ class CartesiaHttpTTSService(TTSService):
|
|
|
541
573
|
else "en",
|
|
542
574
|
"speed": params.speed,
|
|
543
575
|
"emotion": params.emotion,
|
|
576
|
+
"generation_config": params.generation_config,
|
|
544
577
|
}
|
|
545
578
|
self.set_voice(voice_id)
|
|
546
579
|
self.set_model_name(model)
|
|
@@ -634,6 +667,11 @@ class CartesiaHttpTTSService(TTSService):
|
|
|
634
667
|
if self._settings["speed"]:
|
|
635
668
|
payload["speed"] = self._settings["speed"]
|
|
636
669
|
|
|
670
|
+
if self._settings["generation_config"]:
|
|
671
|
+
payload["generation_config"] = self._settings["generation_config"].model_dump(
|
|
672
|
+
exclude_none=True
|
|
673
|
+
)
|
|
674
|
+
|
|
637
675
|
yield TTSStartedFrame()
|
|
638
676
|
|
|
639
677
|
session = await self._client._get_session()
|
|
File without changes
|
{dv_pipecat_ai-0.0.85.dev830.dist-info → dv_pipecat_ai-0.0.85.dev831.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{dv_pipecat_ai-0.0.85.dev830.dist-info → dv_pipecat_ai-0.0.85.dev831.dist-info}/top_level.txt
RENAMED
|
File without changes
|