livekit-plugins-cartesia 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,34 @@ TTSEncoding = Literal[
8
8
  # "pcm_alaw",
9
9
  ]
10
10
 
11
-
12
11
  TTSModels = Literal["sonic-english", "sonic-multilingual"]
13
12
  TTSLanguages = Literal["en", "es", "fr", "de", "pt", "zh", "ja"]
14
13
  TTSDefaultVoiceId = "c2ac25f9-ecc4-4f56-9095-651354df60c0"
14
+ TTSVoiceSpeed = Literal["fastest", "fast", "normal", "slow", "slowest"]
15
+ TTSVoiceEmotion = Literal[
16
+ "anger:lowest",
17
+ "anger:low",
18
+ "anger",
19
+ "anger:high",
20
+ "anger:highest",
21
+ "positivity:lowest",
22
+ "positivity:low",
23
+ "positivity",
24
+ "positivity:high",
25
+ "positivity:highest",
26
+ "surprise:lowest",
27
+ "surprise:low",
28
+ "surprise",
29
+ "surprise:high",
30
+ "surprise:highest",
31
+ "sadness:lowest",
32
+ "sadness:low",
33
+ "sadness",
34
+ "sadness:high",
35
+ "sadness:highest",
36
+ "curiosity:lowest",
37
+ "curiosity:low",
38
+ "curiosity",
39
+ "curiosity:high",
40
+ "curiosity:highest",
41
+ ]
@@ -25,7 +25,13 @@ import aiohttp
25
25
  from livekit.agents import tokenize, tts, utils
26
26
 
27
27
  from .log import logger
28
- from .models import TTSDefaultVoiceId, TTSEncoding, TTSModels
28
+ from .models import (
29
+ TTSDefaultVoiceId,
30
+ TTSEncoding,
31
+ TTSModels,
32
+ TTSVoiceEmotion,
33
+ TTSVoiceSpeed,
34
+ )
29
35
 
30
36
  API_AUTH_HEADER = "X-API-Key"
31
37
  API_VERSION_HEADER = "Cartesia-Version"
@@ -41,6 +47,8 @@ class _TTSOptions:
41
47
  encoding: TTSEncoding
42
48
  sample_rate: int
43
49
  voice: str | list[float]
50
+ speed: TTSVoiceSpeed | float | None
51
+ emotion: list[TTSVoiceEmotion | str] | None
44
52
  api_key: str
45
53
  language: str
46
54
 
@@ -53,10 +61,29 @@ class TTS(tts.TTS):
53
61
  language: str = "en",
54
62
  encoding: TTSEncoding = "pcm_s16le",
55
63
  voice: str | list[float] = TTSDefaultVoiceId,
64
+ speed: TTSVoiceSpeed | float | None = None,
65
+ emotion: list[TTSVoiceEmotion | str] | None = None,
56
66
  sample_rate: int = 24000,
57
67
  api_key: str | None = None,
58
68
  http_session: aiohttp.ClientSession | None = None,
59
69
  ) -> None:
70
+ """
71
+ Create a new instance of Cartesia TTS.
72
+
73
+ See https://docs.cartesia.ai/reference/web-socket/stream-speech/stream-speech for more details on the the Cartesia API.
74
+
75
+ Args:
76
+ model (TTSModels, optional): The Cartesia TTS model to use. Defaults to "sonic-english".
77
+ language (str, optional): The language code for synthesis. Defaults to "en".
78
+ encoding (TTSEncoding, optional): The audio encoding format. Defaults to "pcm_s16le".
79
+ voice (str | list[float], optional): The voice ID or embedding array.
80
+ speed (TTSVoiceSpeed | float, optional): Voice Control - Speed (https://docs.cartesia.ai/user-guides/voice-control)
81
+ emotion (list[TTSVoiceEmotion], optional): Voice Control - Emotion (https://docs.cartesia.ai/user-guides/voice-control)
82
+ sample_rate (int, optional): The audio sample rate in Hz. Defaults to 24000.
83
+ api_key (str, optional): The Cartesia API key. If not provided, it will be read from the CARTESIA_API_KEY environment variable.
84
+ http_session (aiohttp.ClientSession | None, optional): An existing aiohttp ClientSession to use. If not provided, a new session will be created.
85
+ """
86
+
60
87
  super().__init__(
61
88
  capabilities=tts.TTSCapabilities(streaming=True),
62
89
  sample_rate=sample_rate,
@@ -73,6 +100,8 @@ class TTS(tts.TTS):
73
100
  encoding=encoding,
74
101
  sample_rate=sample_rate,
75
102
  voice=voice,
103
+ speed=speed,
104
+ emotion=emotion,
76
105
  api_key=api_key,
77
106
  )
78
107
  self._session = http_session
@@ -268,6 +297,15 @@ def _to_cartesia_options(opts: _TTSOptions) -> dict[str, Any]:
268
297
  voice["mode"] = "embedding"
269
298
  voice["embedding"] = opts.voice
270
299
 
300
+ voice_controls: dict = {}
301
+ if opts.speed is not None:
302
+ voice_controls["speed"] = opts.speed
303
+ if opts.emotion is not None:
304
+ voice_controls["emotion"] = opts.emotion
305
+
306
+ if voice_controls:
307
+ voice["__experimental_controls"] = voice_controls
308
+
271
309
  return {
272
310
  "model_id": opts.model,
273
311
  "voice": voice,
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.4.0"
15
+ __version__ = "0.4.2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: livekit-plugins-cartesia
3
- Version: 0.4.0
3
+ Version: 0.4.2
4
4
  Summary: LiveKit Agents Plugin for Cartesia
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -0,0 +1,10 @@
1
+ livekit/plugins/cartesia/__init__.py,sha256=BUfWY_evL5dUHn9hBDQVor6ssctDKQfbQfZy5SWndN8,926
2
+ livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
3
+ livekit/plugins/cartesia/models.py,sha256=fOO276Vzw3OkDUWUVcw7PH95ctFy38rj3q9I6_mYQ7M,950
4
+ livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/cartesia/tts.py,sha256=kUGIhsmHqIK2m_FV44_nwjHp0c7Zb2H7UG9VayNIae8,11341
6
+ livekit/plugins/cartesia/version.py,sha256=jabhjXzHcov1Cy2z9FGgyHFpSQ3hFKqu3vly20WQeTs,600
7
+ livekit_plugins_cartesia-0.4.2.dist-info/METADATA,sha256=w9ZGYOicE_fUFVTnhgvewGgWgwmaInoG9w6BGTiOu-8,1252
8
+ livekit_plugins_cartesia-0.4.2.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
9
+ livekit_plugins_cartesia-0.4.2.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
+ livekit_plugins_cartesia-0.4.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (72.2.0)
2
+ Generator: setuptools (74.1.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,10 +0,0 @@
1
- livekit/plugins/cartesia/__init__.py,sha256=BUfWY_evL5dUHn9hBDQVor6ssctDKQfbQfZy5SWndN8,926
2
- livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
3
- livekit/plugins/cartesia/models.py,sha256=ZoSyV2ap_LqAIgvBvkmukkPxQR9DfKb3Z3oHtWxMiVg,335
4
- livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- livekit/plugins/cartesia/tts.py,sha256=sdiiWinOZR5EBkQFwa3GZAGrkgzXY1-aSRiDZ34K8ww,9527
6
- livekit/plugins/cartesia/version.py,sha256=yelanl1wEXtgUH0CzoNVXfi2yTc2hElSzuAhULFzANc,600
7
- livekit_plugins_cartesia-0.4.0.dist-info/METADATA,sha256=BGgicrqKsylOpTbUcRG0B4DZF2qnaERI9q7qwIRLN7s,1252
8
- livekit_plugins_cartesia-0.4.0.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
9
- livekit_plugins_cartesia-0.4.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
- livekit_plugins_cartesia-0.4.0.dist-info/RECORD,,