livekit-plugins-cartesia 1.0.0.dev5__py3-none-any.whl → 1.0.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ TTSEncoding = Literal[
8
8
  # "pcm_alaw",
9
9
  ]
10
10
 
11
- TTSModels = Literal["sonic-english", "sonic-multilingual"]
11
+ TTSModels = Literal["sonic", "sonic-2", "sonic-lite", "sonic-preview", "sonic-turbo"]
12
12
  TTSLanguages = Literal["en", "es", "fr", "de", "pt", "zh", "ja"]
13
13
  TTSDefaultVoiceId = "794f9389-aac1-45b6-b726-9d9369183238"
14
14
  TTSVoiceSpeed = Literal["fastest", "fast", "normal", "slow", "slowest"]
@@ -33,9 +33,21 @@ from livekit.agents import (
33
33
  tts,
34
34
  utils,
35
35
  )
36
+ from livekit.agents.types import (
37
+ DEFAULT_API_CONNECT_OPTIONS,
38
+ NOT_GIVEN,
39
+ NotGivenOr,
40
+ )
41
+ from livekit.agents.utils import is_given
36
42
 
37
43
  from .log import logger
38
- from .models import TTSDefaultVoiceId, TTSEncoding, TTSModels, TTSVoiceEmotion, TTSVoiceSpeed
44
+ from .models import (
45
+ TTSDefaultVoiceId,
46
+ TTSEncoding,
47
+ TTSModels,
48
+ TTSVoiceEmotion,
49
+ TTSVoiceSpeed,
50
+ )
39
51
 
40
52
  API_AUTH_HEADER = "X-API-Key"
41
53
  API_VERSION_HEADER = "Cartesia-Version"
@@ -51,8 +63,8 @@ class _TTSOptions:
51
63
  encoding: TTSEncoding
52
64
  sample_rate: int
53
65
  voice: str | list[float]
54
- speed: TTSVoiceSpeed | float | None
55
- emotion: list[TTSVoiceEmotion | str] | None
66
+ speed: NotGivenOr[TTSVoiceSpeed | float]
67
+ emotion: NotGivenOr[list[TTSVoiceEmotion | str]]
56
68
  api_key: str
57
69
  language: str
58
70
  base_url: str
@@ -68,14 +80,14 @@ class TTS(tts.TTS):
68
80
  def __init__(
69
81
  self,
70
82
  *,
71
- model: TTSModels | str = "sonic",
83
+ model: TTSModels | str = "sonic-2",
72
84
  language: str = "en",
73
85
  encoding: TTSEncoding = "pcm_s16le",
74
86
  voice: str | list[float] = TTSDefaultVoiceId,
75
- speed: TTSVoiceSpeed | float | None = None,
76
- emotion: list[TTSVoiceEmotion | str] | None = None,
87
+ speed: NotGivenOr[TTSVoiceSpeed | float] = NOT_GIVEN,
88
+ emotion: NotGivenOr[list[TTSVoiceEmotion | str]] = NOT_GIVEN,
77
89
  sample_rate: int = 24000,
78
- api_key: str | None = None,
90
+ api_key: NotGivenOr[str] = NOT_GIVEN,
79
91
  http_session: aiohttp.ClientSession | None = None,
80
92
  base_url: str = "https://api.cartesia.ai",
81
93
  ) -> None:
@@ -85,7 +97,7 @@ class TTS(tts.TTS):
85
97
  See https://docs.cartesia.ai/reference/web-socket/stream-speech/stream-speech for more details on the the Cartesia API.
86
98
 
87
99
  Args:
88
- model (TTSModels, optional): The Cartesia TTS model to use. Defaults to "sonic-english".
100
+ model (TTSModels, optional): The Cartesia TTS model to use. Defaults to "sonic-2".
89
101
  language (str, optional): The language code for synthesis. Defaults to "en".
90
102
  encoding (TTSEncoding, optional): The audio encoding format. Defaults to "pcm_s16le".
91
103
  voice (str | list[float], optional): The voice ID or embedding array.
@@ -95,16 +107,15 @@ class TTS(tts.TTS):
95
107
  api_key (str, optional): The Cartesia API key. If not provided, it will be read from the CARTESIA_API_KEY environment variable.
96
108
  http_session (aiohttp.ClientSession | None, optional): An existing aiohttp ClientSession to use. If not provided, a new session will be created.
97
109
  base_url (str, optional): The base URL for the Cartesia API. Defaults to "https://api.cartesia.ai".
98
- """
110
+ """ # noqa: E501
99
111
 
100
112
  super().__init__(
101
113
  capabilities=tts.TTSCapabilities(streaming=True),
102
114
  sample_rate=sample_rate,
103
115
  num_channels=NUM_CHANNELS,
104
116
  )
105
-
106
- api_key = api_key or os.environ.get("CARTESIA_API_KEY")
107
- if not api_key:
117
+ cartesia_api_key = api_key if is_given(api_key) else os.environ.get("CARTESIA_API_KEY")
118
+ if not cartesia_api_key:
108
119
  raise ValueError("CARTESIA_API_KEY must be set")
109
120
 
110
121
  self._opts = _TTSOptions(
@@ -115,7 +126,7 @@ class TTS(tts.TTS):
115
126
  voice=voice,
116
127
  speed=speed,
117
128
  emotion=emotion,
118
- api_key=api_key,
129
+ api_key=cartesia_api_key,
119
130
  base_url=base_url,
120
131
  )
121
132
  self._session = http_session
@@ -149,11 +160,11 @@ class TTS(tts.TTS):
149
160
  def update_options(
150
161
  self,
151
162
  *,
152
- model: TTSModels | None = None,
153
- language: str | None = None,
154
- voice: str | list[float] | None = None,
155
- speed: TTSVoiceSpeed | float | None = None,
156
- emotion: list[TTSVoiceEmotion | str] | None = None,
163
+ model: NotGivenOr[TTSModels | str] = NOT_GIVEN,
164
+ language: NotGivenOr[str] = NOT_GIVEN,
165
+ voice: NotGivenOr[str | list[float]] = NOT_GIVEN,
166
+ speed: NotGivenOr[TTSVoiceSpeed | float] = NOT_GIVEN,
167
+ emotion: NotGivenOr[list[TTSVoiceEmotion | str]] = NOT_GIVEN,
157
168
  ) -> None:
158
169
  """
159
170
  Update the Text-to-Speech (TTS) configuration options.
@@ -162,24 +173,28 @@ class TTS(tts.TTS):
162
173
  and emotion. If any parameter is not provided, the existing value will be retained.
163
174
 
164
175
  Args:
165
- model (TTSModels, optional): The Cartesia TTS model to use. Defaults to "sonic-english".
176
+ model (TTSModels, optional): The Cartesia TTS model to use. Defaults to "sonic-2".
166
177
  language (str, optional): The language code for synthesis. Defaults to "en".
167
178
  voice (str | list[float], optional): The voice ID or embedding array.
168
179
  speed (TTSVoiceSpeed | float, optional): Voice Control - Speed (https://docs.cartesia.ai/user-guides/voice-control)
169
180
  emotion (list[TTSVoiceEmotion], optional): Voice Control - Emotion (https://docs.cartesia.ai/user-guides/voice-control)
170
181
  """
171
- self._opts.model = model or self._opts.model
172
- self._opts.language = language or self._opts.language
173
- self._opts.voice = voice or self._opts.voice
174
- self._opts.speed = speed or self._opts.speed
175
- if emotion is not None:
182
+ if is_given(model):
183
+ self._opts.model = model
184
+ if is_given(language):
185
+ self._opts.language = language
186
+ if is_given(voice):
187
+ self._opts.voice = voice
188
+ if is_given(speed):
189
+ self._opts.speed = speed
190
+ if is_given(emotion):
176
191
  self._opts.emotion = emotion
177
192
 
178
193
  def synthesize(
179
194
  self,
180
195
  text: str,
181
196
  *,
182
- conn_options: APIConnectOptions | None = None,
197
+ conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
183
198
  ) -> ChunkedStream:
184
199
  return ChunkedStream(
185
200
  tts=self,
@@ -189,7 +204,9 @@ class TTS(tts.TTS):
189
204
  session=self._ensure_session(),
190
205
  )
191
206
 
192
- def stream(self, *, conn_options: APIConnectOptions | None = None) -> SynthesizeStream:
207
+ def stream(
208
+ self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
209
+ ) -> SynthesizeStream:
193
210
  return SynthesizeStream(
194
211
  tts=self,
195
212
  pool=self._pool,
@@ -214,7 +231,7 @@ class ChunkedStream(tts.ChunkedStream):
214
231
  input_text: str,
215
232
  opts: _TTSOptions,
216
233
  session: aiohttp.ClientSession,
217
- conn_options: APIConnectOptions | None = None,
234
+ conn_options: APIConnectOptions,
218
235
  ) -> None:
219
236
  super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
220
237
  self._opts, self._session = opts, session
@@ -368,17 +385,18 @@ class SynthesizeStream(tts.SynthesizeStream):
368
385
 
369
386
  def _to_cartesia_options(opts: _TTSOptions) -> dict[str, Any]:
370
387
  voice: dict[str, Any] = {}
371
- if isinstance(opts.voice, str):
372
- voice["mode"] = "id"
373
- voice["id"] = opts.voice
374
- else:
375
- voice["mode"] = "embedding"
376
- voice["embedding"] = opts.voice
388
+ if is_given(opts.voice):
389
+ if isinstance(opts.voice, str):
390
+ voice["mode"] = "id"
391
+ voice["id"] = opts.voice
392
+ else:
393
+ voice["mode"] = "embedding"
394
+ voice["embedding"] = opts.voice
377
395
 
378
396
  voice_controls: dict = {}
379
- if opts.speed is not None:
397
+ if is_given(opts.speed):
380
398
  voice_controls["speed"] = opts.speed
381
- if opts.emotion is not None:
399
+ if is_given(opts.emotion):
382
400
  voice_controls["emotion"] = opts.emotion
383
401
 
384
402
  if voice_controls:
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.0.0.dev5"
15
+ __version__ = '1.0.0.rc2'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-cartesia
3
- Version: 1.0.0.dev5
3
+ Version: 1.0.0rc2
4
4
  Summary: LiveKit Agents Plugin for Cartesia
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
@@ -18,7 +18,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
18
18
  Classifier: Topic :: Multimedia :: Video
19
19
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
20
  Requires-Python: >=3.9.0
21
- Requires-Dist: livekit-agents>=1.0.0.dev5
21
+ Requires-Dist: livekit-agents>=1.0.0.rc2
22
22
  Description-Content-Type: text/markdown
23
23
 
24
24
  # LiveKit Plugins Cartesia
@@ -0,0 +1,9 @@
1
+ livekit/plugins/cartesia/__init__.py,sha256=UTa6Q7IxhRBCwPftowHEUDvmBg99J_UjGS_yxTzKD7g,1095
2
+ livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
3
+ livekit/plugins/cartesia/models.py,sha256=KGY-r2luJuUNY6a3nnB0Rx-5Td12hikk-GtYLnqvysE,977
4
+ livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/cartesia/tts.py,sha256=rHDiypiCdn0sKykNvYcl5Cf6SZE2XmPRlnLGQA4m7Ks,14443
6
+ livekit/plugins/cartesia/version.py,sha256=AHsNOknrNG9prN-fv_7X-KI5-O8ZMrUnTDyl9ObQIzY,604
7
+ livekit_plugins_cartesia-1.0.0rc2.dist-info/METADATA,sha256=oy_PfwJoIM-pnM94BvVFtb-C7idxqZzXbXHjApy86WA,1262
8
+ livekit_plugins_cartesia-1.0.0rc2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
+ livekit_plugins_cartesia-1.0.0rc2.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- livekit/plugins/cartesia/__init__.py,sha256=UTa6Q7IxhRBCwPftowHEUDvmBg99J_UjGS_yxTzKD7g,1095
2
- livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
3
- livekit/plugins/cartesia/models.py,sha256=56CJgo7my-w-vpedir_ImV_aqKASeLihE5DbcCCgGJI,950
4
- livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- livekit/plugins/cartesia/tts.py,sha256=Zub4MXXVXQgV0t6al_uidDWH3BTVaYftyVbAFbkTU-U,13999
6
- livekit/plugins/cartesia/version.py,sha256=pXgCpV03nQI-5Kk-74NFyAdw1htj2cx6unwQHipEcfE,605
7
- livekit_plugins_cartesia-1.0.0.dev5.dist-info/METADATA,sha256=1Pjqf3rcfp0L7L93TlgNocQq96krnWo2WFP4rTkCV90,1265
8
- livekit_plugins_cartesia-1.0.0.dev5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
- livekit_plugins_cartesia-1.0.0.dev5.dist-info/RECORD,,