livekit-plugins-cartesia 0.4.8__py3-none-any.whl → 0.4.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ TTSEncoding = Literal[
8
8
  # "pcm_alaw",
9
9
  ]
10
10
 
11
- TTSModels = Literal["sonic-english", "sonic-multilingual"]
11
+ TTSModels = Literal["sonic", "sonic-2", "sonic-lite", "sonic-preview", "sonic-turbo"]
12
12
  TTSLanguages = Literal["en", "es", "fr", "de", "pt", "zh", "ja"]
13
13
  TTSDefaultVoiceId = "794f9389-aac1-45b6-b726-9d9369183238"
14
14
  TTSVoiceSpeed = Literal["fastest", "fast", "normal", "slow", "slowest"]
@@ -23,7 +23,6 @@ from dataclasses import dataclass
23
23
  from typing import Any, Optional
24
24
 
25
25
  import aiohttp
26
- from livekit import rtc
27
26
  from livekit.agents import (
28
27
  APIConnectionError,
29
28
  APIConnectOptions,
@@ -74,7 +73,7 @@ class TTS(tts.TTS):
74
73
  def __init__(
75
74
  self,
76
75
  *,
77
- model: TTSModels | str = "sonic",
76
+ model: TTSModels | str = "sonic-2",
78
77
  language: str = "en",
79
78
  encoding: TTSEncoding = "pcm_s16le",
80
79
  voice: str | list[float] = TTSDefaultVoiceId,
@@ -91,7 +90,7 @@ class TTS(tts.TTS):
91
90
  See https://docs.cartesia.ai/reference/web-socket/stream-speech/stream-speech for more details on the the Cartesia API.
92
91
 
93
92
  Args:
94
- model (TTSModels, optional): The Cartesia TTS model to use. Defaults to "sonic-english".
93
+ model (TTSModels, optional): The Cartesia TTS model to use. Defaults to "sonic-2".
95
94
  language (str, optional): The language code for synthesis. Defaults to "en".
96
95
  encoding (TTSEncoding, optional): The audio encoding format. Defaults to "pcm_s16le".
97
96
  voice (str | list[float], optional): The voice ID or embedding array.
@@ -128,6 +127,8 @@ class TTS(tts.TTS):
128
127
  self._pool = utils.ConnectionPool[aiohttp.ClientWebSocketResponse](
129
128
  connect_cb=self._connect_ws,
130
129
  close_cb=self._close_ws,
130
+ max_session_duration=300,
131
+ mark_refreshed_on_get=True,
131
132
  )
132
133
  self._streams = weakref.WeakSet[SynthesizeStream]()
133
134
 
@@ -149,6 +150,9 @@ class TTS(tts.TTS):
149
150
 
150
151
  return self._session
151
152
 
153
+ def prewarm(self) -> None:
154
+ self._pool.prewarm()
155
+
152
156
  def update_options(
153
157
  self,
154
158
  *,
@@ -165,7 +169,7 @@ class TTS(tts.TTS):
165
169
  and emotion. If any parameter is not provided, the existing value will be retained.
166
170
 
167
171
  Args:
168
- model (TTSModels, optional): The Cartesia TTS model to use. Defaults to "sonic-english".
172
+ model (TTSModels, optional): The Cartesia TTS model to use. Defaults to "sonic-2".
169
173
  language (str, optional): The language code for synthesis. Defaults to "en".
170
174
  voice (str | list[float], optional): The voice ID or embedding array.
171
175
  speed (TTSVoiceSpeed | float, optional): Voice Control - Speed (https://docs.cartesia.ai/user-guides/voice-control)
@@ -251,19 +255,17 @@ class ChunkedStream(tts.ChunkedStream):
251
255
  ),
252
256
  ) as resp:
253
257
  resp.raise_for_status()
258
+ emitter = tts.SynthesizedAudioEmitter(
259
+ event_ch=self._event_ch,
260
+ request_id=request_id,
261
+ )
254
262
  async for data, _ in resp.content.iter_chunks():
255
263
  for frame in bstream.write(data):
256
- self._event_ch.send_nowait(
257
- tts.SynthesizedAudio(
258
- request_id=request_id,
259
- frame=frame,
260
- )
261
- )
264
+ emitter.push(frame)
262
265
 
263
266
  for frame in bstream.flush():
264
- self._event_ch.send_nowait(
265
- tts.SynthesizedAudio(request_id=request_id, frame=frame)
266
- )
267
+ emitter.push(frame)
268
+ emitter.flush()
267
269
  except asyncio.TimeoutError as e:
268
270
  raise APITimeoutError() from e
269
271
  except aiohttp.ClientResponseError as e:
@@ -323,22 +325,10 @@ class SynthesizeStream(tts.SynthesizeStream):
323
325
  sample_rate=self._opts.sample_rate,
324
326
  num_channels=NUM_CHANNELS,
325
327
  )
326
-
327
- last_frame: rtc.AudioFrame | None = None
328
-
329
- def _send_last_frame(*, segment_id: str, is_final: bool) -> None:
330
- nonlocal last_frame
331
- if last_frame is not None:
332
- self._event_ch.send_nowait(
333
- tts.SynthesizedAudio(
334
- request_id=request_id,
335
- segment_id=segment_id,
336
- frame=last_frame,
337
- is_final=is_final,
338
- )
339
- )
340
-
341
- last_frame = None
328
+ emitter = tts.SynthesizedAudioEmitter(
329
+ event_ch=self._event_ch,
330
+ request_id=request_id,
331
+ )
342
332
 
343
333
  while True:
344
334
  msg = await ws.receive()
@@ -358,18 +348,16 @@ class SynthesizeStream(tts.SynthesizeStream):
358
348
 
359
349
  data = json.loads(msg.data)
360
350
  segment_id = data.get("context_id")
351
+ emitter._segment_id = segment_id
361
352
 
362
353
  if data.get("data"):
363
354
  b64data = base64.b64decode(data["data"])
364
355
  for frame in audio_bstream.write(b64data):
365
- _send_last_frame(segment_id=segment_id, is_final=False)
366
- last_frame = frame
356
+ emitter.push(frame)
367
357
  elif data.get("done"):
368
358
  for frame in audio_bstream.flush():
369
- _send_last_frame(segment_id=segment_id, is_final=False)
370
- last_frame = frame
371
-
372
- _send_last_frame(segment_id=segment_id, is_final=True)
359
+ emitter.push(frame)
360
+ emitter.flush()
373
361
  if segment_id == request_id:
374
362
  # we're not going to receive more frames, end stream
375
363
  break
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.4.8"
15
+ __version__ = "0.4.10"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: livekit-plugins-cartesia
3
- Version: 0.4.8
3
+ Version: 0.4.10
4
4
  Summary: LiveKit Agents Plugin for Cartesia
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: 3.10
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: >=3.9.0
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: livekit-agents>=0.12.11
22
+ Requires-Dist: livekit-agents<1.0.0,>=0.12.16
23
23
  Dynamic: classifier
24
24
  Dynamic: description
25
25
  Dynamic: description-content-type
@@ -0,0 +1,10 @@
1
+ livekit/plugins/cartesia/__init__.py,sha256=UTa6Q7IxhRBCwPftowHEUDvmBg99J_UjGS_yxTzKD7g,1095
2
+ livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
3
+ livekit/plugins/cartesia/models.py,sha256=KGY-r2luJuUNY6a3nnB0Rx-5Td12hikk-GtYLnqvysE,977
4
+ livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/cartesia/tts.py,sha256=cOoNFXNlw2NFN5o6PgLTccu_-y_W0MTAwNciNDtxdd8,14128
6
+ livekit/plugins/cartesia/version.py,sha256=EAXwrHdOWRivmdK-RTQl1YBemh0E8ui_JHvG9dT490M,601
7
+ livekit_plugins_cartesia-0.4.10.dist-info/METADATA,sha256=TXT6xGvQ3of6Gl9PyCCYLrurnkDdfyiOjzyrXC0gga4,1471
8
+ livekit_plugins_cartesia-0.4.10.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
9
+ livekit_plugins_cartesia-0.4.10.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
+ livekit_plugins_cartesia-0.4.10.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.2)
2
+ Generator: setuptools (76.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,10 +0,0 @@
1
- livekit/plugins/cartesia/__init__.py,sha256=UTa6Q7IxhRBCwPftowHEUDvmBg99J_UjGS_yxTzKD7g,1095
2
- livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
3
- livekit/plugins/cartesia/models.py,sha256=56CJgo7my-w-vpedir_ImV_aqKASeLihE5DbcCCgGJI,950
4
- livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- livekit/plugins/cartesia/tts.py,sha256=UUnGrqizmnyHhA-rmbtVD6-pYCMEWozbZxlVRe0HFw4,14747
6
- livekit/plugins/cartesia/version.py,sha256=1PUjLiYcL1-nwBUG1EXqZsTfF_WQI8KxpzCeCIZHeLc,600
7
- livekit_plugins_cartesia-0.4.8.dist-info/METADATA,sha256=sJRr9rLGGOUcOonnwQX6xxbKu1Ag-DFWrxjBf4uhDtw,1463
8
- livekit_plugins_cartesia-0.4.8.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
9
- livekit_plugins_cartesia-0.4.8.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
- livekit_plugins_cartesia-0.4.8.dist-info/RECORD,,