livekit-plugins-cartesia 0.4.2__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,3 +28,12 @@ class CartesiaPlugin(Plugin):
28
28
 
29
29
 
30
30
  Plugin.register_plugin(CartesiaPlugin())
31
+
32
+ # Cleanup docs of unexported modules
33
+ _module = dir()
34
+ NOT_IN_ALL = [m for m in _module if m not in __all__]
35
+
36
+ __pdoc__ = {}
37
+
38
+ for n in NOT_IN_ALL:
39
+ __pdoc__[n] = False
@@ -22,7 +22,15 @@ from dataclasses import dataclass
22
22
  from typing import Any
23
23
 
24
24
  import aiohttp
25
- from livekit.agents import tokenize, tts, utils
25
+ from livekit import rtc
26
+ from livekit.agents import (
27
+ APIConnectionError,
28
+ APIStatusError,
29
+ APITimeoutError,
30
+ tokenize,
31
+ tts,
32
+ utils,
33
+ )
26
34
 
27
35
  from .log import logger
28
36
  from .models import (
@@ -43,7 +51,7 @@ BUFFERED_WORDS_COUNT = 8
43
51
 
44
52
  @dataclass
45
53
  class _TTSOptions:
46
- model: TTSModels
54
+ model: TTSModels | str
47
55
  encoding: TTSEncoding
48
56
  sample_rate: int
49
57
  voice: str | list[float]
@@ -57,7 +65,7 @@ class TTS(tts.TTS):
57
65
  def __init__(
58
66
  self,
59
67
  *,
60
- model: TTSModels = "sonic-english",
68
+ model: TTSModels | str = "sonic-english",
61
69
  language: str = "en",
62
70
  encoding: TTSEncoding = "pcm_s16le",
63
71
  voice: str | list[float] = TTSDefaultVoiceId,
@@ -112,63 +120,106 @@ class TTS(tts.TTS):
112
120
 
113
121
  return self._session
114
122
 
123
+ def update_options(
124
+ self,
125
+ *,
126
+ model: TTSModels | None = None,
127
+ language: str | None = None,
128
+ voice: str | list[float] | None = None,
129
+ speed: TTSVoiceSpeed | float | None = None,
130
+ emotion: list[TTSVoiceEmotion | str] | None = None,
131
+ ) -> None:
132
+ """
133
+ Update the Text-to-Speech (TTS) configuration options.
134
+
135
+ This method allows updating the TTS settings, including model type, language, voice, speed,
136
+ and emotion. If any parameter is not provided, the existing value will be retained.
137
+
138
+ Args:
139
+ model (TTSModels, optional): The Cartesia TTS model to use. Defaults to "sonic-english".
140
+ language (str, optional): The language code for synthesis. Defaults to "en".
141
+ voice (str | list[float], optional): The voice ID or embedding array.
142
+ speed (TTSVoiceSpeed | float, optional): Voice Control - Speed (https://docs.cartesia.ai/user-guides/voice-control)
143
+ emotion (list[TTSVoiceEmotion], optional): Voice Control - Emotion (https://docs.cartesia.ai/user-guides/voice-control)
144
+ """
145
+ self._opts.model = model or self._opts.model
146
+ self._opts.language = language or self._opts.language
147
+ self._opts.voice = voice or self._opts.voice
148
+ self._opts.speed = speed or self._opts.speed
149
+ if emotion is not None:
150
+ self._opts.emotion = emotion
151
+
115
152
  def synthesize(self, text: str) -> "ChunkedStream":
116
- return ChunkedStream(text, self._opts, self._ensure_session())
153
+ return ChunkedStream(self, text, self._opts, self._ensure_session())
117
154
 
118
155
  def stream(self) -> "SynthesizeStream":
119
- return SynthesizeStream(self._opts, self._ensure_session())
156
+ return SynthesizeStream(self, self._opts, self._ensure_session())
120
157
 
121
158
 
122
159
  class ChunkedStream(tts.ChunkedStream):
123
160
  """Synthesize chunked text using the bytes endpoint"""
124
161
 
125
162
  def __init__(
126
- self, text: str, opts: _TTSOptions, session: aiohttp.ClientSession
163
+ self, tts: TTS, text: str, opts: _TTSOptions, session: aiohttp.ClientSession
127
164
  ) -> None:
128
- super().__init__()
129
- self._text, self._opts, self._session = text, opts, session
165
+ super().__init__(tts, text)
166
+ self._opts, self._session = opts, session
130
167
 
131
- @utils.log_exceptions(logger=logger)
132
- async def _main_task(self):
168
+ async def _main_task(self) -> None:
169
+ request_id = utils.shortuuid()
133
170
  bstream = utils.audio.AudioByteStream(
134
171
  sample_rate=self._opts.sample_rate, num_channels=NUM_CHANNELS
135
172
  )
136
- request_id, segment_id = utils.shortuuid(), utils.shortuuid()
137
-
138
- data = _to_cartesia_options(self._opts)
139
- data["transcript"] = self._text
140
-
141
- async with self._session.post(
142
- "https://api.cartesia.ai/tts/bytes",
143
- headers={
144
- API_AUTH_HEADER: self._opts.api_key,
145
- API_VERSION_HEADER: API_VERSION,
146
- },
147
- json=data,
148
- ) as resp:
149
- async for data, _ in resp.content.iter_chunks():
150
- for frame in bstream.write(data):
151
- self._event_ch.send_nowait(
152
- tts.SynthesizedAudio(
153
- request_id=request_id, segment_id=segment_id, frame=frame
173
+
174
+ json = _to_cartesia_options(self._opts)
175
+ json["transcript"] = self._input_text
176
+
177
+ headers = {
178
+ API_AUTH_HEADER: self._opts.api_key,
179
+ API_VERSION_HEADER: API_VERSION,
180
+ }
181
+
182
+ try:
183
+ async with self._session.post(
184
+ "https://api.cartesia.ai/tts/bytes",
185
+ headers=headers,
186
+ json=json,
187
+ ) as resp:
188
+ resp.raise_for_status()
189
+ async for data, _ in resp.content.iter_chunks():
190
+ for frame in bstream.write(data):
191
+ self._event_ch.send_nowait(
192
+ tts.SynthesizedAudio(
193
+ request_id=request_id,
194
+ frame=frame,
195
+ )
154
196
  )
155
- )
156
197
 
157
- for frame in bstream.flush():
158
- self._event_ch.send_nowait(
159
- tts.SynthesizedAudio(
160
- request_id=request_id, segment_id=segment_id, frame=frame
198
+ for frame in bstream.flush():
199
+ self._event_ch.send_nowait(
200
+ tts.SynthesizedAudio(request_id=request_id, frame=frame)
161
201
  )
162
- )
202
+ except asyncio.TimeoutError as e:
203
+ raise APITimeoutError() from e
204
+ except aiohttp.ClientResponseError as e:
205
+ raise APIStatusError(
206
+ message=e.message,
207
+ status_code=e.status,
208
+ request_id=None,
209
+ body=None,
210
+ ) from e
211
+ except Exception as e:
212
+ raise APIConnectionError() from e
163
213
 
164
214
 
165
215
  class SynthesizeStream(tts.SynthesizeStream):
166
216
  def __init__(
167
217
  self,
218
+ tts: TTS,
168
219
  opts: _TTSOptions,
169
220
  session: aiohttp.ClientSession,
170
221
  ):
171
- super().__init__()
222
+ super().__init__(tts)
172
223
  self._opts, self._session = opts, session
173
224
  self._sent_tokenizer_stream = tokenize.basic.SentenceTokenizer(
174
225
  min_sentence_len=BUFFERED_WORDS_COUNT
@@ -233,6 +284,22 @@ class SynthesizeStream(tts.SynthesizeStream):
233
284
  num_channels=NUM_CHANNELS,
234
285
  )
235
286
 
287
+ last_frame: rtc.AudioFrame | None = None
288
+
289
+ def _send_last_frame(*, segment_id: str, is_final: bool) -> None:
290
+ nonlocal last_frame
291
+ if last_frame is not None:
292
+ self._event_ch.send_nowait(
293
+ tts.SynthesizedAudio(
294
+ request_id=request_id,
295
+ segment_id=segment_id,
296
+ frame=last_frame,
297
+ is_final=is_final,
298
+ )
299
+ )
300
+
301
+ last_frame = None
302
+
236
303
  while True:
237
304
  msg = await ws.receive()
238
305
  if msg.type in (
@@ -248,26 +315,18 @@ class SynthesizeStream(tts.SynthesizeStream):
248
315
 
249
316
  data = json.loads(msg.data)
250
317
  segment_id = data.get("context_id")
251
- # Once we receive audio for a segment, we can start a new segment
318
+
252
319
  if data.get("data"):
253
320
  b64data = base64.b64decode(data["data"])
254
321
  for frame in audio_bstream.write(b64data):
255
- self._event_ch.send_nowait(
256
- tts.SynthesizedAudio(
257
- request_id=request_id,
258
- segment_id=segment_id,
259
- frame=frame,
260
- )
261
- )
322
+ _send_last_frame(segment_id=segment_id, is_final=False)
323
+ last_frame = frame
262
324
  elif data.get("done"):
263
325
  for frame in audio_bstream.flush():
264
- self._event_ch.send_nowait(
265
- tts.SynthesizedAudio(
266
- request_id=request_id,
267
- segment_id=segment_id,
268
- frame=frame,
269
- )
270
- )
326
+ _send_last_frame(segment_id=segment_id, is_final=False)
327
+ last_frame = frame
328
+
329
+ _send_last_frame(segment_id=segment_id, is_final=True)
271
330
 
272
331
  if segment_id == request_id:
273
332
  # we're not going to receive more frames, close the connection
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.4.2"
15
+ __version__ = "0.4.3"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: livekit-plugins-cartesia
3
- Version: 0.4.2
3
+ Version: 0.4.3
4
4
  Summary: LiveKit Agents Plugin for Cartesia
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: 3.10
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: >=3.9.0
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: livekit-agents >=0.8.0.dev0
22
+ Requires-Dist: livekit-agents >=0.11
23
23
 
24
24
  # LiveKit Plugins Cartesia
25
25
 
@@ -0,0 +1,10 @@
1
+ livekit/plugins/cartesia/__init__.py,sha256=UTa6Q7IxhRBCwPftowHEUDvmBg99J_UjGS_yxTzKD7g,1095
2
+ livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
3
+ livekit/plugins/cartesia/models.py,sha256=fOO276Vzw3OkDUWUVcw7PH95ctFy38rj3q9I6_mYQ7M,950
4
+ livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/cartesia/tts.py,sha256=2xwWOIjwLDOF4TbHlDibrZpUju9If8WrNpHQ2JMuBC0,13533
6
+ livekit/plugins/cartesia/version.py,sha256=u7PSD5TBbPRIhE8vJkBVJzq_eGqYfg6RP5c3VKNlKGk,600
7
+ livekit_plugins_cartesia-0.4.3.dist-info/METADATA,sha256=w5q0oz6rdHDL5cxAyT5hWbHqhZnOPnZYGl3aUKsr3z4,1246
8
+ livekit_plugins_cartesia-0.4.3.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
9
+ livekit_plugins_cartesia-0.4.3.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
+ livekit_plugins_cartesia-0.4.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (74.1.2)
2
+ Generator: setuptools (75.3.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,10 +0,0 @@
1
- livekit/plugins/cartesia/__init__.py,sha256=BUfWY_evL5dUHn9hBDQVor6ssctDKQfbQfZy5SWndN8,926
2
- livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
3
- livekit/plugins/cartesia/models.py,sha256=fOO276Vzw3OkDUWUVcw7PH95ctFy38rj3q9I6_mYQ7M,950
4
- livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- livekit/plugins/cartesia/tts.py,sha256=kUGIhsmHqIK2m_FV44_nwjHp0c7Zb2H7UG9VayNIae8,11341
6
- livekit/plugins/cartesia/version.py,sha256=jabhjXzHcov1Cy2z9FGgyHFpSQ3hFKqu3vly20WQeTs,600
7
- livekit_plugins_cartesia-0.4.2.dist-info/METADATA,sha256=w9ZGYOicE_fUFVTnhgvewGgWgwmaInoG9w6BGTiOu-8,1252
8
- livekit_plugins_cartesia-0.4.2.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
9
- livekit_plugins_cartesia-0.4.2.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
- livekit_plugins_cartesia-0.4.2.dist-info/RECORD,,