livekit-plugins-resemble 1.0.22__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of livekit-plugins-resemble might be problematic. Click here for more details.

@@ -19,13 +19,14 @@ import base64
19
19
  import json
20
20
  import os
21
21
  import weakref
22
- from dataclasses import dataclass
22
+ from dataclasses import dataclass, replace
23
23
 
24
24
  import aiohttp
25
25
 
26
26
  from livekit.agents import (
27
27
  APIConnectionError,
28
28
  APIConnectOptions,
29
+ APIError,
29
30
  APIStatusError,
30
31
  APITimeoutError,
31
32
  tokenize,
@@ -38,7 +39,6 @@ from .log import logger
38
39
 
39
40
  RESEMBLE_WEBSOCKET_URL = "wss://websocket.cluster.resemble.ai/stream"
40
41
  RESEMBLE_REST_API_URL = "https://f.cluster.resemble.ai/synthesize"
41
- NUM_CHANNELS = 1
42
42
  DEFAULT_VOICE_UUID = "55592656"
43
43
  BUFFERED_WORDS_COUNT = 3
44
44
 
@@ -77,7 +77,7 @@ class TTS(tts.TTS):
77
77
  super().__init__(
78
78
  capabilities=tts.TTSCapabilities(streaming=use_streaming),
79
79
  sample_rate=sample_rate,
80
- num_channels=NUM_CHANNELS,
80
+ num_channels=1,
81
81
  )
82
82
 
83
83
  api_key = api_key or os.environ.get("RESEMBLE_API_KEY")
@@ -107,18 +107,16 @@ class TTS(tts.TTS):
107
107
  close_cb=self._close_ws,
108
108
  )
109
109
 
110
- async def _connect_ws(self) -> aiohttp.ClientWebSocketResponse:
111
- session = self._ensure_session()
112
-
110
+ async def _connect_ws(self, timeout: float) -> aiohttp.ClientWebSocketResponse:
113
111
  return await asyncio.wait_for(
114
- session.ws_connect(
112
+ self._ensure_session().ws_connect(
115
113
  RESEMBLE_WEBSOCKET_URL,
116
114
  headers={"Authorization": f"Bearer {self._api_key}"},
117
115
  ),
118
- self._conn_options.timeout,
116
+ timeout,
119
117
  )
120
118
 
121
- async def _close_ws(self, ws: aiohttp.ClientWebSocketResponse):
119
+ async def _close_ws(self, ws: aiohttp.ClientWebSocketResponse) -> None:
122
120
  await ws.close()
123
121
 
124
122
  def _ensure_session(self) -> aiohttp.ClientSession:
@@ -134,153 +132,95 @@ class TTS(tts.TTS):
134
132
  self,
135
133
  *,
136
134
  voice_uuid: str | None = None,
137
- sample_rate: int | None = None,
138
135
  ) -> None:
139
136
  """
140
137
  Update the Text-to-Speech (TTS) configuration options.
141
138
 
142
139
  Args:
143
140
  voice_uuid (str, optional): The voice UUID for the desired voice.
144
- sample_rate (int, optional): The audio sample rate in Hz.
145
141
  """ # noqa: E501
146
142
  self._opts.voice_uuid = voice_uuid or self._opts.voice_uuid
147
- self._opts.sample_rate = sample_rate or self._opts.sample_rate
148
143
 
149
144
  def synthesize(
150
- self,
151
- text: str,
152
- *,
153
- conn_options: APIConnectOptions | None = None,
145
+ self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
154
146
  ) -> ChunkedStream:
155
- return ChunkedStream(
156
- tts=self,
157
- input_text=text,
158
- conn_options=conn_options or DEFAULT_API_CONNECT_OPTIONS,
159
- opts=self._opts,
160
- api_key=self._api_key,
161
- session=self._ensure_session(),
162
- )
147
+ return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)
163
148
 
164
- def stream(self, *, conn_options: APIConnectOptions | None = None) -> SynthesizeStream:
165
- stream = SynthesizeStream(
166
- tts=self,
167
- pool=self._pool,
168
- opts=self._opts,
169
- api_key=self._api_key,
170
- )
149
+ def stream(
150
+ self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
151
+ ) -> SynthesizeStream:
152
+ stream = SynthesizeStream(tts=self, conn_options=conn_options)
171
153
  self._streams.add(stream)
172
154
  return stream
173
155
 
174
156
  async def aclose(self) -> None:
175
157
  for stream in list(self._streams):
176
158
  await stream.aclose()
159
+
177
160
  self._streams.clear()
178
161
  await self._pool.aclose()
179
- await super().aclose()
180
162
 
181
163
 
182
164
  class ChunkedStream(tts.ChunkedStream):
183
165
  """Synthesize text into speech in one go using Resemble AI's REST API."""
184
166
 
185
- def __init__(
186
- self,
187
- *,
188
- tts: TTS,
189
- input_text: str,
190
- opts: _TTSOptions,
191
- conn_options: APIConnectOptions,
192
- api_key: str,
193
- session: aiohttp.ClientSession,
194
- ) -> None:
167
+ def __init__(self, *, tts: TTS, input_text: str, conn_options: APIConnectOptions) -> None:
195
168
  super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
196
- self._opts, self._session, self._api_key = opts, session, api_key
197
-
198
- async def _run(self) -> None:
199
- request_id = utils.shortuuid()
200
-
201
- # Create request headers
202
- headers = {
203
- "Authorization": f"Bearer {self._api_key}",
204
- "Content-Type": "application/json",
205
- "Accept": "application/json", # Expect JSON response
206
- }
207
-
208
- # Create request payload
209
- payload = {
210
- "voice_uuid": self._opts.voice_uuid,
211
- "data": self._input_text,
212
- "sample_rate": self._opts.sample_rate,
213
- "precision": "PCM_16",
214
- }
215
- decoder = utils.codecs.AudioStreamDecoder(
216
- sample_rate=self._opts.sample_rate,
217
- num_channels=NUM_CHANNELS,
218
- )
169
+ self._tts: TTS = tts
170
+ self._opts = replace(tts._opts)
219
171
 
172
+ async def _run(self, output_emitter: tts.AudioEmitter) -> None:
220
173
  try:
221
- async with self._session.post(
174
+ async with self._tts._ensure_session().post(
222
175
  RESEMBLE_REST_API_URL,
223
- headers=headers,
224
- json=payload,
176
+ headers={
177
+ "Authorization": f"Bearer {self._tts._api_key}",
178
+ "Content-Type": "application/json",
179
+ "Accept": "application/json",
180
+ },
181
+ json={
182
+ "voice_uuid": self._opts.voice_uuid,
183
+ "data": self._input_text,
184
+ "sample_rate": self._opts.sample_rate,
185
+ "precision": "PCM_16",
186
+ },
225
187
  timeout=aiohttp.ClientTimeout(
226
188
  total=30,
227
189
  sock_connect=self._conn_options.timeout,
228
190
  ),
229
- ) as response:
230
- response.raise_for_status()
231
- response_json = await response.json()
191
+ ) as resp:
192
+ resp.raise_for_status()
193
+ response_json = await resp.json()
232
194
 
233
- # Check for success
234
195
  if not response_json.get("success", False):
235
196
  issues = response_json.get("issues", ["Unknown error"])
236
197
  error_msg = "; ".join(issues)
237
- raise APIStatusError(
198
+ raise APIError(
238
199
  message=f"Resemble API returned failure: {error_msg}",
239
- status_code=response.status,
240
- request_id=request_id,
241
200
  body=json.dumps(response_json),
242
201
  )
243
202
 
244
- # Extract base64-encoded audio content
245
- audio_content_b64 = response_json.get("audio_content")
246
- if not audio_content_b64:
247
- raise APIStatusError(
248
- message="No audio content in response",
249
- status_code=response.status,
250
- request_id=request_id,
251
- body=json.dumps(response_json),
252
- )
203
+ output_emitter.initialize(
204
+ request_id=utils.shortuuid(),
205
+ sample_rate=self._opts.sample_rate,
206
+ num_channels=1,
207
+ mime_type="audio/wav",
208
+ )
253
209
 
254
- # Decode base64 to get raw audio bytes
255
- audio_bytes = base64.b64decode(audio_content_b64)
256
- decoder.push(audio_bytes)
257
- decoder.end_input()
210
+ audio_b64 = response_json["audio_content"]
211
+ audio_bytes = base64.b64decode(audio_b64)
258
212
 
259
- emitter = tts.SynthesizedAudioEmitter(
260
- event_ch=self._event_ch,
261
- request_id=request_id,
262
- )
263
- async for frame in decoder:
264
- emitter.push(frame)
265
- emitter.flush()
213
+ output_emitter.push(audio_bytes)
214
+ output_emitter.flush()
266
215
 
216
+ except asyncio.TimeoutError:
217
+ raise APITimeoutError() from None
267
218
  except aiohttp.ClientResponseError as e:
268
219
  raise APIStatusError(
269
- message=e.message,
270
- status_code=e.status,
271
- request_id=request_id,
272
- body=f"resemble api error: {str(e)}",
273
- ) from e
274
- except asyncio.TimeoutError as e:
275
- raise APITimeoutError() from e
276
- except aiohttp.ClientError as e:
277
- raise APIConnectionError(
278
- message=f"Resemble API connection error: {str(e)}",
279
- ) from e
220
+ message=e.message, status_code=e.status, request_id=None, body=None
221
+ ) from None
280
222
  except Exception as e:
281
- raise APIConnectionError(f"Error during synthesis: {str(e)}") from e
282
- finally:
283
- await decoder.aclose()
223
+ raise APIConnectionError() from e
284
224
 
285
225
 
286
226
  class SynthesizeStream(tts.SynthesizeStream):
@@ -291,44 +231,45 @@ class SynthesizeStream(tts.SynthesizeStream):
291
231
  synthesis. Note that this requires a Business plan subscription with Resemble AI.
292
232
  """
293
233
 
294
- def __init__(
295
- self,
296
- *,
297
- tts: TTS,
298
- opts: _TTSOptions,
299
- pool: utils.ConnectionPool[aiohttp.ClientWebSocketResponse],
300
- api_key: str,
301
- ):
302
- super().__init__(tts=tts)
303
- self._opts, self._pool, self._api_key = opts, pool, api_key
304
-
305
- async def _run(self) -> None:
306
- request_id = utils.shortuuid()
234
+ def __init__(self, *, tts: TTS, conn_options: APIConnectOptions):
235
+ super().__init__(tts=tts, conn_options=conn_options)
236
+ self._tts: TTS = tts
237
+ self._opts = replace(tts._opts)
307
238
  self._segments_ch = utils.aio.Chan[tokenize.SentenceStream]()
308
239
 
309
- @utils.log_exceptions(logger=logger)
310
- async def _tokenize_input():
240
+ async def _run(self, output_emitter: tts.AudioEmitter) -> None:
241
+ request_id = utils.shortuuid()
242
+ output_emitter.initialize(
243
+ request_id=request_id,
244
+ sample_rate=self._opts.sample_rate,
245
+ num_channels=1,
246
+ stream=True,
247
+ mime_type="audio/mp3",
248
+ )
249
+
250
+ async def _tokenize_input() -> None:
311
251
  """tokenize text from the input_ch to words"""
312
252
  input_stream = None
313
- async for input in self._input_ch:
314
- if isinstance(input, str):
253
+ async for text in self._input_ch:
254
+ if isinstance(text, str):
315
255
  if input_stream is None:
316
256
  # new segment (after flush for e.g)
317
257
  input_stream = self._opts.tokenizer.stream()
318
258
  self._segments_ch.send_nowait(input_stream)
319
- input_stream.push_text(input)
320
- elif isinstance(input, self._FlushSentinel):
259
+ input_stream.push_text(text)
260
+ elif isinstance(text, self._FlushSentinel):
321
261
  if input_stream is not None:
322
262
  input_stream.end_input()
323
263
  input_stream = None
264
+
324
265
  if input_stream is not None:
325
266
  input_stream.end_input()
267
+
326
268
  self._segments_ch.close()
327
269
 
328
- @utils.log_exceptions(logger=logger)
329
- async def _process_segments():
270
+ async def _process_segments() -> None:
330
271
  async for input_stream in self._segments_ch:
331
- await self._run_ws(input_stream)
272
+ await self._run_ws(input_stream, output_emitter)
332
273
 
333
274
  tasks = [
334
275
  asyncio.create_task(_tokenize_input()),
@@ -336,117 +277,77 @@ class SynthesizeStream(tts.SynthesizeStream):
336
277
  ]
337
278
  try:
338
279
  await asyncio.gather(*tasks)
339
- except asyncio.TimeoutError as e:
340
- raise APITimeoutError() from e
280
+ except asyncio.TimeoutError:
281
+ raise APITimeoutError() from None
341
282
  except aiohttp.ClientResponseError as e:
342
283
  raise APIStatusError(
343
- message=e.message,
344
- status_code=e.status,
345
- request_id=request_id,
346
- body=None,
347
- ) from e
284
+ message=e.message, status_code=e.status, request_id=request_id, body=None
285
+ ) from None
348
286
  except Exception as e:
349
287
  raise APIConnectionError() from e
350
288
  finally:
351
289
  await utils.aio.gracefully_cancel(*tasks)
352
290
 
353
291
  async def _run_ws(
354
- self,
355
- input_stream: tokenize.SentenceStream,
292
+ self, input_stream: tokenize.SentenceStream, output_emitter: tts.AudioEmitter
356
293
  ) -> None:
357
- async with self._pool.connection() as ws:
358
- segment_id = utils.shortuuid()
359
- decoder = utils.codecs.AudioStreamDecoder(
360
- sample_rate=self._opts.sample_rate,
361
- num_channels=NUM_CHANNELS,
362
- )
363
- index_lock = asyncio.Lock()
364
- current_index = 0
365
- pending_requests = set()
366
-
367
- @utils.log_exceptions(logger=logger)
368
- async def _send_task(ws: aiohttp.ClientWebSocketResponse):
369
- nonlocal current_index
370
- index = 0
371
- async for data in input_stream:
372
- payload = {
373
- "voice_uuid": self._opts.voice_uuid,
374
- "data": data.token,
375
- "request_id": index,
376
- "sample_rate": self._opts.sample_rate,
377
- "precision": "PCM_16",
378
- "output_format": "mp3",
379
- }
380
- async with index_lock:
381
- pending_requests.add(index)
382
- index += 1
383
- current_index = index
384
- await ws.send_str(json.dumps(payload))
385
-
386
- @utils.log_exceptions(logger=logger)
387
- async def _emit_task():
388
- emitter = tts.SynthesizedAudioEmitter(
389
- event_ch=self._event_ch,
390
- request_id=str(current_index),
391
- segment_id=segment_id,
392
- )
393
- async for frame in decoder:
394
- emitter.push(frame)
395
- emitter.flush()
396
-
397
- @utils.log_exceptions(logger=logger)
398
- async def _recv_task(ws: aiohttp.ClientWebSocketResponse):
399
- while True:
400
- msg = await ws.receive()
401
- if msg.type in (
402
- aiohttp.WSMsgType.CLOSED,
403
- aiohttp.WSMsgType.CLOSE,
404
- aiohttp.WSMsgType.CLOSING,
405
- ):
406
- raise APIStatusError(
407
- "Resemble connection closed unexpectedly",
408
- request_id=str(current_index),
409
- )
410
-
411
- if msg.type != aiohttp.WSMsgType.TEXT:
412
- logger.warning("Unexpected Resemble message type %s", msg.type)
413
- continue
414
-
415
- data = json.loads(msg.data)
416
-
417
- if data.get("type") == "audio":
418
- if data.get("audio_content", None):
419
- b64data = base64.b64decode(data["audio_content"])
420
- decoder.push(b64data)
421
-
422
- elif data.get("type") == "audio_end":
423
- async with index_lock:
424
- index = data["request_id"]
425
- pending_requests.remove(index)
426
- if not pending_requests:
427
- decoder.end_input()
428
- break # we are not going to receive any more audio
429
- else:
430
- logger.error("Unexpected Resemble message %s", data)
431
-
294
+ segment_id = utils.shortuuid()
295
+ output_emitter.start_segment(segment_id=segment_id)
296
+
297
+ last_index = 0
298
+ input_ended = False
299
+
300
+ async def _send_task(ws: aiohttp.ClientWebSocketResponse) -> None:
301
+ nonlocal input_ended, last_index
302
+ async for data in input_stream:
303
+ last_index += 1
304
+ payload = {
305
+ "voice_uuid": self._opts.voice_uuid,
306
+ "data": data.token,
307
+ "request_id": last_index,
308
+ "sample_rate": self._opts.sample_rate,
309
+ "precision": "PCM_16",
310
+ "output_format": "mp3",
311
+ }
312
+ self._mark_started()
313
+ await ws.send_str(json.dumps(payload))
314
+
315
+ input_ended = True
316
+
317
+ async def _recv_task(ws: aiohttp.ClientWebSocketResponse) -> None:
318
+ while True:
319
+ msg = await ws.receive()
320
+ if msg.type in (
321
+ aiohttp.WSMsgType.CLOSED,
322
+ aiohttp.WSMsgType.CLOSE,
323
+ aiohttp.WSMsgType.CLOSING,
324
+ ):
325
+ raise APIStatusError("Resemble connection closed unexpectedly")
326
+
327
+ if msg.type != aiohttp.WSMsgType.TEXT:
328
+ logger.warning("Unexpected Resemble message type %s", msg.type)
329
+ continue
330
+
331
+ data = json.loads(msg.data)
332
+ if data.get("type") == "audio":
333
+ if data.get("audio_content", None):
334
+ b64data = base64.b64decode(data["audio_content"])
335
+ output_emitter.push(b64data)
336
+
337
+ elif data.get("type") == "audio_end":
338
+ index = data["request_id"]
339
+ if index == last_index and input_ended:
340
+ output_emitter.end_segment()
341
+ break
342
+ else:
343
+ logger.error("Unexpected Resemble message %s", data)
344
+
345
+ async with self._tts._pool.connection(timeout=self._conn_options.timeout) as ws:
432
346
  tasks = [
433
347
  asyncio.create_task(_send_task(ws)),
434
348
  asyncio.create_task(_recv_task(ws)),
435
- asyncio.create_task(_emit_task()),
436
349
  ]
437
-
438
350
  try:
439
351
  await asyncio.gather(*tasks)
440
- except asyncio.TimeoutError as e:
441
- raise APITimeoutError() from e
442
- except aiohttp.ClientResponseError as e:
443
- raise APIStatusError(
444
- message=e.message,
445
- status_code=e.status,
446
- request_id=str(current_index),
447
- body=None,
448
- ) from e
449
- except Exception as e:
450
- raise APIConnectionError() from e
451
352
  finally:
452
353
  await utils.aio.gracefully_cancel(*tasks)
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.0.22"
15
+ __version__ = "1.1.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-resemble
3
- Version: 1.0.22
3
+ Version: 1.1.0
4
4
  Summary: LiveKit Agents Plugin for Resemble AI
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
@@ -18,7 +18,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
18
18
  Classifier: Topic :: Multimedia :: Video
19
19
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
20
  Requires-Python: >=3.9.0
21
- Requires-Dist: livekit-agents>=1.0.22
21
+ Requires-Dist: livekit-agents>=1.1.0
22
22
  Description-Content-Type: text/markdown
23
23
 
24
24
  # Resemble plugin for LiveKit Agents
@@ -0,0 +1,9 @@
1
+ livekit/plugins/resemble/__init__.py,sha256=bu6APAMiEaeKgD-hfMgoSv3EeH8_OQE5aJ_qPRv6pZ8,1236
2
+ livekit/plugins/resemble/log.py,sha256=Pgg3yqt4OUcjrnnF8SKfH7G-Dk7jFI0yIhDa5hjTW5k,71
3
+ livekit/plugins/resemble/models.py,sha256=nK29wOCKkS29KjbiDaTpb7mlmUQSad9U_0bTD8yRcwk,74
4
+ livekit/plugins/resemble/py.typed,sha256=ajz1GSNU9xYVrFEDSz6Xwg7amWQ_yvW75tQa1ZvRIWc,3
5
+ livekit/plugins/resemble/tts.py,sha256=FWxE6pPRgHVi_mDQgBA9qaoMwsX2lLK2fJqwPemdzGc,12838
6
+ livekit/plugins/resemble/version.py,sha256=7SjyflIFTjH0djSotKGIRoRykPCqMpVYetIlvHMFuh0,600
7
+ livekit_plugins_resemble-1.1.0.dist-info/METADATA,sha256=5kKSonWfHWA5jPc7lMo-d-a1uMeIrSpQbjCxbPuSWNo,4935
8
+ livekit_plugins_resemble-1.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
+ livekit_plugins_resemble-1.1.0.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- livekit/plugins/resemble/__init__.py,sha256=bu6APAMiEaeKgD-hfMgoSv3EeH8_OQE5aJ_qPRv6pZ8,1236
2
- livekit/plugins/resemble/log.py,sha256=Pgg3yqt4OUcjrnnF8SKfH7G-Dk7jFI0yIhDa5hjTW5k,71
3
- livekit/plugins/resemble/models.py,sha256=nK29wOCKkS29KjbiDaTpb7mlmUQSad9U_0bTD8yRcwk,74
4
- livekit/plugins/resemble/py.typed,sha256=ajz1GSNU9xYVrFEDSz6Xwg7amWQ_yvW75tQa1ZvRIWc,3
5
- livekit/plugins/resemble/tts.py,sha256=N8T0NrYh_nW77DN9yzI4OiNmSxzY7h9fISD2xHYfI8A,16169
6
- livekit/plugins/resemble/version.py,sha256=-8dkOE2vDSF9WN8VoBrSwU2sb5YBGFuwPnSQXQ-uaYM,601
7
- livekit_plugins_resemble-1.0.22.dist-info/METADATA,sha256=TFC2j4BIUqNZQZplxGMZF0mM4nc4fyFYYkJG2Ua0w6M,4937
8
- livekit_plugins_resemble-1.0.22.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
- livekit_plugins_resemble-1.0.22.dist-info/RECORD,,