livekit-plugins-elevenlabs 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -63,6 +63,8 @@ class TTSOptions:
63
63
  voice: Voice
64
64
  model_id: TTSModels
65
65
  base_url: str
66
+ sample_rate: int
67
+ latency: int
66
68
 
67
69
 
68
70
  class TTS(tts.TTS):
@@ -73,6 +75,8 @@ class TTS(tts.TTS):
73
75
  model_id: TTSModels = "eleven_multilingual_v2",
74
76
  api_key: Optional[str] = None,
75
77
  base_url: Optional[str] = None,
78
+ sample_rate: int = 24000,
79
+ latency: int = 2,
76
80
  ) -> None:
77
81
  super().__init__(streaming_supported=True)
78
82
  api_key = api_key or os.environ.get("ELEVEN_API_KEY")
@@ -85,6 +89,8 @@ class TTS(tts.TTS):
85
89
  model_id=model_id,
86
90
  api_key=api_key,
87
91
  base_url=base_url or API_BASE_URL_V1,
92
+ sample_rate=sample_rate,
93
+ latency=latency,
88
94
  )
89
95
 
90
96
  async def list_voices(self) -> List[Voice]:
@@ -134,11 +140,9 @@ class SynthesizeStream(tts.SynthesizeStream):
134
140
  self,
135
141
  session: aiohttp.ClientSession,
136
142
  config: TTSOptions,
137
- latency: int = 2, # [1-4] the higher the more optimized for streaming latency
138
143
  ):
139
144
  self._config = config
140
145
  self._session = session
141
- self._latency = latency
142
146
 
143
147
  self._queue = asyncio.Queue[str]()
144
148
  self._event_queue = asyncio.Queue[tts.SynthesisEvent]()
@@ -157,7 +161,7 @@ class SynthesizeStream(tts.SynthesizeStream):
157
161
  base_url = self._config.base_url
158
162
  voice_id = self._config.voice.id
159
163
  model_id = self._config.model_id
160
- return f"{base_url}/text-to-speech/{voice_id}/stream-input?model_id={model_id}&output_format=pcm_44100&optimize_streaming_latency={self._latency}"
164
+ return f"{base_url}/text-to-speech/{voice_id}/stream-input?model_id={model_id}&output_format=pcm_{self._config.sample_rate}&optimize_streaming_latency={self._config.latency}"
161
165
 
162
166
  def push_text(self, token: str) -> None:
163
167
  if self._closed:
@@ -180,6 +184,7 @@ class SynthesizeStream(tts.SynthesizeStream):
180
184
  retry_count = 0
181
185
  listen_task: Optional[asyncio.Task] = None
182
186
  ws: Optional[aiohttp.ClientWebSocketResponse] = None
187
+ retry_text_queue: asyncio.Queue[str] = asyncio.Queue()
183
188
  while True:
184
189
  try:
185
190
  ws = await self._try_connect()
@@ -190,7 +195,13 @@ class SynthesizeStream(tts.SynthesizeStream):
190
195
  # forward queued text to 11labs
191
196
  started = False
192
197
  while not ws.closed:
193
- text = await self._queue.get()
198
+ text = None
199
+ if retry_text_queue.empty():
200
+ text = await retry_text_queue.get()
201
+ retry_text_queue.task_done()
202
+ else:
203
+ text = await self._queue.get()
204
+
194
205
  if not started:
195
206
  self._event_queue.put_nowait(
196
207
  tts.SynthesisEvent(type=tts.SynthesisEventType.STARTED)
@@ -200,7 +211,19 @@ class SynthesizeStream(tts.SynthesizeStream):
200
211
  text=text,
201
212
  try_trigger_generation=True,
202
213
  )
203
- await ws.send_str(json.dumps(text_packet))
214
+
215
+ # This case can happen in normal operation because 11labs will not
216
+ # keep connections open indefinitely if we are not sending data.
217
+ try:
218
+ await ws.send_str(json.dumps(text_packet))
219
+ except Exception:
220
+ await retry_text_queue.put(text)
221
+ break
222
+
223
+ # We call self._queue.task_done() even if we are retrying the text because
224
+ # all text has gone through self._queue. An exception may have short-circuited
225
+ # out of the loop so task_done() will not have already been called on text that
226
+ # is being retried.
204
227
  self._queue.task_done()
205
228
  if text == STREAM_EOS:
206
229
  await listen_task
@@ -265,7 +288,7 @@ class SynthesizeStream(tts.SynthesizeStream):
265
288
  data = base64.b64decode(msg["audio"])
266
289
  audio_frame = rtc.AudioFrame(
267
290
  data=data,
268
- sample_rate=44100,
291
+ sample_rate=self._config.sample_rate,
269
292
  num_channels=1,
270
293
  samples_per_channel=len(data) // 2,
271
294
  )
@@ -277,6 +300,8 @@ class SynthesizeStream(tts.SynthesizeStream):
277
300
  )
278
301
  elif msg.get("isFinal"):
279
302
  break
303
+ else:
304
+ logging.error(f"Unhandled message from ElevenLabs: {msg}")
280
305
 
281
306
  async def flush(self) -> None:
282
307
  self._queue.put_nowait(self._text + " ")
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.1.2"
15
+ __version__ = "0.1.4"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: livekit-plugins-elevenlabs
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -0,0 +1,8 @@
1
+ livekit/plugins/elevenlabs/__init__.py,sha256=-FQ-hnTqqbaVFa0sEu8CwInVp9vzkt-nRWkd34ruFFk,977
2
+ livekit/plugins/elevenlabs/models.py,sha256=g46mCMMHP3x3qtHmybHHMcid1UwmjKCcF0T4IWjMjWE,163
3
+ livekit/plugins/elevenlabs/tts.py,sha256=K3dg8En-GX6-pKxioSeEP1jTsIOuPaDAfgYvC_itE4k,11110
4
+ livekit/plugins/elevenlabs/version.py,sha256=JisuVeJTYHFXsPWF9pf3j_4J8bsnqlKqN-xXzq_6vhI,600
5
+ livekit_plugins_elevenlabs-0.1.4.dist-info/METADATA,sha256=wZTATw_9t4d1x-UsahhpwFITfX_J6c1kxvaU5Izq8kM,1360
6
+ livekit_plugins_elevenlabs-0.1.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
7
+ livekit_plugins_elevenlabs-0.1.4.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
8
+ livekit_plugins_elevenlabs-0.1.4.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- livekit/plugins/elevenlabs/__init__.py,sha256=-FQ-hnTqqbaVFa0sEu8CwInVp9vzkt-nRWkd34ruFFk,977
2
- livekit/plugins/elevenlabs/models.py,sha256=g46mCMMHP3x3qtHmybHHMcid1UwmjKCcF0T4IWjMjWE,163
3
- livekit/plugins/elevenlabs/tts.py,sha256=kGFh5yCdAxss97wf-Z3mfQtxs7V8wXJmmnKlsOGTe30,9975
4
- livekit/plugins/elevenlabs/version.py,sha256=S3xxF-H96nScSv_7l7IUvROJ0avu9oz5Gm6j673md_Y,600
5
- livekit_plugins_elevenlabs-0.1.2.dist-info/METADATA,sha256=1R3mr6tpE6KWEyzseGy80VOifmjSdcv9vasuIKDk-Hs,1360
6
- livekit_plugins_elevenlabs-0.1.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
7
- livekit_plugins_elevenlabs-0.1.2.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
8
- livekit_plugins_elevenlabs-0.1.2.dist-info/RECORD,,