livekit-plugins-elevenlabs 1.0.17__py3-none-any.whl → 1.0.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,10 +13,12 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from .models import TTSEncoding, TTSModels
16
+ from .stt import STT
16
17
  from .tts import DEFAULT_VOICE_ID, TTS, Voice, VoiceSettings
17
18
  from .version import __version__
18
19
 
19
20
  __all__ = [
21
+ "STT",
20
22
  "TTS",
21
23
  "Voice",
22
24
  "VoiceSettings",
@@ -0,0 +1,127 @@
1
+ # Copyright 2023 LiveKit, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from __future__ import annotations
16
+
17
+ import asyncio
18
+ import os
19
+ from dataclasses import dataclass
20
+
21
+ import aiohttp
22
+
23
+ from livekit import rtc
24
+ from livekit.agents import (
25
+ DEFAULT_API_CONNECT_OPTIONS,
26
+ APIConnectionError,
27
+ APIConnectOptions,
28
+ APIStatusError,
29
+ APITimeoutError,
30
+ stt,
31
+ )
32
+ from livekit.agents.stt import SpeechEventType, STTCapabilities
33
+ from livekit.agents.types import NOT_GIVEN, NotGivenOr
34
+ from livekit.agents.utils import AudioBuffer, http_context, is_given
35
+
36
+ API_BASE_URL_V1 = "https://api.elevenlabs.io/v1"
37
+ AUTHORIZATION_HEADER = "xi-api-key"
38
+
39
+
40
+ @dataclass
41
+ class _STTOptions:
42
+ api_key: str
43
+ base_url: str
44
+ language_code: str = "en"
45
+
46
+
47
+ class STT(stt.STT):
48
+ def __init__(
49
+ self,
50
+ api_key: NotGivenOr[str] = NOT_GIVEN,
51
+ base_url: NotGivenOr[str] = NOT_GIVEN,
52
+ http_session: aiohttp.ClientSession | None = None,
53
+ language_code: NotGivenOr[str] = NOT_GIVEN,
54
+ ) -> None:
55
+ """
56
+ Create a new instance of ElevenLabs TTS.
57
+
58
+ Args:
59
+ api_key (NotGivenOr[str]): ElevenLabs API key. Can be set via argument or `ELEVEN_API_KEY` environment variable.
60
+ base_url (NotGivenOr[str]): Custom base URL for the API. Optional.
61
+ http_session (aiohttp.ClientSession | None): Custom HTTP session for API requests. Optional.
62
+ language (NotGivenOr[str]): Language code for the STT model. Optional.
63
+ """ # noqa: E501
64
+ super().__init__(capabilities=STTCapabilities(streaming=False, interim_results=True))
65
+
66
+ elevenlabs_api_key = api_key if is_given(api_key) else os.environ.get("ELEVEN_API_KEY")
67
+ if not elevenlabs_api_key:
68
+ raise ValueError(
69
+ "ElevenLabs API key is required, either as argument or "
70
+ "set ELEVEN_API_KEY environmental variable"
71
+ )
72
+ self._opts = _STTOptions(
73
+ api_key=elevenlabs_api_key,
74
+ base_url=base_url if is_given(base_url) else API_BASE_URL_V1,
75
+ language_code=language_code,
76
+ )
77
+ self._session = http_session
78
+
79
+ def _ensure_session(self) -> aiohttp.ClientSession:
80
+ if not self._session:
81
+ self._session = http_context.http_session()
82
+
83
+ return self._session
84
+
85
+ async def _recognize_impl(
86
+ self,
87
+ buffer: AudioBuffer,
88
+ *,
89
+ language: NotGivenOr[str] = NOT_GIVEN,
90
+ conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
91
+ ) -> stt.SpeechEvent:
92
+ if is_given(language):
93
+ self._opts.language_code = language
94
+
95
+ wav_bytes = rtc.combine_audio_frames(buffer).to_wav_bytes()
96
+ form = aiohttp.FormData()
97
+ form.add_field("file", wav_bytes, filename="audio.wav", content_type="audio/x-wav")
98
+ form.add_field("model_id", "scribe_v1")
99
+ form.add_field("language_code", self._opts.language_code)
100
+
101
+ try:
102
+ async with self._ensure_session().post(
103
+ f"{API_BASE_URL_V1}/speech-to-text",
104
+ data=form,
105
+ headers={AUTHORIZATION_HEADER: self._opts.api_key},
106
+ ) as response:
107
+ response_json = await response.json()
108
+ extracted_text = response_json.get("text")
109
+ except asyncio.TimeoutError as e:
110
+ raise APITimeoutError() from e
111
+ except aiohttp.ClientResponseError as e:
112
+ raise APIStatusError(
113
+ message=e.message,
114
+ status_code=e.status,
115
+ request_id=None,
116
+ body=None,
117
+ ) from e
118
+ except Exception as e:
119
+ raise APIConnectionError() from e
120
+
121
+ return self._transcription_to_speech_event(text=extracted_text)
122
+
123
+ def _transcription_to_speech_event(self, text: str) -> stt.SpeechEvent:
124
+ return stt.SpeechEvent(
125
+ type=SpeechEventType.FINAL_TRANSCRIPT,
126
+ alternatives=[stt.SpeechData(text=text, language=self._opts.language_code)],
127
+ )
@@ -418,20 +418,25 @@ class SynthesizeStream(tts.SynthesizeStream):
418
418
  xml_content = []
419
419
  async for data in word_stream:
420
420
  text = data.token
421
- # send the xml phoneme in one go
421
+ # send xml tags fully formed
422
+ xml_start_tokens = ["<phoneme", "<break"]
423
+ xml_end_tokens = ["</phoneme>", "/>"]
424
+
422
425
  if (
423
426
  self._opts.enable_ssml_parsing
424
- and data.token.startswith("<phoneme")
427
+ and any(data.token.startswith(start) for start in xml_start_tokens)
425
428
  or xml_content
426
429
  ):
427
430
  xml_content.append(text)
428
- if data.token.find("</phoneme>") > -1:
431
+
432
+ if any(data.token.find(end) > -1 for end in xml_end_tokens):
429
433
  text = self._opts.word_tokenizer.format_words(xml_content)
430
434
  xml_content = []
431
435
  else:
432
436
  continue
433
437
 
434
438
  data_pkt = {"text": f"{text} "} # must always end with a space
439
+
435
440
  self._mark_started()
436
441
  await ws_conn.send_str(json.dumps(data_pkt))
437
442
  if xml_content:
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.0.17"
15
+ __version__ = "1.0.19"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-elevenlabs
3
- Version: 1.0.17
3
+ Version: 1.0.19
4
4
  Summary: Agent Framework plugin for voice synthesis with ElevenLabs' API.
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
@@ -18,7 +18,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
18
18
  Classifier: Topic :: Multimedia :: Video
19
19
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
20
  Requires-Python: >=3.9.0
21
- Requires-Dist: livekit-agents[codecs]>=1.0.17
21
+ Requires-Dist: livekit-agents[codecs]>=1.0.19
22
22
  Description-Content-Type: text/markdown
23
23
 
24
24
  # LiveKit Plugins Elevenlabs
@@ -0,0 +1,10 @@
1
+ livekit/plugins/elevenlabs/__init__.py,sha256=h6pPP9kT348Gfgzo9Wj3LpkoiW0JfIu3POL1CDWK3PE,1287
2
+ livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
3
+ livekit/plugins/elevenlabs/models.py,sha256=p_wHEz15bdsNEqwzN831ysm70PNWQ-xeN__BKvGPZxA,401
4
+ livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/elevenlabs/stt.py,sha256=1B8c7t_52GIbnPSFLq44Fkm0gnnFUZs7xX9nIWbsAQM,4528
6
+ livekit/plugins/elevenlabs/tts.py,sha256=5QHq9Yds7dclJZgyPyfHtnkMXb_eSzGBQvOIEh_S3oA,20305
7
+ livekit/plugins/elevenlabs/version.py,sha256=UDC8ahmGgRkv-qMQUY3QibuuVevGMQ9Fd4yIhcQBZwA,601
8
+ livekit_plugins_elevenlabs-1.0.19.dist-info/METADATA,sha256=GzvJZv865rY5NCclqmKVlKtc4xpjYpNZzOXT7NkEqeY,1314
9
+ livekit_plugins_elevenlabs-1.0.19.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
+ livekit_plugins_elevenlabs-1.0.19.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- livekit/plugins/elevenlabs/__init__.py,sha256=Va24UYTuuosmRuTcuzd_DIHYQOgV-wSYKJIXmOSB2Go,1255
2
- livekit/plugins/elevenlabs/log.py,sha256=hIuXqDsEB5GBa7rQY3z4Uqi1oCqc_lRmCHZEmXz0LHw,73
3
- livekit/plugins/elevenlabs/models.py,sha256=p_wHEz15bdsNEqwzN831ysm70PNWQ-xeN__BKvGPZxA,401
4
- livekit/plugins/elevenlabs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- livekit/plugins/elevenlabs/tts.py,sha256=KPFf8845VCNG9z5mmwFocjgTilKk-uhA3uvRAnlAd8c,20142
6
- livekit/plugins/elevenlabs/version.py,sha256=GOfJB-DKZur-i3hrjFbzgpC2NHE96dnWhGLziW1e0_E,601
7
- livekit_plugins_elevenlabs-1.0.17.dist-info/METADATA,sha256=PIPfmDNCJMVfkaBMmCOMhFTe-nTB65zs61yAdo6tdT8,1314
8
- livekit_plugins_elevenlabs-1.0.17.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
- livekit_plugins_elevenlabs-1.0.17.dist-info/RECORD,,