livekit-plugins-google 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/google/__init__.py +9 -0
- livekit/plugins/google/stt.py +33 -10
- livekit/plugins/google/tts.py +117 -47
- livekit/plugins/google/version.py +1 -1
- {livekit_plugins_google-0.7.0.dist-info → livekit_plugins_google-0.7.2.dist-info}/METADATA +2 -2
- livekit_plugins_google-0.7.2.dist-info/RECORD +11 -0
- {livekit_plugins_google-0.7.0.dist-info → livekit_plugins_google-0.7.2.dist-info}/WHEEL +1 -1
- livekit_plugins_google-0.7.0.dist-info/RECORD +0 -11
- {livekit_plugins_google-0.7.0.dist-info → livekit_plugins_google-0.7.2.dist-info}/top_level.txt +0 -0
@@ -29,3 +29,12 @@ class GooglePlugin(Plugin):
|
|
29
29
|
|
30
30
|
|
31
31
|
Plugin.register_plugin(GooglePlugin())
|
32
|
+
|
33
|
+
# Cleanup docs of unexported modules
|
34
|
+
_module = dir()
|
35
|
+
NOT_IN_ALL = [m for m in _module if m not in __all__]
|
36
|
+
|
37
|
+
__pdoc__ = {}
|
38
|
+
|
39
|
+
for n in NOT_IN_ALL:
|
40
|
+
__pdoc__[n] = False
|
livekit/plugins/google/stt.py
CHANGED
@@ -20,8 +20,15 @@ from dataclasses import dataclass
|
|
20
20
|
from typing import AsyncIterable, List, Union
|
21
21
|
|
22
22
|
from livekit import agents, rtc
|
23
|
-
from livekit.agents import
|
24
|
-
|
23
|
+
from livekit.agents import (
|
24
|
+
APIConnectionError,
|
25
|
+
APIStatusError,
|
26
|
+
APITimeoutError,
|
27
|
+
stt,
|
28
|
+
utils,
|
29
|
+
)
|
30
|
+
|
31
|
+
from google.api_core.exceptions import DeadlineExceeded, GoogleAPICallError
|
25
32
|
from google.auth import default as gauth_default
|
26
33
|
from google.auth.exceptions import DefaultCredentialsError
|
27
34
|
from google.cloud.speech_v2 import SpeechAsyncClient
|
@@ -141,7 +148,7 @@ class STT(stt.STT):
|
|
141
148
|
|
142
149
|
return config
|
143
150
|
|
144
|
-
async def
|
151
|
+
async def _recognize_impl(
|
145
152
|
self,
|
146
153
|
buffer: utils.AudioBuffer,
|
147
154
|
*,
|
@@ -165,23 +172,39 @@ class STT(stt.STT):
|
|
165
172
|
language_codes=config.languages,
|
166
173
|
)
|
167
174
|
|
168
|
-
|
169
|
-
|
170
|
-
|
175
|
+
try:
|
176
|
+
raw = await self._ensure_client().recognize(
|
177
|
+
cloud_speech.RecognizeRequest(
|
178
|
+
recognizer=self._recognizer,
|
179
|
+
config=config,
|
180
|
+
content=frame.data.tobytes(),
|
181
|
+
)
|
182
|
+
)
|
183
|
+
|
184
|
+
return _recognize_response_to_speech_event(raw)
|
185
|
+
except DeadlineExceeded:
|
186
|
+
raise APITimeoutError()
|
187
|
+
except GoogleAPICallError as e:
|
188
|
+
raise APIStatusError(
|
189
|
+
e.message,
|
190
|
+
status_code=e.code or -1,
|
191
|
+
request_id=None,
|
192
|
+
body=None,
|
171
193
|
)
|
172
|
-
|
173
|
-
|
194
|
+
except Exception as e:
|
195
|
+
raise APIConnectionError() from e
|
174
196
|
|
175
197
|
def stream(
|
176
198
|
self, *, language: SpeechLanguages | str | None = None
|
177
199
|
) -> "SpeechStream":
|
178
200
|
config = self._sanitize_options(language=language)
|
179
|
-
return SpeechStream(self._ensure_client(), self._recognizer, config)
|
201
|
+
return SpeechStream(self, self._ensure_client(), self._recognizer, config)
|
180
202
|
|
181
203
|
|
182
204
|
class SpeechStream(stt.SpeechStream):
|
183
205
|
def __init__(
|
184
206
|
self,
|
207
|
+
stt: STT,
|
185
208
|
client: SpeechAsyncClient,
|
186
209
|
recognizer: str,
|
187
210
|
config: STTOptions,
|
@@ -189,7 +212,7 @@ class SpeechStream(stt.SpeechStream):
|
|
189
212
|
num_channels: int = 1,
|
190
213
|
max_retry: int = 32,
|
191
214
|
) -> None:
|
192
|
-
super().__init__()
|
215
|
+
super().__init__(stt)
|
193
216
|
|
194
217
|
self._client = client
|
195
218
|
self._recognizer = recognizer
|
livekit/plugins/google/tts.py
CHANGED
@@ -15,21 +15,22 @@
|
|
15
15
|
from __future__ import annotations
|
16
16
|
|
17
17
|
from dataclasses import dataclass
|
18
|
-
from typing import Union
|
19
18
|
|
20
19
|
from livekit import rtc
|
21
|
-
from livekit.agents import
|
22
|
-
|
20
|
+
from livekit.agents import (
|
21
|
+
APIConnectionError,
|
22
|
+
APIStatusError,
|
23
|
+
APITimeoutError,
|
24
|
+
tts,
|
25
|
+
utils,
|
26
|
+
)
|
27
|
+
|
28
|
+
from google.api_core.exceptions import DeadlineExceeded, GoogleAPICallError
|
23
29
|
from google.cloud import texttospeech
|
24
30
|
from google.cloud.texttospeech_v1.types import SsmlVoiceGender, SynthesizeSpeechResponse
|
25
31
|
|
26
|
-
from .log import logger
|
27
32
|
from .models import AudioEncoding, Gender, SpeechLanguages
|
28
33
|
|
29
|
-
LgType = Union[SpeechLanguages, str]
|
30
|
-
GenderType = Union[Gender, str]
|
31
|
-
AudioEncodingType = Union[AudioEncoding, str]
|
32
|
-
|
33
34
|
|
34
35
|
@dataclass
|
35
36
|
class _TTSOptions:
|
@@ -41,11 +42,13 @@ class TTS(tts.TTS):
|
|
41
42
|
def __init__(
|
42
43
|
self,
|
43
44
|
*,
|
44
|
-
language:
|
45
|
-
gender:
|
45
|
+
language: SpeechLanguages | str = "en-US",
|
46
|
+
gender: Gender | str = "neutral",
|
46
47
|
voice_name: str = "", # Not required
|
47
|
-
encoding:
|
48
|
+
encoding: AudioEncoding | str = "linear16",
|
48
49
|
sample_rate: int = 24000,
|
50
|
+
pitch: int = 0,
|
51
|
+
effects_profile_id: str = "",
|
49
52
|
speaking_rate: float = 1.0,
|
50
53
|
credentials_info: dict | None = None,
|
51
54
|
credentials_file: str | None = None,
|
@@ -56,6 +59,18 @@ class TTS(tts.TTS):
|
|
56
59
|
Credentials must be provided, either by using the ``credentials_info`` dict, or reading
|
57
60
|
from the file specified in ``credentials_file`` or the ``GOOGLE_APPLICATION_CREDENTIALS``
|
58
61
|
environmental variable.
|
62
|
+
|
63
|
+
Args:
|
64
|
+
language (SpeechLanguages | str, optional): Language code (e.g., "en-US"). Default is "en-US".
|
65
|
+
gender (Gender | str, optional): Voice gender ("male", "female", "neutral"). Default is "neutral".
|
66
|
+
voice_name (str, optional): Specific voice name. Default is an empty string.
|
67
|
+
encoding (AudioEncoding | str, optional): Audio encoding format (e.g., "linear16"). Default is "linear16".
|
68
|
+
sample_rate (int, optional): Audio sample rate in Hz. Default is 24000.
|
69
|
+
pitch (float, optional): Speaking pitch, ranging from -20.0 to 20.0 semitones relative to the original pitch. Default is 0.
|
70
|
+
effects_profile_id (str): Optional identifier for selecting audio effects profiles to apply to the synthesized speech.
|
71
|
+
speaking_rate (float, optional): Speed of speech. Default is 1.0.
|
72
|
+
credentials_info (dict, optional): Dictionary containing Google Cloud credentials. Default is None.
|
73
|
+
credentials_file (str, optional): Path to the Google Cloud credentials JSON file. Default is None.
|
59
74
|
"""
|
60
75
|
|
61
76
|
super().__init__(
|
@@ -70,14 +85,10 @@ class TTS(tts.TTS):
|
|
70
85
|
self._credentials_info = credentials_info
|
71
86
|
self._credentials_file = credentials_file
|
72
87
|
|
73
|
-
ssml_gender = SsmlVoiceGender.NEUTRAL
|
74
|
-
if gender == "male":
|
75
|
-
ssml_gender = SsmlVoiceGender.MALE
|
76
|
-
elif gender == "female":
|
77
|
-
ssml_gender = SsmlVoiceGender.FEMALE
|
78
|
-
|
79
88
|
voice = texttospeech.VoiceSelectionParams(
|
80
|
-
name=voice_name,
|
89
|
+
name=voice_name,
|
90
|
+
language_code=language,
|
91
|
+
ssml_gender=_gender_from_str(gender),
|
81
92
|
)
|
82
93
|
|
83
94
|
if encoding == "linear16" or encoding == "wav":
|
@@ -92,10 +103,36 @@ class TTS(tts.TTS):
|
|
92
103
|
audio_config=texttospeech.AudioConfig(
|
93
104
|
audio_encoding=_audio_encoding,
|
94
105
|
sample_rate_hertz=sample_rate,
|
106
|
+
pitch=pitch,
|
107
|
+
effects_profile_id=effects_profile_id,
|
95
108
|
speaking_rate=speaking_rate,
|
96
109
|
),
|
97
110
|
)
|
98
111
|
|
112
|
+
def update_options(
|
113
|
+
self,
|
114
|
+
*,
|
115
|
+
language: SpeechLanguages | str = "en-US",
|
116
|
+
gender: Gender | str = "neutral",
|
117
|
+
voice_name: str = "", # Not required
|
118
|
+
speaking_rate: float = 1.0,
|
119
|
+
) -> None:
|
120
|
+
"""
|
121
|
+
Update the TTS options.
|
122
|
+
|
123
|
+
Args:
|
124
|
+
language (SpeechLanguages | str, optional): Language code (e.g., "en-US"). Default is "en-US".
|
125
|
+
gender (Gender | str, optional): Voice gender ("male", "female", "neutral"). Default is "neutral".
|
126
|
+
voice_name (str, optional): Specific voice name. Default is an empty string.
|
127
|
+
speaking_rate (float, optional): Speed of speech. Default is 1.0.
|
128
|
+
"""
|
129
|
+
self._opts.voice = texttospeech.VoiceSelectionParams(
|
130
|
+
name=voice_name,
|
131
|
+
language_code=language,
|
132
|
+
ssml_gender=_gender_from_str(gender),
|
133
|
+
)
|
134
|
+
self._opts.audio_config.speaking_rate = speaking_rate
|
135
|
+
|
99
136
|
def _ensure_client(self) -> texttospeech.TextToSpeechAsyncClient:
|
100
137
|
if not self._client:
|
101
138
|
if self._credentials_info:
|
@@ -118,46 +155,79 @@ class TTS(tts.TTS):
|
|
118
155
|
return self._client
|
119
156
|
|
120
157
|
def synthesize(self, text: str) -> "ChunkedStream":
|
121
|
-
return ChunkedStream(text, self._opts, self._ensure_client())
|
158
|
+
return ChunkedStream(self, text, self._opts, self._ensure_client())
|
122
159
|
|
123
160
|
|
124
161
|
class ChunkedStream(tts.ChunkedStream):
|
125
162
|
def __init__(
|
126
|
-
self,
|
163
|
+
self,
|
164
|
+
tts: TTS,
|
165
|
+
text: str,
|
166
|
+
opts: _TTSOptions,
|
167
|
+
client: texttospeech.TextToSpeechAsyncClient,
|
127
168
|
) -> None:
|
128
|
-
super().__init__()
|
129
|
-
self.
|
169
|
+
super().__init__(tts, text)
|
170
|
+
self._opts, self._client = opts, client
|
130
171
|
|
131
|
-
@utils.log_exceptions(logger=logger)
|
132
172
|
async def _main_task(self) -> None:
|
133
173
|
request_id = utils.shortuuid()
|
134
|
-
segment_id = utils.shortuuid()
|
135
|
-
response: SynthesizeSpeechResponse = await self._client.synthesize_speech(
|
136
|
-
input=texttospeech.SynthesisInput(text=self._text),
|
137
|
-
voice=self._opts.voice,
|
138
|
-
audio_config=self._opts.audio_config,
|
139
|
-
)
|
140
174
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
175
|
+
try:
|
176
|
+
response: SynthesizeSpeechResponse = await self._client.synthesize_speech(
|
177
|
+
input=texttospeech.SynthesisInput(text=self._input_text),
|
178
|
+
voice=self._opts.voice,
|
179
|
+
audio_config=self._opts.audio_config,
|
180
|
+
)
|
181
|
+
|
182
|
+
data = response.audio_content
|
183
|
+
if self._opts.audio_config.audio_encoding == "mp3":
|
184
|
+
decoder = utils.codecs.Mp3StreamDecoder()
|
185
|
+
bstream = utils.audio.AudioByteStream(
|
186
|
+
sample_rate=self._opts.audio_config.sample_rate_hertz,
|
187
|
+
num_channels=1,
|
188
|
+
)
|
189
|
+
for frame in decoder.decode_chunk(data):
|
190
|
+
for frame in bstream.write(frame.data.tobytes()):
|
191
|
+
self._event_ch.send_nowait(
|
192
|
+
tts.SynthesizedAudio(request_id=request_id, frame=frame)
|
193
|
+
)
|
194
|
+
|
195
|
+
for frame in bstream.flush():
|
196
|
+
self._event_ch.send_nowait(
|
197
|
+
tts.SynthesizedAudio(request_id=request_id, frame=frame)
|
198
|
+
)
|
199
|
+
else:
|
200
|
+
data = data[44:] # skip WAV header
|
145
201
|
self._event_ch.send_nowait(
|
146
202
|
tts.SynthesizedAudio(
|
147
|
-
request_id=request_id,
|
203
|
+
request_id=request_id,
|
204
|
+
frame=rtc.AudioFrame(
|
205
|
+
data=data,
|
206
|
+
sample_rate=self._opts.audio_config.sample_rate_hertz,
|
207
|
+
num_channels=1,
|
208
|
+
samples_per_channel=len(data) // 2, # 16-bit
|
209
|
+
),
|
148
210
|
)
|
149
211
|
)
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
num_channels=1,
|
160
|
-
samples_per_channel=len(data) // 2, # 16-bit
|
161
|
-
),
|
162
|
-
)
|
212
|
+
|
213
|
+
except DeadlineExceeded:
|
214
|
+
raise APITimeoutError()
|
215
|
+
except GoogleAPICallError as e:
|
216
|
+
raise APIStatusError(
|
217
|
+
e.message,
|
218
|
+
status_code=e.code or -1,
|
219
|
+
request_id=None,
|
220
|
+
body=None,
|
163
221
|
)
|
222
|
+
except Exception as e:
|
223
|
+
raise APIConnectionError() from e
|
224
|
+
|
225
|
+
|
226
|
+
def _gender_from_str(gender: str) -> SsmlVoiceGender:
|
227
|
+
ssml_gender = SsmlVoiceGender.NEUTRAL
|
228
|
+
if gender == "male":
|
229
|
+
ssml_gender = SsmlVoiceGender.MALE
|
230
|
+
elif gender == "female":
|
231
|
+
ssml_gender = SsmlVoiceGender.FEMALE
|
232
|
+
|
233
|
+
return ssml_gender # type: ignore
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: livekit-plugins-google
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.2
|
4
4
|
Summary: Agent Framework plugin for services from Google Cloud
|
5
5
|
Home-page: https://github.com/livekit/agents
|
6
6
|
License: Apache-2.0
|
@@ -22,7 +22,7 @@ Description-Content-Type: text/markdown
|
|
22
22
|
Requires-Dist: google-auth <3,>=2
|
23
23
|
Requires-Dist: google-cloud-speech <3,>=2
|
24
24
|
Requires-Dist: google-cloud-texttospeech <3,>=2
|
25
|
-
Requires-Dist: livekit-agents >=0.
|
25
|
+
Requires-Dist: livekit-agents >=0.11
|
26
26
|
|
27
27
|
# LiveKit Plugins Google
|
28
28
|
|
@@ -0,0 +1,11 @@
|
|
1
|
+
livekit/plugins/google/__init__.py,sha256=rqV6C5mFNDFlrA2IcGJrsebr2VxQwMzoDUjY1JhMBZM,1117
|
2
|
+
livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
|
3
|
+
livekit/plugins/google/models.py,sha256=n8pgTJ7xyJpPCZJ_y0GzaQq6LqYknL6K6trpi07-AxM,1307
|
4
|
+
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
livekit/plugins/google/stt.py,sha256=XNU9G0DKc-joOMdhgfJJ2u6IZ3JJ33Wi-XmdqX426fg,14198
|
6
|
+
livekit/plugins/google/tts.py,sha256=hRN8ul1lDXU8LPVEfbTszgBiRYsifZXCPMwk-Pv2KeA,8793
|
7
|
+
livekit/plugins/google/version.py,sha256=wNTnO8L3jrMdUjS-xAEFoMTKPaPYiFY9Kxnvzm4hTBc,600
|
8
|
+
livekit_plugins_google-0.7.2.dist-info/METADATA,sha256=ohgXDVPUSOXfZ8AA7PQhC5RU5huOaZF9dq9GDDRO0-E,1647
|
9
|
+
livekit_plugins_google-0.7.2.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
10
|
+
livekit_plugins_google-0.7.2.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
11
|
+
livekit_plugins_google-0.7.2.dist-info/RECORD,,
|
@@ -1,11 +0,0 @@
|
|
1
|
-
livekit/plugins/google/__init__.py,sha256=CYbSmm5fEw71F_r_4pEApGaWQ_r15Y3ZEocH88a4yc8,948
|
2
|
-
livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
|
3
|
-
livekit/plugins/google/models.py,sha256=n8pgTJ7xyJpPCZJ_y0GzaQq6LqYknL6K6trpi07-AxM,1307
|
4
|
-
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
livekit/plugins/google/stt.py,sha256=XXDOISg-8U1MzVu543xLEB3-mr_NFKJp9qo1-ya2-Hc,13569
|
6
|
-
livekit/plugins/google/tts.py,sha256=LQttOY3rI8TQ7w3FT-nBv-PDg5oXwITvFeBZtjwrwJE,5692
|
7
|
-
livekit/plugins/google/version.py,sha256=G63knoeV7ai0fH-1DCHqI3a7eSI4LlHqjV64n4GbCGg,600
|
8
|
-
livekit_plugins_google-0.7.0.dist-info/METADATA,sha256=cHccq2kH8vnXd7qNkADTDbmH1dOd7haUonMGNvdMbmo,1653
|
9
|
-
livekit_plugins_google-0.7.0.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
|
10
|
-
livekit_plugins_google-0.7.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
11
|
-
livekit_plugins_google-0.7.0.dist-info/RECORD,,
|
{livekit_plugins_google-0.7.0.dist-info → livekit_plugins_google-0.7.2.dist-info}/top_level.txt
RENAMED
File without changes
|