livekit-plugins-google 1.0.21__py3-none-any.whl → 1.0.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/google/tts.py +142 -8
- livekit/plugins/google/version.py +1 -1
- {livekit_plugins_google-1.0.21.dist-info → livekit_plugins_google-1.0.22.dist-info}/METADATA +2 -2
- {livekit_plugins_google-1.0.21.dist-info → livekit_plugins_google-1.0.22.dist-info}/RECORD +5 -5
- {livekit_plugins_google-1.0.21.dist-info → livekit_plugins_google-1.0.22.dist-info}/WHEEL +0 -0
livekit/plugins/google/tts.py
CHANGED
@@ -14,6 +14,8 @@
|
|
14
14
|
|
15
15
|
from __future__ import annotations
|
16
16
|
|
17
|
+
import asyncio
|
18
|
+
import weakref
|
17
19
|
from dataclasses import dataclass
|
18
20
|
|
19
21
|
from google.api_core.client_options import ClientOptions
|
@@ -25,6 +27,7 @@ from livekit.agents import (
|
|
25
27
|
APIConnectOptions,
|
26
28
|
APIStatusError,
|
27
29
|
APITimeoutError,
|
30
|
+
tokenize,
|
28
31
|
tts,
|
29
32
|
utils,
|
30
33
|
)
|
@@ -35,13 +38,21 @@ from livekit.agents.types import (
|
|
35
38
|
)
|
36
39
|
from livekit.agents.utils import is_given
|
37
40
|
|
41
|
+
from .log import logger
|
38
42
|
from .models import Gender, SpeechLanguages
|
39
43
|
|
44
|
+
BUFFERED_WORDS_COUNT = 8
|
45
|
+
NUM_CHANNELS = 1
|
46
|
+
DEFAULT_VOICE_NAME = "en-US-Chirp3-HD-Charon"
|
47
|
+
DEFAULT_LANGUAGE = "en-US"
|
48
|
+
DEFAULT_GENDER = "neutral"
|
49
|
+
|
40
50
|
|
41
51
|
@dataclass
|
42
52
|
class _TTSOptions:
|
43
53
|
voice: texttospeech.VoiceSelectionParams
|
44
54
|
audio_config: texttospeech.AudioConfig
|
55
|
+
tokenizer: tokenize.SentenceTokenizer
|
45
56
|
|
46
57
|
|
47
58
|
class TTS(tts.TTS):
|
@@ -59,6 +70,8 @@ class TTS(tts.TTS):
|
|
59
70
|
audio_encoding: texttospeech.AudioEncoding = texttospeech.AudioEncoding.PCM,
|
60
71
|
credentials_info: NotGivenOr[dict] = NOT_GIVEN,
|
61
72
|
credentials_file: NotGivenOr[str] = NOT_GIVEN,
|
73
|
+
tokenizer: NotGivenOr[tokenize.SentenceTokenizer] = NOT_GIVEN,
|
74
|
+
use_streaming: NotGivenOr[bool] = NOT_GIVEN,
|
62
75
|
) -> None:
|
63
76
|
"""
|
64
77
|
Create a new instance of Google TTS.
|
@@ -78,12 +91,14 @@ class TTS(tts.TTS):
|
|
78
91
|
speaking_rate (float, optional): Speed of speech. Default is 1.0.
|
79
92
|
credentials_info (dict, optional): Dictionary containing Google Cloud credentials. Default is None.
|
80
93
|
credentials_file (str, optional): Path to the Google Cloud credentials JSON file. Default is None.
|
94
|
+
tokenizer (tokenize.SentenceTokenizer, optional): Tokenizer for the TTS. Default is a basic sentence tokenizer.
|
95
|
+
use_streaming (bool, optional): Whether to use streaming synthesis. Default is True.
|
81
96
|
""" # noqa: E501
|
97
|
+
if not is_given(use_streaming):
|
98
|
+
use_streaming = True
|
82
99
|
|
83
100
|
super().__init__(
|
84
|
-
capabilities=tts.TTSCapabilities(
|
85
|
-
streaming=False,
|
86
|
-
),
|
101
|
+
capabilities=tts.TTSCapabilities(streaming=use_streaming),
|
87
102
|
sample_rate=sample_rate,
|
88
103
|
num_channels=1,
|
89
104
|
)
|
@@ -93,15 +108,17 @@ class TTS(tts.TTS):
|
|
93
108
|
self._credentials_file = credentials_file
|
94
109
|
self._location = location
|
95
110
|
|
96
|
-
lang = language if is_given(language) else
|
97
|
-
ssml_gender = _gender_from_str(
|
98
|
-
name =
|
111
|
+
lang = language if is_given(language) else DEFAULT_LANGUAGE
|
112
|
+
ssml_gender = _gender_from_str(DEFAULT_GENDER if not is_given(gender) else gender)
|
113
|
+
name = DEFAULT_VOICE_NAME if not is_given(voice_name) else voice_name
|
99
114
|
|
100
115
|
voice_params = texttospeech.VoiceSelectionParams(
|
101
116
|
name=name,
|
102
117
|
language_code=lang,
|
103
118
|
ssml_gender=ssml_gender,
|
104
119
|
)
|
120
|
+
if not is_given(tokenizer):
|
121
|
+
tokenizer = tokenize.basic.SentenceTokenizer(min_sentence_len=BUFFERED_WORDS_COUNT)
|
105
122
|
|
106
123
|
self._opts = _TTSOptions(
|
107
124
|
voice=voice_params,
|
@@ -112,7 +129,9 @@ class TTS(tts.TTS):
|
|
112
129
|
effects_profile_id=effects_profile_id,
|
113
130
|
speaking_rate=speaking_rate,
|
114
131
|
),
|
132
|
+
tokenizer=tokenizer,
|
115
133
|
)
|
134
|
+
self._streams = weakref.WeakSet[SynthesizeStream]()
|
116
135
|
|
117
136
|
def update_options(
|
118
137
|
self,
|
@@ -168,6 +187,18 @@ class TTS(tts.TTS):
|
|
168
187
|
assert self._client is not None
|
169
188
|
return self._client
|
170
189
|
|
190
|
+
def stream(
|
191
|
+
self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
|
192
|
+
) -> SynthesizeStream:
|
193
|
+
stream = SynthesizeStream(
|
194
|
+
tts=self,
|
195
|
+
opts=self._opts,
|
196
|
+
client=self._ensure_client(),
|
197
|
+
conn_options=conn_options,
|
198
|
+
)
|
199
|
+
self._streams.add(stream)
|
200
|
+
return stream
|
201
|
+
|
171
202
|
def synthesize(
|
172
203
|
self,
|
173
204
|
text: str,
|
@@ -182,6 +213,12 @@ class TTS(tts.TTS):
|
|
182
213
|
client=self._ensure_client(),
|
183
214
|
)
|
184
215
|
|
216
|
+
async def aclose(self) -> None:
|
217
|
+
for stream in list(self._streams):
|
218
|
+
await stream.aclose()
|
219
|
+
self._streams.clear()
|
220
|
+
await super().aclose()
|
221
|
+
|
185
222
|
|
186
223
|
class ChunkedStream(tts.ChunkedStream):
|
187
224
|
def __init__(
|
@@ -230,8 +267,105 @@ class ChunkedStream(tts.ChunkedStream):
|
|
230
267
|
raise APITimeoutError() from None
|
231
268
|
except GoogleAPICallError as e:
|
232
269
|
raise APIStatusError(
|
233
|
-
e.message, status_code=e.code or -1, request_id=None, body=None
|
234
|
-
) from
|
270
|
+
f"{e.message} {e.details}", status_code=e.code or -1, request_id=None, body=None
|
271
|
+
) from e
|
272
|
+
except Exception as e:
|
273
|
+
raise APIConnectionError() from e
|
274
|
+
|
275
|
+
|
276
|
+
class SynthesizeStream(tts.SynthesizeStream):
|
277
|
+
def __init__(
|
278
|
+
self,
|
279
|
+
*,
|
280
|
+
tts: TTS,
|
281
|
+
opts: _TTSOptions,
|
282
|
+
client: texttospeech.TextToSpeechAsyncClient,
|
283
|
+
conn_options: APIConnectOptions,
|
284
|
+
):
|
285
|
+
super().__init__(tts=tts, conn_options=conn_options)
|
286
|
+
self._opts, self._client = opts, client
|
287
|
+
self._segments_ch = utils.aio.Chan[tokenize.SentenceStream]()
|
288
|
+
|
289
|
+
async def _run(self) -> None:
|
290
|
+
request_id = utils.shortuuid()
|
291
|
+
|
292
|
+
@utils.log_exceptions(logger=logger)
|
293
|
+
async def _tokenize_input():
|
294
|
+
input_stream = None
|
295
|
+
async for input in self._input_ch:
|
296
|
+
if isinstance(input, str):
|
297
|
+
if input_stream is None:
|
298
|
+
input_stream = self._opts.tokenizer.stream()
|
299
|
+
self._segments_ch.send_nowait(input_stream)
|
300
|
+
input_stream.push_text(input)
|
301
|
+
elif isinstance(input, self._FlushSentinel):
|
302
|
+
if input_stream:
|
303
|
+
input_stream.end_input()
|
304
|
+
input_stream = None
|
305
|
+
self._segments_ch.close()
|
306
|
+
|
307
|
+
@utils.log_exceptions(logger=logger)
|
308
|
+
async def _run_segments():
|
309
|
+
async for input_stream in self._segments_ch:
|
310
|
+
await self._run_stream(input_stream, request_id)
|
311
|
+
|
312
|
+
tasks = [
|
313
|
+
asyncio.create_task(_tokenize_input()),
|
314
|
+
asyncio.create_task(_run_segments()),
|
315
|
+
]
|
316
|
+
try:
|
317
|
+
await asyncio.gather(*tasks)
|
318
|
+
except Exception as e:
|
319
|
+
raise APIConnectionError() from e
|
320
|
+
|
321
|
+
async def _run_stream(self, input_stream, request_id):
|
322
|
+
streaming_config = texttospeech.StreamingSynthesizeConfig(
|
323
|
+
voice=self._opts.voice,
|
324
|
+
streaming_audio_config=texttospeech.StreamingAudioConfig(
|
325
|
+
audio_encoding=texttospeech.AudioEncoding.PCM
|
326
|
+
),
|
327
|
+
)
|
328
|
+
emitter = tts.SynthesizedAudioEmitter(event_ch=self._event_ch, request_id=request_id)
|
329
|
+
audio_bstream = utils.audio.AudioByteStream(
|
330
|
+
sample_rate=self._opts.audio_config.sample_rate_hertz,
|
331
|
+
num_channels=NUM_CHANNELS,
|
332
|
+
)
|
333
|
+
|
334
|
+
@utils.log_exceptions(logger=logger)
|
335
|
+
async def input_generator():
|
336
|
+
try:
|
337
|
+
yield texttospeech.StreamingSynthesizeRequest(streaming_config=streaming_config)
|
338
|
+
async for input in input_stream:
|
339
|
+
self._mark_started()
|
340
|
+
yield texttospeech.StreamingSynthesizeRequest(
|
341
|
+
input=texttospeech.StreamingSynthesisInput(text=input.token)
|
342
|
+
)
|
343
|
+
|
344
|
+
except Exception:
|
345
|
+
logger.exception("an error occurred while streaming input to google TTS")
|
346
|
+
|
347
|
+
try:
|
348
|
+
stream = await self._client.streaming_synthesize(
|
349
|
+
input_generator(),
|
350
|
+
timeout=self._conn_options.timeout,
|
351
|
+
)
|
352
|
+
async for resp in stream:
|
353
|
+
for frame in audio_bstream.write(resp.audio_content):
|
354
|
+
emitter.push(frame)
|
355
|
+
|
356
|
+
for frame in audio_bstream.flush():
|
357
|
+
emitter.push(frame)
|
358
|
+
emitter.flush()
|
359
|
+
except DeadlineExceeded as e:
|
360
|
+
logger.debug(f"google tts deadline exceeded: {e}")
|
361
|
+
pass
|
362
|
+
except GoogleAPICallError as e:
|
363
|
+
raise APIStatusError(
|
364
|
+
f"{e.message} {e.details}",
|
365
|
+
status_code=e.code or -1,
|
366
|
+
request_id=request_id,
|
367
|
+
body=None,
|
368
|
+
) from e
|
235
369
|
except Exception as e:
|
236
370
|
raise APIConnectionError() from e
|
237
371
|
|
{livekit_plugins_google-1.0.21.dist-info → livekit_plugins_google-1.0.22.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: livekit-plugins-google
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.22
|
4
4
|
Summary: Agent Framework plugin for services from Google Cloud
|
5
5
|
Project-URL: Documentation, https://docs.livekit.io
|
6
6
|
Project-URL: Website, https://livekit.io/
|
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
|
|
22
22
|
Requires-Dist: google-cloud-speech<3,>=2
|
23
23
|
Requires-Dist: google-cloud-texttospeech<3,>=2.24
|
24
24
|
Requires-Dist: google-genai>=1.14.0
|
25
|
-
Requires-Dist: livekit-agents>=1.0.
|
25
|
+
Requires-Dist: livekit-agents>=1.0.22
|
26
26
|
Description-Content-Type: text/markdown
|
27
27
|
|
28
28
|
# Google AI plugin for LiveKit Agents
|
@@ -4,13 +4,13 @@ livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA
|
|
4
4
|
livekit/plugins/google/models.py,sha256=maGlEM3hK4-5hMnH9UQMJewA7BZMrnStsFLBNoNVySg,1531
|
5
5
|
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
6
|
livekit/plugins/google/stt.py,sha256=2jk-1fHiBT8UW_n3CZsIEdMp2iBnUAlTnmefdUd8rAM,23620
|
7
|
-
livekit/plugins/google/tts.py,sha256=
|
7
|
+
livekit/plugins/google/tts.py,sha256=FfhNfGtW8drmYDDfLLZDjaIp2GvNiIdoovgtZq4t_l8,14211
|
8
8
|
livekit/plugins/google/utils.py,sha256=UBAbddYk7G8Nojg6bSC7_xN2pdl9qhs86HGhKYFuf9M,10509
|
9
|
-
livekit/plugins/google/version.py,sha256
|
9
|
+
livekit/plugins/google/version.py,sha256=-8dkOE2vDSF9WN8VoBrSwU2sb5YBGFuwPnSQXQ-uaYM,601
|
10
10
|
livekit/plugins/google/beta/__init__.py,sha256=5PnoG3Ux24bjzMSzmTeSVljE9EINivGcbWUEV6egGnM,216
|
11
11
|
livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
|
12
12
|
livekit/plugins/google/beta/realtime/api_proto.py,sha256=Fyrejs3SG0EjOPCCFLEnWXKEUxCff47PMWk2VsKJm5E,594
|
13
13
|
livekit/plugins/google/beta/realtime/realtime_api.py,sha256=yYB5fKXl_aaMH_ZSpfUlfOTUg4eRqqRENLTZhZMfBMc,36253
|
14
|
-
livekit_plugins_google-1.0.
|
15
|
-
livekit_plugins_google-1.0.
|
16
|
-
livekit_plugins_google-1.0.
|
14
|
+
livekit_plugins_google-1.0.22.dist-info/METADATA,sha256=S4bQZr4NhWrAI6vyJi299sh5lsD5eVMNfxvN9__xAMY,1908
|
15
|
+
livekit_plugins_google-1.0.22.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
16
|
+
livekit_plugins_google-1.0.22.dist-info/RECORD,,
|
File without changes
|