livekit-plugins-google 0.10.5__py3-none-any.whl → 0.10.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/google/_utils.py +12 -15
- livekit/plugins/google/beta/realtime/realtime_api.py +4 -4
- livekit/plugins/google/beta/realtime/transcriber.py +1 -1
- livekit/plugins/google/llm.py +7 -6
- livekit/plugins/google/models.py +2 -0
- livekit/plugins/google/stt.py +97 -83
- livekit/plugins/google/tts.py +3 -3
- livekit/plugins/google/version.py +1 -1
- {livekit_plugins_google-0.10.5.dist-info → livekit_plugins_google-0.10.6.dist-info}/METADATA +2 -2
- livekit_plugins_google-0.10.6.dist-info/RECORD +18 -0
- {livekit_plugins_google-0.10.5.dist-info → livekit_plugins_google-0.10.6.dist-info}/WHEEL +1 -1
- livekit_plugins_google-0.10.5.dist-info/RECORD +0 -18
- {livekit_plugins_google-0.10.5.dist-info → livekit_plugins_google-0.10.6.dist-info}/top_level.txt +0 -0
livekit/plugins/google/_utils.py
CHANGED
@@ -10,14 +10,15 @@ from livekit.agents import llm, utils
|
|
10
10
|
from livekit.agents.llm.function_context import _is_optional_type
|
11
11
|
|
12
12
|
from google.genai import types
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
13
|
+
from google.genai.types import Type as GenaiType
|
14
|
+
|
15
|
+
JSON_SCHEMA_TYPE_MAP: dict[type, GenaiType] = {
|
16
|
+
str: GenaiType.STRING,
|
17
|
+
int: GenaiType.INTEGER,
|
18
|
+
float: GenaiType.NUMBER,
|
19
|
+
bool: GenaiType.BOOLEAN,
|
20
|
+
dict: GenaiType.OBJECT,
|
21
|
+
list: GenaiType.ARRAY,
|
21
22
|
}
|
22
23
|
|
23
24
|
__all__ = ["_build_gemini_ctx", "_build_tools"]
|
@@ -38,7 +39,7 @@ def _build_parameters(arguments: Dict[str, Any]) -> types.Schema | None:
|
|
38
39
|
item_type = get_args(py_type)[0]
|
39
40
|
if item_type not in JSON_SCHEMA_TYPE_MAP:
|
40
41
|
raise ValueError(f"Unsupported type: {item_type}")
|
41
|
-
prop.type =
|
42
|
+
prop.type = GenaiType.ARRAY
|
42
43
|
prop.items = types.Schema(type=JSON_SCHEMA_TYPE_MAP[item_type])
|
43
44
|
|
44
45
|
if arg_info.choices:
|
@@ -62,7 +63,7 @@ def _build_parameters(arguments: Dict[str, Any]) -> types.Schema | None:
|
|
62
63
|
required.append(arg_name)
|
63
64
|
|
64
65
|
if properties:
|
65
|
-
parameters = types.Schema(type=
|
66
|
+
parameters = types.Schema(type=GenaiType.OBJECT, properties=properties)
|
66
67
|
if required:
|
67
68
|
parameters.required = required
|
68
69
|
|
@@ -119,7 +120,6 @@ def _build_gemini_ctx(
|
|
119
120
|
parts.append(
|
120
121
|
types.Part(
|
121
122
|
function_call=types.FunctionCall(
|
122
|
-
id=fnc.tool_call_id,
|
123
123
|
name=fnc.function_info.name,
|
124
124
|
args=fnc.arguments,
|
125
125
|
)
|
@@ -132,7 +132,6 @@ def _build_gemini_ctx(
|
|
132
132
|
parts.append(
|
133
133
|
types.Part(
|
134
134
|
function_response=types.FunctionResponse(
|
135
|
-
id=msg.tool_call_id,
|
136
135
|
name=msg.name,
|
137
136
|
response=msg.content,
|
138
137
|
)
|
@@ -142,7 +141,6 @@ def _build_gemini_ctx(
|
|
142
141
|
parts.append(
|
143
142
|
types.Part(
|
144
143
|
function_response=types.FunctionResponse(
|
145
|
-
id=msg.tool_call_id,
|
146
144
|
name=msg.name,
|
147
145
|
response={"result": msg.content},
|
148
146
|
)
|
@@ -193,8 +191,7 @@ def _build_gemini_image_part(image: llm.ChatImage, cache_key: Any) -> types.Part
|
|
193
191
|
height=image.inference_height,
|
194
192
|
strategy="scale_aspect_fit",
|
195
193
|
)
|
196
|
-
|
197
|
-
image._cache[cache_key] = base64.b64encode(encoded_data).decode("utf-8")
|
194
|
+
image._cache[cache_key] = utils.images.encode(image.image, opts)
|
198
195
|
|
199
196
|
return types.Part.from_bytes(
|
200
197
|
data=image._cache[cache_key], mime_type="image/jpeg"
|
@@ -11,12 +11,12 @@ from livekit.agents import llm, utils
|
|
11
11
|
from livekit.agents.llm.function_context import _create_ai_function_info
|
12
12
|
|
13
13
|
from google import genai
|
14
|
-
from google.genai._api_client import HttpOptions
|
15
14
|
from google.genai.types import (
|
16
15
|
Blob,
|
17
16
|
Content,
|
18
17
|
FunctionResponse,
|
19
18
|
GenerationConfig,
|
19
|
+
HttpOptions,
|
20
20
|
LiveClientContent,
|
21
21
|
LiveClientRealtimeInput,
|
22
22
|
LiveClientToolResponse,
|
@@ -107,7 +107,7 @@ class RealtimeModel:
|
|
107
107
|
model: LiveAPIModels | str = "gemini-2.0-flash-exp",
|
108
108
|
api_key: str | None = None,
|
109
109
|
voice: Voice | str = "Puck",
|
110
|
-
modalities: list[Modality] = [
|
110
|
+
modalities: list[Modality] = [Modality.AUDIO],
|
111
111
|
enable_user_audio_transcription: bool = True,
|
112
112
|
enable_agent_audio_transcription: bool = True,
|
113
113
|
vertexai: bool = False,
|
@@ -479,12 +479,12 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
|
|
479
479
|
logger.warning(
|
480
480
|
"function call cancelled",
|
481
481
|
extra={
|
482
|
-
"function_call_ids": response.tool_call_cancellation.
|
482
|
+
"function_call_ids": response.tool_call_cancellation.ids,
|
483
483
|
},
|
484
484
|
)
|
485
485
|
self.emit(
|
486
486
|
"function_calls_cancelled",
|
487
|
-
response.tool_call_cancellation.
|
487
|
+
response.tool_call_cancellation.ids,
|
488
488
|
)
|
489
489
|
|
490
490
|
async with self._client.aio.live.connect(
|
@@ -55,7 +55,7 @@ class TranscriberSession(utils.EventEmitter[EventTypes]):
|
|
55
55
|
parts=[types.Part(text=SYSTEM_INSTRUCTIONS)]
|
56
56
|
)
|
57
57
|
self._config = types.LiveConnectConfig(
|
58
|
-
response_modalities=[
|
58
|
+
response_modalities=[types.Modality.TEXT],
|
59
59
|
system_instruction=system_instructions,
|
60
60
|
generation_config=types.GenerationConfig(temperature=0.0),
|
61
61
|
)
|
livekit/plugins/google/llm.py
CHANGED
@@ -240,7 +240,7 @@ class LLMStream(llm.LLMStream):
|
|
240
240
|
# specific function
|
241
241
|
tool_config = types.ToolConfig(
|
242
242
|
function_calling_config=types.FunctionCallingConfig(
|
243
|
-
mode=
|
243
|
+
mode=types.FunctionCallingConfigMode.ANY,
|
244
244
|
allowed_function_names=[self._tool_choice.name],
|
245
245
|
)
|
246
246
|
)
|
@@ -248,7 +248,7 @@ class LLMStream(llm.LLMStream):
|
|
248
248
|
# model must call any function
|
249
249
|
tool_config = types.ToolConfig(
|
250
250
|
function_calling_config=types.FunctionCallingConfig(
|
251
|
-
mode=
|
251
|
+
mode=types.FunctionCallingConfigMode.ANY,
|
252
252
|
allowed_function_names=[
|
253
253
|
fnc.name
|
254
254
|
for fnc in self._fnc_ctx.ai_functions.values()
|
@@ -259,14 +259,14 @@ class LLMStream(llm.LLMStream):
|
|
259
259
|
# model can call any function
|
260
260
|
tool_config = types.ToolConfig(
|
261
261
|
function_calling_config=types.FunctionCallingConfig(
|
262
|
-
mode=
|
262
|
+
mode=types.FunctionCallingConfigMode.AUTO
|
263
263
|
)
|
264
264
|
)
|
265
265
|
elif self._tool_choice == "none":
|
266
266
|
# model cannot call any function
|
267
267
|
tool_config = types.ToolConfig(
|
268
268
|
function_calling_config=types.FunctionCallingConfig(
|
269
|
-
mode=
|
269
|
+
mode=types.FunctionCallingConfigMode.NONE,
|
270
270
|
)
|
271
271
|
)
|
272
272
|
opts["tool_config"] = tool_config
|
@@ -282,11 +282,12 @@ class LLMStream(llm.LLMStream):
|
|
282
282
|
system_instruction=system_instruction,
|
283
283
|
**opts,
|
284
284
|
)
|
285
|
-
|
285
|
+
stream = await self._client.aio.models.generate_content_stream(
|
286
286
|
model=self._model,
|
287
287
|
contents=cast(types.ContentListUnion, turns),
|
288
288
|
config=config,
|
289
|
-
)
|
289
|
+
)
|
290
|
+
async for response in stream: # type: ignore
|
290
291
|
if response.prompt_feedback:
|
291
292
|
raise APIStatusError(
|
292
293
|
response.prompt_feedback.json(),
|
livekit/plugins/google/models.py
CHANGED
livekit/plugins/google/stt.py
CHANGED
@@ -19,7 +19,7 @@ import dataclasses
|
|
19
19
|
import time
|
20
20
|
import weakref
|
21
21
|
from dataclasses import dataclass
|
22
|
-
from typing import List, Union
|
22
|
+
from typing import Callable, List, Union
|
23
23
|
|
24
24
|
from livekit import rtc
|
25
25
|
from livekit.agents import (
|
@@ -61,7 +61,7 @@ class STTOptions:
|
|
61
61
|
interim_results: bool
|
62
62
|
punctuate: bool
|
63
63
|
spoken_punctuation: bool
|
64
|
-
model: SpeechModels
|
64
|
+
model: SpeechModels | str
|
65
65
|
sample_rate: int
|
66
66
|
keywords: List[tuple[str, float]] | None
|
67
67
|
|
@@ -93,7 +93,7 @@ class STT(stt.STT):
|
|
93
93
|
interim_results: bool = True,
|
94
94
|
punctuate: bool = True,
|
95
95
|
spoken_punctuation: bool = False,
|
96
|
-
model: SpeechModels = "
|
96
|
+
model: SpeechModels | str = "latest_long",
|
97
97
|
location: str = "us-central1",
|
98
98
|
sample_rate: int = 16000,
|
99
99
|
credentials_info: dict | None = None,
|
@@ -106,12 +106,24 @@ class STT(stt.STT):
|
|
106
106
|
Credentials must be provided, either by using the ``credentials_info`` dict, or reading
|
107
107
|
from the file specified in ``credentials_file`` or via Application Default Credentials as
|
108
108
|
described in https://cloud.google.com/docs/authentication/application-default-credentials
|
109
|
+
|
110
|
+
args:
|
111
|
+
languages(LanguageCode): list of language codes to recognize (default: "en-US")
|
112
|
+
detect_language(bool): whether to detect the language of the audio (default: True)
|
113
|
+
interim_results(bool): whether to return interim results (default: True)
|
114
|
+
punctuate(bool): whether to punctuate the audio (default: True)
|
115
|
+
spoken_punctuation(bool): whether to use spoken punctuation (default: False)
|
116
|
+
model(SpeechModels): the model to use for recognition default: "latest_long"
|
117
|
+
location(str): the location to use for recognition default: "us-central1"
|
118
|
+
sample_rate(int): the sample rate of the audio default: 16000
|
119
|
+
credentials_info(dict): the credentials info to use for recognition (default: None)
|
120
|
+
credentials_file(str): the credentials file to use for recognition (default: None)
|
121
|
+
keywords(List[tuple[str, float]]): list of keywords to recognize (default: None)
|
109
122
|
"""
|
110
123
|
super().__init__(
|
111
124
|
capabilities=stt.STTCapabilities(streaming=True, interim_results=True)
|
112
125
|
)
|
113
126
|
|
114
|
-
self._client: SpeechAsyncClient | None = None
|
115
127
|
self._location = location
|
116
128
|
self._credentials_info = credentials_info
|
117
129
|
self._credentials_file = credentials_file
|
@@ -140,40 +152,44 @@ class STT(stt.STT):
|
|
140
152
|
keywords=keywords,
|
141
153
|
)
|
142
154
|
self._streams = weakref.WeakSet[SpeechStream]()
|
155
|
+
self._pool = utils.ConnectionPool[SpeechAsyncClient](
|
156
|
+
max_session_duration=_max_session_duration,
|
157
|
+
connect_cb=self._create_client,
|
158
|
+
)
|
143
159
|
|
144
|
-
def
|
160
|
+
async def _create_client(self) -> SpeechAsyncClient:
|
145
161
|
# Add support for passing a specific location that matches recognizer
|
146
162
|
# see: https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages
|
147
163
|
client_options = None
|
164
|
+
client: SpeechAsyncClient | None = None
|
148
165
|
if self._location != "global":
|
149
166
|
client_options = ClientOptions(
|
150
167
|
api_endpoint=f"{self._location}-speech.googleapis.com"
|
151
168
|
)
|
152
169
|
if self._credentials_info:
|
153
|
-
|
170
|
+
client = SpeechAsyncClient.from_service_account_info(
|
154
171
|
self._credentials_info,
|
155
172
|
client_options=client_options,
|
156
173
|
)
|
157
174
|
elif self._credentials_file:
|
158
|
-
|
175
|
+
client = SpeechAsyncClient.from_service_account_file(
|
159
176
|
self._credentials_file,
|
160
177
|
client_options=client_options,
|
161
178
|
)
|
162
179
|
else:
|
163
|
-
|
180
|
+
client = SpeechAsyncClient(
|
164
181
|
client_options=client_options,
|
165
182
|
)
|
166
|
-
assert
|
167
|
-
return
|
183
|
+
assert client is not None
|
184
|
+
return client
|
168
185
|
|
169
|
-
|
170
|
-
def _recognizer(self) -> str:
|
186
|
+
def _get_recognizer(self, client: SpeechAsyncClient) -> str:
|
171
187
|
# TODO(theomonnom): should we use recognizers?
|
172
188
|
# recognizers may improve latency https://cloud.google.com/speech-to-text/v2/docs/recognizers#understand_recognizers
|
173
189
|
|
174
190
|
# TODO(theomonnom): find a better way to access the project_id
|
175
191
|
try:
|
176
|
-
project_id =
|
192
|
+
project_id = client.transport._credentials.project_id # type: ignore
|
177
193
|
except AttributeError:
|
178
194
|
from google.auth import default as ga_default
|
179
195
|
|
@@ -224,16 +240,17 @@ class STT(stt.STT):
|
|
224
240
|
)
|
225
241
|
|
226
242
|
try:
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
243
|
+
async with self._pool.connection() as client:
|
244
|
+
raw = await client.recognize(
|
245
|
+
cloud_speech.RecognizeRequest(
|
246
|
+
recognizer=self._get_recognizer(client),
|
247
|
+
config=config,
|
248
|
+
content=frame.data.tobytes(),
|
249
|
+
),
|
250
|
+
timeout=conn_options.timeout,
|
251
|
+
)
|
235
252
|
|
236
|
-
|
253
|
+
return _recognize_response_to_speech_event(raw)
|
237
254
|
except DeadlineExceeded:
|
238
255
|
raise APITimeoutError()
|
239
256
|
except GoogleAPICallError as e:
|
@@ -253,8 +270,8 @@ class STT(stt.STT):
|
|
253
270
|
config = self._sanitize_options(language=language)
|
254
271
|
stream = SpeechStream(
|
255
272
|
stt=self,
|
256
|
-
|
257
|
-
|
273
|
+
pool=self._pool,
|
274
|
+
recognizer_cb=self._get_recognizer,
|
258
275
|
config=config,
|
259
276
|
conn_options=conn_options,
|
260
277
|
)
|
@@ -287,13 +304,10 @@ class STT(stt.STT):
|
|
287
304
|
self._config.spoken_punctuation = spoken_punctuation
|
288
305
|
if model is not None:
|
289
306
|
self._config.model = model
|
290
|
-
client = None
|
291
|
-
recognizer = None
|
292
307
|
if location is not None:
|
293
308
|
self._location = location
|
294
309
|
# if location is changed, fetch a new client and recognizer as per the new location
|
295
|
-
|
296
|
-
recognizer = self._recognizer
|
310
|
+
self._pool.invalidate()
|
297
311
|
if keywords is not None:
|
298
312
|
self._config.keywords = keywords
|
299
313
|
|
@@ -306,8 +320,6 @@ class STT(stt.STT):
|
|
306
320
|
spoken_punctuation=spoken_punctuation,
|
307
321
|
model=model,
|
308
322
|
keywords=keywords,
|
309
|
-
client=client,
|
310
|
-
recognizer=recognizer,
|
311
323
|
)
|
312
324
|
|
313
325
|
|
@@ -317,16 +329,16 @@ class SpeechStream(stt.SpeechStream):
|
|
317
329
|
*,
|
318
330
|
stt: STT,
|
319
331
|
conn_options: APIConnectOptions,
|
320
|
-
|
321
|
-
|
332
|
+
pool: utils.ConnectionPool[SpeechAsyncClient],
|
333
|
+
recognizer_cb: Callable[[SpeechAsyncClient], str],
|
322
334
|
config: STTOptions,
|
323
335
|
) -> None:
|
324
336
|
super().__init__(
|
325
337
|
stt=stt, conn_options=conn_options, sample_rate=config.sample_rate
|
326
338
|
)
|
327
339
|
|
328
|
-
self.
|
329
|
-
self.
|
340
|
+
self._pool = pool
|
341
|
+
self._recognizer_cb = recognizer_cb
|
330
342
|
self._config = config
|
331
343
|
self._reconnect_event = asyncio.Event()
|
332
344
|
self._session_connected_at: float = 0
|
@@ -341,8 +353,6 @@ class SpeechStream(stt.SpeechStream):
|
|
341
353
|
spoken_punctuation: bool | None = None,
|
342
354
|
model: SpeechModels | None = None,
|
343
355
|
keywords: List[tuple[str, float]] | None = None,
|
344
|
-
client: SpeechAsyncClient | None = None,
|
345
|
-
recognizer: str | None = None,
|
346
356
|
):
|
347
357
|
if languages is not None:
|
348
358
|
if isinstance(languages, str):
|
@@ -360,21 +370,19 @@ class SpeechStream(stt.SpeechStream):
|
|
360
370
|
self._config.model = model
|
361
371
|
if keywords is not None:
|
362
372
|
self._config.keywords = keywords
|
363
|
-
if client is not None:
|
364
|
-
self._client = client
|
365
|
-
if recognizer is not None:
|
366
|
-
self._recognizer = recognizer
|
367
373
|
|
368
374
|
self._reconnect_event.set()
|
369
375
|
|
370
376
|
async def _run(self) -> None:
|
371
377
|
# google requires a async generator when calling streaming_recognize
|
372
378
|
# this function basically convert the queue into a async generator
|
373
|
-
async def input_generator(
|
379
|
+
async def input_generator(
|
380
|
+
client: SpeechAsyncClient, should_stop: asyncio.Event
|
381
|
+
):
|
374
382
|
try:
|
375
383
|
# first request should contain the config
|
376
384
|
yield cloud_speech.StreamingRecognizeRequest(
|
377
|
-
recognizer=self.
|
385
|
+
recognizer=self._recognizer_cb(client),
|
378
386
|
streaming_config=self._streaming_config,
|
379
387
|
)
|
380
388
|
|
@@ -395,7 +403,7 @@ class SpeechStream(stt.SpeechStream):
|
|
395
403
|
"an error occurred while streaming input to google STT"
|
396
404
|
)
|
397
405
|
|
398
|
-
async def process_stream(stream):
|
406
|
+
async def process_stream(client: SpeechAsyncClient, stream):
|
399
407
|
has_started = False
|
400
408
|
async for resp in stream:
|
401
409
|
if (
|
@@ -437,6 +445,7 @@ class SpeechStream(stt.SpeechStream):
|
|
437
445
|
logger.debug(
|
438
446
|
"Google STT maximum connection time reached. Reconnecting..."
|
439
447
|
)
|
448
|
+
self._pool.remove(client)
|
440
449
|
if has_started:
|
441
450
|
self._event_ch.send_nowait(
|
442
451
|
stt.SpeechEvent(
|
@@ -458,52 +467,57 @@ class SpeechStream(stt.SpeechStream):
|
|
458
467
|
|
459
468
|
while True:
|
460
469
|
try:
|
461
|
-
self.
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
470
|
+
async with self._pool.connection() as client:
|
471
|
+
self._streaming_config = cloud_speech.StreamingRecognitionConfig(
|
472
|
+
config=cloud_speech.RecognitionConfig(
|
473
|
+
explicit_decoding_config=cloud_speech.ExplicitDecodingConfig(
|
474
|
+
encoding=cloud_speech.ExplicitDecodingConfig.AudioEncoding.LINEAR16,
|
475
|
+
sample_rate_hertz=self._config.sample_rate,
|
476
|
+
audio_channel_count=1,
|
477
|
+
),
|
478
|
+
adaptation=self._config.build_adaptation(),
|
479
|
+
language_codes=self._config.languages,
|
480
|
+
model=self._config.model,
|
481
|
+
features=cloud_speech.RecognitionFeatures(
|
482
|
+
enable_automatic_punctuation=self._config.punctuate,
|
483
|
+
enable_word_time_offsets=True,
|
484
|
+
),
|
467
485
|
),
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
features=cloud_speech.RecognitionFeatures(
|
472
|
-
enable_automatic_punctuation=self._config.punctuate,
|
473
|
-
enable_word_time_offsets=True,
|
486
|
+
streaming_features=cloud_speech.StreamingRecognitionFeatures(
|
487
|
+
enable_voice_activity_events=True,
|
488
|
+
interim_results=self._config.interim_results,
|
474
489
|
),
|
475
|
-
)
|
476
|
-
streaming_features=cloud_speech.StreamingRecognitionFeatures(
|
477
|
-
enable_voice_activity_events=True,
|
478
|
-
interim_results=self._config.interim_results,
|
479
|
-
),
|
480
|
-
)
|
481
|
-
|
482
|
-
should_stop = asyncio.Event()
|
483
|
-
stream = await self._client.streaming_recognize(
|
484
|
-
requests=input_generator(should_stop),
|
485
|
-
)
|
486
|
-
self._session_connected_at = time.time()
|
490
|
+
)
|
487
491
|
|
488
|
-
|
489
|
-
|
492
|
+
should_stop = asyncio.Event()
|
493
|
+
stream = await client.streaming_recognize(
|
494
|
+
requests=input_generator(client, should_stop),
|
495
|
+
)
|
496
|
+
self._session_connected_at = time.time()
|
490
497
|
|
491
|
-
|
492
|
-
|
493
|
-
[process_stream_task, wait_reconnect_task],
|
494
|
-
return_when=asyncio.FIRST_COMPLETED,
|
498
|
+
process_stream_task = asyncio.create_task(
|
499
|
+
process_stream(client, stream)
|
495
500
|
)
|
496
|
-
|
497
|
-
|
498
|
-
task.result()
|
499
|
-
if wait_reconnect_task not in done:
|
500
|
-
break
|
501
|
-
self._reconnect_event.clear()
|
502
|
-
finally:
|
503
|
-
await utils.aio.gracefully_cancel(
|
504
|
-
process_stream_task, wait_reconnect_task
|
501
|
+
wait_reconnect_task = asyncio.create_task(
|
502
|
+
self._reconnect_event.wait()
|
505
503
|
)
|
506
|
-
|
504
|
+
|
505
|
+
try:
|
506
|
+
done, _ = await asyncio.wait(
|
507
|
+
[process_stream_task, wait_reconnect_task],
|
508
|
+
return_when=asyncio.FIRST_COMPLETED,
|
509
|
+
)
|
510
|
+
for task in done:
|
511
|
+
if task != wait_reconnect_task:
|
512
|
+
task.result()
|
513
|
+
if wait_reconnect_task not in done:
|
514
|
+
break
|
515
|
+
self._reconnect_event.clear()
|
516
|
+
finally:
|
517
|
+
await utils.aio.gracefully_cancel(
|
518
|
+
process_stream_task, wait_reconnect_task
|
519
|
+
)
|
520
|
+
should_stop.set()
|
507
521
|
except DeadlineExceeded:
|
508
522
|
raise APITimeoutError()
|
509
523
|
except GoogleAPICallError as e:
|
livekit/plugins/google/tts.py
CHANGED
@@ -15,10 +15,10 @@
|
|
15
15
|
from __future__ import annotations
|
16
16
|
|
17
17
|
from dataclasses import dataclass
|
18
|
+
from typing import Optional
|
18
19
|
|
19
20
|
from livekit import rtc
|
20
21
|
from livekit.agents import (
|
21
|
-
DEFAULT_API_CONNECT_OPTIONS,
|
22
22
|
APIConnectionError,
|
23
23
|
APIConnectOptions,
|
24
24
|
APIStatusError,
|
@@ -160,7 +160,7 @@ class TTS(tts.TTS):
|
|
160
160
|
self,
|
161
161
|
text: str,
|
162
162
|
*,
|
163
|
-
conn_options: APIConnectOptions =
|
163
|
+
conn_options: Optional[APIConnectOptions] = None,
|
164
164
|
) -> "ChunkedStream":
|
165
165
|
return ChunkedStream(
|
166
166
|
tts=self,
|
@@ -177,9 +177,9 @@ class ChunkedStream(tts.ChunkedStream):
|
|
177
177
|
*,
|
178
178
|
tts: TTS,
|
179
179
|
input_text: str,
|
180
|
-
conn_options: APIConnectOptions,
|
181
180
|
opts: _TTSOptions,
|
182
181
|
client: texttospeech.TextToSpeechAsyncClient,
|
182
|
+
conn_options: Optional[APIConnectOptions] = None,
|
183
183
|
) -> None:
|
184
184
|
super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
|
185
185
|
self._opts, self._client = opts, client
|
{livekit_plugins_google-0.10.5.dist-info → livekit_plugins_google-0.10.6.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: livekit-plugins-google
|
3
|
-
Version: 0.10.
|
3
|
+
Version: 0.10.6
|
4
4
|
Summary: Agent Framework plugin for services from Google Cloud
|
5
5
|
Home-page: https://github.com/livekit/agents
|
6
6
|
License: Apache-2.0
|
@@ -22,7 +22,7 @@ Description-Content-Type: text/markdown
|
|
22
22
|
Requires-Dist: google-auth<3,>=2
|
23
23
|
Requires-Dist: google-cloud-speech<3,>=2
|
24
24
|
Requires-Dist: google-cloud-texttospeech<3,>=2
|
25
|
-
Requires-Dist: google-genai==
|
25
|
+
Requires-Dist: google-genai==1.3.0
|
26
26
|
Requires-Dist: livekit-agents>=0.12.11
|
27
27
|
Dynamic: classifier
|
28
28
|
Dynamic: description
|
@@ -0,0 +1,18 @@
|
|
1
|
+
livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
|
2
|
+
livekit/plugins/google/_utils.py,sha256=FG1_26nlWGcI6onPleQQcmGBMfb4QNYgis1B5BMJxWA,7131
|
3
|
+
livekit/plugins/google/llm.py,sha256=LZaHsrkjfboRZLWm7L2G0mw62q2sXBNj4YeeV2Sk2uU,16717
|
4
|
+
livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
|
5
|
+
livekit/plugins/google/models.py,sha256=8Ysqkb0pOSSr_S9XHYxLz5nofDTt8RtfbsTIWoptOQU,1532
|
6
|
+
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
+
livekit/plugins/google/stt.py,sha256=0-4mVD5IydvsWp9OzYyVmXe6pz6FDvPutRLF169y674,22752
|
8
|
+
livekit/plugins/google/tts.py,sha256=w4EMk9rPfyAzPyWFwE_5sPo9UY7DNFa2g83K56AUk9I,9228
|
9
|
+
livekit/plugins/google/version.py,sha256=B7ZiVTsE24YmkTGl3227ZHjutNpXQp27028_w5-LuRA,601
|
10
|
+
livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
|
11
|
+
livekit/plugins/google/beta/realtime/__init__.py,sha256=sGTn6JFNyA30QUXBZ_BV3l2eHpGAzR35ByXxg77vWNU,205
|
12
|
+
livekit/plugins/google/beta/realtime/api_proto.py,sha256=ralrRZqIbE71oyuLKRYaXHvm6tcHMwBJueKvSO8Xfus,658
|
13
|
+
livekit/plugins/google/beta/realtime/realtime_api.py,sha256=SU_uQvZMBwbVgexZqkAjGmJVUW80ObJ4LP53rV7xqko,21228
|
14
|
+
livekit/plugins/google/beta/realtime/transcriber.py,sha256=rjXO0cSPr3HATxrSfv1MX7IbrjmiTvnLPF280BfRBL8,9809
|
15
|
+
livekit_plugins_google-0.10.6.dist-info/METADATA,sha256=cvkHdPcsrRpbSjW8oowAgN392NWQmoUD429U6zYSeKk,2058
|
16
|
+
livekit_plugins_google-0.10.6.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
17
|
+
livekit_plugins_google-0.10.6.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
18
|
+
livekit_plugins_google-0.10.6.dist-info/RECORD,,
|
@@ -1,18 +0,0 @@
|
|
1
|
-
livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
|
2
|
-
livekit/plugins/google/_utils.py,sha256=mjsqblhGMgAZ2MNPisAVkNsqq4gfO6vvprEKzAGoVwE,7248
|
3
|
-
livekit/plugins/google/llm.py,sha256=TVTerAabIf10AKVZr-Kn13eajhQ9RV7K4xaVD771yHU,16547
|
4
|
-
livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
|
5
|
-
livekit/plugins/google/models.py,sha256=Q47z_tIwLCufxhJyJHH7_1bo4xdBYZBSkkvMeycuItg,1493
|
6
|
-
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
livekit/plugins/google/stt.py,sha256=QcpKAcg8ltFlQnLGSdtRS2H12pFEPs1ZzLojKHB8bpY,21376
|
8
|
-
livekit/plugins/google/tts.py,sha256=95qXCigVQYWNbcN3pIKBpIah4b31U_MWtXv5Ji0AMc4,9229
|
9
|
-
livekit/plugins/google/version.py,sha256=na7fXYRLcWIgCRi4QSAbV4DZGA7YDgOWcE0O21jDlAo,601
|
10
|
-
livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
|
11
|
-
livekit/plugins/google/beta/realtime/__init__.py,sha256=sGTn6JFNyA30QUXBZ_BV3l2eHpGAzR35ByXxg77vWNU,205
|
12
|
-
livekit/plugins/google/beta/realtime/api_proto.py,sha256=ralrRZqIbE71oyuLKRYaXHvm6tcHMwBJueKvSO8Xfus,658
|
13
|
-
livekit/plugins/google/beta/realtime/realtime_api.py,sha256=RPGYAJXelYPo16YyR2qccjUjxUJCkJBU2N5rNTpKxyo,21281
|
14
|
-
livekit/plugins/google/beta/realtime/transcriber.py,sha256=ZpKA3F8dqOtJPDlPiAgjw0AUDBIuhQiBVnvSYL4cdBg,9796
|
15
|
-
livekit_plugins_google-0.10.5.dist-info/METADATA,sha256=AHhTVMBNVlOnqMnLPjncTO_iIqkDS-ExCm_5ubD9Mdg,2058
|
16
|
-
livekit_plugins_google-0.10.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
17
|
-
livekit_plugins_google-0.10.5.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
18
|
-
livekit_plugins_google-0.10.5.dist-info/RECORD,,
|
{livekit_plugins_google-0.10.5.dist-info → livekit_plugins_google-0.10.6.dist-info}/top_level.txt
RENAMED
File without changes
|