livekit-plugins-google 0.10.4__py3-none-any.whl → 0.10.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/google/_utils.py +12 -15
- livekit/plugins/google/beta/realtime/realtime_api.py +4 -4
- livekit/plugins/google/beta/realtime/transcriber.py +1 -1
- livekit/plugins/google/llm.py +7 -6
- livekit/plugins/google/models.py +2 -0
- livekit/plugins/google/stt.py +102 -83
- livekit/plugins/google/tts.py +3 -3
- livekit/plugins/google/version.py +1 -1
- {livekit_plugins_google-0.10.4.dist-info → livekit_plugins_google-0.10.6.dist-info}/METADATA +2 -2
- livekit_plugins_google-0.10.6.dist-info/RECORD +18 -0
- {livekit_plugins_google-0.10.4.dist-info → livekit_plugins_google-0.10.6.dist-info}/WHEEL +1 -1
- livekit_plugins_google-0.10.4.dist-info/RECORD +0 -18
- {livekit_plugins_google-0.10.4.dist-info → livekit_plugins_google-0.10.6.dist-info}/top_level.txt +0 -0
livekit/plugins/google/_utils.py
CHANGED
@@ -10,14 +10,15 @@ from livekit.agents import llm, utils
|
|
10
10
|
from livekit.agents.llm.function_context import _is_optional_type
|
11
11
|
|
12
12
|
from google.genai import types
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
13
|
+
from google.genai.types import Type as GenaiType
|
14
|
+
|
15
|
+
JSON_SCHEMA_TYPE_MAP: dict[type, GenaiType] = {
|
16
|
+
str: GenaiType.STRING,
|
17
|
+
int: GenaiType.INTEGER,
|
18
|
+
float: GenaiType.NUMBER,
|
19
|
+
bool: GenaiType.BOOLEAN,
|
20
|
+
dict: GenaiType.OBJECT,
|
21
|
+
list: GenaiType.ARRAY,
|
21
22
|
}
|
22
23
|
|
23
24
|
__all__ = ["_build_gemini_ctx", "_build_tools"]
|
@@ -38,7 +39,7 @@ def _build_parameters(arguments: Dict[str, Any]) -> types.Schema | None:
|
|
38
39
|
item_type = get_args(py_type)[0]
|
39
40
|
if item_type not in JSON_SCHEMA_TYPE_MAP:
|
40
41
|
raise ValueError(f"Unsupported type: {item_type}")
|
41
|
-
prop.type =
|
42
|
+
prop.type = GenaiType.ARRAY
|
42
43
|
prop.items = types.Schema(type=JSON_SCHEMA_TYPE_MAP[item_type])
|
43
44
|
|
44
45
|
if arg_info.choices:
|
@@ -62,7 +63,7 @@ def _build_parameters(arguments: Dict[str, Any]) -> types.Schema | None:
|
|
62
63
|
required.append(arg_name)
|
63
64
|
|
64
65
|
if properties:
|
65
|
-
parameters = types.Schema(type=
|
66
|
+
parameters = types.Schema(type=GenaiType.OBJECT, properties=properties)
|
66
67
|
if required:
|
67
68
|
parameters.required = required
|
68
69
|
|
@@ -119,7 +120,6 @@ def _build_gemini_ctx(
|
|
119
120
|
parts.append(
|
120
121
|
types.Part(
|
121
122
|
function_call=types.FunctionCall(
|
122
|
-
id=fnc.tool_call_id,
|
123
123
|
name=fnc.function_info.name,
|
124
124
|
args=fnc.arguments,
|
125
125
|
)
|
@@ -132,7 +132,6 @@ def _build_gemini_ctx(
|
|
132
132
|
parts.append(
|
133
133
|
types.Part(
|
134
134
|
function_response=types.FunctionResponse(
|
135
|
-
id=msg.tool_call_id,
|
136
135
|
name=msg.name,
|
137
136
|
response=msg.content,
|
138
137
|
)
|
@@ -142,7 +141,6 @@ def _build_gemini_ctx(
|
|
142
141
|
parts.append(
|
143
142
|
types.Part(
|
144
143
|
function_response=types.FunctionResponse(
|
145
|
-
id=msg.tool_call_id,
|
146
144
|
name=msg.name,
|
147
145
|
response={"result": msg.content},
|
148
146
|
)
|
@@ -193,8 +191,7 @@ def _build_gemini_image_part(image: llm.ChatImage, cache_key: Any) -> types.Part
|
|
193
191
|
height=image.inference_height,
|
194
192
|
strategy="scale_aspect_fit",
|
195
193
|
)
|
196
|
-
|
197
|
-
image._cache[cache_key] = base64.b64encode(encoded_data).decode("utf-8")
|
194
|
+
image._cache[cache_key] = utils.images.encode(image.image, opts)
|
198
195
|
|
199
196
|
return types.Part.from_bytes(
|
200
197
|
data=image._cache[cache_key], mime_type="image/jpeg"
|
@@ -11,12 +11,12 @@ from livekit.agents import llm, utils
|
|
11
11
|
from livekit.agents.llm.function_context import _create_ai_function_info
|
12
12
|
|
13
13
|
from google import genai
|
14
|
-
from google.genai._api_client import HttpOptions
|
15
14
|
from google.genai.types import (
|
16
15
|
Blob,
|
17
16
|
Content,
|
18
17
|
FunctionResponse,
|
19
18
|
GenerationConfig,
|
19
|
+
HttpOptions,
|
20
20
|
LiveClientContent,
|
21
21
|
LiveClientRealtimeInput,
|
22
22
|
LiveClientToolResponse,
|
@@ -107,7 +107,7 @@ class RealtimeModel:
|
|
107
107
|
model: LiveAPIModels | str = "gemini-2.0-flash-exp",
|
108
108
|
api_key: str | None = None,
|
109
109
|
voice: Voice | str = "Puck",
|
110
|
-
modalities: list[Modality] = [
|
110
|
+
modalities: list[Modality] = [Modality.AUDIO],
|
111
111
|
enable_user_audio_transcription: bool = True,
|
112
112
|
enable_agent_audio_transcription: bool = True,
|
113
113
|
vertexai: bool = False,
|
@@ -479,12 +479,12 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
|
|
479
479
|
logger.warning(
|
480
480
|
"function call cancelled",
|
481
481
|
extra={
|
482
|
-
"function_call_ids": response.tool_call_cancellation.
|
482
|
+
"function_call_ids": response.tool_call_cancellation.ids,
|
483
483
|
},
|
484
484
|
)
|
485
485
|
self.emit(
|
486
486
|
"function_calls_cancelled",
|
487
|
-
response.tool_call_cancellation.
|
487
|
+
response.tool_call_cancellation.ids,
|
488
488
|
)
|
489
489
|
|
490
490
|
async with self._client.aio.live.connect(
|
@@ -55,7 +55,7 @@ class TranscriberSession(utils.EventEmitter[EventTypes]):
|
|
55
55
|
parts=[types.Part(text=SYSTEM_INSTRUCTIONS)]
|
56
56
|
)
|
57
57
|
self._config = types.LiveConnectConfig(
|
58
|
-
response_modalities=[
|
58
|
+
response_modalities=[types.Modality.TEXT],
|
59
59
|
system_instruction=system_instructions,
|
60
60
|
generation_config=types.GenerationConfig(temperature=0.0),
|
61
61
|
)
|
livekit/plugins/google/llm.py
CHANGED
@@ -240,7 +240,7 @@ class LLMStream(llm.LLMStream):
|
|
240
240
|
# specific function
|
241
241
|
tool_config = types.ToolConfig(
|
242
242
|
function_calling_config=types.FunctionCallingConfig(
|
243
|
-
mode=
|
243
|
+
mode=types.FunctionCallingConfigMode.ANY,
|
244
244
|
allowed_function_names=[self._tool_choice.name],
|
245
245
|
)
|
246
246
|
)
|
@@ -248,7 +248,7 @@ class LLMStream(llm.LLMStream):
|
|
248
248
|
# model must call any function
|
249
249
|
tool_config = types.ToolConfig(
|
250
250
|
function_calling_config=types.FunctionCallingConfig(
|
251
|
-
mode=
|
251
|
+
mode=types.FunctionCallingConfigMode.ANY,
|
252
252
|
allowed_function_names=[
|
253
253
|
fnc.name
|
254
254
|
for fnc in self._fnc_ctx.ai_functions.values()
|
@@ -259,14 +259,14 @@ class LLMStream(llm.LLMStream):
|
|
259
259
|
# model can call any function
|
260
260
|
tool_config = types.ToolConfig(
|
261
261
|
function_calling_config=types.FunctionCallingConfig(
|
262
|
-
mode=
|
262
|
+
mode=types.FunctionCallingConfigMode.AUTO
|
263
263
|
)
|
264
264
|
)
|
265
265
|
elif self._tool_choice == "none":
|
266
266
|
# model cannot call any function
|
267
267
|
tool_config = types.ToolConfig(
|
268
268
|
function_calling_config=types.FunctionCallingConfig(
|
269
|
-
mode=
|
269
|
+
mode=types.FunctionCallingConfigMode.NONE,
|
270
270
|
)
|
271
271
|
)
|
272
272
|
opts["tool_config"] = tool_config
|
@@ -282,11 +282,12 @@ class LLMStream(llm.LLMStream):
|
|
282
282
|
system_instruction=system_instruction,
|
283
283
|
**opts,
|
284
284
|
)
|
285
|
-
|
285
|
+
stream = await self._client.aio.models.generate_content_stream(
|
286
286
|
model=self._model,
|
287
287
|
contents=cast(types.ContentListUnion, turns),
|
288
288
|
config=config,
|
289
|
-
)
|
289
|
+
)
|
290
|
+
async for response in stream: # type: ignore
|
290
291
|
if response.prompt_feedback:
|
291
292
|
raise APIStatusError(
|
292
293
|
response.prompt_feedback.json(),
|
livekit/plugins/google/models.py
CHANGED
livekit/plugins/google/stt.py
CHANGED
@@ -19,7 +19,7 @@ import dataclasses
|
|
19
19
|
import time
|
20
20
|
import weakref
|
21
21
|
from dataclasses import dataclass
|
22
|
-
from typing import List, Union
|
22
|
+
from typing import Callable, List, Union
|
23
23
|
|
24
24
|
from livekit import rtc
|
25
25
|
from livekit.agents import (
|
@@ -49,6 +49,9 @@ LanguageCode = Union[LgType, List[LgType]]
|
|
49
49
|
# before that timeout is reached
|
50
50
|
_max_session_duration = 240
|
51
51
|
|
52
|
+
# Google is very sensitive to background noise, so we'll ignore results with low confidence
|
53
|
+
_min_confidence = 0.65
|
54
|
+
|
52
55
|
|
53
56
|
# This class is only be used internally to encapsulate the options
|
54
57
|
@dataclass
|
@@ -58,7 +61,7 @@ class STTOptions:
|
|
58
61
|
interim_results: bool
|
59
62
|
punctuate: bool
|
60
63
|
spoken_punctuation: bool
|
61
|
-
model: SpeechModels
|
64
|
+
model: SpeechModels | str
|
62
65
|
sample_rate: int
|
63
66
|
keywords: List[tuple[str, float]] | None
|
64
67
|
|
@@ -90,7 +93,7 @@ class STT(stt.STT):
|
|
90
93
|
interim_results: bool = True,
|
91
94
|
punctuate: bool = True,
|
92
95
|
spoken_punctuation: bool = False,
|
93
|
-
model: SpeechModels = "
|
96
|
+
model: SpeechModels | str = "latest_long",
|
94
97
|
location: str = "us-central1",
|
95
98
|
sample_rate: int = 16000,
|
96
99
|
credentials_info: dict | None = None,
|
@@ -103,12 +106,24 @@ class STT(stt.STT):
|
|
103
106
|
Credentials must be provided, either by using the ``credentials_info`` dict, or reading
|
104
107
|
from the file specified in ``credentials_file`` or via Application Default Credentials as
|
105
108
|
described in https://cloud.google.com/docs/authentication/application-default-credentials
|
109
|
+
|
110
|
+
args:
|
111
|
+
languages(LanguageCode): list of language codes to recognize (default: "en-US")
|
112
|
+
detect_language(bool): whether to detect the language of the audio (default: True)
|
113
|
+
interim_results(bool): whether to return interim results (default: True)
|
114
|
+
punctuate(bool): whether to punctuate the audio (default: True)
|
115
|
+
spoken_punctuation(bool): whether to use spoken punctuation (default: False)
|
116
|
+
model(SpeechModels): the model to use for recognition default: "latest_long"
|
117
|
+
location(str): the location to use for recognition default: "us-central1"
|
118
|
+
sample_rate(int): the sample rate of the audio default: 16000
|
119
|
+
credentials_info(dict): the credentials info to use for recognition (default: None)
|
120
|
+
credentials_file(str): the credentials file to use for recognition (default: None)
|
121
|
+
keywords(List[tuple[str, float]]): list of keywords to recognize (default: None)
|
106
122
|
"""
|
107
123
|
super().__init__(
|
108
124
|
capabilities=stt.STTCapabilities(streaming=True, interim_results=True)
|
109
125
|
)
|
110
126
|
|
111
|
-
self._client: SpeechAsyncClient | None = None
|
112
127
|
self._location = location
|
113
128
|
self._credentials_info = credentials_info
|
114
129
|
self._credentials_file = credentials_file
|
@@ -137,40 +152,44 @@ class STT(stt.STT):
|
|
137
152
|
keywords=keywords,
|
138
153
|
)
|
139
154
|
self._streams = weakref.WeakSet[SpeechStream]()
|
155
|
+
self._pool = utils.ConnectionPool[SpeechAsyncClient](
|
156
|
+
max_session_duration=_max_session_duration,
|
157
|
+
connect_cb=self._create_client,
|
158
|
+
)
|
140
159
|
|
141
|
-
def
|
160
|
+
async def _create_client(self) -> SpeechAsyncClient:
|
142
161
|
# Add support for passing a specific location that matches recognizer
|
143
162
|
# see: https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages
|
144
163
|
client_options = None
|
164
|
+
client: SpeechAsyncClient | None = None
|
145
165
|
if self._location != "global":
|
146
166
|
client_options = ClientOptions(
|
147
167
|
api_endpoint=f"{self._location}-speech.googleapis.com"
|
148
168
|
)
|
149
169
|
if self._credentials_info:
|
150
|
-
|
170
|
+
client = SpeechAsyncClient.from_service_account_info(
|
151
171
|
self._credentials_info,
|
152
172
|
client_options=client_options,
|
153
173
|
)
|
154
174
|
elif self._credentials_file:
|
155
|
-
|
175
|
+
client = SpeechAsyncClient.from_service_account_file(
|
156
176
|
self._credentials_file,
|
157
177
|
client_options=client_options,
|
158
178
|
)
|
159
179
|
else:
|
160
|
-
|
180
|
+
client = SpeechAsyncClient(
|
161
181
|
client_options=client_options,
|
162
182
|
)
|
163
|
-
assert
|
164
|
-
return
|
183
|
+
assert client is not None
|
184
|
+
return client
|
165
185
|
|
166
|
-
|
167
|
-
def _recognizer(self) -> str:
|
186
|
+
def _get_recognizer(self, client: SpeechAsyncClient) -> str:
|
168
187
|
# TODO(theomonnom): should we use recognizers?
|
169
188
|
# recognizers may improve latency https://cloud.google.com/speech-to-text/v2/docs/recognizers#understand_recognizers
|
170
189
|
|
171
190
|
# TODO(theomonnom): find a better way to access the project_id
|
172
191
|
try:
|
173
|
-
project_id =
|
192
|
+
project_id = client.transport._credentials.project_id # type: ignore
|
174
193
|
except AttributeError:
|
175
194
|
from google.auth import default as ga_default
|
176
195
|
|
@@ -221,16 +240,17 @@ class STT(stt.STT):
|
|
221
240
|
)
|
222
241
|
|
223
242
|
try:
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
243
|
+
async with self._pool.connection() as client:
|
244
|
+
raw = await client.recognize(
|
245
|
+
cloud_speech.RecognizeRequest(
|
246
|
+
recognizer=self._get_recognizer(client),
|
247
|
+
config=config,
|
248
|
+
content=frame.data.tobytes(),
|
249
|
+
),
|
250
|
+
timeout=conn_options.timeout,
|
251
|
+
)
|
232
252
|
|
233
|
-
|
253
|
+
return _recognize_response_to_speech_event(raw)
|
234
254
|
except DeadlineExceeded:
|
235
255
|
raise APITimeoutError()
|
236
256
|
except GoogleAPICallError as e:
|
@@ -250,8 +270,8 @@ class STT(stt.STT):
|
|
250
270
|
config = self._sanitize_options(language=language)
|
251
271
|
stream = SpeechStream(
|
252
272
|
stt=self,
|
253
|
-
|
254
|
-
|
273
|
+
pool=self._pool,
|
274
|
+
recognizer_cb=self._get_recognizer,
|
255
275
|
config=config,
|
256
276
|
conn_options=conn_options,
|
257
277
|
)
|
@@ -284,13 +304,10 @@ class STT(stt.STT):
|
|
284
304
|
self._config.spoken_punctuation = spoken_punctuation
|
285
305
|
if model is not None:
|
286
306
|
self._config.model = model
|
287
|
-
client = None
|
288
|
-
recognizer = None
|
289
307
|
if location is not None:
|
290
308
|
self._location = location
|
291
309
|
# if location is changed, fetch a new client and recognizer as per the new location
|
292
|
-
|
293
|
-
recognizer = self._recognizer
|
310
|
+
self._pool.invalidate()
|
294
311
|
if keywords is not None:
|
295
312
|
self._config.keywords = keywords
|
296
313
|
|
@@ -303,8 +320,6 @@ class STT(stt.STT):
|
|
303
320
|
spoken_punctuation=spoken_punctuation,
|
304
321
|
model=model,
|
305
322
|
keywords=keywords,
|
306
|
-
client=client,
|
307
|
-
recognizer=recognizer,
|
308
323
|
)
|
309
324
|
|
310
325
|
|
@@ -314,16 +329,16 @@ class SpeechStream(stt.SpeechStream):
|
|
314
329
|
*,
|
315
330
|
stt: STT,
|
316
331
|
conn_options: APIConnectOptions,
|
317
|
-
|
318
|
-
|
332
|
+
pool: utils.ConnectionPool[SpeechAsyncClient],
|
333
|
+
recognizer_cb: Callable[[SpeechAsyncClient], str],
|
319
334
|
config: STTOptions,
|
320
335
|
) -> None:
|
321
336
|
super().__init__(
|
322
337
|
stt=stt, conn_options=conn_options, sample_rate=config.sample_rate
|
323
338
|
)
|
324
339
|
|
325
|
-
self.
|
326
|
-
self.
|
340
|
+
self._pool = pool
|
341
|
+
self._recognizer_cb = recognizer_cb
|
327
342
|
self._config = config
|
328
343
|
self._reconnect_event = asyncio.Event()
|
329
344
|
self._session_connected_at: float = 0
|
@@ -338,8 +353,6 @@ class SpeechStream(stt.SpeechStream):
|
|
338
353
|
spoken_punctuation: bool | None = None,
|
339
354
|
model: SpeechModels | None = None,
|
340
355
|
keywords: List[tuple[str, float]] | None = None,
|
341
|
-
client: SpeechAsyncClient | None = None,
|
342
|
-
recognizer: str | None = None,
|
343
356
|
):
|
344
357
|
if languages is not None:
|
345
358
|
if isinstance(languages, str):
|
@@ -357,21 +370,19 @@ class SpeechStream(stt.SpeechStream):
|
|
357
370
|
self._config.model = model
|
358
371
|
if keywords is not None:
|
359
372
|
self._config.keywords = keywords
|
360
|
-
if client is not None:
|
361
|
-
self._client = client
|
362
|
-
if recognizer is not None:
|
363
|
-
self._recognizer = recognizer
|
364
373
|
|
365
374
|
self._reconnect_event.set()
|
366
375
|
|
367
376
|
async def _run(self) -> None:
|
368
377
|
# google requires a async generator when calling streaming_recognize
|
369
378
|
# this function basically convert the queue into a async generator
|
370
|
-
async def input_generator(
|
379
|
+
async def input_generator(
|
380
|
+
client: SpeechAsyncClient, should_stop: asyncio.Event
|
381
|
+
):
|
371
382
|
try:
|
372
383
|
# first request should contain the config
|
373
384
|
yield cloud_speech.StreamingRecognizeRequest(
|
374
|
-
recognizer=self.
|
385
|
+
recognizer=self._recognizer_cb(client),
|
375
386
|
streaming_config=self._streaming_config,
|
376
387
|
)
|
377
388
|
|
@@ -392,7 +403,7 @@ class SpeechStream(stt.SpeechStream):
|
|
392
403
|
"an error occurred while streaming input to google STT"
|
393
404
|
)
|
394
405
|
|
395
|
-
async def process_stream(stream):
|
406
|
+
async def process_stream(client: SpeechAsyncClient, stream):
|
396
407
|
has_started = False
|
397
408
|
async for resp in stream:
|
398
409
|
if (
|
@@ -434,6 +445,7 @@ class SpeechStream(stt.SpeechStream):
|
|
434
445
|
logger.debug(
|
435
446
|
"Google STT maximum connection time reached. Reconnecting..."
|
436
447
|
)
|
448
|
+
self._pool.remove(client)
|
437
449
|
if has_started:
|
438
450
|
self._event_ch.send_nowait(
|
439
451
|
stt.SpeechEvent(
|
@@ -455,52 +467,57 @@ class SpeechStream(stt.SpeechStream):
|
|
455
467
|
|
456
468
|
while True:
|
457
469
|
try:
|
458
|
-
self.
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
470
|
+
async with self._pool.connection() as client:
|
471
|
+
self._streaming_config = cloud_speech.StreamingRecognitionConfig(
|
472
|
+
config=cloud_speech.RecognitionConfig(
|
473
|
+
explicit_decoding_config=cloud_speech.ExplicitDecodingConfig(
|
474
|
+
encoding=cloud_speech.ExplicitDecodingConfig.AudioEncoding.LINEAR16,
|
475
|
+
sample_rate_hertz=self._config.sample_rate,
|
476
|
+
audio_channel_count=1,
|
477
|
+
),
|
478
|
+
adaptation=self._config.build_adaptation(),
|
479
|
+
language_codes=self._config.languages,
|
480
|
+
model=self._config.model,
|
481
|
+
features=cloud_speech.RecognitionFeatures(
|
482
|
+
enable_automatic_punctuation=self._config.punctuate,
|
483
|
+
enable_word_time_offsets=True,
|
484
|
+
),
|
464
485
|
),
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
features=cloud_speech.RecognitionFeatures(
|
469
|
-
enable_automatic_punctuation=self._config.punctuate,
|
470
|
-
enable_word_time_offsets=True,
|
486
|
+
streaming_features=cloud_speech.StreamingRecognitionFeatures(
|
487
|
+
enable_voice_activity_events=True,
|
488
|
+
interim_results=self._config.interim_results,
|
471
489
|
),
|
472
|
-
)
|
473
|
-
streaming_features=cloud_speech.StreamingRecognitionFeatures(
|
474
|
-
enable_voice_activity_events=True,
|
475
|
-
interim_results=self._config.interim_results,
|
476
|
-
),
|
477
|
-
)
|
478
|
-
|
479
|
-
should_stop = asyncio.Event()
|
480
|
-
stream = await self._client.streaming_recognize(
|
481
|
-
requests=input_generator(should_stop),
|
482
|
-
)
|
483
|
-
self._session_connected_at = time.time()
|
490
|
+
)
|
484
491
|
|
485
|
-
|
486
|
-
|
492
|
+
should_stop = asyncio.Event()
|
493
|
+
stream = await client.streaming_recognize(
|
494
|
+
requests=input_generator(client, should_stop),
|
495
|
+
)
|
496
|
+
self._session_connected_at = time.time()
|
487
497
|
|
488
|
-
|
489
|
-
|
490
|
-
[process_stream_task, wait_reconnect_task],
|
491
|
-
return_when=asyncio.FIRST_COMPLETED,
|
498
|
+
process_stream_task = asyncio.create_task(
|
499
|
+
process_stream(client, stream)
|
492
500
|
)
|
493
|
-
|
494
|
-
|
495
|
-
task.result()
|
496
|
-
if wait_reconnect_task not in done:
|
497
|
-
break
|
498
|
-
self._reconnect_event.clear()
|
499
|
-
finally:
|
500
|
-
await utils.aio.gracefully_cancel(
|
501
|
-
process_stream_task, wait_reconnect_task
|
501
|
+
wait_reconnect_task = asyncio.create_task(
|
502
|
+
self._reconnect_event.wait()
|
502
503
|
)
|
503
|
-
|
504
|
+
|
505
|
+
try:
|
506
|
+
done, _ = await asyncio.wait(
|
507
|
+
[process_stream_task, wait_reconnect_task],
|
508
|
+
return_when=asyncio.FIRST_COMPLETED,
|
509
|
+
)
|
510
|
+
for task in done:
|
511
|
+
if task != wait_reconnect_task:
|
512
|
+
task.result()
|
513
|
+
if wait_reconnect_task not in done:
|
514
|
+
break
|
515
|
+
self._reconnect_event.clear()
|
516
|
+
finally:
|
517
|
+
await utils.aio.gracefully_cancel(
|
518
|
+
process_stream_task, wait_reconnect_task
|
519
|
+
)
|
520
|
+
should_stop.set()
|
504
521
|
except DeadlineExceeded:
|
505
522
|
raise APITimeoutError()
|
506
523
|
except GoogleAPICallError as e:
|
@@ -555,6 +572,8 @@ def _streaming_recognize_response_to_speech_data(
|
|
555
572
|
confidence /= len(resp.results)
|
556
573
|
lg = resp.results[0].language_code
|
557
574
|
|
575
|
+
if confidence < _min_confidence:
|
576
|
+
return None
|
558
577
|
if text == "":
|
559
578
|
return None
|
560
579
|
|
livekit/plugins/google/tts.py
CHANGED
@@ -15,10 +15,10 @@
|
|
15
15
|
from __future__ import annotations
|
16
16
|
|
17
17
|
from dataclasses import dataclass
|
18
|
+
from typing import Optional
|
18
19
|
|
19
20
|
from livekit import rtc
|
20
21
|
from livekit.agents import (
|
21
|
-
DEFAULT_API_CONNECT_OPTIONS,
|
22
22
|
APIConnectionError,
|
23
23
|
APIConnectOptions,
|
24
24
|
APIStatusError,
|
@@ -160,7 +160,7 @@ class TTS(tts.TTS):
|
|
160
160
|
self,
|
161
161
|
text: str,
|
162
162
|
*,
|
163
|
-
conn_options: APIConnectOptions =
|
163
|
+
conn_options: Optional[APIConnectOptions] = None,
|
164
164
|
) -> "ChunkedStream":
|
165
165
|
return ChunkedStream(
|
166
166
|
tts=self,
|
@@ -177,9 +177,9 @@ class ChunkedStream(tts.ChunkedStream):
|
|
177
177
|
*,
|
178
178
|
tts: TTS,
|
179
179
|
input_text: str,
|
180
|
-
conn_options: APIConnectOptions,
|
181
180
|
opts: _TTSOptions,
|
182
181
|
client: texttospeech.TextToSpeechAsyncClient,
|
182
|
+
conn_options: Optional[APIConnectOptions] = None,
|
183
183
|
) -> None:
|
184
184
|
super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
|
185
185
|
self._opts, self._client = opts, client
|
{livekit_plugins_google-0.10.4.dist-info → livekit_plugins_google-0.10.6.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: livekit-plugins-google
|
3
|
-
Version: 0.10.
|
3
|
+
Version: 0.10.6
|
4
4
|
Summary: Agent Framework plugin for services from Google Cloud
|
5
5
|
Home-page: https://github.com/livekit/agents
|
6
6
|
License: Apache-2.0
|
@@ -22,7 +22,7 @@ Description-Content-Type: text/markdown
|
|
22
22
|
Requires-Dist: google-auth<3,>=2
|
23
23
|
Requires-Dist: google-cloud-speech<3,>=2
|
24
24
|
Requires-Dist: google-cloud-texttospeech<3,>=2
|
25
|
-
Requires-Dist: google-genai==
|
25
|
+
Requires-Dist: google-genai==1.3.0
|
26
26
|
Requires-Dist: livekit-agents>=0.12.11
|
27
27
|
Dynamic: classifier
|
28
28
|
Dynamic: description
|
@@ -0,0 +1,18 @@
|
|
1
|
+
livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
|
2
|
+
livekit/plugins/google/_utils.py,sha256=FG1_26nlWGcI6onPleQQcmGBMfb4QNYgis1B5BMJxWA,7131
|
3
|
+
livekit/plugins/google/llm.py,sha256=LZaHsrkjfboRZLWm7L2G0mw62q2sXBNj4YeeV2Sk2uU,16717
|
4
|
+
livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
|
5
|
+
livekit/plugins/google/models.py,sha256=8Ysqkb0pOSSr_S9XHYxLz5nofDTt8RtfbsTIWoptOQU,1532
|
6
|
+
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
+
livekit/plugins/google/stt.py,sha256=0-4mVD5IydvsWp9OzYyVmXe6pz6FDvPutRLF169y674,22752
|
8
|
+
livekit/plugins/google/tts.py,sha256=w4EMk9rPfyAzPyWFwE_5sPo9UY7DNFa2g83K56AUk9I,9228
|
9
|
+
livekit/plugins/google/version.py,sha256=B7ZiVTsE24YmkTGl3227ZHjutNpXQp27028_w5-LuRA,601
|
10
|
+
livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
|
11
|
+
livekit/plugins/google/beta/realtime/__init__.py,sha256=sGTn6JFNyA30QUXBZ_BV3l2eHpGAzR35ByXxg77vWNU,205
|
12
|
+
livekit/plugins/google/beta/realtime/api_proto.py,sha256=ralrRZqIbE71oyuLKRYaXHvm6tcHMwBJueKvSO8Xfus,658
|
13
|
+
livekit/plugins/google/beta/realtime/realtime_api.py,sha256=SU_uQvZMBwbVgexZqkAjGmJVUW80ObJ4LP53rV7xqko,21228
|
14
|
+
livekit/plugins/google/beta/realtime/transcriber.py,sha256=rjXO0cSPr3HATxrSfv1MX7IbrjmiTvnLPF280BfRBL8,9809
|
15
|
+
livekit_plugins_google-0.10.6.dist-info/METADATA,sha256=cvkHdPcsrRpbSjW8oowAgN392NWQmoUD429U6zYSeKk,2058
|
16
|
+
livekit_plugins_google-0.10.6.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
17
|
+
livekit_plugins_google-0.10.6.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
18
|
+
livekit_plugins_google-0.10.6.dist-info/RECORD,,
|
@@ -1,18 +0,0 @@
|
|
1
|
-
livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
|
2
|
-
livekit/plugins/google/_utils.py,sha256=mjsqblhGMgAZ2MNPisAVkNsqq4gfO6vvprEKzAGoVwE,7248
|
3
|
-
livekit/plugins/google/llm.py,sha256=TVTerAabIf10AKVZr-Kn13eajhQ9RV7K4xaVD771yHU,16547
|
4
|
-
livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
|
5
|
-
livekit/plugins/google/models.py,sha256=Q47z_tIwLCufxhJyJHH7_1bo4xdBYZBSkkvMeycuItg,1493
|
6
|
-
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
livekit/plugins/google/stt.py,sha256=zl5B8MroarvoBbOmSK5YzC1d3GJeltkpv4Y0n2XLoVE,21203
|
8
|
-
livekit/plugins/google/tts.py,sha256=95qXCigVQYWNbcN3pIKBpIah4b31U_MWtXv5Ji0AMc4,9229
|
9
|
-
livekit/plugins/google/version.py,sha256=4H1pRTakUdztFHr_mZA7ybSGAF2BVH1xhvAHHQwGqwA,601
|
10
|
-
livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
|
11
|
-
livekit/plugins/google/beta/realtime/__init__.py,sha256=sGTn6JFNyA30QUXBZ_BV3l2eHpGAzR35ByXxg77vWNU,205
|
12
|
-
livekit/plugins/google/beta/realtime/api_proto.py,sha256=ralrRZqIbE71oyuLKRYaXHvm6tcHMwBJueKvSO8Xfus,658
|
13
|
-
livekit/plugins/google/beta/realtime/realtime_api.py,sha256=RPGYAJXelYPo16YyR2qccjUjxUJCkJBU2N5rNTpKxyo,21281
|
14
|
-
livekit/plugins/google/beta/realtime/transcriber.py,sha256=ZpKA3F8dqOtJPDlPiAgjw0AUDBIuhQiBVnvSYL4cdBg,9796
|
15
|
-
livekit_plugins_google-0.10.4.dist-info/METADATA,sha256=UkYK-aE8XAbV0BIinD9e_xGJXi-Oq-oQuc_ZASS8d_c,2058
|
16
|
-
livekit_plugins_google-0.10.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
17
|
-
livekit_plugins_google-0.10.4.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
18
|
-
livekit_plugins_google-0.10.4.dist-info/RECORD,,
|
{livekit_plugins_google-0.10.4.dist-info → livekit_plugins_google-0.10.6.dist-info}/top_level.txt
RENAMED
File without changes
|