livekit-plugins-google 0.9.0__py3-none-any.whl → 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -105,7 +105,7 @@ class RealtimeModel:
105
105
 
106
106
  Args:
107
107
  instructions (str, optional): Initial system instructions for the model. Defaults to "".
108
- api_key (str or None, optional): OpenAI API key. If None, will attempt to read from the environment variable OPENAI_API_KEY
108
+ api_key (str or None, optional): Google Gemini API key. If None, will attempt to read from the environment variable GOOGLE_API_KEY.
109
109
  modalities (ResponseModality): Modalities to use, such as ["TEXT", "AUDIO"]. Defaults to ["AUDIO"].
110
110
  model (str or None, optional): The name of the model to use. Defaults to "gemini-2.0-flash-exp".
111
111
  voice (api_proto.Voice, optional): Voice setting for audio outputs. Defaults to "Puck".
@@ -16,6 +16,7 @@ from __future__ import annotations
16
16
 
17
17
  import asyncio
18
18
  import dataclasses
19
+ import time
19
20
  import weakref
20
21
  from dataclasses import dataclass
21
22
  from typing import List, Union
@@ -44,6 +45,10 @@ from .models import SpeechLanguages, SpeechModels
44
45
  LgType = Union[SpeechLanguages, str]
45
46
  LanguageCode = Union[LgType, List[LgType]]
46
47
 
48
+ # Google STT has a timeout of 5 mins, we'll attempt to restart the session
49
+ # before that timeout is reached
50
+ _max_session_duration = 240
51
+
47
52
 
48
53
  # This class is only be used internally to encapsulate the options
49
54
  @dataclass
@@ -229,8 +234,6 @@ class STT(stt.STT):
229
234
  raise APIStatusError(
230
235
  e.message,
231
236
  status_code=e.code or -1,
232
- request_id=None,
233
- body=None,
234
237
  )
235
238
  except Exception as e:
236
239
  raise APIConnectionError() from e
@@ -278,6 +281,13 @@ class STT(stt.STT):
278
281
  self._config.spoken_punctuation = spoken_punctuation
279
282
  if model is not None:
280
283
  self._config.model = model
284
+ client = None
285
+ recognizer = None
286
+ if location is not None:
287
+ self._location = location
288
+ # if location is changed, fetch a new client and recognizer as per the new location
289
+ client = self._ensure_client()
290
+ recognizer = self._recognizer
281
291
  if keywords is not None:
282
292
  self._config.keywords = keywords
283
293
 
@@ -289,8 +299,9 @@ class STT(stt.STT):
289
299
  punctuate=punctuate,
290
300
  spoken_punctuation=spoken_punctuation,
291
301
  model=model,
292
- location=location,
293
302
  keywords=keywords,
303
+ client=client,
304
+ recognizer=recognizer,
294
305
  )
295
306
 
296
307
 
@@ -312,6 +323,7 @@ class SpeechStream(stt.SpeechStream):
312
323
  self._recognizer = recognizer
313
324
  self._config = config
314
325
  self._reconnect_event = asyncio.Event()
326
+ self._session_connected_at: float = 0
315
327
 
316
328
  def update_options(
317
329
  self,
@@ -322,8 +334,9 @@ class SpeechStream(stt.SpeechStream):
322
334
  punctuate: bool | None = None,
323
335
  spoken_punctuation: bool | None = None,
324
336
  model: SpeechModels | None = None,
325
- location: str | None = None,
326
337
  keywords: List[tuple[str, float]] | None = None,
338
+ client: SpeechAsyncClient | None = None,
339
+ recognizer: str | None = None,
327
340
  ):
328
341
  if languages is not None:
329
342
  if isinstance(languages, str):
@@ -341,13 +354,17 @@ class SpeechStream(stt.SpeechStream):
341
354
  self._config.model = model
342
355
  if keywords is not None:
343
356
  self._config.keywords = keywords
357
+ if client is not None:
358
+ self._client = client
359
+ if recognizer is not None:
360
+ self._recognizer = recognizer
344
361
 
345
362
  self._reconnect_event.set()
346
363
 
347
364
  async def _run(self) -> None:
348
365
  # google requires a async generator when calling streaming_recognize
349
366
  # this function basically convert the queue into a async generator
350
- async def input_generator():
367
+ async def input_generator(should_stop: asyncio.Event):
351
368
  try:
352
369
  # first request should contain the config
353
370
  yield cloud_speech.StreamingRecognizeRequest(
@@ -356,6 +373,12 @@ class SpeechStream(stt.SpeechStream):
356
373
  )
357
374
 
358
375
  async for frame in self._input_ch:
376
+ # when the stream is aborted due to reconnect, this input_generator
377
+ # needs to stop consuming frames
378
+ # when the generator stops, the previous gRPC stream will close
379
+ if should_stop.is_set():
380
+ return
381
+
359
382
  if isinstance(frame, rtc.AudioFrame):
360
383
  yield cloud_speech.StreamingRecognizeRequest(
361
384
  audio=frame.data.tobytes()
@@ -367,6 +390,7 @@ class SpeechStream(stt.SpeechStream):
367
390
  )
368
391
 
369
392
  async def process_stream(stream):
393
+ has_started = False
370
394
  async for resp in stream:
371
395
  if (
372
396
  resp.speech_event_type
@@ -375,6 +399,7 @@ class SpeechStream(stt.SpeechStream):
375
399
  self._event_ch.send_nowait(
376
400
  stt.SpeechEvent(type=stt.SpeechEventType.START_OF_SPEECH)
377
401
  )
402
+ has_started = True
378
403
 
379
404
  if (
380
405
  resp.speech_event_type
@@ -399,6 +424,22 @@ class SpeechStream(stt.SpeechStream):
399
424
  alternatives=[speech_data],
400
425
  )
401
426
  )
427
+ if (
428
+ time.time() - self._session_connected_at
429
+ > _max_session_duration
430
+ ):
431
+ logger.debug(
432
+ "Google STT maximum connection time reached. Reconnecting..."
433
+ )
434
+ if has_started:
435
+ self._event_ch.send_nowait(
436
+ stt.SpeechEvent(
437
+ type=stt.SpeechEventType.END_OF_SPEECH
438
+ )
439
+ )
440
+ has_started = False
441
+ self._reconnect_event.set()
442
+ return
402
443
 
403
444
  if (
404
445
  resp.speech_event_type
@@ -407,6 +448,7 @@ class SpeechStream(stt.SpeechStream):
407
448
  self._event_ch.send_nowait(
408
449
  stt.SpeechEvent(type=stt.SpeechEventType.END_OF_SPEECH)
409
450
  )
451
+ has_started = False
410
452
 
411
453
  while True:
412
454
  try:
@@ -431,12 +473,15 @@ class SpeechStream(stt.SpeechStream):
431
473
  ),
432
474
  )
433
475
 
476
+ should_stop = asyncio.Event()
434
477
  stream = await self._client.streaming_recognize(
435
- requests=input_generator(),
478
+ requests=input_generator(should_stop),
436
479
  )
480
+ self._session_connected_at = time.time()
437
481
 
438
482
  process_stream_task = asyncio.create_task(process_stream(stream))
439
483
  wait_reconnect_task = asyncio.create_task(self._reconnect_event.wait())
484
+
440
485
  try:
441
486
  done, _ = await asyncio.wait(
442
487
  [process_stream_task, wait_reconnect_task],
@@ -445,14 +490,23 @@ class SpeechStream(stt.SpeechStream):
445
490
  for task in done:
446
491
  if task != wait_reconnect_task:
447
492
  task.result()
493
+ if wait_reconnect_task not in done:
494
+ break
495
+ self._reconnect_event.clear()
448
496
  finally:
449
497
  await utils.aio.gracefully_cancel(
450
498
  process_stream_task, wait_reconnect_task
451
499
  )
452
- finally:
453
- if not self._reconnect_event.is_set():
454
- break
455
- self._reconnect_event.clear()
500
+ should_stop.set()
501
+ except DeadlineExceeded:
502
+ raise APITimeoutError()
503
+ except GoogleAPICallError as e:
504
+ raise APIStatusError(
505
+ e.message,
506
+ status_code=e.code or -1,
507
+ )
508
+ except Exception as e:
509
+ raise APIConnectionError() from e
456
510
 
457
511
 
458
512
  def _recognize_response_to_speech_event(
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.9.0"
15
+ __version__ = "0.9.1"
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: livekit-plugins-google
3
- Version: 0.9.0
3
+ Version: 0.9.1
4
4
  Summary: Agent Framework plugin for services from Google Cloud
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -24,6 +24,16 @@ Requires-Dist: google-cloud-speech<3,>=2
24
24
  Requires-Dist: google-cloud-texttospeech<3,>=2
25
25
  Requires-Dist: google-genai>=0.3.0
26
26
  Requires-Dist: livekit-agents>=0.12.3
27
+ Dynamic: classifier
28
+ Dynamic: description
29
+ Dynamic: description-content-type
30
+ Dynamic: home-page
31
+ Dynamic: keywords
32
+ Dynamic: license
33
+ Dynamic: project-url
34
+ Dynamic: requires-dist
35
+ Dynamic: requires-python
36
+ Dynamic: summary
27
37
 
28
38
  # LiveKit Plugins Google
29
39
 
@@ -2,14 +2,14 @@ livekit/plugins/google/__init__.py,sha256=TY-5FwEX4Vs7GLO1wSegIxC5W4UPkHBthlr-__
2
2
  livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
3
3
  livekit/plugins/google/models.py,sha256=cBXhZGY9bFaSCyL9VeSng9wsxhf3peJi3AUYBKV-8GQ,1343
4
4
  livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- livekit/plugins/google/stt.py,sha256=SfmKgQotIVzk9-Hipo1X5cnLQG4uXLniTUoyM3IynwA,18712
5
+ livekit/plugins/google/stt.py,sha256=E5kXPbicH4FEXBjyBzfqQWA-nPhKkojzcc-cbtWdmNs,21088
6
6
  livekit/plugins/google/tts.py,sha256=95qXCigVQYWNbcN3pIKBpIah4b31U_MWtXv5Ji0AMc4,9229
7
- livekit/plugins/google/version.py,sha256=onRKrcQ35NZG4oEg_95WGeTytHh_6VVAlQKAZhwiEe4,600
7
+ livekit/plugins/google/version.py,sha256=4GcbYy7J7gvPMEA4wlPB0BJqg8CjF7HRVjQ-i1EH7M8,600
8
8
  livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
9
9
  livekit/plugins/google/beta/realtime/__init__.py,sha256=XnJpNIN6NRm7Y4hH2RNA8Xt-tTmkZEKCs_zzU3_koBI,251
10
10
  livekit/plugins/google/beta/realtime/api_proto.py,sha256=IHYBryuzpfGQD86Twlfq6qxrBhFHptf_IvOk36Wxo1M,2156
11
- livekit/plugins/google/beta/realtime/realtime_api.py,sha256=OxrbWnUOT_oFdrMruvLPHgEoXlOr6M5oGym9b2Iqz48,15958
12
- livekit_plugins_google-0.9.0.dist-info/METADATA,sha256=tB70OQMa7JtWLqRi1TMDUpv4y0TZEk0L609BN6y0x48,1841
13
- livekit_plugins_google-0.9.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
14
- livekit_plugins_google-0.9.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
15
- livekit_plugins_google-0.9.0.dist-info/RECORD,,
11
+ livekit/plugins/google/beta/realtime/realtime_api.py,sha256=YUEf3iR9dIctnXRqev_qKSBM_plqcYKudodFO8nADJY,15966
12
+ livekit_plugins_google-0.9.1.dist-info/METADATA,sha256=y5d0OEdbkoGk0IPGURiDZbt6e6sWhsxOU2cioNrPu7w,2056
13
+ livekit_plugins_google-0.9.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
14
+ livekit_plugins_google-0.9.1.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
15
+ livekit_plugins_google-0.9.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.6.0)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5