livekit-plugins-google 1.2.6__tar.gz → 1.2.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of livekit-plugins-google might be problematic. Click here for more details.

Files changed (19) hide show
  1. {livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/PKG-INFO +2 -2
  2. {livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/livekit/plugins/google/beta/realtime/realtime_api.py +14 -1
  3. {livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/livekit/plugins/google/llm.py +0 -4
  4. {livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/livekit/plugins/google/stt.py +29 -12
  5. {livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/livekit/plugins/google/tts.py +1 -1
  6. {livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/livekit/plugins/google/version.py +1 -1
  7. {livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/pyproject.toml +1 -1
  8. {livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/.gitignore +0 -0
  9. {livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/README.md +0 -0
  10. {livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/livekit/plugins/google/__init__.py +0 -0
  11. {livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/livekit/plugins/google/beta/__init__.py +0 -0
  12. {livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/livekit/plugins/google/beta/gemini_tts.py +0 -0
  13. {livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/livekit/plugins/google/beta/realtime/__init__.py +0 -0
  14. {livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/livekit/plugins/google/beta/realtime/api_proto.py +0 -0
  15. {livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/livekit/plugins/google/log.py +0 -0
  16. {livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/livekit/plugins/google/models.py +0 -0
  17. {livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/livekit/plugins/google/py.typed +0 -0
  18. {livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/livekit/plugins/google/tools.py +0 -0
  19. {livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/livekit/plugins/google/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-google
3
- Version: 1.2.6
3
+ Version: 1.2.7
4
4
  Summary: Agent Framework plugin for services from Google Cloud
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
22
22
  Requires-Dist: google-cloud-speech<3,>=2
23
23
  Requires-Dist: google-cloud-texttospeech<3,>=2.27
24
24
  Requires-Dist: google-genai>=v1.23.0
25
- Requires-Dist: livekit-agents>=1.2.6
25
+ Requires-Dist: livekit-agents>=1.2.7
26
26
  Description-Content-Type: text/markdown
27
27
 
28
28
  # Google AI plugin for LiveKit Agents
@@ -708,7 +708,7 @@ class RealtimeSession(llm.RealtimeSession):
708
708
  # reset the flag and still start a new generation in case it has any other content
709
709
  response.server_content.interrupted = False
710
710
 
711
- if response.server_content or response.tool_call:
711
+ if self._is_new_generation(response):
712
712
  self._start_new_generation()
713
713
 
714
714
  if response.session_resumption_update:
@@ -1091,3 +1091,16 @@ class RealtimeSession(llm.RealtimeSession):
1091
1091
  recoverable=recoverable,
1092
1092
  ),
1093
1093
  )
1094
+
1095
+ def _is_new_generation(self, resp: types.LiveServerMessage) -> bool:
1096
+ if resp.tool_call:
1097
+ return True
1098
+
1099
+ if (sc := resp.server_content) and (
1100
+ sc.model_turn
1101
+ or (sc.output_transcription and sc.output_transcription.text is not None)
1102
+ or (sc.input_transcription and sc.input_transcription.text is not None)
1103
+ ):
1104
+ return True
1105
+
1106
+ return False
@@ -157,10 +157,6 @@ class LLM(llm.LLM):
157
157
  if _thinking_budget is not None:
158
158
  if not isinstance(_thinking_budget, int):
159
159
  raise ValueError("thinking_budget inside thinking_config must be an integer")
160
- if not (0 <= _thinking_budget <= 24576):
161
- raise ValueError(
162
- "thinking_budget inside thinking_config must be between 0 and 24576"
163
- )
164
160
 
165
161
  self._opts = _LLMOptions(
166
162
  model=model,
@@ -20,6 +20,7 @@ import time
20
20
  import weakref
21
21
  from collections.abc import AsyncGenerator, AsyncIterable
22
22
  from dataclasses import dataclass
23
+ from datetime import timedelta
23
24
  from typing import Callable, Union, cast
24
25
 
25
26
  from google.api_core.client_options import ClientOptions
@@ -28,6 +29,7 @@ from google.auth import default as gauth_default
28
29
  from google.auth.exceptions import DefaultCredentialsError
29
30
  from google.cloud.speech_v2 import SpeechAsyncClient
30
31
  from google.cloud.speech_v2.types import cloud_speech
32
+ from google.protobuf.duration_pb2 import Duration
31
33
  from livekit import rtc
32
34
  from livekit.agents import (
33
35
  DEFAULT_API_CONNECT_OPTIONS,
@@ -552,6 +554,14 @@ class SpeechStream(stt.SpeechStream):
552
554
  raise APIConnectionError() from e
553
555
 
554
556
 
557
+ def _duration_to_seconds(duration: Duration | timedelta) -> float:
558
+ # Proto Plus may auto-convert Duration to timedelta; handle both.
559
+ # https://proto-plus-python.readthedocs.io/en/latest/marshal.html
560
+ if isinstance(duration, timedelta):
561
+ return duration.total_seconds()
562
+ return duration.seconds + duration.nanos / 1e9
563
+
564
+
555
565
  def _recognize_response_to_speech_event(
556
566
  resp: cloud_speech.RecognizeResponse,
557
567
  ) -> stt.SpeechEvent:
@@ -561,24 +571,31 @@ def _recognize_response_to_speech_event(
561
571
  text += result.alternatives[0].transcript
562
572
  confidence += result.alternatives[0].confidence
563
573
 
564
- # not sure why start_offset and end_offset returns a timedelta
565
- start_offset = resp.results[0].alternatives[0].words[0].start_offset
566
- end_offset = resp.results[-1].alternatives[0].words[-1].end_offset
574
+ alternatives = []
567
575
 
568
- confidence /= len(resp.results)
569
- lg = resp.results[0].language_code
570
- return stt.SpeechEvent(
571
- type=stt.SpeechEventType.FINAL_TRANSCRIPT,
572
- alternatives=[
576
+ # Google STT may return empty results when spoken_lang != stt_lang
577
+ if resp.results:
578
+ try:
579
+ start_time = _duration_to_seconds(resp.results[0].alternatives[0].words[0].start_offset)
580
+ end_time = _duration_to_seconds(resp.results[-1].alternatives[0].words[-1].end_offset)
581
+ except IndexError:
582
+ # When enable_word_time_offsets=False, there are no "words" to access
583
+ start_time = end_time = 0
584
+
585
+ confidence /= len(resp.results)
586
+ lg = resp.results[0].language_code
587
+
588
+ alternatives = [
573
589
  stt.SpeechData(
574
590
  language=lg,
575
- start_time=start_offset.total_seconds(), # type: ignore
576
- end_time=end_offset.total_seconds(), # type: ignore
591
+ start_time=start_time,
592
+ end_time=end_time,
577
593
  confidence=confidence,
578
594
  text=text,
579
595
  )
580
- ],
581
- )
596
+ ]
597
+
598
+ return stt.SpeechEvent(type=stt.SpeechEventType.FINAL_TRANSCRIPT, alternatives=alternatives)
582
599
 
583
600
 
584
601
  def _streaming_recognize_response_to_speech_data(
@@ -124,7 +124,7 @@ class TTS(tts.TTS):
124
124
  )
125
125
  if is_given(voice_cloning_key):
126
126
  voice_params.voice_clone = texttospeech.VoiceCloneParams(
127
- voice_clone_key=voice_cloning_key,
127
+ voice_cloning_key=voice_cloning_key,
128
128
  )
129
129
  else:
130
130
  voice_params.name = voice_name if is_given(voice_name) else DEFAULT_VOICE_NAME
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.2.6"
15
+ __version__ = "1.2.7"
@@ -27,7 +27,7 @@ dependencies = [
27
27
  "google-cloud-speech >= 2, < 3",
28
28
  "google-cloud-texttospeech >= 2.27, < 3",
29
29
  "google-genai >= v1.23.0",
30
- "livekit-agents>=1.2.6",
30
+ "livekit-agents>=1.2.7",
31
31
  ]
32
32
 
33
33
  [project.urls]