livekit-plugins-google 1.0.18__py3-none-any.whl → 1.0.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,6 +18,7 @@ from google.genai.types import (
18
18
  GenerationConfig,
19
19
  LiveClientContent,
20
20
  LiveClientRealtimeInput,
21
+ LiveClientToolResponse,
21
22
  LiveConnectConfig,
22
23
  LiveServerContent,
23
24
  LiveServerGoAway,
@@ -481,11 +482,18 @@ class RealtimeSession(llm.RealtimeSession):
481
482
  not self._active_session or self._active_session != session
482
483
  ):
483
484
  break
484
-
485
485
  if isinstance(msg, LiveClientContent):
486
- await session.send(input=msg)
486
+ await session.send_client_content(
487
+ turns=msg.turns, turn_complete=msg.turn_complete
488
+ )
489
+ elif isinstance(msg, LiveClientToolResponse):
490
+ await session.send_tool_response(function_responses=msg.function_responses)
491
+ elif isinstance(msg, LiveClientRealtimeInput):
492
+ for media_chunk in msg.media_chunks:
493
+ await session.send_realtime_input(media=media_chunk)
487
494
  else:
488
- await session.send(input=msg)
495
+ logger.warning(f"Warning: Received unhandled message type: {type(msg)}")
496
+
489
497
  except Exception as e:
490
498
  if not self._session_should_close.is_set():
491
499
  logger.error(f"error in send task: {e}", exc_info=e)
@@ -270,7 +270,7 @@ class LLMStream(llm.LLMStream):
270
270
  request_id = utils.shortuuid()
271
271
 
272
272
  try:
273
- turns, system_instruction = to_chat_ctx(self._chat_ctx, id(self._llm))
273
+ turns, system_instruction = to_chat_ctx(self._chat_ctx, id(self._llm), generate=True)
274
274
  function_declarations = to_fnc_ctx(self._tools)
275
275
  if function_declarations:
276
276
  self._extra_kwargs["tools"] = [
@@ -54,7 +54,7 @@ LanguageCode = Union[LgType, list[LgType]]
54
54
  _max_session_duration = 240
55
55
 
56
56
  # Google is very sensitive to background noise, so we'll ignore results with low confidence
57
- _min_confidence = 0.65
57
+ _default_min_confidence = 0.65
58
58
 
59
59
 
60
60
  # This class is only be used internally to encapsulate the options
@@ -67,6 +67,7 @@ class STTOptions:
67
67
  spoken_punctuation: bool
68
68
  model: SpeechModels | str
69
69
  sample_rate: int
70
+ min_confidence_threshold: float
70
71
  keywords: NotGivenOr[list[tuple[str, float]]] = NOT_GIVEN
71
72
 
72
73
  def build_adaptation(self) -> cloud_speech.SpeechAdaptation | None:
@@ -98,6 +99,7 @@ class STT(stt.STT):
98
99
  model: SpeechModels | str = "latest_long",
99
100
  location: str = "global",
100
101
  sample_rate: int = 16000,
102
+ min_confidence_threshold: float = _default_min_confidence,
101
103
  credentials_info: NotGivenOr[dict] = NOT_GIVEN,
102
104
  credentials_file: NotGivenOr[str] = NOT_GIVEN,
103
105
  keywords: NotGivenOr[list[tuple[str, float]]] = NOT_GIVEN,
@@ -118,6 +120,8 @@ class STT(stt.STT):
118
120
  model(SpeechModels): the model to use for recognition default: "latest_long"
119
121
  location(str): the location to use for recognition default: "global"
120
122
  sample_rate(int): the sample rate of the audio default: 16000
123
+ min_confidence_threshold(float): minimum confidence threshold for recognition
124
+ (default: 0.65)
121
125
  credentials_info(dict): the credentials info to use for recognition (default: None)
122
126
  credentials_file(str): the credentials file to use for recognition (default: None)
123
127
  keywords(List[tuple[str, float]]): list of keywords to recognize (default: None)
@@ -149,6 +153,7 @@ class STT(stt.STT):
149
153
  spoken_punctuation=spoken_punctuation,
150
154
  model=model,
151
155
  sample_rate=sample_rate,
156
+ min_confidence_threshold=min_confidence_threshold,
152
157
  keywords=keywords,
153
158
  )
154
159
  self._streams = weakref.WeakSet[SpeechStream]()
@@ -343,6 +348,7 @@ class SpeechStream(stt.SpeechStream):
343
348
  punctuate: NotGivenOr[bool] = NOT_GIVEN,
344
349
  spoken_punctuation: NotGivenOr[bool] = NOT_GIVEN,
345
350
  model: NotGivenOr[SpeechModels] = NOT_GIVEN,
351
+ min_confidence_threshold: NotGivenOr[float] = NOT_GIVEN,
346
352
  keywords: NotGivenOr[list[tuple[str, float]]] = NOT_GIVEN,
347
353
  ):
348
354
  if is_given(languages):
@@ -359,6 +365,8 @@ class SpeechStream(stt.SpeechStream):
359
365
  self._config.spoken_punctuation = spoken_punctuation
360
366
  if is_given(model):
361
367
  self._config.model = model
368
+ if is_given(min_confidence_threshold):
369
+ self._config.min_confidence_threshold = min_confidence_threshold
362
370
  if is_given(keywords):
363
371
  self._config.keywords = keywords
364
372
 
@@ -405,7 +413,10 @@ class SpeechStream(stt.SpeechStream):
405
413
  == cloud_speech.StreamingRecognizeResponse.SpeechEventType.SPEECH_EVENT_TYPE_UNSPECIFIED # noqa: E501
406
414
  ):
407
415
  result = resp.results[0]
408
- speech_data = _streaming_recognize_response_to_speech_data(resp)
416
+ speech_data = _streaming_recognize_response_to_speech_data(
417
+ resp,
418
+ min_confidence_threshold=self._config.min_confidence_threshold,
419
+ )
409
420
  if speech_data is None:
410
421
  continue
411
422
 
@@ -530,6 +541,8 @@ def _recognize_response_to_speech_event(
530
541
 
531
542
  def _streaming_recognize_response_to_speech_data(
532
543
  resp: cloud_speech.StreamingRecognizeResponse,
544
+ *,
545
+ min_confidence_threshold: float,
533
546
  ) -> stt.SpeechData | None:
534
547
  text = ""
535
548
  confidence = 0.0
@@ -542,7 +555,7 @@ def _streaming_recognize_response_to_speech_data(
542
555
  confidence /= len(resp.results)
543
556
  lg = resp.results[0].language_code
544
557
 
545
- if confidence < _min_confidence:
558
+ if confidence < min_confidence_threshold:
546
559
  return None
547
560
  if text == "":
548
561
  return None
@@ -56,6 +56,7 @@ class TTS(tts.TTS):
56
56
  effects_profile_id: str = "",
57
57
  speaking_rate: float = 1.0,
58
58
  location: str = "global",
59
+ audio_encoding: texttospeech.AudioEncoding = texttospeech.AudioEncoding.PCM,
59
60
  credentials_info: NotGivenOr[dict] = NOT_GIVEN,
60
61
  credentials_file: NotGivenOr[str] = NOT_GIVEN,
61
62
  ) -> None:
@@ -105,7 +106,7 @@ class TTS(tts.TTS):
105
106
  self._opts = _TTSOptions(
106
107
  voice=voice_params,
107
108
  audio_config=texttospeech.AudioConfig(
108
- audio_encoding=texttospeech.AudioEncoding.PCM,
109
+ audio_encoding=audio_encoding,
109
110
  sample_rate_hertz=sample_rate,
110
111
  pitch=pitch,
111
112
  effects_profile_id=effects_profile_id,
@@ -39,7 +39,10 @@ def get_tool_results_for_realtime(chat_ctx: llm.ChatContext) -> types.LiveClient
39
39
 
40
40
 
41
41
  def to_chat_ctx(
42
- chat_ctx: llm.ChatContext, cache_key: Any, ignore_functions: bool = False
42
+ chat_ctx: llm.ChatContext,
43
+ cache_key: Any,
44
+ ignore_functions: bool = False,
45
+ generate: bool = False,
43
46
  ) -> tuple[list[types.Content], types.Content | None]:
44
47
  turns: list[types.Content] = []
45
48
  system_instruction: types.Content | None = None
@@ -99,10 +102,9 @@ def to_chat_ctx(
99
102
  if current_role is not None and parts:
100
103
  turns.append(types.Content(role=current_role, parts=parts))
101
104
 
102
- # # Gemini requires the last message to end with user's turn before they can generate
103
- # # currently not used because to_chat_ctx should not be used to force a new generation
104
- # if current_role != "user":
105
- # turns.append(types.Content(role="user", parts=[types.Part(text=".")]))
105
+ # Gemini requires the last message to end with user's turn before they can generate
106
+ if generate and current_role != "user":
107
+ turns.append(types.Content(role="user", parts=[types.Part(text=".")]))
106
108
 
107
109
  return turns, system_instruction
108
110
 
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.0.18"
15
+ __version__ = "1.0.19"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-google
3
- Version: 1.0.18
3
+ Version: 1.0.19
4
4
  Summary: Agent Framework plugin for services from Google Cloud
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
22
22
  Requires-Dist: google-cloud-speech<3,>=2
23
23
  Requires-Dist: google-cloud-texttospeech<3,>=2
24
24
  Requires-Dist: google-genai>=1.12.1
25
- Requires-Dist: livekit-agents>=1.0.18
25
+ Requires-Dist: livekit-agents>=1.0.19
26
26
  Description-Content-Type: text/markdown
27
27
 
28
28
  # LiveKit Plugins Google
@@ -0,0 +1,16 @@
1
+ livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
2
+ livekit/plugins/google/llm.py,sha256=NaaT4Zaw6o98VcUHNrQcZZRkD7DPREd76O8fG9IOpXQ,16190
3
+ livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
4
+ livekit/plugins/google/models.py,sha256=SGjAumdDK97NNLwMFcqZdKR68f1NoGB2Rk1UP2-imG0,1457
5
+ livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ livekit/plugins/google/stt.py,sha256=MADnkh0YKWY4bLRgBwFv4emu4YFO-7EVnhxO--dPTlI,23082
7
+ livekit/plugins/google/tts.py,sha256=29R0ieV5sRPBf5Yi0SPFQk7ZZMbELF30bIL9K_j_Wcg,9100
8
+ livekit/plugins/google/utils.py,sha256=sPZZg5VHf60kSILUIHGIZyN2CWYwnCGNYICn8Mhcv9g,9534
9
+ livekit/plugins/google/version.py,sha256=UDC8ahmGgRkv-qMQUY3QibuuVevGMQ9Fd4yIhcQBZwA,601
10
+ livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
11
+ livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
12
+ livekit/plugins/google/beta/realtime/api_proto.py,sha256=Fyrejs3SG0EjOPCCFLEnWXKEUxCff47PMWk2VsKJm5E,594
13
+ livekit/plugins/google/beta/realtime/realtime_api.py,sha256=yk202S604Eogp_ssBX2BSbAXV67uUyQzVO-bzLnScrs,31423
14
+ livekit_plugins_google-1.0.19.dist-info/METADATA,sha256=HuRBvpT9dX3Mz7YOVhZhgQLm3-qQa2vAf2SRDQ5u1vM,3492
15
+ livekit_plugins_google-1.0.19.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
+ livekit_plugins_google-1.0.19.dist-info/RECORD,,
@@ -1,16 +0,0 @@
1
- livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
2
- livekit/plugins/google/llm.py,sha256=SqNGg6-wlrIUo9uaismP7QW5XztkXyDivJXLVgOIZMI,16175
3
- livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
4
- livekit/plugins/google/models.py,sha256=SGjAumdDK97NNLwMFcqZdKR68f1NoGB2Rk1UP2-imG0,1457
5
- livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- livekit/plugins/google/stt.py,sha256=AG_lh2fuuduJi0jFbA_QKFXLJ6NUdF1W_FfkLUJML_Q,22413
7
- livekit/plugins/google/tts.py,sha256=fmQwW9a1kPsEsrTvIo8fqw479RxWEx0SIc3oTVaj41U,9031
8
- livekit/plugins/google/utils.py,sha256=TjjTwMbdJdxr3bZjUXxs-J_fipTTM00goW2-d9KWX6w,9582
9
- livekit/plugins/google/version.py,sha256=cnPu9FVKZV9tFmmz7lEvftrO3B_nWJVFghi3j6UcJLs,601
10
- livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
11
- livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
12
- livekit/plugins/google/beta/realtime/api_proto.py,sha256=Fyrejs3SG0EjOPCCFLEnWXKEUxCff47PMWk2VsKJm5E,594
13
- livekit/plugins/google/beta/realtime/realtime_api.py,sha256=sXp2oHnTlHrAp5wFmcXj0bRtQKixBYedfbufcbjVHxk,30897
14
- livekit_plugins_google-1.0.18.dist-info/METADATA,sha256=Vqt0FoqibcKzX_jFXlyFkn-mT7iPC16JlH61VS0fbuw,3492
15
- livekit_plugins_google-1.0.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
- livekit_plugins_google-1.0.18.dist-info/RECORD,,