livekit-plugins-google 1.0.18__py3-none-any.whl → 1.0.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/google/beta/realtime/realtime_api.py +11 -3
- livekit/plugins/google/llm.py +1 -1
- livekit/plugins/google/stt.py +16 -3
- livekit/plugins/google/tts.py +2 -1
- livekit/plugins/google/utils.py +7 -5
- livekit/plugins/google/version.py +1 -1
- {livekit_plugins_google-1.0.18.dist-info → livekit_plugins_google-1.0.19.dist-info}/METADATA +2 -2
- livekit_plugins_google-1.0.19.dist-info/RECORD +16 -0
- livekit_plugins_google-1.0.18.dist-info/RECORD +0 -16
- {livekit_plugins_google-1.0.18.dist-info → livekit_plugins_google-1.0.19.dist-info}/WHEEL +0 -0
@@ -18,6 +18,7 @@ from google.genai.types import (
|
|
18
18
|
GenerationConfig,
|
19
19
|
LiveClientContent,
|
20
20
|
LiveClientRealtimeInput,
|
21
|
+
LiveClientToolResponse,
|
21
22
|
LiveConnectConfig,
|
22
23
|
LiveServerContent,
|
23
24
|
LiveServerGoAway,
|
@@ -481,11 +482,18 @@ class RealtimeSession(llm.RealtimeSession):
|
|
481
482
|
not self._active_session or self._active_session != session
|
482
483
|
):
|
483
484
|
break
|
484
|
-
|
485
485
|
if isinstance(msg, LiveClientContent):
|
486
|
-
await session.
|
486
|
+
await session.send_client_content(
|
487
|
+
turns=msg.turns, turn_complete=msg.turn_complete
|
488
|
+
)
|
489
|
+
elif isinstance(msg, LiveClientToolResponse):
|
490
|
+
await session.send_tool_response(function_responses=msg.function_responses)
|
491
|
+
elif isinstance(msg, LiveClientRealtimeInput):
|
492
|
+
for media_chunk in msg.media_chunks:
|
493
|
+
await session.send_realtime_input(media=media_chunk)
|
487
494
|
else:
|
488
|
-
|
495
|
+
logger.warning(f"Warning: Received unhandled message type: {type(msg)}")
|
496
|
+
|
489
497
|
except Exception as e:
|
490
498
|
if not self._session_should_close.is_set():
|
491
499
|
logger.error(f"error in send task: {e}", exc_info=e)
|
livekit/plugins/google/llm.py
CHANGED
@@ -270,7 +270,7 @@ class LLMStream(llm.LLMStream):
|
|
270
270
|
request_id = utils.shortuuid()
|
271
271
|
|
272
272
|
try:
|
273
|
-
turns, system_instruction = to_chat_ctx(self._chat_ctx, id(self._llm))
|
273
|
+
turns, system_instruction = to_chat_ctx(self._chat_ctx, id(self._llm), generate=True)
|
274
274
|
function_declarations = to_fnc_ctx(self._tools)
|
275
275
|
if function_declarations:
|
276
276
|
self._extra_kwargs["tools"] = [
|
livekit/plugins/google/stt.py
CHANGED
@@ -54,7 +54,7 @@ LanguageCode = Union[LgType, list[LgType]]
|
|
54
54
|
_max_session_duration = 240
|
55
55
|
|
56
56
|
# Google is very sensitive to background noise, so we'll ignore results with low confidence
|
57
|
-
|
57
|
+
_default_min_confidence = 0.65
|
58
58
|
|
59
59
|
|
60
60
|
# This class is only be used internally to encapsulate the options
|
@@ -67,6 +67,7 @@ class STTOptions:
|
|
67
67
|
spoken_punctuation: bool
|
68
68
|
model: SpeechModels | str
|
69
69
|
sample_rate: int
|
70
|
+
min_confidence_threshold: float
|
70
71
|
keywords: NotGivenOr[list[tuple[str, float]]] = NOT_GIVEN
|
71
72
|
|
72
73
|
def build_adaptation(self) -> cloud_speech.SpeechAdaptation | None:
|
@@ -98,6 +99,7 @@ class STT(stt.STT):
|
|
98
99
|
model: SpeechModels | str = "latest_long",
|
99
100
|
location: str = "global",
|
100
101
|
sample_rate: int = 16000,
|
102
|
+
min_confidence_threshold: float = _default_min_confidence,
|
101
103
|
credentials_info: NotGivenOr[dict] = NOT_GIVEN,
|
102
104
|
credentials_file: NotGivenOr[str] = NOT_GIVEN,
|
103
105
|
keywords: NotGivenOr[list[tuple[str, float]]] = NOT_GIVEN,
|
@@ -118,6 +120,8 @@ class STT(stt.STT):
|
|
118
120
|
model(SpeechModels): the model to use for recognition default: "latest_long"
|
119
121
|
location(str): the location to use for recognition default: "global"
|
120
122
|
sample_rate(int): the sample rate of the audio default: 16000
|
123
|
+
min_confidence_threshold(float): minimum confidence threshold for recognition
|
124
|
+
(default: 0.65)
|
121
125
|
credentials_info(dict): the credentials info to use for recognition (default: None)
|
122
126
|
credentials_file(str): the credentials file to use for recognition (default: None)
|
123
127
|
keywords(List[tuple[str, float]]): list of keywords to recognize (default: None)
|
@@ -149,6 +153,7 @@ class STT(stt.STT):
|
|
149
153
|
spoken_punctuation=spoken_punctuation,
|
150
154
|
model=model,
|
151
155
|
sample_rate=sample_rate,
|
156
|
+
min_confidence_threshold=min_confidence_threshold,
|
152
157
|
keywords=keywords,
|
153
158
|
)
|
154
159
|
self._streams = weakref.WeakSet[SpeechStream]()
|
@@ -343,6 +348,7 @@ class SpeechStream(stt.SpeechStream):
|
|
343
348
|
punctuate: NotGivenOr[bool] = NOT_GIVEN,
|
344
349
|
spoken_punctuation: NotGivenOr[bool] = NOT_GIVEN,
|
345
350
|
model: NotGivenOr[SpeechModels] = NOT_GIVEN,
|
351
|
+
min_confidence_threshold: NotGivenOr[float] = NOT_GIVEN,
|
346
352
|
keywords: NotGivenOr[list[tuple[str, float]]] = NOT_GIVEN,
|
347
353
|
):
|
348
354
|
if is_given(languages):
|
@@ -359,6 +365,8 @@ class SpeechStream(stt.SpeechStream):
|
|
359
365
|
self._config.spoken_punctuation = spoken_punctuation
|
360
366
|
if is_given(model):
|
361
367
|
self._config.model = model
|
368
|
+
if is_given(min_confidence_threshold):
|
369
|
+
self._config.min_confidence_threshold = min_confidence_threshold
|
362
370
|
if is_given(keywords):
|
363
371
|
self._config.keywords = keywords
|
364
372
|
|
@@ -405,7 +413,10 @@ class SpeechStream(stt.SpeechStream):
|
|
405
413
|
== cloud_speech.StreamingRecognizeResponse.SpeechEventType.SPEECH_EVENT_TYPE_UNSPECIFIED # noqa: E501
|
406
414
|
):
|
407
415
|
result = resp.results[0]
|
408
|
-
speech_data = _streaming_recognize_response_to_speech_data(
|
416
|
+
speech_data = _streaming_recognize_response_to_speech_data(
|
417
|
+
resp,
|
418
|
+
min_confidence_threshold=self._config.min_confidence_threshold,
|
419
|
+
)
|
409
420
|
if speech_data is None:
|
410
421
|
continue
|
411
422
|
|
@@ -530,6 +541,8 @@ def _recognize_response_to_speech_event(
|
|
530
541
|
|
531
542
|
def _streaming_recognize_response_to_speech_data(
|
532
543
|
resp: cloud_speech.StreamingRecognizeResponse,
|
544
|
+
*,
|
545
|
+
min_confidence_threshold: float,
|
533
546
|
) -> stt.SpeechData | None:
|
534
547
|
text = ""
|
535
548
|
confidence = 0.0
|
@@ -542,7 +555,7 @@ def _streaming_recognize_response_to_speech_data(
|
|
542
555
|
confidence /= len(resp.results)
|
543
556
|
lg = resp.results[0].language_code
|
544
557
|
|
545
|
-
if confidence <
|
558
|
+
if confidence < min_confidence_threshold:
|
546
559
|
return None
|
547
560
|
if text == "":
|
548
561
|
return None
|
livekit/plugins/google/tts.py
CHANGED
@@ -56,6 +56,7 @@ class TTS(tts.TTS):
|
|
56
56
|
effects_profile_id: str = "",
|
57
57
|
speaking_rate: float = 1.0,
|
58
58
|
location: str = "global",
|
59
|
+
audio_encoding: texttospeech.AudioEncoding = texttospeech.AudioEncoding.PCM,
|
59
60
|
credentials_info: NotGivenOr[dict] = NOT_GIVEN,
|
60
61
|
credentials_file: NotGivenOr[str] = NOT_GIVEN,
|
61
62
|
) -> None:
|
@@ -105,7 +106,7 @@ class TTS(tts.TTS):
|
|
105
106
|
self._opts = _TTSOptions(
|
106
107
|
voice=voice_params,
|
107
108
|
audio_config=texttospeech.AudioConfig(
|
108
|
-
audio_encoding=
|
109
|
+
audio_encoding=audio_encoding,
|
109
110
|
sample_rate_hertz=sample_rate,
|
110
111
|
pitch=pitch,
|
111
112
|
effects_profile_id=effects_profile_id,
|
livekit/plugins/google/utils.py
CHANGED
@@ -39,7 +39,10 @@ def get_tool_results_for_realtime(chat_ctx: llm.ChatContext) -> types.LiveClient
|
|
39
39
|
|
40
40
|
|
41
41
|
def to_chat_ctx(
|
42
|
-
chat_ctx: llm.ChatContext,
|
42
|
+
chat_ctx: llm.ChatContext,
|
43
|
+
cache_key: Any,
|
44
|
+
ignore_functions: bool = False,
|
45
|
+
generate: bool = False,
|
43
46
|
) -> tuple[list[types.Content], types.Content | None]:
|
44
47
|
turns: list[types.Content] = []
|
45
48
|
system_instruction: types.Content | None = None
|
@@ -99,10 +102,9 @@ def to_chat_ctx(
|
|
99
102
|
if current_role is not None and parts:
|
100
103
|
turns.append(types.Content(role=current_role, parts=parts))
|
101
104
|
|
102
|
-
#
|
103
|
-
|
104
|
-
|
105
|
-
# turns.append(types.Content(role="user", parts=[types.Part(text=".")]))
|
105
|
+
# Gemini requires the last message to end with user's turn before they can generate
|
106
|
+
if generate and current_role != "user":
|
107
|
+
turns.append(types.Content(role="user", parts=[types.Part(text=".")]))
|
106
108
|
|
107
109
|
return turns, system_instruction
|
108
110
|
|
{livekit_plugins_google-1.0.18.dist-info → livekit_plugins_google-1.0.19.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: livekit-plugins-google
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.19
|
4
4
|
Summary: Agent Framework plugin for services from Google Cloud
|
5
5
|
Project-URL: Documentation, https://docs.livekit.io
|
6
6
|
Project-URL: Website, https://livekit.io/
|
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
|
|
22
22
|
Requires-Dist: google-cloud-speech<3,>=2
|
23
23
|
Requires-Dist: google-cloud-texttospeech<3,>=2
|
24
24
|
Requires-Dist: google-genai>=1.12.1
|
25
|
-
Requires-Dist: livekit-agents>=1.0.
|
25
|
+
Requires-Dist: livekit-agents>=1.0.19
|
26
26
|
Description-Content-Type: text/markdown
|
27
27
|
|
28
28
|
# LiveKit Plugins Google
|
@@ -0,0 +1,16 @@
|
|
1
|
+
livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
|
2
|
+
livekit/plugins/google/llm.py,sha256=NaaT4Zaw6o98VcUHNrQcZZRkD7DPREd76O8fG9IOpXQ,16190
|
3
|
+
livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
|
4
|
+
livekit/plugins/google/models.py,sha256=SGjAumdDK97NNLwMFcqZdKR68f1NoGB2Rk1UP2-imG0,1457
|
5
|
+
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
+
livekit/plugins/google/stt.py,sha256=MADnkh0YKWY4bLRgBwFv4emu4YFO-7EVnhxO--dPTlI,23082
|
7
|
+
livekit/plugins/google/tts.py,sha256=29R0ieV5sRPBf5Yi0SPFQk7ZZMbELF30bIL9K_j_Wcg,9100
|
8
|
+
livekit/plugins/google/utils.py,sha256=sPZZg5VHf60kSILUIHGIZyN2CWYwnCGNYICn8Mhcv9g,9534
|
9
|
+
livekit/plugins/google/version.py,sha256=UDC8ahmGgRkv-qMQUY3QibuuVevGMQ9Fd4yIhcQBZwA,601
|
10
|
+
livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
|
11
|
+
livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
|
12
|
+
livekit/plugins/google/beta/realtime/api_proto.py,sha256=Fyrejs3SG0EjOPCCFLEnWXKEUxCff47PMWk2VsKJm5E,594
|
13
|
+
livekit/plugins/google/beta/realtime/realtime_api.py,sha256=yk202S604Eogp_ssBX2BSbAXV67uUyQzVO-bzLnScrs,31423
|
14
|
+
livekit_plugins_google-1.0.19.dist-info/METADATA,sha256=HuRBvpT9dX3Mz7YOVhZhgQLm3-qQa2vAf2SRDQ5u1vM,3492
|
15
|
+
livekit_plugins_google-1.0.19.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
16
|
+
livekit_plugins_google-1.0.19.dist-info/RECORD,,
|
@@ -1,16 +0,0 @@
|
|
1
|
-
livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
|
2
|
-
livekit/plugins/google/llm.py,sha256=SqNGg6-wlrIUo9uaismP7QW5XztkXyDivJXLVgOIZMI,16175
|
3
|
-
livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
|
4
|
-
livekit/plugins/google/models.py,sha256=SGjAumdDK97NNLwMFcqZdKR68f1NoGB2Rk1UP2-imG0,1457
|
5
|
-
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
livekit/plugins/google/stt.py,sha256=AG_lh2fuuduJi0jFbA_QKFXLJ6NUdF1W_FfkLUJML_Q,22413
|
7
|
-
livekit/plugins/google/tts.py,sha256=fmQwW9a1kPsEsrTvIo8fqw479RxWEx0SIc3oTVaj41U,9031
|
8
|
-
livekit/plugins/google/utils.py,sha256=TjjTwMbdJdxr3bZjUXxs-J_fipTTM00goW2-d9KWX6w,9582
|
9
|
-
livekit/plugins/google/version.py,sha256=cnPu9FVKZV9tFmmz7lEvftrO3B_nWJVFghi3j6UcJLs,601
|
10
|
-
livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
|
11
|
-
livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
|
12
|
-
livekit/plugins/google/beta/realtime/api_proto.py,sha256=Fyrejs3SG0EjOPCCFLEnWXKEUxCff47PMWk2VsKJm5E,594
|
13
|
-
livekit/plugins/google/beta/realtime/realtime_api.py,sha256=sXp2oHnTlHrAp5wFmcXj0bRtQKixBYedfbufcbjVHxk,30897
|
14
|
-
livekit_plugins_google-1.0.18.dist-info/METADATA,sha256=Vqt0FoqibcKzX_jFXlyFkn-mT7iPC16JlH61VS0fbuw,3492
|
15
|
-
livekit_plugins_google-1.0.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
16
|
-
livekit_plugins_google-1.0.18.dist-info/RECORD,,
|
File without changes
|