livekit-plugins-google 1.2.5__tar.gz → 1.2.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of livekit-plugins-google might be problematic. Click here for more details.
- {livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/PKG-INFO +2 -2
- {livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/livekit/plugins/google/beta/realtime/realtime_api.py +25 -5
- {livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/livekit/plugins/google/llm.py +0 -4
- {livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/livekit/plugins/google/stt.py +34 -12
- {livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/livekit/plugins/google/tts.py +9 -2
- {livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/livekit/plugins/google/utils.py +16 -7
- {livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/livekit/plugins/google/version.py +1 -1
- {livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/pyproject.toml +1 -1
- {livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/.gitignore +0 -0
- {livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/README.md +0 -0
- {livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/livekit/plugins/google/__init__.py +0 -0
- {livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/livekit/plugins/google/beta/__init__.py +0 -0
- {livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/livekit/plugins/google/beta/gemini_tts.py +0 -0
- {livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/livekit/plugins/google/beta/realtime/__init__.py +0 -0
- {livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/livekit/plugins/google/beta/realtime/api_proto.py +0 -0
- {livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/livekit/plugins/google/log.py +0 -0
- {livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/livekit/plugins/google/models.py +0 -0
- {livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/livekit/plugins/google/py.typed +0 -0
- {livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/livekit/plugins/google/tools.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: livekit-plugins-google
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.7
|
|
4
4
|
Summary: Agent Framework plugin for services from Google Cloud
|
|
5
5
|
Project-URL: Documentation, https://docs.livekit.io
|
|
6
6
|
Project-URL: Website, https://livekit.io/
|
|
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
|
|
|
22
22
|
Requires-Dist: google-cloud-speech<3,>=2
|
|
23
23
|
Requires-Dist: google-cloud-texttospeech<3,>=2.27
|
|
24
24
|
Requires-Dist: google-genai>=v1.23.0
|
|
25
|
-
Requires-Dist: livekit-agents>=1.2.
|
|
25
|
+
Requires-Dist: livekit-agents>=1.2.7
|
|
26
26
|
Description-Content-Type: text/markdown
|
|
27
27
|
|
|
28
28
|
# Google AI plugin for LiveKit Agents
|
|
@@ -428,7 +428,9 @@ class RealtimeSession(llm.RealtimeSession):
|
|
|
428
428
|
self._chat_ctx = chat_ctx.copy()
|
|
429
429
|
|
|
430
430
|
async def update_tools(self, tools: list[llm.FunctionTool | llm.RawFunctionTool]) -> None:
|
|
431
|
-
new_declarations: list[types.FunctionDeclaration] = to_fnc_ctx(
|
|
431
|
+
new_declarations: list[types.FunctionDeclaration] = to_fnc_ctx(
|
|
432
|
+
tools, use_parameters_json_schema=False
|
|
433
|
+
)
|
|
432
434
|
current_tool_names = {f.name for f in self._gemini_declarations}
|
|
433
435
|
new_tool_names = {f.name for f in new_declarations}
|
|
434
436
|
|
|
@@ -699,10 +701,15 @@ class RealtimeSession(llm.RealtimeSession):
|
|
|
699
701
|
break
|
|
700
702
|
|
|
701
703
|
async for response in session.receive():
|
|
702
|
-
if
|
|
703
|
-
response.server_content
|
|
704
|
-
|
|
705
|
-
|
|
704
|
+
if not self._current_generation or self._current_generation._done:
|
|
705
|
+
if response.server_content and response.server_content.interrupted:
|
|
706
|
+
# interrupt a generation already done
|
|
707
|
+
self._handle_input_speech_started()
|
|
708
|
+
# reset the flag and still start a new generation in case it has any other content
|
|
709
|
+
response.server_content.interrupted = False
|
|
710
|
+
|
|
711
|
+
if self._is_new_generation(response):
|
|
712
|
+
self._start_new_generation()
|
|
706
713
|
|
|
707
714
|
if response.session_resumption_update:
|
|
708
715
|
if (
|
|
@@ -1084,3 +1091,16 @@ class RealtimeSession(llm.RealtimeSession):
|
|
|
1084
1091
|
recoverable=recoverable,
|
|
1085
1092
|
),
|
|
1086
1093
|
)
|
|
1094
|
+
|
|
1095
|
+
def _is_new_generation(self, resp: types.LiveServerMessage) -> bool:
|
|
1096
|
+
if resp.tool_call:
|
|
1097
|
+
return True
|
|
1098
|
+
|
|
1099
|
+
if (sc := resp.server_content) and (
|
|
1100
|
+
sc.model_turn
|
|
1101
|
+
or (sc.output_transcription and sc.output_transcription.text is not None)
|
|
1102
|
+
or (sc.input_transcription and sc.input_transcription.text is not None)
|
|
1103
|
+
):
|
|
1104
|
+
return True
|
|
1105
|
+
|
|
1106
|
+
return False
|
|
@@ -157,10 +157,6 @@ class LLM(llm.LLM):
|
|
|
157
157
|
if _thinking_budget is not None:
|
|
158
158
|
if not isinstance(_thinking_budget, int):
|
|
159
159
|
raise ValueError("thinking_budget inside thinking_config must be an integer")
|
|
160
|
-
if not (0 <= _thinking_budget <= 24576):
|
|
161
|
-
raise ValueError(
|
|
162
|
-
"thinking_budget inside thinking_config must be between 0 and 24576"
|
|
163
|
-
)
|
|
164
160
|
|
|
165
161
|
self._opts = _LLMOptions(
|
|
166
162
|
model=model,
|
|
@@ -20,6 +20,7 @@ import time
|
|
|
20
20
|
import weakref
|
|
21
21
|
from collections.abc import AsyncGenerator, AsyncIterable
|
|
22
22
|
from dataclasses import dataclass
|
|
23
|
+
from datetime import timedelta
|
|
23
24
|
from typing import Callable, Union, cast
|
|
24
25
|
|
|
25
26
|
from google.api_core.client_options import ClientOptions
|
|
@@ -28,6 +29,7 @@ from google.auth import default as gauth_default
|
|
|
28
29
|
from google.auth.exceptions import DefaultCredentialsError
|
|
29
30
|
from google.cloud.speech_v2 import SpeechAsyncClient
|
|
30
31
|
from google.cloud.speech_v2.types import cloud_speech
|
|
32
|
+
from google.protobuf.duration_pb2 import Duration
|
|
31
33
|
from livekit import rtc
|
|
32
34
|
from livekit.agents import (
|
|
33
35
|
DEFAULT_API_CONNECT_OPTIONS,
|
|
@@ -67,6 +69,7 @@ class STTOptions:
|
|
|
67
69
|
punctuate: bool
|
|
68
70
|
spoken_punctuation: bool
|
|
69
71
|
enable_word_time_offsets: bool
|
|
72
|
+
enable_word_confidence: bool
|
|
70
73
|
model: SpeechModels | str
|
|
71
74
|
sample_rate: int
|
|
72
75
|
min_confidence_threshold: float
|
|
@@ -99,6 +102,7 @@ class STT(stt.STT):
|
|
|
99
102
|
punctuate: bool = True,
|
|
100
103
|
spoken_punctuation: bool = False,
|
|
101
104
|
enable_word_time_offsets: bool = True,
|
|
105
|
+
enable_word_confidence: bool = False,
|
|
102
106
|
model: SpeechModels | str = "latest_long",
|
|
103
107
|
location: str = "global",
|
|
104
108
|
sample_rate: int = 16000,
|
|
@@ -122,6 +126,7 @@ class STT(stt.STT):
|
|
|
122
126
|
punctuate(bool): whether to punctuate the audio (default: True)
|
|
123
127
|
spoken_punctuation(bool): whether to use spoken punctuation (default: False)
|
|
124
128
|
enable_word_time_offsets(bool): whether to enable word time offsets (default: True)
|
|
129
|
+
enable_word_confidence(bool): whether to enable word confidence (default: False)
|
|
125
130
|
model(SpeechModels): the model to use for recognition default: "latest_long"
|
|
126
131
|
location(str): the location to use for recognition default: "global"
|
|
127
132
|
sample_rate(int): the sample rate of the audio default: 16000
|
|
@@ -162,6 +167,7 @@ class STT(stt.STT):
|
|
|
162
167
|
punctuate=punctuate,
|
|
163
168
|
spoken_punctuation=spoken_punctuation,
|
|
164
169
|
enable_word_time_offsets=enable_word_time_offsets,
|
|
170
|
+
enable_word_confidence=enable_word_confidence,
|
|
165
171
|
model=model,
|
|
166
172
|
sample_rate=sample_rate,
|
|
167
173
|
min_confidence_threshold=min_confidence_threshold,
|
|
@@ -243,6 +249,7 @@ class STT(stt.STT):
|
|
|
243
249
|
enable_automatic_punctuation=config.punctuate,
|
|
244
250
|
enable_spoken_punctuation=config.spoken_punctuation,
|
|
245
251
|
enable_word_time_offsets=config.enable_word_time_offsets,
|
|
252
|
+
enable_word_confidence=config.enable_word_confidence,
|
|
246
253
|
),
|
|
247
254
|
model=config.model,
|
|
248
255
|
language_codes=config.languages,
|
|
@@ -547,6 +554,14 @@ class SpeechStream(stt.SpeechStream):
|
|
|
547
554
|
raise APIConnectionError() from e
|
|
548
555
|
|
|
549
556
|
|
|
557
|
+
def _duration_to_seconds(duration: Duration | timedelta) -> float:
|
|
558
|
+
# Proto Plus may auto-convert Duration to timedelta; handle both.
|
|
559
|
+
# https://proto-plus-python.readthedocs.io/en/latest/marshal.html
|
|
560
|
+
if isinstance(duration, timedelta):
|
|
561
|
+
return duration.total_seconds()
|
|
562
|
+
return duration.seconds + duration.nanos / 1e9
|
|
563
|
+
|
|
564
|
+
|
|
550
565
|
def _recognize_response_to_speech_event(
|
|
551
566
|
resp: cloud_speech.RecognizeResponse,
|
|
552
567
|
) -> stt.SpeechEvent:
|
|
@@ -556,24 +571,31 @@ def _recognize_response_to_speech_event(
|
|
|
556
571
|
text += result.alternatives[0].transcript
|
|
557
572
|
confidence += result.alternatives[0].confidence
|
|
558
573
|
|
|
559
|
-
|
|
560
|
-
start_offset = resp.results[0].alternatives[0].words[0].start_offset
|
|
561
|
-
end_offset = resp.results[-1].alternatives[0].words[-1].end_offset
|
|
574
|
+
alternatives = []
|
|
562
575
|
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
576
|
+
# Google STT may return empty results when spoken_lang != stt_lang
|
|
577
|
+
if resp.results:
|
|
578
|
+
try:
|
|
579
|
+
start_time = _duration_to_seconds(resp.results[0].alternatives[0].words[0].start_offset)
|
|
580
|
+
end_time = _duration_to_seconds(resp.results[-1].alternatives[0].words[-1].end_offset)
|
|
581
|
+
except IndexError:
|
|
582
|
+
# When enable_word_time_offsets=False, there are no "words" to access
|
|
583
|
+
start_time = end_time = 0
|
|
584
|
+
|
|
585
|
+
confidence /= len(resp.results)
|
|
586
|
+
lg = resp.results[0].language_code
|
|
587
|
+
|
|
588
|
+
alternatives = [
|
|
568
589
|
stt.SpeechData(
|
|
569
590
|
language=lg,
|
|
570
|
-
start_time=
|
|
571
|
-
end_time=
|
|
591
|
+
start_time=start_time,
|
|
592
|
+
end_time=end_time,
|
|
572
593
|
confidence=confidence,
|
|
573
594
|
text=text,
|
|
574
595
|
)
|
|
575
|
-
]
|
|
576
|
-
|
|
596
|
+
]
|
|
597
|
+
|
|
598
|
+
return stt.SpeechEvent(type=stt.SpeechEventType.FINAL_TRANSCRIPT, alternatives=alternatives)
|
|
577
599
|
|
|
578
600
|
|
|
579
601
|
def _streaming_recognize_response_to_speech_data(
|
|
@@ -61,6 +61,7 @@ class TTS(tts.TTS):
|
|
|
61
61
|
language: NotGivenOr[SpeechLanguages | str] = NOT_GIVEN,
|
|
62
62
|
gender: NotGivenOr[Gender | str] = NOT_GIVEN,
|
|
63
63
|
voice_name: NotGivenOr[str] = NOT_GIVEN,
|
|
64
|
+
voice_cloning_key: NotGivenOr[str] = NOT_GIVEN,
|
|
64
65
|
sample_rate: int = 24000,
|
|
65
66
|
pitch: int = 0,
|
|
66
67
|
effects_profile_id: str = "",
|
|
@@ -86,6 +87,7 @@ class TTS(tts.TTS):
|
|
|
86
87
|
language (SpeechLanguages | str, optional): Language code (e.g., "en-US"). Default is "en-US".
|
|
87
88
|
gender (Gender | str, optional): Voice gender ("male", "female", "neutral"). Default is "neutral".
|
|
88
89
|
voice_name (str, optional): Specific voice name. Default is an empty string.
|
|
90
|
+
voice_cloning_key (str, optional): Voice clone key. Created via https://cloud.google.com/text-to-speech/docs/chirp3-instant-custom-voice
|
|
89
91
|
sample_rate (int, optional): Audio sample rate in Hz. Default is 24000.
|
|
90
92
|
location (str, optional): Location for the TTS client. Default is "global".
|
|
91
93
|
pitch (float, optional): Speaking pitch, ranging from -20.0 to 20.0 semitones relative to the original pitch. Default is 0.
|
|
@@ -115,13 +117,18 @@ class TTS(tts.TTS):
|
|
|
115
117
|
|
|
116
118
|
lang = language if is_given(language) else DEFAULT_LANGUAGE
|
|
117
119
|
ssml_gender = _gender_from_str(DEFAULT_GENDER if not is_given(gender) else gender)
|
|
118
|
-
name = DEFAULT_VOICE_NAME if not is_given(voice_name) else voice_name
|
|
119
120
|
|
|
120
121
|
voice_params = texttospeech.VoiceSelectionParams(
|
|
121
|
-
name=name,
|
|
122
122
|
language_code=lang,
|
|
123
123
|
ssml_gender=ssml_gender,
|
|
124
124
|
)
|
|
125
|
+
if is_given(voice_cloning_key):
|
|
126
|
+
voice_params.voice_clone = texttospeech.VoiceCloneParams(
|
|
127
|
+
voice_cloning_key=voice_cloning_key,
|
|
128
|
+
)
|
|
129
|
+
else:
|
|
130
|
+
voice_params.name = voice_name if is_given(voice_name) else DEFAULT_VOICE_NAME
|
|
131
|
+
|
|
125
132
|
if not is_given(tokenizer):
|
|
126
133
|
tokenizer = tokenize.blingfire.SentenceTokenizer()
|
|
127
134
|
|
{livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/livekit/plugins/google/utils.py
RENAMED
|
@@ -23,18 +23,27 @@ from .tools import _LLMTool
|
|
|
23
23
|
__all__ = ["to_fnc_ctx"]
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
def to_fnc_ctx(
|
|
26
|
+
def to_fnc_ctx(
|
|
27
|
+
fncs: list[FunctionTool | RawFunctionTool], *, use_parameters_json_schema: bool = True
|
|
28
|
+
) -> list[types.FunctionDeclaration]:
|
|
27
29
|
tools: list[types.FunctionDeclaration] = []
|
|
28
30
|
for fnc in fncs:
|
|
29
31
|
if is_raw_function_tool(fnc):
|
|
30
32
|
info = get_raw_function_info(fnc)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
33
|
+
fnc_kwargs = {
|
|
34
|
+
"name": info.name,
|
|
35
|
+
"description": info.raw_schema.get("description", ""),
|
|
36
|
+
}
|
|
37
|
+
if use_parameters_json_schema:
|
|
38
|
+
fnc_kwargs["parameters_json_schema"] = info.raw_schema.get("parameters", {})
|
|
39
|
+
else:
|
|
40
|
+
# https://github.com/googleapis/python-genai/issues/1147
|
|
41
|
+
fnc_kwargs["parameters"] = types.Schema.from_json_schema(
|
|
42
|
+
json_schema=types.JSONSchema.model_validate(
|
|
43
|
+
info.raw_schema.get("parameters", {})
|
|
44
|
+
)
|
|
36
45
|
)
|
|
37
|
-
)
|
|
46
|
+
tools.append(types.FunctionDeclaration(**fnc_kwargs))
|
|
38
47
|
|
|
39
48
|
elif is_function_tool(fnc):
|
|
40
49
|
tools.append(_build_gemini_fnc(fnc))
|
|
File without changes
|
|
File without changes
|
{livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/livekit/plugins/google/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/livekit/plugins/google/models.py
RENAMED
|
File without changes
|
{livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/livekit/plugins/google/py.typed
RENAMED
|
File without changes
|
{livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.7}/livekit/plugins/google/tools.py
RENAMED
|
File without changes
|