livekit-plugins-google 1.2.4__tar.gz → 1.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of livekit-plugins-google might be problematic. Click here for more details.

Files changed (19) hide show
  1. {livekit_plugins_google-1.2.4 → livekit_plugins_google-1.2.6}/PKG-INFO +2 -2
  2. {livekit_plugins_google-1.2.4 → livekit_plugins_google-1.2.6}/livekit/plugins/google/beta/realtime/realtime_api.py +12 -5
  3. {livekit_plugins_google-1.2.4 → livekit_plugins_google-1.2.6}/livekit/plugins/google/stt.py +5 -0
  4. {livekit_plugins_google-1.2.4 → livekit_plugins_google-1.2.6}/livekit/plugins/google/tts.py +9 -2
  5. {livekit_plugins_google-1.2.4 → livekit_plugins_google-1.2.6}/livekit/plugins/google/utils.py +16 -7
  6. {livekit_plugins_google-1.2.4 → livekit_plugins_google-1.2.6}/livekit/plugins/google/version.py +1 -1
  7. {livekit_plugins_google-1.2.4 → livekit_plugins_google-1.2.6}/pyproject.toml +1 -1
  8. {livekit_plugins_google-1.2.4 → livekit_plugins_google-1.2.6}/.gitignore +0 -0
  9. {livekit_plugins_google-1.2.4 → livekit_plugins_google-1.2.6}/README.md +0 -0
  10. {livekit_plugins_google-1.2.4 → livekit_plugins_google-1.2.6}/livekit/plugins/google/__init__.py +0 -0
  11. {livekit_plugins_google-1.2.4 → livekit_plugins_google-1.2.6}/livekit/plugins/google/beta/__init__.py +0 -0
  12. {livekit_plugins_google-1.2.4 → livekit_plugins_google-1.2.6}/livekit/plugins/google/beta/gemini_tts.py +0 -0
  13. {livekit_plugins_google-1.2.4 → livekit_plugins_google-1.2.6}/livekit/plugins/google/beta/realtime/__init__.py +0 -0
  14. {livekit_plugins_google-1.2.4 → livekit_plugins_google-1.2.6}/livekit/plugins/google/beta/realtime/api_proto.py +0 -0
  15. {livekit_plugins_google-1.2.4 → livekit_plugins_google-1.2.6}/livekit/plugins/google/llm.py +0 -0
  16. {livekit_plugins_google-1.2.4 → livekit_plugins_google-1.2.6}/livekit/plugins/google/log.py +0 -0
  17. {livekit_plugins_google-1.2.4 → livekit_plugins_google-1.2.6}/livekit/plugins/google/models.py +0 -0
  18. {livekit_plugins_google-1.2.4 → livekit_plugins_google-1.2.6}/livekit/plugins/google/py.typed +0 -0
  19. {livekit_plugins_google-1.2.4 → livekit_plugins_google-1.2.6}/livekit/plugins/google/tools.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-google
3
- Version: 1.2.4
3
+ Version: 1.2.6
4
4
  Summary: Agent Framework plugin for services from Google Cloud
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
22
22
  Requires-Dist: google-cloud-speech<3,>=2
23
23
  Requires-Dist: google-cloud-texttospeech<3,>=2.27
24
24
  Requires-Dist: google-genai>=v1.23.0
25
- Requires-Dist: livekit-agents>=1.2.4
25
+ Requires-Dist: livekit-agents>=1.2.6
26
26
  Description-Content-Type: text/markdown
27
27
 
28
28
  # Google AI plugin for LiveKit Agents
@@ -428,7 +428,9 @@ class RealtimeSession(llm.RealtimeSession):
428
428
  self._chat_ctx = chat_ctx.copy()
429
429
 
430
430
  async def update_tools(self, tools: list[llm.FunctionTool | llm.RawFunctionTool]) -> None:
431
- new_declarations: list[types.FunctionDeclaration] = to_fnc_ctx(tools)
431
+ new_declarations: list[types.FunctionDeclaration] = to_fnc_ctx(
432
+ tools, use_parameters_json_schema=False
433
+ )
432
434
  current_tool_names = {f.name for f in self._gemini_declarations}
433
435
  new_tool_names = {f.name for f in new_declarations}
434
436
 
@@ -699,10 +701,15 @@ class RealtimeSession(llm.RealtimeSession):
699
701
  break
700
702
 
701
703
  async for response in session.receive():
702
- if (not self._current_generation or self._current_generation._done) and (
703
- response.server_content or response.tool_call
704
- ):
705
- self._start_new_generation()
704
+ if not self._current_generation or self._current_generation._done:
705
+ if response.server_content and response.server_content.interrupted:
706
+ # interrupt a generation already done
707
+ self._handle_input_speech_started()
708
+ # reset the flag and still start a new generation in case it has any other content
709
+ response.server_content.interrupted = False
710
+
711
+ if response.server_content or response.tool_call:
712
+ self._start_new_generation()
706
713
 
707
714
  if response.session_resumption_update:
708
715
  if (
@@ -67,6 +67,7 @@ class STTOptions:
67
67
  punctuate: bool
68
68
  spoken_punctuation: bool
69
69
  enable_word_time_offsets: bool
70
+ enable_word_confidence: bool
70
71
  model: SpeechModels | str
71
72
  sample_rate: int
72
73
  min_confidence_threshold: float
@@ -99,6 +100,7 @@ class STT(stt.STT):
99
100
  punctuate: bool = True,
100
101
  spoken_punctuation: bool = False,
101
102
  enable_word_time_offsets: bool = True,
103
+ enable_word_confidence: bool = False,
102
104
  model: SpeechModels | str = "latest_long",
103
105
  location: str = "global",
104
106
  sample_rate: int = 16000,
@@ -122,6 +124,7 @@ class STT(stt.STT):
122
124
  punctuate(bool): whether to punctuate the audio (default: True)
123
125
  spoken_punctuation(bool): whether to use spoken punctuation (default: False)
124
126
  enable_word_time_offsets(bool): whether to enable word time offsets (default: True)
127
+ enable_word_confidence(bool): whether to enable word confidence (default: False)
125
128
  model(SpeechModels): the model to use for recognition default: "latest_long"
126
129
  location(str): the location to use for recognition default: "global"
127
130
  sample_rate(int): the sample rate of the audio default: 16000
@@ -162,6 +165,7 @@ class STT(stt.STT):
162
165
  punctuate=punctuate,
163
166
  spoken_punctuation=spoken_punctuation,
164
167
  enable_word_time_offsets=enable_word_time_offsets,
168
+ enable_word_confidence=enable_word_confidence,
165
169
  model=model,
166
170
  sample_rate=sample_rate,
167
171
  min_confidence_threshold=min_confidence_threshold,
@@ -243,6 +247,7 @@ class STT(stt.STT):
243
247
  enable_automatic_punctuation=config.punctuate,
244
248
  enable_spoken_punctuation=config.spoken_punctuation,
245
249
  enable_word_time_offsets=config.enable_word_time_offsets,
250
+ enable_word_confidence=config.enable_word_confidence,
246
251
  ),
247
252
  model=config.model,
248
253
  language_codes=config.languages,
@@ -61,6 +61,7 @@ class TTS(tts.TTS):
61
61
  language: NotGivenOr[SpeechLanguages | str] = NOT_GIVEN,
62
62
  gender: NotGivenOr[Gender | str] = NOT_GIVEN,
63
63
  voice_name: NotGivenOr[str] = NOT_GIVEN,
64
+ voice_cloning_key: NotGivenOr[str] = NOT_GIVEN,
64
65
  sample_rate: int = 24000,
65
66
  pitch: int = 0,
66
67
  effects_profile_id: str = "",
@@ -86,6 +87,7 @@ class TTS(tts.TTS):
86
87
  language (SpeechLanguages | str, optional): Language code (e.g., "en-US"). Default is "en-US".
87
88
  gender (Gender | str, optional): Voice gender ("male", "female", "neutral"). Default is "neutral".
88
89
  voice_name (str, optional): Specific voice name. Default is an empty string.
90
+ voice_cloning_key (str, optional): Voice clone key. Created via https://cloud.google.com/text-to-speech/docs/chirp3-instant-custom-voice
89
91
  sample_rate (int, optional): Audio sample rate in Hz. Default is 24000.
90
92
  location (str, optional): Location for the TTS client. Default is "global".
91
93
  pitch (float, optional): Speaking pitch, ranging from -20.0 to 20.0 semitones relative to the original pitch. Default is 0.
@@ -115,13 +117,18 @@ class TTS(tts.TTS):
115
117
 
116
118
  lang = language if is_given(language) else DEFAULT_LANGUAGE
117
119
  ssml_gender = _gender_from_str(DEFAULT_GENDER if not is_given(gender) else gender)
118
- name = DEFAULT_VOICE_NAME if not is_given(voice_name) else voice_name
119
120
 
120
121
  voice_params = texttospeech.VoiceSelectionParams(
121
- name=name,
122
122
  language_code=lang,
123
123
  ssml_gender=ssml_gender,
124
124
  )
125
+ if is_given(voice_cloning_key):
126
+ voice_params.voice_clone = texttospeech.VoiceCloneParams(
127
+ voice_clone_key=voice_cloning_key,
128
+ )
129
+ else:
130
+ voice_params.name = voice_name if is_given(voice_name) else DEFAULT_VOICE_NAME
131
+
125
132
  if not is_given(tokenizer):
126
133
  tokenizer = tokenize.blingfire.SentenceTokenizer()
127
134
 
@@ -23,18 +23,27 @@ from .tools import _LLMTool
23
23
  __all__ = ["to_fnc_ctx"]
24
24
 
25
25
 
26
- def to_fnc_ctx(fncs: list[FunctionTool | RawFunctionTool]) -> list[types.FunctionDeclaration]:
26
+ def to_fnc_ctx(
27
+ fncs: list[FunctionTool | RawFunctionTool], *, use_parameters_json_schema: bool = True
28
+ ) -> list[types.FunctionDeclaration]:
27
29
  tools: list[types.FunctionDeclaration] = []
28
30
  for fnc in fncs:
29
31
  if is_raw_function_tool(fnc):
30
32
  info = get_raw_function_info(fnc)
31
- tools.append(
32
- types.FunctionDeclaration(
33
- name=info.name,
34
- description=info.raw_schema.get("description", ""),
35
- parameters_json_schema=info.raw_schema.get("parameters", {}),
33
+ fnc_kwargs = {
34
+ "name": info.name,
35
+ "description": info.raw_schema.get("description", ""),
36
+ }
37
+ if use_parameters_json_schema:
38
+ fnc_kwargs["parameters_json_schema"] = info.raw_schema.get("parameters", {})
39
+ else:
40
+ # https://github.com/googleapis/python-genai/issues/1147
41
+ fnc_kwargs["parameters"] = types.Schema.from_json_schema(
42
+ json_schema=types.JSONSchema.model_validate(
43
+ info.raw_schema.get("parameters", {})
44
+ )
36
45
  )
37
- )
46
+ tools.append(types.FunctionDeclaration(**fnc_kwargs))
38
47
 
39
48
  elif is_function_tool(fnc):
40
49
  tools.append(_build_gemini_fnc(fnc))
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.2.4"
15
+ __version__ = "1.2.6"
@@ -27,7 +27,7 @@ dependencies = [
27
27
  "google-cloud-speech >= 2, < 3",
28
28
  "google-cloud-texttospeech >= 2.27, < 3",
29
29
  "google-genai >= v1.23.0",
30
- "livekit-agents>=1.2.4",
30
+ "livekit-agents>=1.2.6",
31
31
  ]
32
32
 
33
33
  [project.urls]