PyPI - livekit-plugins-google - Versions diffs - 1.2.5__tar.gz → 1.2.6__tar.gz - Mend

livekit-plugins-google 1.2.5tar.gz → 1.2.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of livekit-plugins-google might be problematic. Click here for more details.

Files changed (19) hide show

{livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: livekit-plugins-google
-Version: 1.2.5
+Version: 1.2.6
 Summary: Agent Framework plugin for services from Google Cloud
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
 Requires-Dist: google-cloud-speech<3,>=2
 Requires-Dist: google-cloud-texttospeech<3,>=2.27
 Requires-Dist: google-genai>=v1.23.0
-Requires-Dist: livekit-agents>=1.2.5
+Requires-Dist: livekit-agents>=1.2.6
 Description-Content-Type: text/markdown
 # Google AI plugin for LiveKit Agents

{livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.6}/livekit/plugins/google/beta/realtime/realtime_api.py RENAMED Viewed

@@ -428,7 +428,9 @@ class RealtimeSession(llm.RealtimeSession):
         self._chat_ctx = chat_ctx.copy()
     async def update_tools(self, tools: list[llm.FunctionTool | llm.RawFunctionTool]) -> None:
-        new_declarations: list[types.FunctionDeclaration] = to_fnc_ctx(tools)
+        new_declarations: list[types.FunctionDeclaration] = to_fnc_ctx(
+            tools, use_parameters_json_schema=False
+        )
         current_tool_names = {f.name for f in self._gemini_declarations}
         new_tool_names = {f.name for f in new_declarations}
@@ -699,10 +701,15 @@ class RealtimeSession(llm.RealtimeSession):
                         break
                 async for response in session.receive():
-                    if (not self._current_generation or self._current_generation._done) and (
-                        response.server_content or response.tool_call
-                    ):
-                        self._start_new_generation()
+                    if not self._current_generation or self._current_generation._done:
+                        if response.server_content and response.server_content.interrupted:
+                            # interrupt a generation already done
+                            self._handle_input_speech_started()
+                            # reset the flag and still start a new generation in case it has any other content
+                            response.server_content.interrupted = False
+                        if response.server_content or response.tool_call:
+                            self._start_new_generation()
                     if response.session_resumption_update:
                         if (

{livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.6}/livekit/plugins/google/stt.py RENAMED Viewed

@@ -67,6 +67,7 @@ class STTOptions:
     punctuate: bool
     spoken_punctuation: bool
     enable_word_time_offsets: bool
+    enable_word_confidence: bool
     model: SpeechModels | str
     sample_rate: int
     min_confidence_threshold: float
@@ -99,6 +100,7 @@ class STT(stt.STT):
         punctuate: bool = True,
         spoken_punctuation: bool = False,
         enable_word_time_offsets: bool = True,
+        enable_word_confidence: bool = False,
         model: SpeechModels | str = "latest_long",
         location: str = "global",
         sample_rate: int = 16000,
@@ -122,6 +124,7 @@ class STT(stt.STT):
             punctuate(bool): whether to punctuate the audio (default: True)
             spoken_punctuation(bool): whether to use spoken punctuation (default: False)
             enable_word_time_offsets(bool): whether to enable word time offsets (default: True)
+            enable_word_confidence(bool): whether to enable word confidence (default: False)
             model(SpeechModels): the model to use for recognition default: "latest_long"
             location(str): the location to use for recognition default: "global"
             sample_rate(int): the sample rate of the audio default: 16000
@@ -162,6 +165,7 @@ class STT(stt.STT):
             punctuate=punctuate,
             spoken_punctuation=spoken_punctuation,
             enable_word_time_offsets=enable_word_time_offsets,
+            enable_word_confidence=enable_word_confidence,
             model=model,
             sample_rate=sample_rate,
             min_confidence_threshold=min_confidence_threshold,
@@ -243,6 +247,7 @@ class STT(stt.STT):
                 enable_automatic_punctuation=config.punctuate,
                 enable_spoken_punctuation=config.spoken_punctuation,
                 enable_word_time_offsets=config.enable_word_time_offsets,
+                enable_word_confidence=config.enable_word_confidence,
             ),
             model=config.model,
             language_codes=config.languages,

{livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.6}/livekit/plugins/google/tts.py RENAMED Viewed

@@ -61,6 +61,7 @@ class TTS(tts.TTS):
         language: NotGivenOr[SpeechLanguages | str] = NOT_GIVEN,
         gender: NotGivenOr[Gender | str] = NOT_GIVEN,
         voice_name: NotGivenOr[str] = NOT_GIVEN,
+        voice_cloning_key: NotGivenOr[str] = NOT_GIVEN,
         sample_rate: int = 24000,
         pitch: int = 0,
         effects_profile_id: str = "",
@@ -86,6 +87,7 @@ class TTS(tts.TTS):
             language (SpeechLanguages | str, optional): Language code (e.g., "en-US"). Default is "en-US".
             gender (Gender | str, optional): Voice gender ("male", "female", "neutral"). Default is "neutral".
             voice_name (str, optional): Specific voice name. Default is an empty string.
+            voice_cloning_key (str, optional): Voice clone key. Created via https://cloud.google.com/text-to-speech/docs/chirp3-instant-custom-voice
             sample_rate (int, optional): Audio sample rate in Hz. Default is 24000.
             location (str, optional): Location for the TTS client. Default is "global".
             pitch (float, optional): Speaking pitch, ranging from -20.0 to 20.0 semitones relative to the original pitch. Default is 0.
@@ -115,13 +117,18 @@ class TTS(tts.TTS):
         lang = language if is_given(language) else DEFAULT_LANGUAGE
         ssml_gender = _gender_from_str(DEFAULT_GENDER if not is_given(gender) else gender)
-        name = DEFAULT_VOICE_NAME if not is_given(voice_name) else voice_name
         voice_params = texttospeech.VoiceSelectionParams(
-            name=name,
             language_code=lang,
             ssml_gender=ssml_gender,
         )
+        if is_given(voice_cloning_key):
+            voice_params.voice_clone = texttospeech.VoiceCloneParams(
+                voice_clone_key=voice_cloning_key,
+            )
+        else:
+            voice_params.name = voice_name if is_given(voice_name) else DEFAULT_VOICE_NAME
         if not is_given(tokenizer):
             tokenizer = tokenize.blingfire.SentenceTokenizer()

{livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.6}/livekit/plugins/google/utils.py RENAMED Viewed

@@ -23,18 +23,27 @@ from .tools import _LLMTool
 __all__ = ["to_fnc_ctx"]
-def to_fnc_ctx(fncs: list[FunctionTool | RawFunctionTool]) -> list[types.FunctionDeclaration]:
+def to_fnc_ctx(
+    fncs: list[FunctionTool | RawFunctionTool], *, use_parameters_json_schema: bool = True
+) -> list[types.FunctionDeclaration]:
     tools: list[types.FunctionDeclaration] = []
     for fnc in fncs:
         if is_raw_function_tool(fnc):
             info = get_raw_function_info(fnc)
-            tools.append(
-                types.FunctionDeclaration(
-                    name=info.name,
-                    description=info.raw_schema.get("description", ""),
-                    parameters_json_schema=info.raw_schema.get("parameters", {}),
+            fnc_kwargs = {
+                "name": info.name,
+                "description": info.raw_schema.get("description", ""),
+            }
+            if use_parameters_json_schema:
+                fnc_kwargs["parameters_json_schema"] = info.raw_schema.get("parameters", {})
+            else:
+                # https://github.com/googleapis/python-genai/issues/1147
+                fnc_kwargs["parameters"] = types.Schema.from_json_schema(
+                    json_schema=types.JSONSchema.model_validate(
+                        info.raw_schema.get("parameters", {})
+                    )
                 )
-            )
+            tools.append(types.FunctionDeclaration(**fnc_kwargs))
         elif is_function_tool(fnc):
             tools.append(_build_gemini_fnc(fnc))

{livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.6}/livekit/plugins/google/version.py RENAMED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.2.5"
+__version__ = "1.2.6"

{livekit_plugins_google-1.2.5 → livekit_plugins_google-1.2.6}/pyproject.toml RENAMED Viewed

@@ -27,7 +27,7 @@ dependencies = [
     "google-cloud-speech >= 2, < 3",
     "google-cloud-texttospeech >= 2.27, < 3",
     "google-genai >= v1.23.0",
-    "livekit-agents>=1.2.5",
+    "livekit-agents>=1.2.6",
 ]
 [project.urls]