PyPI - livekit-plugins-google - Versions diffs - 1.2.8__py3-none-any.whl → 1.2.11__py3-none-any.whl - Mend

livekit-plugins-google 1.2.8py3-none-any.whl → 1.2.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of livekit-plugins-google might be problematic. Click here for more details.

Files changed (8) hide show

livekit/plugins/google/beta/realtime/realtime_api.py CHANGED Viewed

@@ -202,6 +202,7 @@ class RealtimeModel(llm.RealtimeModel):
                 user_transcription=input_audio_transcription is not None,
                 auto_tool_reply_generation=True,
                 audio_output=types.Modality.AUDIO in modalities,
+                manual_function_calls=False,
             )
         )
@@ -304,6 +305,10 @@ class RealtimeModel(llm.RealtimeModel):
     async def aclose(self) -> None:
         pass
+    @property
+    def model(self) -> str:
+        return self._opts.model
 class RealtimeSession(llm.RealtimeSession):
     def __init__(self, realtime_model: RealtimeModel) -> None:
@@ -775,7 +780,7 @@ class RealtimeSession(llm.RealtimeSession):
                 ),
                 language_code=self._opts.language if is_given(self._opts.language) else None,
             ),
-            tools=tools_config,  # type: ignore
+            tools=tools_config,
             input_audio_transcription=self._opts.input_audio_transcription,
             output_audio_transcription=self._opts.output_audio_transcription,
             session_resumption=types.SessionResumptionConfig(
@@ -829,6 +834,7 @@ class RealtimeSession(llm.RealtimeSession):
             message_stream=self._current_generation.message_ch,
             function_stream=self._current_generation.function_ch,
             user_initiated=False,
+            response_id=self._current_generation.response_id,
         )
         if self._pending_generation_fut and not self._pending_generation_fut.done():
@@ -969,7 +975,7 @@ class RealtimeSession(llm.RealtimeSession):
             gen.function_ch.send_nowait(
                 llm.FunctionCall(
                     call_id=fnc_call.id or utils.shortuuid("fnc-call-"),
-                    name=fnc_call.name,  # type: ignore
+                    name=fnc_call.name,
                     arguments=arguments,
                 )
             )
@@ -1018,7 +1024,8 @@ class RealtimeSession(llm.RealtimeSession):
             return token_details_map
         metrics = RealtimeModelMetrics(
-            label=self._realtime_model._label,
+            label=self._realtime_model.label,
+            model=self._realtime_model.model,
             request_id=current_gen.response_id,
             timestamp=current_gen._created_timestamp,
             duration=duration,

livekit/plugins/google/llm.py CHANGED Viewed

@@ -423,7 +423,7 @@ class LLMStream(llm.LLMStream):
                     tool_calls=[
                         llm.FunctionToolCall(
                             arguments=json.dumps(part.function_call.args),
-                            name=part.function_call.name,  # type: ignore
+                            name=part.function_call.name,
                             call_id=part.function_call.id or utils.shortuuid("function_call_"),
                         )
                     ],

livekit/plugins/google/stt.py CHANGED Viewed

@@ -70,6 +70,7 @@ class STTOptions:
     spoken_punctuation: bool
     enable_word_time_offsets: bool
     enable_word_confidence: bool
+    enable_voice_activity_events: bool
     model: SpeechModels | str
     sample_rate: int
     min_confidence_threshold: float
@@ -103,6 +104,7 @@ class STT(stt.STT):
         spoken_punctuation: bool = False,
         enable_word_time_offsets: bool = True,
         enable_word_confidence: bool = False,
+        enable_voice_activity_events: bool = False,
         model: SpeechModels | str = "latest_long",
         location: str = "global",
         sample_rate: int = 16000,
@@ -127,6 +129,7 @@ class STT(stt.STT):
             spoken_punctuation(bool): whether to use spoken punctuation (default: False)
             enable_word_time_offsets(bool): whether to enable word time offsets (default: True)
             enable_word_confidence(bool): whether to enable word confidence (default: False)
+            enable_voice_activity_events(bool): whether to enable voice activity events (default: False)
             model(SpeechModels): the model to use for recognition default: "latest_long"
             location(str): the location to use for recognition default: "global"
             sample_rate(int): the sample rate of the audio default: 16000
@@ -168,6 +171,7 @@ class STT(stt.STT):
             spoken_punctuation=spoken_punctuation,
             enable_word_time_offsets=enable_word_time_offsets,
             enable_word_confidence=enable_word_confidence,
+            enable_voice_activity_events=enable_voice_activity_events,
             model=model,
             sample_rate=sample_rate,
             min_confidence_threshold=min_confidence_threshold,
@@ -507,6 +511,7 @@ class SpeechStream(stt.SpeechStream):
                         ),
                         streaming_features=cloud_speech.StreamingRecognitionFeatures(
                             interim_results=self._config.interim_results,
+                            enable_voice_activity_events=self._config.enable_voice_activity_events,
                         ),
                     )

livekit/plugins/google/tts.py CHANGED Viewed

@@ -52,6 +52,7 @@ class _TTSOptions:
     volume_gain_db: float
     custom_pronunciations: CustomPronunciations | None
     enable_ssml: bool
+    use_markup: bool
 class TTS(tts.TTS):
@@ -75,6 +76,7 @@ class TTS(tts.TTS):
         custom_pronunciations: NotGivenOr[CustomPronunciations] = NOT_GIVEN,
         use_streaming: bool = True,
         enable_ssml: bool = False,
+        use_markup: bool = False,
     ) -> None:
         """
         Create a new instance of Google TTS.
@@ -100,6 +102,7 @@ class TTS(tts.TTS):
             custom_pronunciations (CustomPronunciations, optional): Custom pronunciations for the TTS. Default is None.
             use_streaming (bool, optional): Whether to use streaming synthesis. Default is True.
             enable_ssml (bool, optional): Whether to enable SSML support. Default is False.
+            use_markup (bool, optional): Whether to enable markup input for HD voices. Default is False.
         """  # noqa: E501
         super().__init__(
             capabilities=tts.TTSCapabilities(streaming=use_streaming),
@@ -107,8 +110,11 @@ class TTS(tts.TTS):
             num_channels=1,
         )
-        if enable_ssml and use_streaming:
-            raise ValueError("SSML support is not available for streaming synthesis")
+        if enable_ssml:
+            if use_streaming:
+                raise ValueError("SSML support is not available for streaming synthesis")
+            if use_markup:
+                raise ValueError("SSML support is not available for markup input")
         self._client: texttospeech.TextToSpeechAsyncClient | None = None
         self._credentials_info = credentials_info
@@ -145,6 +151,7 @@ class TTS(tts.TTS):
             volume_gain_db=volume_gain_db,
             custom_pronunciations=pronunciations,
             enable_ssml=enable_ssml,
+            use_markup=use_markup,
         )
         self._streams = weakref.WeakSet[SynthesizeStream]()
@@ -238,19 +245,21 @@ class ChunkedStream(tts.ChunkedStream):
     async def _run(self, output_emitter: tts.AudioEmitter) -> None:
         try:
-            input = (
-                texttospeech.SynthesisInput(
-                    ssml=self._build_ssml(),
-                    custom_pronunciations=self._opts.custom_pronunciations,
+            if self._opts.use_markup:
+                tts_input = texttospeech.SynthesisInput(
+                    markup=self._input_text, custom_pronunciations=self._opts.custom_pronunciations
                 )
-                if self._opts.enable_ssml
-                else texttospeech.SynthesisInput(
-                    text=self._input_text,
-                    custom_pronunciations=self._opts.custom_pronunciations,
+            elif self._opts.enable_ssml:
+                tts_input = texttospeech.SynthesisInput(
+                    ssml=self._build_ssml(), custom_pronunciations=self._opts.custom_pronunciations
                 )
-            )
+            else:
+                tts_input = texttospeech.SynthesisInput(
+                    text=self._input_text, custom_pronunciations=self._opts.custom_pronunciations
+                )
             response: SynthesizeSpeechResponse = await self._tts._ensure_client().synthesize_speech(
-                input=input,
+                input=tts_input,
                 voice=self._opts.voice,
                 audio_config=texttospeech.AudioConfig(
                     audio_encoding=self._opts.encoding,
@@ -355,8 +364,12 @@ class SynthesizeStream(tts.SynthesizeStream):
                 async for input in input_stream:
                     self._mark_started()
-                    yield texttospeech.StreamingSynthesizeRequest(
-                        input=texttospeech.StreamingSynthesisInput(text=input.token)
+                    yield (
+                        texttospeech.StreamingSynthesizeRequest(
+                            input=texttospeech.StreamingSynthesisInput(markup=input.token)
+                            if self._opts.use_markup
+                            else texttospeech.StreamingSynthesisInput(text=input.token)
+                        )
                     )
             except Exception:

livekit/plugins/google/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.2.8"
+__version__ = "1.2.11"

{livekit_plugins_google-1.2.8.dist-info → livekit_plugins_google-1.2.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: livekit-plugins-google
-Version: 1.2.8
+Version: 1.2.11
 Summary: Agent Framework plugin for services from Google Cloud
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
 Requires-Dist: google-cloud-speech<3,>=2
 Requires-Dist: google-cloud-texttospeech<3,>=2.27
 Requires-Dist: google-genai>=v1.23.0
-Requires-Dist: livekit-agents>=1.2.8
+Requires-Dist: livekit-agents>=1.2.11
 Description-Content-Type: text/markdown
 # Google AI plugin for LiveKit Agents

{livekit_plugins_google-1.2.8.dist-info → livekit_plugins_google-1.2.11.dist-info}/RECORD RENAMED Viewed

@@ -1,18 +1,18 @@
 livekit/plugins/google/__init__.py,sha256=XIyZ-iFnRBpaLtOJgVwojlB-a8GjdDugVFcjBpMEww8,1412
-livekit/plugins/google/llm.py,sha256=cMlmLX1m3TsrLW0a-k2oj6WQSNWEjj3jv7ob8MUoXCI,18825
+livekit/plugins/google/llm.py,sha256=aeeGqhbEScbEs-GKp1T8rLocNqmvG4UBj6diekYe4FU,18809
 livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
 livekit/plugins/google/models.py,sha256=poOvUBvgpqmmQV5EUQsq0RgNIRAq7nH-_IZIcIfPSBI,2801
 livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-livekit/plugins/google/stt.py,sha256=gRhVRsfg3BPNkBJGG78QOxEia0mF1jBnI_Ckq1jxqIs,25938
+livekit/plugins/google/stt.py,sha256=RhbDkCbrGWb8R7feS5w766fwUNJEbg7hHlba7Oq9lBI,26305
 livekit/plugins/google/tools.py,sha256=tD5HVDHO5JfUF029Cx3axHMJec0Gxalkl7s1FDgxLzI,259
-livekit/plugins/google/tts.py,sha256=3TPHBKJJwIt-hSTAdbI4NUcQNerhV0eDuK_o2rprdqg,16606
+livekit/plugins/google/tts.py,sha256=LBLP3pEq1iCCgfidpRTtpeoDKYmXh8PKeJf1llAsybQ,17302
 livekit/plugins/google/utils.py,sha256=z0iCP6-hYix3JRm2RM5JOBEJCICehUe5N4FTl-JpXLc,9269
-livekit/plugins/google/version.py,sha256=XZ3forlpqz9F1SmliM6XQGe2MyHu60jOjDbNGs_mcRg,600
+livekit/plugins/google/version.py,sha256=xsWwuH5qgJrB3wPPfmZaiEH7zObN2yGboBmyrTHj-b8,601
 livekit/plugins/google/beta/__init__.py,sha256=RvAUdvEiRN-fe4JrgPcN0Jkw1kZR9wPerGMFVjS1Cc0,270
 livekit/plugins/google/beta/gemini_tts.py,sha256=esWjr0Xf95tl0_AB7MXiFZ_VCORWgcWjzvLvRa3t0FQ,8515
 livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
 livekit/plugins/google/beta/realtime/api_proto.py,sha256=nb_QkVQDEH7h0SKA9vdS3JaL12a6t2Z1ja4SdnxE6a8,814
-livekit/plugins/google/beta/realtime/realtime_api.py,sha256=p0vEaxQhPLUbGjHo7Za2rbBrCjD_UqPk-thd9ybIiuk,47817
-livekit_plugins_google-1.2.8.dist-info/METADATA,sha256=2I3YmnnGQGcd8qxz8AGaxG_KflpdYu-3oUBVCudZfA8,1907
-livekit_plugins_google-1.2.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-livekit_plugins_google-1.2.8.dist-info/RECORD,,
+livekit/plugins/google/beta/realtime/realtime_api.py,sha256=RALLfKWb8c4K8ennINDLeVxKrP5JXvGa_nNGP0_ASlI,48012
+livekit_plugins_google-1.2.11.dist-info/METADATA,sha256=ulwiM6njmKCQG_8e1imKwV6oG0IHuXRahBSB0UI0OBM,1909
+livekit_plugins_google-1.2.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+livekit_plugins_google-1.2.11.dist-info/RECORD,,

{livekit_plugins_google-1.2.8.dist-info → livekit_plugins_google-1.2.11.dist-info}/WHEEL RENAMED Viewed

File without changes

livekit-plugins-google 1.2.8__py3-none-any.whl → 1.2.11__py3-none-any.whl

Potentially problematic release.

livekit-plugins-google 1.2.8py3-none-any.whl → 1.2.11py3-none-any.whl