PyPI - livekit-plugins-google - Versions diffs - 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

livekit-plugins-google 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

livekit/plugins/google/__init__.py CHANGED Viewed

@@ -13,9 +13,10 @@
 # limitations under the License.
 from .stt import STT, SpeechStream
+from .tts import TTS
 from .version import __version__
-__all__ = ["STT", "SpeechStream", "__version__"]
+__all__ = ["STT", "TTS", "SpeechStream", "__version__"]
 from livekit.agents import Plugin

livekit/plugins/google/log.py ADDED Viewed

@@ -0,0 +1,3 @@
+import logging
+logger = logging.getLogger("livekit.plugins.google")

livekit/plugins/google/models.py CHANGED Viewed

@@ -83,3 +83,7 @@ SpeechLanguages = Literal[
     "vi-VN",
     "da-DK",
 ]
+Gender = Literal["male", "female", "neutral"]
+AudioEncoding = Literal["wav", "mp3", "ogg", "mulaw", "alaw"]

livekit/plugins/google/stt.py CHANGED Viewed

@@ -17,7 +17,6 @@ from __future__ import annotations
 import asyncio
 import contextlib
 import dataclasses
-import logging
 from dataclasses import dataclass
 from typing import Any, AsyncIterable, Dict, List
@@ -29,6 +28,7 @@ from google.auth import credentials  # type: ignore
 from google.cloud.speech_v2 import SpeechAsyncClient
 from google.cloud.speech_v2.types import cloud_speech
+from .log import logger
 from .models import SpeechLanguages, SpeechModels
 LgType = SpeechLanguages | str
@@ -105,7 +105,7 @@ class STT(stt.STT):
             config.languages = [config.languages]
         elif not config.detect_language:
             if len(config.languages) > 1:
-                logging.warning(
+                logger.warning(
                     "multiple languages provided, but language detection is disabled"
                 )
             config.languages = [config.languages[0]]
@@ -208,7 +208,7 @@ class SpeechStream(stt.SpeechStream):
         def log_exception(task: asyncio.Task) -> None:
             if not task.cancelled() and task.exception():
-                logging.error(f"google stt task failed: {task.exception()}")
+                logger.error(f"google stt task failed: {task.exception()}")
         self._main_task.add_done_callback(log_exception)
@@ -256,7 +256,7 @@ class SpeechStream(stt.SpeechStream):
                                     audio=frame.data.tobytes(),
                                 )
                         except Exception as e:
-                            logging.error(
+                            logger.error(
                                 f"an error occurred while streaming inputs: {e}"
                             )
@@ -269,7 +269,7 @@ class SpeechStream(stt.SpeechStream):
                     await self._run_stream(stream)
                 except Exception as e:
                     if retry_count >= max_retry:
-                        logging.error(
+                        logger.error(
                             f"failed to connect to google stt after {max_retry} tries",
                             exc_info=e,
                         )
@@ -277,7 +277,7 @@ class SpeechStream(stt.SpeechStream):
                     retry_delay = min(retry_count * 2, 10)  # max 10s
                     retry_count += 1
-                    logging.warning(
+                    logger.warning(
                         f"google stt connection failed, retrying in {retry_delay}s",
                         exc_info=e,
                     )

livekit/plugins/google/tts.py ADDED Viewed

@@ -0,0 +1,146 @@
+# Copyright 2023 LiveKit, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from dataclasses import dataclass
+from typing import AsyncIterable, Optional, Union
+from livekit import rtc
+from livekit.agents import codecs, tts
+from google.cloud import texttospeech
+from google.cloud.texttospeech_v1.types import (
+    SsmlVoiceGender,
+    SynthesizeSpeechResponse,
+)
+from .log import logger
+from .models import AudioEncoding, Gender, SpeechLanguages
+LgType = Union[SpeechLanguages, str]
+GenderType = Union[Gender, str]
+AudioEncodingType = Union[AudioEncoding, str]
+@dataclass
+class TTSOptions:
+    voice: texttospeech.VoiceSelectionParams
+    audio_config: texttospeech.AudioConfig
+class TTS(tts.TTS):
+    def __init__(
+        self,
+        config: Optional[TTSOptions] = None,
+        *,
+        language: LgType = "en-US",
+        gender: GenderType = "neutral",
+        voice_name: str = "",  # Not required
+        audio_encoding: AudioEncodingType = "wav",
+        sample_rate: int = 24000,
+        speaking_rate: float = 1.0,
+        credentials_info: Optional[dict] = None,
+        credentials_file: Optional[str] = None,
+    ) -> None:
+        super().__init__(
+            streaming_supported=False, sample_rate=sample_rate, num_channels=1
+        )
+        if credentials_info:
+            self._client = (
+                texttospeech.TextToSpeechAsyncClient.from_service_account_info(
+                    credentials_info
+                )
+            )
+        elif credentials_file:
+            self._client = (
+                texttospeech.TextToSpeechAsyncClient.from_service_account_file(
+                    credentials_file
+                )
+            )
+        else:
+            self._client = texttospeech.TextToSpeechAsyncClient()
+        if not config:
+            _gender = SsmlVoiceGender.NEUTRAL
+            if gender == "male":
+                _gender = SsmlVoiceGender.MALE
+            elif gender == "female":
+                _gender = SsmlVoiceGender.FEMALE
+            voice = texttospeech.VoiceSelectionParams(
+                name=voice_name,
+                language_code=language,
+                ssml_gender=_gender,
+            )
+            # Support wav and mp3 only
+            if audio_encoding == "wav":
+                _audio_encoding = texttospeech.AudioEncoding.LINEAR16
+            elif audio_encoding == "mp3":
+                _audio_encoding = texttospeech.AudioEncoding.MP3
+            # elif audio_encoding == "opus":
+            #     _audio_encoding = texttospeech.AudioEncoding.OGG_OPUS
+            # elif audio_encoding == "mulaw":
+            #     _audio_encoding = texttospeech.AudioEncoding.MULAW
+            # elif audio_encoding == "alaw":
+            #     _audio_encoding = texttospeech.AudioEncoding.ALAW
+            else:
+                raise NotImplementedError(
+                    f"Audio encoding {audio_encoding} is not supported"
+                )
+            config = TTSOptions(
+                voice=voice,
+                audio_config=texttospeech.AudioConfig(
+                    audio_encoding=_audio_encoding,
+                    sample_rate_hertz=sample_rate,
+                    speaking_rate=speaking_rate,
+                ),
+            )
+        self._config = config
+    def synthesize(
+        self,
+        text: str,
+    ) -> AsyncIterable[tts.SynthesizedAudio]:
+        async def generator():
+            try:
+                # Perform the text-to-speech request on the text input with the selected
+                # voice parameters and audio file type
+                response: SynthesizeSpeechResponse = (
+                    await self._client.synthesize_speech(
+                        input=texttospeech.SynthesisInput(text=text),
+                        voice=self._config.voice,
+                        audio_config=self._config.audio_config,
+                    )
+                )
+                data = response.audio_content
+                if self._config.audio_config.audio_encoding == "mp3":
+                    decoder = codecs.Mp3StreamDecoder()
+                    frames = decoder.decode_chunk(data)
+                    for frame in frames:
+                        yield tts.SynthesizedAudio(text=text, data=frame)
+                else:
+                    yield tts.SynthesizedAudio(
+                        text=text,
+                        data=rtc.AudioFrame(
+                            data=data,
+                            sample_rate=self._config.audio_config.sample_rate_hertz,
+                            num_channels=1,
+                            samples_per_channel=len(data) // 2,  # 16-bit
+                        ),
+                    )
+            except Exception as e:
+                logger.error(f"failed to synthesize: {e}")
+        return generator()

livekit/plugins/google/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "0.3.0"
+__version__ = "0.4.0"

{livekit_plugins_google-0.3.0.dist-info → livekit_plugins_google-0.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: livekit-plugins-google
-Version: 0.3.0
+Version: 0.4.0
 Summary: Agent Framework plugin for services from Google Cloud
 Home-page: https://github.com/livekit/agents
 License: Apache-2.0
@@ -29,8 +29,8 @@ Requires-Dist: google-cloud-speech <3,>=2
 Requires-Dist: google-cloud-texttospeech <3,>=2
 Requires-Dist: google-cloud-translate <4,>=3
 Requires-Dist: googleapis-common-protos <2,>=1
-Requires-Dist: livekit >=0.9.2
-Requires-Dist: livekit-agents ~=0.5.dev0
+Requires-Dist: livekit ~=0.11
+Requires-Dist: livekit-agents ~=0.6.0
 # LiveKit Plugins Google

livekit_plugins_google-0.4.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+livekit/plugins/google/__init__.py,sha256=DlQC5cosMFyQlM8_vFvJGoZiziFkd0Sa4mutnsxXyZM,959
+livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
+livekit/plugins/google/models.py,sha256=e-KvFKOn6eFfLucAltwdAwMEYByuHcJpIr7KfO0ClL0,1295
+livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+livekit/plugins/google/stt.py,sha256=sPV4ByAxfeGBNvAGIuwZvheEA0k7NYjXR_UiYWjd39Y,15029
+livekit/plugins/google/tts.py,sha256=ZYtotaD8hZ-n53A7qOfp728oPAWIrJYLvCPjF_Ni-xo,5299
+livekit/plugins/google/version.py,sha256=yelanl1wEXtgUH0CzoNVXfi2yTc2hElSzuAhULFzANc,600
+livekit_plugins_google-0.4.0.dist-info/METADATA,sha256=xm5VC02Nbzj7x_cxZ-THc4iwb76_Jr7hu7C_G_Z-mtA,1941
+livekit_plugins_google-0.4.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+livekit_plugins_google-0.4.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
+livekit_plugins_google-0.4.0.dist-info/RECORD,,

livekit_plugins_google-0.3.0.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-livekit/plugins/google/__init__.py,sha256=snPMHNLrurYbLWQOkV_o6qG1CEWsOCZ8ZfPMvmh5ejY,931
-livekit/plugins/google/models.py,sha256=DgiXOvGDO8D9rfCKHJL28lbyQR8mXXB2kpku-szXLRs,1185
-livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-livekit/plugins/google/stt.py,sha256=lYA8hlkxG3YSw1Q34j8hgs4us5Ij-TLBQTRwtGPN9MY,15025
-livekit/plugins/google/version.py,sha256=G5iYozum4q7UpHwW43F7QfhzUfwcncPxBZ0gmUGsd5I,600
-livekit_plugins_google-0.3.0.dist-info/METADATA,sha256=sPd3OZxViD0Aq1uF1qJpbsYeqLAlq8tB720JXk-_RKw,1945
-livekit_plugins_google-0.3.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-livekit_plugins_google-0.3.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
-livekit_plugins_google-0.3.0.dist-info/RECORD,,

{livekit_plugins_google-0.3.0.dist-info → livekit_plugins_google-0.4.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{livekit_plugins_google-0.3.0.dist-info → livekit_plugins_google-0.4.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

livekit-plugins-google 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

livekit-plugins-google 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl