livekit-plugins-google 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,9 +13,10 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from .stt import STT, SpeechStream
16
+ from .tts import TTS
16
17
  from .version import __version__
17
18
 
18
- __all__ = ["STT", "SpeechStream", "__version__"]
19
+ __all__ = ["STT", "TTS", "SpeechStream", "__version__"]
19
20
 
20
21
  from livekit.agents import Plugin
21
22
 
@@ -0,0 +1,3 @@
1
+ import logging
2
+
3
+ logger = logging.getLogger("livekit.plugins.google")
@@ -83,3 +83,7 @@ SpeechLanguages = Literal[
83
83
  "vi-VN",
84
84
  "da-DK",
85
85
  ]
86
+
87
+ Gender = Literal["male", "female", "neutral"]
88
+
89
+ AudioEncoding = Literal["wav", "mp3", "ogg", "mulaw", "alaw"]
@@ -17,7 +17,6 @@ from __future__ import annotations
17
17
  import asyncio
18
18
  import contextlib
19
19
  import dataclasses
20
- import logging
21
20
  from dataclasses import dataclass
22
21
  from typing import Any, AsyncIterable, Dict, List
23
22
 
@@ -29,6 +28,7 @@ from google.auth import credentials # type: ignore
29
28
  from google.cloud.speech_v2 import SpeechAsyncClient
30
29
  from google.cloud.speech_v2.types import cloud_speech
31
30
 
31
+ from .log import logger
32
32
  from .models import SpeechLanguages, SpeechModels
33
33
 
34
34
  LgType = SpeechLanguages | str
@@ -105,7 +105,7 @@ class STT(stt.STT):
105
105
  config.languages = [config.languages]
106
106
  elif not config.detect_language:
107
107
  if len(config.languages) > 1:
108
- logging.warning(
108
+ logger.warning(
109
109
  "multiple languages provided, but language detection is disabled"
110
110
  )
111
111
  config.languages = [config.languages[0]]
@@ -208,7 +208,7 @@ class SpeechStream(stt.SpeechStream):
208
208
 
209
209
  def log_exception(task: asyncio.Task) -> None:
210
210
  if not task.cancelled() and task.exception():
211
- logging.error(f"google stt task failed: {task.exception()}")
211
+ logger.error(f"google stt task failed: {task.exception()}")
212
212
 
213
213
  self._main_task.add_done_callback(log_exception)
214
214
 
@@ -256,7 +256,7 @@ class SpeechStream(stt.SpeechStream):
256
256
  audio=frame.data.tobytes(),
257
257
  )
258
258
  except Exception as e:
259
- logging.error(
259
+ logger.error(
260
260
  f"an error occurred while streaming inputs: {e}"
261
261
  )
262
262
 
@@ -269,7 +269,7 @@ class SpeechStream(stt.SpeechStream):
269
269
  await self._run_stream(stream)
270
270
  except Exception as e:
271
271
  if retry_count >= max_retry:
272
- logging.error(
272
+ logger.error(
273
273
  f"failed to connect to google stt after {max_retry} tries",
274
274
  exc_info=e,
275
275
  )
@@ -277,7 +277,7 @@ class SpeechStream(stt.SpeechStream):
277
277
 
278
278
  retry_delay = min(retry_count * 2, 10) # max 10s
279
279
  retry_count += 1
280
- logging.warning(
280
+ logger.warning(
281
281
  f"google stt connection failed, retrying in {retry_delay}s",
282
282
  exc_info=e,
283
283
  )
@@ -0,0 +1,146 @@
1
+ # Copyright 2023 LiveKit, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from dataclasses import dataclass
16
+ from typing import AsyncIterable, Optional, Union
17
+
18
+ from livekit import rtc
19
+ from livekit.agents import codecs, tts
20
+
21
+ from google.cloud import texttospeech
22
+ from google.cloud.texttospeech_v1.types import (
23
+ SsmlVoiceGender,
24
+ SynthesizeSpeechResponse,
25
+ )
26
+
27
+ from .log import logger
28
+ from .models import AudioEncoding, Gender, SpeechLanguages
29
+
30
+ LgType = Union[SpeechLanguages, str]
31
+ GenderType = Union[Gender, str]
32
+ AudioEncodingType = Union[AudioEncoding, str]
33
+
34
+
35
+ @dataclass
36
+ class TTSOptions:
37
+ voice: texttospeech.VoiceSelectionParams
38
+ audio_config: texttospeech.AudioConfig
39
+
40
+
41
+ class TTS(tts.TTS):
42
+ def __init__(
43
+ self,
44
+ config: Optional[TTSOptions] = None,
45
+ *,
46
+ language: LgType = "en-US",
47
+ gender: GenderType = "neutral",
48
+ voice_name: str = "", # Not required
49
+ audio_encoding: AudioEncodingType = "wav",
50
+ sample_rate: int = 24000,
51
+ speaking_rate: float = 1.0,
52
+ credentials_info: Optional[dict] = None,
53
+ credentials_file: Optional[str] = None,
54
+ ) -> None:
55
+ super().__init__(
56
+ streaming_supported=False, sample_rate=sample_rate, num_channels=1
57
+ )
58
+
59
+ if credentials_info:
60
+ self._client = (
61
+ texttospeech.TextToSpeechAsyncClient.from_service_account_info(
62
+ credentials_info
63
+ )
64
+ )
65
+ elif credentials_file:
66
+ self._client = (
67
+ texttospeech.TextToSpeechAsyncClient.from_service_account_file(
68
+ credentials_file
69
+ )
70
+ )
71
+ else:
72
+ self._client = texttospeech.TextToSpeechAsyncClient()
73
+
74
+ if not config:
75
+ _gender = SsmlVoiceGender.NEUTRAL
76
+ if gender == "male":
77
+ _gender = SsmlVoiceGender.MALE
78
+ elif gender == "female":
79
+ _gender = SsmlVoiceGender.FEMALE
80
+ voice = texttospeech.VoiceSelectionParams(
81
+ name=voice_name,
82
+ language_code=language,
83
+ ssml_gender=_gender,
84
+ )
85
+ # Support wav and mp3 only
86
+ if audio_encoding == "wav":
87
+ _audio_encoding = texttospeech.AudioEncoding.LINEAR16
88
+ elif audio_encoding == "mp3":
89
+ _audio_encoding = texttospeech.AudioEncoding.MP3
90
+ # elif audio_encoding == "opus":
91
+ # _audio_encoding = texttospeech.AudioEncoding.OGG_OPUS
92
+ # elif audio_encoding == "mulaw":
93
+ # _audio_encoding = texttospeech.AudioEncoding.MULAW
94
+ # elif audio_encoding == "alaw":
95
+ # _audio_encoding = texttospeech.AudioEncoding.ALAW
96
+ else:
97
+ raise NotImplementedError(
98
+ f"Audio encoding {audio_encoding} is not supported"
99
+ )
100
+
101
+ config = TTSOptions(
102
+ voice=voice,
103
+ audio_config=texttospeech.AudioConfig(
104
+ audio_encoding=_audio_encoding,
105
+ sample_rate_hertz=sample_rate,
106
+ speaking_rate=speaking_rate,
107
+ ),
108
+ )
109
+ self._config = config
110
+
111
+ def synthesize(
112
+ self,
113
+ text: str,
114
+ ) -> AsyncIterable[tts.SynthesizedAudio]:
115
+ async def generator():
116
+ try:
117
+ # Perform the text-to-speech request on the text input with the selected
118
+ # voice parameters and audio file type
119
+ response: SynthesizeSpeechResponse = (
120
+ await self._client.synthesize_speech(
121
+ input=texttospeech.SynthesisInput(text=text),
122
+ voice=self._config.voice,
123
+ audio_config=self._config.audio_config,
124
+ )
125
+ )
126
+
127
+ data = response.audio_content
128
+ if self._config.audio_config.audio_encoding == "mp3":
129
+ decoder = codecs.Mp3StreamDecoder()
130
+ frames = decoder.decode_chunk(data)
131
+ for frame in frames:
132
+ yield tts.SynthesizedAudio(text=text, data=frame)
133
+ else:
134
+ yield tts.SynthesizedAudio(
135
+ text=text,
136
+ data=rtc.AudioFrame(
137
+ data=data,
138
+ sample_rate=self._config.audio_config.sample_rate_hertz,
139
+ num_channels=1,
140
+ samples_per_channel=len(data) // 2, # 16-bit
141
+ ),
142
+ )
143
+ except Exception as e:
144
+ logger.error(f"failed to synthesize: {e}")
145
+
146
+ return generator()
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.3.0"
15
+ __version__ = "0.4.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: livekit-plugins-google
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Agent Framework plugin for services from Google Cloud
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -29,8 +29,8 @@ Requires-Dist: google-cloud-speech <3,>=2
29
29
  Requires-Dist: google-cloud-texttospeech <3,>=2
30
30
  Requires-Dist: google-cloud-translate <4,>=3
31
31
  Requires-Dist: googleapis-common-protos <2,>=1
32
- Requires-Dist: livekit >=0.9.2
33
- Requires-Dist: livekit-agents ~=0.5.dev0
32
+ Requires-Dist: livekit ~=0.11
33
+ Requires-Dist: livekit-agents ~=0.6.0
34
34
 
35
35
  # LiveKit Plugins Google
36
36
 
@@ -0,0 +1,11 @@
1
+ livekit/plugins/google/__init__.py,sha256=DlQC5cosMFyQlM8_vFvJGoZiziFkd0Sa4mutnsxXyZM,959
2
+ livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
3
+ livekit/plugins/google/models.py,sha256=e-KvFKOn6eFfLucAltwdAwMEYByuHcJpIr7KfO0ClL0,1295
4
+ livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/google/stt.py,sha256=sPV4ByAxfeGBNvAGIuwZvheEA0k7NYjXR_UiYWjd39Y,15029
6
+ livekit/plugins/google/tts.py,sha256=ZYtotaD8hZ-n53A7qOfp728oPAWIrJYLvCPjF_Ni-xo,5299
7
+ livekit/plugins/google/version.py,sha256=yelanl1wEXtgUH0CzoNVXfi2yTc2hElSzuAhULFzANc,600
8
+ livekit_plugins_google-0.4.0.dist-info/METADATA,sha256=xm5VC02Nbzj7x_cxZ-THc4iwb76_Jr7hu7C_G_Z-mtA,1941
9
+ livekit_plugins_google-0.4.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
10
+ livekit_plugins_google-0.4.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
11
+ livekit_plugins_google-0.4.0.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- livekit/plugins/google/__init__.py,sha256=snPMHNLrurYbLWQOkV_o6qG1CEWsOCZ8ZfPMvmh5ejY,931
2
- livekit/plugins/google/models.py,sha256=DgiXOvGDO8D9rfCKHJL28lbyQR8mXXB2kpku-szXLRs,1185
3
- livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- livekit/plugins/google/stt.py,sha256=lYA8hlkxG3YSw1Q34j8hgs4us5Ij-TLBQTRwtGPN9MY,15025
5
- livekit/plugins/google/version.py,sha256=G5iYozum4q7UpHwW43F7QfhzUfwcncPxBZ0gmUGsd5I,600
6
- livekit_plugins_google-0.3.0.dist-info/METADATA,sha256=sPd3OZxViD0Aq1uF1qJpbsYeqLAlq8tB720JXk-_RKw,1945
7
- livekit_plugins_google-0.3.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
8
- livekit_plugins_google-0.3.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
9
- livekit_plugins_google-0.3.0.dist-info/RECORD,,