livekit-plugins-google 0.7.1__py3-none-any.whl → 0.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,3 +29,12 @@ class GooglePlugin(Plugin):
29
29
 
30
30
 
31
31
  Plugin.register_plugin(GooglePlugin())
32
+
33
+ # Cleanup docs of unexported modules
34
+ _module = dir()
35
+ NOT_IN_ALL = [m for m in _module if m not in __all__]
36
+
37
+ __pdoc__ = {}
38
+
39
+ for n in NOT_IN_ALL:
40
+ __pdoc__[n] = False
@@ -20,8 +20,15 @@ from dataclasses import dataclass
20
20
  from typing import AsyncIterable, List, Union
21
21
 
22
22
  from livekit import agents, rtc
23
- from livekit.agents import stt, utils
24
-
23
+ from livekit.agents import (
24
+ APIConnectionError,
25
+ APIStatusError,
26
+ APITimeoutError,
27
+ stt,
28
+ utils,
29
+ )
30
+
31
+ from google.api_core.exceptions import DeadlineExceeded, GoogleAPICallError
25
32
  from google.auth import default as gauth_default
26
33
  from google.auth.exceptions import DefaultCredentialsError
27
34
  from google.cloud.speech_v2 import SpeechAsyncClient
@@ -141,7 +148,7 @@ class STT(stt.STT):
141
148
 
142
149
  return config
143
150
 
144
- async def recognize(
151
+ async def _recognize_impl(
145
152
  self,
146
153
  buffer: utils.AudioBuffer,
147
154
  *,
@@ -165,23 +172,39 @@ class STT(stt.STT):
165
172
  language_codes=config.languages,
166
173
  )
167
174
 
168
- raw = await self._ensure_client().recognize(
169
- cloud_speech.RecognizeRequest(
170
- recognizer=self._recognizer, config=config, content=frame.data.tobytes()
175
+ try:
176
+ raw = await self._ensure_client().recognize(
177
+ cloud_speech.RecognizeRequest(
178
+ recognizer=self._recognizer,
179
+ config=config,
180
+ content=frame.data.tobytes(),
181
+ )
182
+ )
183
+
184
+ return _recognize_response_to_speech_event(raw)
185
+ except DeadlineExceeded:
186
+ raise APITimeoutError()
187
+ except GoogleAPICallError as e:
188
+ raise APIStatusError(
189
+ e.message,
190
+ status_code=e.code or -1,
191
+ request_id=None,
192
+ body=None,
171
193
  )
172
- )
173
- return _recognize_response_to_speech_event(raw)
194
+ except Exception as e:
195
+ raise APIConnectionError() from e
174
196
 
175
197
  def stream(
176
198
  self, *, language: SpeechLanguages | str | None = None
177
199
  ) -> "SpeechStream":
178
200
  config = self._sanitize_options(language=language)
179
- return SpeechStream(self._ensure_client(), self._recognizer, config)
201
+ return SpeechStream(self, self._ensure_client(), self._recognizer, config)
180
202
 
181
203
 
182
204
  class SpeechStream(stt.SpeechStream):
183
205
  def __init__(
184
206
  self,
207
+ stt: STT,
185
208
  client: SpeechAsyncClient,
186
209
  recognizer: str,
187
210
  config: STTOptions,
@@ -189,7 +212,7 @@ class SpeechStream(stt.SpeechStream):
189
212
  num_channels: int = 1,
190
213
  max_retry: int = 32,
191
214
  ) -> None:
192
- super().__init__()
215
+ super().__init__(stt)
193
216
 
194
217
  self._client = client
195
218
  self._recognizer = recognizer
@@ -15,21 +15,22 @@
15
15
  from __future__ import annotations
16
16
 
17
17
  from dataclasses import dataclass
18
- from typing import Union
19
18
 
20
19
  from livekit import rtc
21
- from livekit.agents import tts, utils
22
-
20
+ from livekit.agents import (
21
+ APIConnectionError,
22
+ APIStatusError,
23
+ APITimeoutError,
24
+ tts,
25
+ utils,
26
+ )
27
+
28
+ from google.api_core.exceptions import DeadlineExceeded, GoogleAPICallError
23
29
  from google.cloud import texttospeech
24
30
  from google.cloud.texttospeech_v1.types import SsmlVoiceGender, SynthesizeSpeechResponse
25
31
 
26
- from .log import logger
27
32
  from .models import AudioEncoding, Gender, SpeechLanguages
28
33
 
29
- LgType = Union[SpeechLanguages, str]
30
- GenderType = Union[Gender, str]
31
- AudioEncodingType = Union[AudioEncoding, str]
32
-
33
34
 
34
35
  @dataclass
35
36
  class _TTSOptions:
@@ -41,11 +42,13 @@ class TTS(tts.TTS):
41
42
  def __init__(
42
43
  self,
43
44
  *,
44
- language: LgType = "en-US",
45
- gender: GenderType = "neutral",
45
+ language: SpeechLanguages | str = "en-US",
46
+ gender: Gender | str = "neutral",
46
47
  voice_name: str = "", # Not required
47
- encoding: AudioEncodingType = "linear16",
48
+ encoding: AudioEncoding | str = "linear16",
48
49
  sample_rate: int = 24000,
50
+ pitch: int = 0,
51
+ effects_profile_id: str = "",
49
52
  speaking_rate: float = 1.0,
50
53
  credentials_info: dict | None = None,
51
54
  credentials_file: str | None = None,
@@ -56,6 +59,18 @@ class TTS(tts.TTS):
56
59
  Credentials must be provided, either by using the ``credentials_info`` dict, or reading
57
60
  from the file specified in ``credentials_file`` or the ``GOOGLE_APPLICATION_CREDENTIALS``
58
61
  environmental variable.
62
+
63
+ Args:
64
+ language (SpeechLanguages | str, optional): Language code (e.g., "en-US"). Default is "en-US".
65
+ gender (Gender | str, optional): Voice gender ("male", "female", "neutral"). Default is "neutral".
66
+ voice_name (str, optional): Specific voice name. Default is an empty string.
67
+ encoding (AudioEncoding | str, optional): Audio encoding format (e.g., "linear16"). Default is "linear16".
68
+ sample_rate (int, optional): Audio sample rate in Hz. Default is 24000.
69
+ pitch (float, optional): Speaking pitch, ranging from -20.0 to 20.0 semitones relative to the original pitch. Default is 0.
70
+ effects_profile_id (str): Optional identifier for selecting audio effects profiles to apply to the synthesized speech.
71
+ speaking_rate (float, optional): Speed of speech. Default is 1.0.
72
+ credentials_info (dict, optional): Dictionary containing Google Cloud credentials. Default is None.
73
+ credentials_file (str, optional): Path to the Google Cloud credentials JSON file. Default is None.
59
74
  """
60
75
 
61
76
  super().__init__(
@@ -70,14 +85,10 @@ class TTS(tts.TTS):
70
85
  self._credentials_info = credentials_info
71
86
  self._credentials_file = credentials_file
72
87
 
73
- ssml_gender = SsmlVoiceGender.NEUTRAL
74
- if gender == "male":
75
- ssml_gender = SsmlVoiceGender.MALE
76
- elif gender == "female":
77
- ssml_gender = SsmlVoiceGender.FEMALE
78
-
79
88
  voice = texttospeech.VoiceSelectionParams(
80
- name=voice_name, language_code=language, ssml_gender=ssml_gender
89
+ name=voice_name,
90
+ language_code=language,
91
+ ssml_gender=_gender_from_str(gender),
81
92
  )
82
93
 
83
94
  if encoding == "linear16" or encoding == "wav":
@@ -92,10 +103,36 @@ class TTS(tts.TTS):
92
103
  audio_config=texttospeech.AudioConfig(
93
104
  audio_encoding=_audio_encoding,
94
105
  sample_rate_hertz=sample_rate,
106
+ pitch=pitch,
107
+ effects_profile_id=effects_profile_id,
95
108
  speaking_rate=speaking_rate,
96
109
  ),
97
110
  )
98
111
 
112
+ def update_options(
113
+ self,
114
+ *,
115
+ language: SpeechLanguages | str = "en-US",
116
+ gender: Gender | str = "neutral",
117
+ voice_name: str = "", # Not required
118
+ speaking_rate: float = 1.0,
119
+ ) -> None:
120
+ """
121
+ Update the TTS options.
122
+
123
+ Args:
124
+ language (SpeechLanguages | str, optional): Language code (e.g., "en-US"). Default is "en-US".
125
+ gender (Gender | str, optional): Voice gender ("male", "female", "neutral"). Default is "neutral".
126
+ voice_name (str, optional): Specific voice name. Default is an empty string.
127
+ speaking_rate (float, optional): Speed of speech. Default is 1.0.
128
+ """
129
+ self._opts.voice = texttospeech.VoiceSelectionParams(
130
+ name=voice_name,
131
+ language_code=language,
132
+ ssml_gender=_gender_from_str(gender),
133
+ )
134
+ self._opts.audio_config.speaking_rate = speaking_rate
135
+
99
136
  def _ensure_client(self) -> texttospeech.TextToSpeechAsyncClient:
100
137
  if not self._client:
101
138
  if self._credentials_info:
@@ -118,57 +155,79 @@ class TTS(tts.TTS):
118
155
  return self._client
119
156
 
120
157
  def synthesize(self, text: str) -> "ChunkedStream":
121
- return ChunkedStream(text, self._opts, self._ensure_client())
158
+ return ChunkedStream(self, text, self._opts, self._ensure_client())
122
159
 
123
160
 
124
161
  class ChunkedStream(tts.ChunkedStream):
125
162
  def __init__(
126
- self, text: str, opts: _TTSOptions, client: texttospeech.TextToSpeechAsyncClient
163
+ self,
164
+ tts: TTS,
165
+ text: str,
166
+ opts: _TTSOptions,
167
+ client: texttospeech.TextToSpeechAsyncClient,
127
168
  ) -> None:
128
- super().__init__()
129
- self._text, self._opts, self._client = text, opts, client
169
+ super().__init__(tts, text)
170
+ self._opts, self._client = opts, client
130
171
 
131
- @utils.log_exceptions(logger=logger)
132
172
  async def _main_task(self) -> None:
133
173
  request_id = utils.shortuuid()
134
- segment_id = utils.shortuuid()
135
- response: SynthesizeSpeechResponse = await self._client.synthesize_speech(
136
- input=texttospeech.SynthesisInput(text=self._text),
137
- voice=self._opts.voice,
138
- audio_config=self._opts.audio_config,
139
- )
140
174
 
141
- data = response.audio_content
142
- if self._opts.audio_config.audio_encoding == "mp3":
143
- decoder = utils.codecs.Mp3StreamDecoder()
144
- bstream = utils.audio.AudioByteStream(
145
- sample_rate=self._opts.audio_config.sample_rate_hertz, num_channels=1
175
+ try:
176
+ response: SynthesizeSpeechResponse = await self._client.synthesize_speech(
177
+ input=texttospeech.SynthesisInput(text=self._input_text),
178
+ voice=self._opts.voice,
179
+ audio_config=self._opts.audio_config,
146
180
  )
147
- for frame in decoder.decode_chunk(data):
148
- for frame in bstream.write(frame.data):
149
- self._event_ch.send_nowait(
150
- tts.SynthesizedAudio(
151
- request_id=request_id, segment_id=segment_id, frame=frame
181
+
182
+ data = response.audio_content
183
+ if self._opts.audio_config.audio_encoding == "mp3":
184
+ decoder = utils.codecs.Mp3StreamDecoder()
185
+ bstream = utils.audio.AudioByteStream(
186
+ sample_rate=self._opts.audio_config.sample_rate_hertz,
187
+ num_channels=1,
188
+ )
189
+ for frame in decoder.decode_chunk(data):
190
+ for frame in bstream.write(frame.data.tobytes()):
191
+ self._event_ch.send_nowait(
192
+ tts.SynthesizedAudio(request_id=request_id, frame=frame)
152
193
  )
153
- )
154
194
 
155
- for frame in bstream.flush():
195
+ for frame in bstream.flush():
196
+ self._event_ch.send_nowait(
197
+ tts.SynthesizedAudio(request_id=request_id, frame=frame)
198
+ )
199
+ else:
200
+ data = data[44:] # skip WAV header
156
201
  self._event_ch.send_nowait(
157
202
  tts.SynthesizedAudio(
158
- request_id=request_id, segment_id=segment_id, frame=frame
203
+ request_id=request_id,
204
+ frame=rtc.AudioFrame(
205
+ data=data,
206
+ sample_rate=self._opts.audio_config.sample_rate_hertz,
207
+ num_channels=1,
208
+ samples_per_channel=len(data) // 2, # 16-bit
209
+ ),
159
210
  )
160
211
  )
161
- else:
162
- data = data[44:] # skip WAV header
163
- self._event_ch.send_nowait(
164
- tts.SynthesizedAudio(
165
- request_id=request_id,
166
- segment_id=segment_id,
167
- frame=rtc.AudioFrame(
168
- data=data,
169
- sample_rate=self._opts.audio_config.sample_rate_hertz,
170
- num_channels=1,
171
- samples_per_channel=len(data) // 2, # 16-bit
172
- ),
173
- )
212
+
213
+ except DeadlineExceeded:
214
+ raise APITimeoutError()
215
+ except GoogleAPICallError as e:
216
+ raise APIStatusError(
217
+ e.message,
218
+ status_code=e.code or -1,
219
+ request_id=None,
220
+ body=None,
174
221
  )
222
+ except Exception as e:
223
+ raise APIConnectionError() from e
224
+
225
+
226
+ def _gender_from_str(gender: str) -> SsmlVoiceGender:
227
+ ssml_gender = SsmlVoiceGender.NEUTRAL
228
+ if gender == "male":
229
+ ssml_gender = SsmlVoiceGender.MALE
230
+ elif gender == "female":
231
+ ssml_gender = SsmlVoiceGender.FEMALE
232
+
233
+ return ssml_gender # type: ignore
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.7.1"
15
+ __version__ = "0.7.2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: livekit-plugins-google
3
- Version: 0.7.1
3
+ Version: 0.7.2
4
4
  Summary: Agent Framework plugin for services from Google Cloud
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -22,7 +22,7 @@ Description-Content-Type: text/markdown
22
22
  Requires-Dist: google-auth <3,>=2
23
23
  Requires-Dist: google-cloud-speech <3,>=2
24
24
  Requires-Dist: google-cloud-texttospeech <3,>=2
25
- Requires-Dist: livekit-agents >=0.8.0.dev0
25
+ Requires-Dist: livekit-agents >=0.11
26
26
 
27
27
  # LiveKit Plugins Google
28
28
 
@@ -0,0 +1,11 @@
1
+ livekit/plugins/google/__init__.py,sha256=rqV6C5mFNDFlrA2IcGJrsebr2VxQwMzoDUjY1JhMBZM,1117
2
+ livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
3
+ livekit/plugins/google/models.py,sha256=n8pgTJ7xyJpPCZJ_y0GzaQq6LqYknL6K6trpi07-AxM,1307
4
+ livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/google/stt.py,sha256=XNU9G0DKc-joOMdhgfJJ2u6IZ3JJ33Wi-XmdqX426fg,14198
6
+ livekit/plugins/google/tts.py,sha256=hRN8ul1lDXU8LPVEfbTszgBiRYsifZXCPMwk-Pv2KeA,8793
7
+ livekit/plugins/google/version.py,sha256=wNTnO8L3jrMdUjS-xAEFoMTKPaPYiFY9Kxnvzm4hTBc,600
8
+ livekit_plugins_google-0.7.2.dist-info/METADATA,sha256=ohgXDVPUSOXfZ8AA7PQhC5RU5huOaZF9dq9GDDRO0-E,1647
9
+ livekit_plugins_google-0.7.2.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
10
+ livekit_plugins_google-0.7.2.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
11
+ livekit_plugins_google-0.7.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.1.0)
2
+ Generator: setuptools (75.3.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,11 +0,0 @@
1
- livekit/plugins/google/__init__.py,sha256=CYbSmm5fEw71F_r_4pEApGaWQ_r15Y3ZEocH88a4yc8,948
2
- livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
3
- livekit/plugins/google/models.py,sha256=n8pgTJ7xyJpPCZJ_y0GzaQq6LqYknL6K6trpi07-AxM,1307
4
- livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- livekit/plugins/google/stt.py,sha256=XXDOISg-8U1MzVu543xLEB3-mr_NFKJp9qo1-ya2-Hc,13569
6
- livekit/plugins/google/tts.py,sha256=T9AHsxofwo3XaMciJPWh9O7lTZqDVYdQQlnFPiGWVbQ,6170
7
- livekit/plugins/google/version.py,sha256=JOBYrlKcxbTTRXkUKH0921GsmV-i71_KHczg2cgQiLc,600
8
- livekit_plugins_google-0.7.1.dist-info/METADATA,sha256=MyDLqZp1DC52KWx_Re3Hj0kO75l-Dg9z9IfiihtH4KY,1653
9
- livekit_plugins_google-0.7.1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
10
- livekit_plugins_google-0.7.1.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
11
- livekit_plugins_google-0.7.1.dist-info/RECORD,,