livekit-plugins-google 0.7.0__tar.gz → 0.7.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/PKG-INFO +2 -2
  2. {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit/plugins/google/__init__.py +9 -0
  3. {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit/plugins/google/stt.py +33 -10
  4. livekit_plugins_google-0.7.2/livekit/plugins/google/tts.py +233 -0
  5. {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit/plugins/google/version.py +1 -1
  6. {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit_plugins_google.egg-info/PKG-INFO +2 -2
  7. {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit_plugins_google.egg-info/requires.txt +1 -1
  8. {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/setup.py +1 -1
  9. livekit_plugins_google-0.7.0/livekit/plugins/google/tts.py +0 -163
  10. {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/README.md +0 -0
  11. {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit/plugins/google/log.py +0 -0
  12. {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit/plugins/google/models.py +0 -0
  13. {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit/plugins/google/py.typed +0 -0
  14. {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit_plugins_google.egg-info/SOURCES.txt +0 -0
  15. {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit_plugins_google.egg-info/dependency_links.txt +0 -0
  16. {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit_plugins_google.egg-info/top_level.txt +0 -0
  17. {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/pyproject.toml +0 -0
  18. {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: livekit-plugins-google
3
- Version: 0.7.0
3
+ Version: 0.7.2
4
4
  Summary: Agent Framework plugin for services from Google Cloud
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -22,7 +22,7 @@ Description-Content-Type: text/markdown
22
22
  Requires-Dist: google-auth<3,>=2
23
23
  Requires-Dist: google-cloud-speech<3,>=2
24
24
  Requires-Dist: google-cloud-texttospeech<3,>=2
25
- Requires-Dist: livekit-agents>=0.8.0.dev0
25
+ Requires-Dist: livekit-agents>=0.11
26
26
 
27
27
  # LiveKit Plugins Google
28
28
 
@@ -29,3 +29,12 @@ class GooglePlugin(Plugin):
29
29
 
30
30
 
31
31
  Plugin.register_plugin(GooglePlugin())
32
+
33
+ # Cleanup docs of unexported modules
34
+ _module = dir()
35
+ NOT_IN_ALL = [m for m in _module if m not in __all__]
36
+
37
+ __pdoc__ = {}
38
+
39
+ for n in NOT_IN_ALL:
40
+ __pdoc__[n] = False
@@ -20,8 +20,15 @@ from dataclasses import dataclass
20
20
  from typing import AsyncIterable, List, Union
21
21
 
22
22
  from livekit import agents, rtc
23
- from livekit.agents import stt, utils
24
-
23
+ from livekit.agents import (
24
+ APIConnectionError,
25
+ APIStatusError,
26
+ APITimeoutError,
27
+ stt,
28
+ utils,
29
+ )
30
+
31
+ from google.api_core.exceptions import DeadlineExceeded, GoogleAPICallError
25
32
  from google.auth import default as gauth_default
26
33
  from google.auth.exceptions import DefaultCredentialsError
27
34
  from google.cloud.speech_v2 import SpeechAsyncClient
@@ -141,7 +148,7 @@ class STT(stt.STT):
141
148
 
142
149
  return config
143
150
 
144
- async def recognize(
151
+ async def _recognize_impl(
145
152
  self,
146
153
  buffer: utils.AudioBuffer,
147
154
  *,
@@ -165,23 +172,39 @@ class STT(stt.STT):
165
172
  language_codes=config.languages,
166
173
  )
167
174
 
168
- raw = await self._ensure_client().recognize(
169
- cloud_speech.RecognizeRequest(
170
- recognizer=self._recognizer, config=config, content=frame.data.tobytes()
175
+ try:
176
+ raw = await self._ensure_client().recognize(
177
+ cloud_speech.RecognizeRequest(
178
+ recognizer=self._recognizer,
179
+ config=config,
180
+ content=frame.data.tobytes(),
181
+ )
182
+ )
183
+
184
+ return _recognize_response_to_speech_event(raw)
185
+ except DeadlineExceeded:
186
+ raise APITimeoutError()
187
+ except GoogleAPICallError as e:
188
+ raise APIStatusError(
189
+ e.message,
190
+ status_code=e.code or -1,
191
+ request_id=None,
192
+ body=None,
171
193
  )
172
- )
173
- return _recognize_response_to_speech_event(raw)
194
+ except Exception as e:
195
+ raise APIConnectionError() from e
174
196
 
175
197
  def stream(
176
198
  self, *, language: SpeechLanguages | str | None = None
177
199
  ) -> "SpeechStream":
178
200
  config = self._sanitize_options(language=language)
179
- return SpeechStream(self._ensure_client(), self._recognizer, config)
201
+ return SpeechStream(self, self._ensure_client(), self._recognizer, config)
180
202
 
181
203
 
182
204
  class SpeechStream(stt.SpeechStream):
183
205
  def __init__(
184
206
  self,
207
+ stt: STT,
185
208
  client: SpeechAsyncClient,
186
209
  recognizer: str,
187
210
  config: STTOptions,
@@ -189,7 +212,7 @@ class SpeechStream(stt.SpeechStream):
189
212
  num_channels: int = 1,
190
213
  max_retry: int = 32,
191
214
  ) -> None:
192
- super().__init__()
215
+ super().__init__(stt)
193
216
 
194
217
  self._client = client
195
218
  self._recognizer = recognizer
@@ -0,0 +1,233 @@
1
+ # Copyright 2023 LiveKit, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from __future__ import annotations
16
+
17
+ from dataclasses import dataclass
18
+
19
+ from livekit import rtc
20
+ from livekit.agents import (
21
+ APIConnectionError,
22
+ APIStatusError,
23
+ APITimeoutError,
24
+ tts,
25
+ utils,
26
+ )
27
+
28
+ from google.api_core.exceptions import DeadlineExceeded, GoogleAPICallError
29
+ from google.cloud import texttospeech
30
+ from google.cloud.texttospeech_v1.types import SsmlVoiceGender, SynthesizeSpeechResponse
31
+
32
+ from .models import AudioEncoding, Gender, SpeechLanguages
33
+
34
+
35
+ @dataclass
36
+ class _TTSOptions:
37
+ voice: texttospeech.VoiceSelectionParams
38
+ audio_config: texttospeech.AudioConfig
39
+
40
+
41
+ class TTS(tts.TTS):
42
+ def __init__(
43
+ self,
44
+ *,
45
+ language: SpeechLanguages | str = "en-US",
46
+ gender: Gender | str = "neutral",
47
+ voice_name: str = "", # Not required
48
+ encoding: AudioEncoding | str = "linear16",
49
+ sample_rate: int = 24000,
50
+ pitch: int = 0,
51
+ effects_profile_id: str = "",
52
+ speaking_rate: float = 1.0,
53
+ credentials_info: dict | None = None,
54
+ credentials_file: str | None = None,
55
+ ) -> None:
56
+ """
57
+ Create a new instance of Google TTS.
58
+
59
+ Credentials must be provided, either by using the ``credentials_info`` dict, or reading
60
+ from the file specified in ``credentials_file`` or the ``GOOGLE_APPLICATION_CREDENTIALS``
61
+ environmental variable.
62
+
63
+ Args:
64
+ language (SpeechLanguages | str, optional): Language code (e.g., "en-US"). Default is "en-US".
65
+ gender (Gender | str, optional): Voice gender ("male", "female", "neutral"). Default is "neutral".
66
+ voice_name (str, optional): Specific voice name. Default is an empty string.
67
+ encoding (AudioEncoding | str, optional): Audio encoding format (e.g., "linear16"). Default is "linear16".
68
+ sample_rate (int, optional): Audio sample rate in Hz. Default is 24000.
69
+ pitch (float, optional): Speaking pitch, ranging from -20.0 to 20.0 semitones relative to the original pitch. Default is 0.
70
+ effects_profile_id (str): Optional identifier for selecting audio effects profiles to apply to the synthesized speech.
71
+ speaking_rate (float, optional): Speed of speech. Default is 1.0.
72
+ credentials_info (dict, optional): Dictionary containing Google Cloud credentials. Default is None.
73
+ credentials_file (str, optional): Path to the Google Cloud credentials JSON file. Default is None.
74
+ """
75
+
76
+ super().__init__(
77
+ capabilities=tts.TTSCapabilities(
78
+ streaming=False,
79
+ ),
80
+ sample_rate=sample_rate,
81
+ num_channels=1,
82
+ )
83
+
84
+ self._client: texttospeech.TextToSpeechAsyncClient | None = None
85
+ self._credentials_info = credentials_info
86
+ self._credentials_file = credentials_file
87
+
88
+ voice = texttospeech.VoiceSelectionParams(
89
+ name=voice_name,
90
+ language_code=language,
91
+ ssml_gender=_gender_from_str(gender),
92
+ )
93
+
94
+ if encoding == "linear16" or encoding == "wav":
95
+ _audio_encoding = texttospeech.AudioEncoding.LINEAR16
96
+ elif encoding == "mp3":
97
+ _audio_encoding = texttospeech.AudioEncoding.MP3
98
+ else:
99
+ raise NotImplementedError(f"audio encoding {encoding} is not supported")
100
+
101
+ self._opts = _TTSOptions(
102
+ voice=voice,
103
+ audio_config=texttospeech.AudioConfig(
104
+ audio_encoding=_audio_encoding,
105
+ sample_rate_hertz=sample_rate,
106
+ pitch=pitch,
107
+ effects_profile_id=effects_profile_id,
108
+ speaking_rate=speaking_rate,
109
+ ),
110
+ )
111
+
112
+ def update_options(
113
+ self,
114
+ *,
115
+ language: SpeechLanguages | str = "en-US",
116
+ gender: Gender | str = "neutral",
117
+ voice_name: str = "", # Not required
118
+ speaking_rate: float = 1.0,
119
+ ) -> None:
120
+ """
121
+ Update the TTS options.
122
+
123
+ Args:
124
+ language (SpeechLanguages | str, optional): Language code (e.g., "en-US"). Default is "en-US".
125
+ gender (Gender | str, optional): Voice gender ("male", "female", "neutral"). Default is "neutral".
126
+ voice_name (str, optional): Specific voice name. Default is an empty string.
127
+ speaking_rate (float, optional): Speed of speech. Default is 1.0.
128
+ """
129
+ self._opts.voice = texttospeech.VoiceSelectionParams(
130
+ name=voice_name,
131
+ language_code=language,
132
+ ssml_gender=_gender_from_str(gender),
133
+ )
134
+ self._opts.audio_config.speaking_rate = speaking_rate
135
+
136
+ def _ensure_client(self) -> texttospeech.TextToSpeechAsyncClient:
137
+ if not self._client:
138
+ if self._credentials_info:
139
+ self._client = (
140
+ texttospeech.TextToSpeechAsyncClient.from_service_account_info(
141
+ self._credentials_info
142
+ )
143
+ )
144
+
145
+ elif self._credentials_file:
146
+ self._client = (
147
+ texttospeech.TextToSpeechAsyncClient.from_service_account_file(
148
+ self._credentials_file
149
+ )
150
+ )
151
+ else:
152
+ self._client = texttospeech.TextToSpeechAsyncClient()
153
+
154
+ assert self._client is not None
155
+ return self._client
156
+
157
+ def synthesize(self, text: str) -> "ChunkedStream":
158
+ return ChunkedStream(self, text, self._opts, self._ensure_client())
159
+
160
+
161
+ class ChunkedStream(tts.ChunkedStream):
162
+ def __init__(
163
+ self,
164
+ tts: TTS,
165
+ text: str,
166
+ opts: _TTSOptions,
167
+ client: texttospeech.TextToSpeechAsyncClient,
168
+ ) -> None:
169
+ super().__init__(tts, text)
170
+ self._opts, self._client = opts, client
171
+
172
+ async def _main_task(self) -> None:
173
+ request_id = utils.shortuuid()
174
+
175
+ try:
176
+ response: SynthesizeSpeechResponse = await self._client.synthesize_speech(
177
+ input=texttospeech.SynthesisInput(text=self._input_text),
178
+ voice=self._opts.voice,
179
+ audio_config=self._opts.audio_config,
180
+ )
181
+
182
+ data = response.audio_content
183
+ if self._opts.audio_config.audio_encoding == "mp3":
184
+ decoder = utils.codecs.Mp3StreamDecoder()
185
+ bstream = utils.audio.AudioByteStream(
186
+ sample_rate=self._opts.audio_config.sample_rate_hertz,
187
+ num_channels=1,
188
+ )
189
+ for frame in decoder.decode_chunk(data):
190
+ for frame in bstream.write(frame.data.tobytes()):
191
+ self._event_ch.send_nowait(
192
+ tts.SynthesizedAudio(request_id=request_id, frame=frame)
193
+ )
194
+
195
+ for frame in bstream.flush():
196
+ self._event_ch.send_nowait(
197
+ tts.SynthesizedAudio(request_id=request_id, frame=frame)
198
+ )
199
+ else:
200
+ data = data[44:] # skip WAV header
201
+ self._event_ch.send_nowait(
202
+ tts.SynthesizedAudio(
203
+ request_id=request_id,
204
+ frame=rtc.AudioFrame(
205
+ data=data,
206
+ sample_rate=self._opts.audio_config.sample_rate_hertz,
207
+ num_channels=1,
208
+ samples_per_channel=len(data) // 2, # 16-bit
209
+ ),
210
+ )
211
+ )
212
+
213
+ except DeadlineExceeded:
214
+ raise APITimeoutError()
215
+ except GoogleAPICallError as e:
216
+ raise APIStatusError(
217
+ e.message,
218
+ status_code=e.code or -1,
219
+ request_id=None,
220
+ body=None,
221
+ )
222
+ except Exception as e:
223
+ raise APIConnectionError() from e
224
+
225
+
226
+ def _gender_from_str(gender: str) -> SsmlVoiceGender:
227
+ ssml_gender = SsmlVoiceGender.NEUTRAL
228
+ if gender == "male":
229
+ ssml_gender = SsmlVoiceGender.MALE
230
+ elif gender == "female":
231
+ ssml_gender = SsmlVoiceGender.FEMALE
232
+
233
+ return ssml_gender # type: ignore
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.7.0"
15
+ __version__ = "0.7.2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: livekit-plugins-google
3
- Version: 0.7.0
3
+ Version: 0.7.2
4
4
  Summary: Agent Framework plugin for services from Google Cloud
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -22,7 +22,7 @@ Description-Content-Type: text/markdown
22
22
  Requires-Dist: google-auth<3,>=2
23
23
  Requires-Dist: google-cloud-speech<3,>=2
24
24
  Requires-Dist: google-cloud-texttospeech<3,>=2
25
- Requires-Dist: livekit-agents>=0.8.0.dev0
25
+ Requires-Dist: livekit-agents>=0.11
26
26
 
27
27
  # LiveKit Plugins Google
28
28
 
@@ -1,4 +1,4 @@
1
1
  google-auth<3,>=2
2
2
  google-cloud-speech<3,>=2
3
3
  google-cloud-texttospeech<3,>=2
4
- livekit-agents>=0.8.0.dev0
4
+ livekit-agents>=0.11
@@ -51,7 +51,7 @@ setuptools.setup(
51
51
  "google-auth >= 2, < 3",
52
52
  "google-cloud-speech >= 2, < 3",
53
53
  "google-cloud-texttospeech >= 2, < 3",
54
- "livekit-agents>=0.8.0.dev0",
54
+ "livekit-agents>=0.11",
55
55
  ],
56
56
  package_data={"livekit.plugins.google": ["py.typed"]},
57
57
  project_urls={
@@ -1,163 +0,0 @@
1
- # Copyright 2023 LiveKit, Inc.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- from __future__ import annotations
16
-
17
- from dataclasses import dataclass
18
- from typing import Union
19
-
20
- from livekit import rtc
21
- from livekit.agents import tts, utils
22
-
23
- from google.cloud import texttospeech
24
- from google.cloud.texttospeech_v1.types import SsmlVoiceGender, SynthesizeSpeechResponse
25
-
26
- from .log import logger
27
- from .models import AudioEncoding, Gender, SpeechLanguages
28
-
29
- LgType = Union[SpeechLanguages, str]
30
- GenderType = Union[Gender, str]
31
- AudioEncodingType = Union[AudioEncoding, str]
32
-
33
-
34
- @dataclass
35
- class _TTSOptions:
36
- voice: texttospeech.VoiceSelectionParams
37
- audio_config: texttospeech.AudioConfig
38
-
39
-
40
- class TTS(tts.TTS):
41
- def __init__(
42
- self,
43
- *,
44
- language: LgType = "en-US",
45
- gender: GenderType = "neutral",
46
- voice_name: str = "", # Not required
47
- encoding: AudioEncodingType = "linear16",
48
- sample_rate: int = 24000,
49
- speaking_rate: float = 1.0,
50
- credentials_info: dict | None = None,
51
- credentials_file: str | None = None,
52
- ) -> None:
53
- """
54
- Create a new instance of Google TTS.
55
-
56
- Credentials must be provided, either by using the ``credentials_info`` dict, or reading
57
- from the file specified in ``credentials_file`` or the ``GOOGLE_APPLICATION_CREDENTIALS``
58
- environmental variable.
59
- """
60
-
61
- super().__init__(
62
- capabilities=tts.TTSCapabilities(
63
- streaming=False,
64
- ),
65
- sample_rate=sample_rate,
66
- num_channels=1,
67
- )
68
-
69
- self._client: texttospeech.TextToSpeechAsyncClient | None = None
70
- self._credentials_info = credentials_info
71
- self._credentials_file = credentials_file
72
-
73
- ssml_gender = SsmlVoiceGender.NEUTRAL
74
- if gender == "male":
75
- ssml_gender = SsmlVoiceGender.MALE
76
- elif gender == "female":
77
- ssml_gender = SsmlVoiceGender.FEMALE
78
-
79
- voice = texttospeech.VoiceSelectionParams(
80
- name=voice_name, language_code=language, ssml_gender=ssml_gender
81
- )
82
-
83
- if encoding == "linear16" or encoding == "wav":
84
- _audio_encoding = texttospeech.AudioEncoding.LINEAR16
85
- elif encoding == "mp3":
86
- _audio_encoding = texttospeech.AudioEncoding.MP3
87
- else:
88
- raise NotImplementedError(f"audio encoding {encoding} is not supported")
89
-
90
- self._opts = _TTSOptions(
91
- voice=voice,
92
- audio_config=texttospeech.AudioConfig(
93
- audio_encoding=_audio_encoding,
94
- sample_rate_hertz=sample_rate,
95
- speaking_rate=speaking_rate,
96
- ),
97
- )
98
-
99
- def _ensure_client(self) -> texttospeech.TextToSpeechAsyncClient:
100
- if not self._client:
101
- if self._credentials_info:
102
- self._client = (
103
- texttospeech.TextToSpeechAsyncClient.from_service_account_info(
104
- self._credentials_info
105
- )
106
- )
107
-
108
- elif self._credentials_file:
109
- self._client = (
110
- texttospeech.TextToSpeechAsyncClient.from_service_account_file(
111
- self._credentials_file
112
- )
113
- )
114
- else:
115
- self._client = texttospeech.TextToSpeechAsyncClient()
116
-
117
- assert self._client is not None
118
- return self._client
119
-
120
- def synthesize(self, text: str) -> "ChunkedStream":
121
- return ChunkedStream(text, self._opts, self._ensure_client())
122
-
123
-
124
- class ChunkedStream(tts.ChunkedStream):
125
- def __init__(
126
- self, text: str, opts: _TTSOptions, client: texttospeech.TextToSpeechAsyncClient
127
- ) -> None:
128
- super().__init__()
129
- self._text, self._opts, self._client = text, opts, client
130
-
131
- @utils.log_exceptions(logger=logger)
132
- async def _main_task(self) -> None:
133
- request_id = utils.shortuuid()
134
- segment_id = utils.shortuuid()
135
- response: SynthesizeSpeechResponse = await self._client.synthesize_speech(
136
- input=texttospeech.SynthesisInput(text=self._text),
137
- voice=self._opts.voice,
138
- audio_config=self._opts.audio_config,
139
- )
140
-
141
- data = response.audio_content
142
- if self._opts.audio_config.audio_encoding == "mp3":
143
- decoder = utils.codecs.Mp3StreamDecoder()
144
- for frame in decoder.decode_chunk(data):
145
- self._event_ch.send_nowait(
146
- tts.SynthesizedAudio(
147
- request_id=request_id, segment_id=segment_id, frame=frame
148
- )
149
- )
150
- else:
151
- data = data[44:] # skip WAV header
152
- self._event_ch.send_nowait(
153
- tts.SynthesizedAudio(
154
- request_id=request_id,
155
- segment_id=segment_id,
156
- frame=rtc.AudioFrame(
157
- data=data,
158
- sample_rate=self._opts.audio_config.sample_rate_hertz,
159
- num_channels=1,
160
- samples_per_channel=len(data) // 2, # 16-bit
161
- ),
162
- )
163
- )