livekit-plugins-google 0.7.0__tar.gz → 0.7.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/PKG-INFO +2 -2
- {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit/plugins/google/__init__.py +9 -0
- {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit/plugins/google/stt.py +33 -10
- livekit_plugins_google-0.7.2/livekit/plugins/google/tts.py +233 -0
- {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit/plugins/google/version.py +1 -1
- {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit_plugins_google.egg-info/PKG-INFO +2 -2
- {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit_plugins_google.egg-info/requires.txt +1 -1
- {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/setup.py +1 -1
- livekit_plugins_google-0.7.0/livekit/plugins/google/tts.py +0 -163
- {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/README.md +0 -0
- {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit/plugins/google/log.py +0 -0
- {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit/plugins/google/models.py +0 -0
- {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit/plugins/google/py.typed +0 -0
- {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit_plugins_google.egg-info/SOURCES.txt +0 -0
- {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit_plugins_google.egg-info/dependency_links.txt +0 -0
- {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit_plugins_google.egg-info/top_level.txt +0 -0
- {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/pyproject.toml +0 -0
- {livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: livekit-plugins-google
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.2
|
4
4
|
Summary: Agent Framework plugin for services from Google Cloud
|
5
5
|
Home-page: https://github.com/livekit/agents
|
6
6
|
License: Apache-2.0
|
@@ -22,7 +22,7 @@ Description-Content-Type: text/markdown
|
|
22
22
|
Requires-Dist: google-auth<3,>=2
|
23
23
|
Requires-Dist: google-cloud-speech<3,>=2
|
24
24
|
Requires-Dist: google-cloud-texttospeech<3,>=2
|
25
|
-
Requires-Dist: livekit-agents>=0.
|
25
|
+
Requires-Dist: livekit-agents>=0.11
|
26
26
|
|
27
27
|
# LiveKit Plugins Google
|
28
28
|
|
{livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit/plugins/google/__init__.py
RENAMED
@@ -29,3 +29,12 @@ class GooglePlugin(Plugin):
|
|
29
29
|
|
30
30
|
|
31
31
|
Plugin.register_plugin(GooglePlugin())
|
32
|
+
|
33
|
+
# Cleanup docs of unexported modules
|
34
|
+
_module = dir()
|
35
|
+
NOT_IN_ALL = [m for m in _module if m not in __all__]
|
36
|
+
|
37
|
+
__pdoc__ = {}
|
38
|
+
|
39
|
+
for n in NOT_IN_ALL:
|
40
|
+
__pdoc__[n] = False
|
@@ -20,8 +20,15 @@ from dataclasses import dataclass
|
|
20
20
|
from typing import AsyncIterable, List, Union
|
21
21
|
|
22
22
|
from livekit import agents, rtc
|
23
|
-
from livekit.agents import
|
24
|
-
|
23
|
+
from livekit.agents import (
|
24
|
+
APIConnectionError,
|
25
|
+
APIStatusError,
|
26
|
+
APITimeoutError,
|
27
|
+
stt,
|
28
|
+
utils,
|
29
|
+
)
|
30
|
+
|
31
|
+
from google.api_core.exceptions import DeadlineExceeded, GoogleAPICallError
|
25
32
|
from google.auth import default as gauth_default
|
26
33
|
from google.auth.exceptions import DefaultCredentialsError
|
27
34
|
from google.cloud.speech_v2 import SpeechAsyncClient
|
@@ -141,7 +148,7 @@ class STT(stt.STT):
|
|
141
148
|
|
142
149
|
return config
|
143
150
|
|
144
|
-
async def
|
151
|
+
async def _recognize_impl(
|
145
152
|
self,
|
146
153
|
buffer: utils.AudioBuffer,
|
147
154
|
*,
|
@@ -165,23 +172,39 @@ class STT(stt.STT):
|
|
165
172
|
language_codes=config.languages,
|
166
173
|
)
|
167
174
|
|
168
|
-
|
169
|
-
|
170
|
-
|
175
|
+
try:
|
176
|
+
raw = await self._ensure_client().recognize(
|
177
|
+
cloud_speech.RecognizeRequest(
|
178
|
+
recognizer=self._recognizer,
|
179
|
+
config=config,
|
180
|
+
content=frame.data.tobytes(),
|
181
|
+
)
|
182
|
+
)
|
183
|
+
|
184
|
+
return _recognize_response_to_speech_event(raw)
|
185
|
+
except DeadlineExceeded:
|
186
|
+
raise APITimeoutError()
|
187
|
+
except GoogleAPICallError as e:
|
188
|
+
raise APIStatusError(
|
189
|
+
e.message,
|
190
|
+
status_code=e.code or -1,
|
191
|
+
request_id=None,
|
192
|
+
body=None,
|
171
193
|
)
|
172
|
-
|
173
|
-
|
194
|
+
except Exception as e:
|
195
|
+
raise APIConnectionError() from e
|
174
196
|
|
175
197
|
def stream(
|
176
198
|
self, *, language: SpeechLanguages | str | None = None
|
177
199
|
) -> "SpeechStream":
|
178
200
|
config = self._sanitize_options(language=language)
|
179
|
-
return SpeechStream(self._ensure_client(), self._recognizer, config)
|
201
|
+
return SpeechStream(self, self._ensure_client(), self._recognizer, config)
|
180
202
|
|
181
203
|
|
182
204
|
class SpeechStream(stt.SpeechStream):
|
183
205
|
def __init__(
|
184
206
|
self,
|
207
|
+
stt: STT,
|
185
208
|
client: SpeechAsyncClient,
|
186
209
|
recognizer: str,
|
187
210
|
config: STTOptions,
|
@@ -189,7 +212,7 @@ class SpeechStream(stt.SpeechStream):
|
|
189
212
|
num_channels: int = 1,
|
190
213
|
max_retry: int = 32,
|
191
214
|
) -> None:
|
192
|
-
super().__init__()
|
215
|
+
super().__init__(stt)
|
193
216
|
|
194
217
|
self._client = client
|
195
218
|
self._recognizer = recognizer
|
@@ -0,0 +1,233 @@
|
|
1
|
+
# Copyright 2023 LiveKit, Inc.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from __future__ import annotations
|
16
|
+
|
17
|
+
from dataclasses import dataclass
|
18
|
+
|
19
|
+
from livekit import rtc
|
20
|
+
from livekit.agents import (
|
21
|
+
APIConnectionError,
|
22
|
+
APIStatusError,
|
23
|
+
APITimeoutError,
|
24
|
+
tts,
|
25
|
+
utils,
|
26
|
+
)
|
27
|
+
|
28
|
+
from google.api_core.exceptions import DeadlineExceeded, GoogleAPICallError
|
29
|
+
from google.cloud import texttospeech
|
30
|
+
from google.cloud.texttospeech_v1.types import SsmlVoiceGender, SynthesizeSpeechResponse
|
31
|
+
|
32
|
+
from .models import AudioEncoding, Gender, SpeechLanguages
|
33
|
+
|
34
|
+
|
35
|
+
@dataclass
|
36
|
+
class _TTSOptions:
|
37
|
+
voice: texttospeech.VoiceSelectionParams
|
38
|
+
audio_config: texttospeech.AudioConfig
|
39
|
+
|
40
|
+
|
41
|
+
class TTS(tts.TTS):
|
42
|
+
def __init__(
|
43
|
+
self,
|
44
|
+
*,
|
45
|
+
language: SpeechLanguages | str = "en-US",
|
46
|
+
gender: Gender | str = "neutral",
|
47
|
+
voice_name: str = "", # Not required
|
48
|
+
encoding: AudioEncoding | str = "linear16",
|
49
|
+
sample_rate: int = 24000,
|
50
|
+
pitch: int = 0,
|
51
|
+
effects_profile_id: str = "",
|
52
|
+
speaking_rate: float = 1.0,
|
53
|
+
credentials_info: dict | None = None,
|
54
|
+
credentials_file: str | None = None,
|
55
|
+
) -> None:
|
56
|
+
"""
|
57
|
+
Create a new instance of Google TTS.
|
58
|
+
|
59
|
+
Credentials must be provided, either by using the ``credentials_info`` dict, or reading
|
60
|
+
from the file specified in ``credentials_file`` or the ``GOOGLE_APPLICATION_CREDENTIALS``
|
61
|
+
environmental variable.
|
62
|
+
|
63
|
+
Args:
|
64
|
+
language (SpeechLanguages | str, optional): Language code (e.g., "en-US"). Default is "en-US".
|
65
|
+
gender (Gender | str, optional): Voice gender ("male", "female", "neutral"). Default is "neutral".
|
66
|
+
voice_name (str, optional): Specific voice name. Default is an empty string.
|
67
|
+
encoding (AudioEncoding | str, optional): Audio encoding format (e.g., "linear16"). Default is "linear16".
|
68
|
+
sample_rate (int, optional): Audio sample rate in Hz. Default is 24000.
|
69
|
+
pitch (float, optional): Speaking pitch, ranging from -20.0 to 20.0 semitones relative to the original pitch. Default is 0.
|
70
|
+
effects_profile_id (str): Optional identifier for selecting audio effects profiles to apply to the synthesized speech.
|
71
|
+
speaking_rate (float, optional): Speed of speech. Default is 1.0.
|
72
|
+
credentials_info (dict, optional): Dictionary containing Google Cloud credentials. Default is None.
|
73
|
+
credentials_file (str, optional): Path to the Google Cloud credentials JSON file. Default is None.
|
74
|
+
"""
|
75
|
+
|
76
|
+
super().__init__(
|
77
|
+
capabilities=tts.TTSCapabilities(
|
78
|
+
streaming=False,
|
79
|
+
),
|
80
|
+
sample_rate=sample_rate,
|
81
|
+
num_channels=1,
|
82
|
+
)
|
83
|
+
|
84
|
+
self._client: texttospeech.TextToSpeechAsyncClient | None = None
|
85
|
+
self._credentials_info = credentials_info
|
86
|
+
self._credentials_file = credentials_file
|
87
|
+
|
88
|
+
voice = texttospeech.VoiceSelectionParams(
|
89
|
+
name=voice_name,
|
90
|
+
language_code=language,
|
91
|
+
ssml_gender=_gender_from_str(gender),
|
92
|
+
)
|
93
|
+
|
94
|
+
if encoding == "linear16" or encoding == "wav":
|
95
|
+
_audio_encoding = texttospeech.AudioEncoding.LINEAR16
|
96
|
+
elif encoding == "mp3":
|
97
|
+
_audio_encoding = texttospeech.AudioEncoding.MP3
|
98
|
+
else:
|
99
|
+
raise NotImplementedError(f"audio encoding {encoding} is not supported")
|
100
|
+
|
101
|
+
self._opts = _TTSOptions(
|
102
|
+
voice=voice,
|
103
|
+
audio_config=texttospeech.AudioConfig(
|
104
|
+
audio_encoding=_audio_encoding,
|
105
|
+
sample_rate_hertz=sample_rate,
|
106
|
+
pitch=pitch,
|
107
|
+
effects_profile_id=effects_profile_id,
|
108
|
+
speaking_rate=speaking_rate,
|
109
|
+
),
|
110
|
+
)
|
111
|
+
|
112
|
+
def update_options(
|
113
|
+
self,
|
114
|
+
*,
|
115
|
+
language: SpeechLanguages | str = "en-US",
|
116
|
+
gender: Gender | str = "neutral",
|
117
|
+
voice_name: str = "", # Not required
|
118
|
+
speaking_rate: float = 1.0,
|
119
|
+
) -> None:
|
120
|
+
"""
|
121
|
+
Update the TTS options.
|
122
|
+
|
123
|
+
Args:
|
124
|
+
language (SpeechLanguages | str, optional): Language code (e.g., "en-US"). Default is "en-US".
|
125
|
+
gender (Gender | str, optional): Voice gender ("male", "female", "neutral"). Default is "neutral".
|
126
|
+
voice_name (str, optional): Specific voice name. Default is an empty string.
|
127
|
+
speaking_rate (float, optional): Speed of speech. Default is 1.0.
|
128
|
+
"""
|
129
|
+
self._opts.voice = texttospeech.VoiceSelectionParams(
|
130
|
+
name=voice_name,
|
131
|
+
language_code=language,
|
132
|
+
ssml_gender=_gender_from_str(gender),
|
133
|
+
)
|
134
|
+
self._opts.audio_config.speaking_rate = speaking_rate
|
135
|
+
|
136
|
+
def _ensure_client(self) -> texttospeech.TextToSpeechAsyncClient:
|
137
|
+
if not self._client:
|
138
|
+
if self._credentials_info:
|
139
|
+
self._client = (
|
140
|
+
texttospeech.TextToSpeechAsyncClient.from_service_account_info(
|
141
|
+
self._credentials_info
|
142
|
+
)
|
143
|
+
)
|
144
|
+
|
145
|
+
elif self._credentials_file:
|
146
|
+
self._client = (
|
147
|
+
texttospeech.TextToSpeechAsyncClient.from_service_account_file(
|
148
|
+
self._credentials_file
|
149
|
+
)
|
150
|
+
)
|
151
|
+
else:
|
152
|
+
self._client = texttospeech.TextToSpeechAsyncClient()
|
153
|
+
|
154
|
+
assert self._client is not None
|
155
|
+
return self._client
|
156
|
+
|
157
|
+
def synthesize(self, text: str) -> "ChunkedStream":
|
158
|
+
return ChunkedStream(self, text, self._opts, self._ensure_client())
|
159
|
+
|
160
|
+
|
161
|
+
class ChunkedStream(tts.ChunkedStream):
|
162
|
+
def __init__(
|
163
|
+
self,
|
164
|
+
tts: TTS,
|
165
|
+
text: str,
|
166
|
+
opts: _TTSOptions,
|
167
|
+
client: texttospeech.TextToSpeechAsyncClient,
|
168
|
+
) -> None:
|
169
|
+
super().__init__(tts, text)
|
170
|
+
self._opts, self._client = opts, client
|
171
|
+
|
172
|
+
async def _main_task(self) -> None:
|
173
|
+
request_id = utils.shortuuid()
|
174
|
+
|
175
|
+
try:
|
176
|
+
response: SynthesizeSpeechResponse = await self._client.synthesize_speech(
|
177
|
+
input=texttospeech.SynthesisInput(text=self._input_text),
|
178
|
+
voice=self._opts.voice,
|
179
|
+
audio_config=self._opts.audio_config,
|
180
|
+
)
|
181
|
+
|
182
|
+
data = response.audio_content
|
183
|
+
if self._opts.audio_config.audio_encoding == "mp3":
|
184
|
+
decoder = utils.codecs.Mp3StreamDecoder()
|
185
|
+
bstream = utils.audio.AudioByteStream(
|
186
|
+
sample_rate=self._opts.audio_config.sample_rate_hertz,
|
187
|
+
num_channels=1,
|
188
|
+
)
|
189
|
+
for frame in decoder.decode_chunk(data):
|
190
|
+
for frame in bstream.write(frame.data.tobytes()):
|
191
|
+
self._event_ch.send_nowait(
|
192
|
+
tts.SynthesizedAudio(request_id=request_id, frame=frame)
|
193
|
+
)
|
194
|
+
|
195
|
+
for frame in bstream.flush():
|
196
|
+
self._event_ch.send_nowait(
|
197
|
+
tts.SynthesizedAudio(request_id=request_id, frame=frame)
|
198
|
+
)
|
199
|
+
else:
|
200
|
+
data = data[44:] # skip WAV header
|
201
|
+
self._event_ch.send_nowait(
|
202
|
+
tts.SynthesizedAudio(
|
203
|
+
request_id=request_id,
|
204
|
+
frame=rtc.AudioFrame(
|
205
|
+
data=data,
|
206
|
+
sample_rate=self._opts.audio_config.sample_rate_hertz,
|
207
|
+
num_channels=1,
|
208
|
+
samples_per_channel=len(data) // 2, # 16-bit
|
209
|
+
),
|
210
|
+
)
|
211
|
+
)
|
212
|
+
|
213
|
+
except DeadlineExceeded:
|
214
|
+
raise APITimeoutError()
|
215
|
+
except GoogleAPICallError as e:
|
216
|
+
raise APIStatusError(
|
217
|
+
e.message,
|
218
|
+
status_code=e.code or -1,
|
219
|
+
request_id=None,
|
220
|
+
body=None,
|
221
|
+
)
|
222
|
+
except Exception as e:
|
223
|
+
raise APIConnectionError() from e
|
224
|
+
|
225
|
+
|
226
|
+
def _gender_from_str(gender: str) -> SsmlVoiceGender:
|
227
|
+
ssml_gender = SsmlVoiceGender.NEUTRAL
|
228
|
+
if gender == "male":
|
229
|
+
ssml_gender = SsmlVoiceGender.MALE
|
230
|
+
elif gender == "female":
|
231
|
+
ssml_gender = SsmlVoiceGender.FEMALE
|
232
|
+
|
233
|
+
return ssml_gender # type: ignore
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: livekit-plugins-google
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.2
|
4
4
|
Summary: Agent Framework plugin for services from Google Cloud
|
5
5
|
Home-page: https://github.com/livekit/agents
|
6
6
|
License: Apache-2.0
|
@@ -22,7 +22,7 @@ Description-Content-Type: text/markdown
|
|
22
22
|
Requires-Dist: google-auth<3,>=2
|
23
23
|
Requires-Dist: google-cloud-speech<3,>=2
|
24
24
|
Requires-Dist: google-cloud-texttospeech<3,>=2
|
25
|
-
Requires-Dist: livekit-agents>=0.
|
25
|
+
Requires-Dist: livekit-agents>=0.11
|
26
26
|
|
27
27
|
# LiveKit Plugins Google
|
28
28
|
|
@@ -51,7 +51,7 @@ setuptools.setup(
|
|
51
51
|
"google-auth >= 2, < 3",
|
52
52
|
"google-cloud-speech >= 2, < 3",
|
53
53
|
"google-cloud-texttospeech >= 2, < 3",
|
54
|
-
"livekit-agents>=0.
|
54
|
+
"livekit-agents>=0.11",
|
55
55
|
],
|
56
56
|
package_data={"livekit.plugins.google": ["py.typed"]},
|
57
57
|
project_urls={
|
@@ -1,163 +0,0 @@
|
|
1
|
-
# Copyright 2023 LiveKit, Inc.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
from __future__ import annotations
|
16
|
-
|
17
|
-
from dataclasses import dataclass
|
18
|
-
from typing import Union
|
19
|
-
|
20
|
-
from livekit import rtc
|
21
|
-
from livekit.agents import tts, utils
|
22
|
-
|
23
|
-
from google.cloud import texttospeech
|
24
|
-
from google.cloud.texttospeech_v1.types import SsmlVoiceGender, SynthesizeSpeechResponse
|
25
|
-
|
26
|
-
from .log import logger
|
27
|
-
from .models import AudioEncoding, Gender, SpeechLanguages
|
28
|
-
|
29
|
-
LgType = Union[SpeechLanguages, str]
|
30
|
-
GenderType = Union[Gender, str]
|
31
|
-
AudioEncodingType = Union[AudioEncoding, str]
|
32
|
-
|
33
|
-
|
34
|
-
@dataclass
|
35
|
-
class _TTSOptions:
|
36
|
-
voice: texttospeech.VoiceSelectionParams
|
37
|
-
audio_config: texttospeech.AudioConfig
|
38
|
-
|
39
|
-
|
40
|
-
class TTS(tts.TTS):
|
41
|
-
def __init__(
|
42
|
-
self,
|
43
|
-
*,
|
44
|
-
language: LgType = "en-US",
|
45
|
-
gender: GenderType = "neutral",
|
46
|
-
voice_name: str = "", # Not required
|
47
|
-
encoding: AudioEncodingType = "linear16",
|
48
|
-
sample_rate: int = 24000,
|
49
|
-
speaking_rate: float = 1.0,
|
50
|
-
credentials_info: dict | None = None,
|
51
|
-
credentials_file: str | None = None,
|
52
|
-
) -> None:
|
53
|
-
"""
|
54
|
-
Create a new instance of Google TTS.
|
55
|
-
|
56
|
-
Credentials must be provided, either by using the ``credentials_info`` dict, or reading
|
57
|
-
from the file specified in ``credentials_file`` or the ``GOOGLE_APPLICATION_CREDENTIALS``
|
58
|
-
environmental variable.
|
59
|
-
"""
|
60
|
-
|
61
|
-
super().__init__(
|
62
|
-
capabilities=tts.TTSCapabilities(
|
63
|
-
streaming=False,
|
64
|
-
),
|
65
|
-
sample_rate=sample_rate,
|
66
|
-
num_channels=1,
|
67
|
-
)
|
68
|
-
|
69
|
-
self._client: texttospeech.TextToSpeechAsyncClient | None = None
|
70
|
-
self._credentials_info = credentials_info
|
71
|
-
self._credentials_file = credentials_file
|
72
|
-
|
73
|
-
ssml_gender = SsmlVoiceGender.NEUTRAL
|
74
|
-
if gender == "male":
|
75
|
-
ssml_gender = SsmlVoiceGender.MALE
|
76
|
-
elif gender == "female":
|
77
|
-
ssml_gender = SsmlVoiceGender.FEMALE
|
78
|
-
|
79
|
-
voice = texttospeech.VoiceSelectionParams(
|
80
|
-
name=voice_name, language_code=language, ssml_gender=ssml_gender
|
81
|
-
)
|
82
|
-
|
83
|
-
if encoding == "linear16" or encoding == "wav":
|
84
|
-
_audio_encoding = texttospeech.AudioEncoding.LINEAR16
|
85
|
-
elif encoding == "mp3":
|
86
|
-
_audio_encoding = texttospeech.AudioEncoding.MP3
|
87
|
-
else:
|
88
|
-
raise NotImplementedError(f"audio encoding {encoding} is not supported")
|
89
|
-
|
90
|
-
self._opts = _TTSOptions(
|
91
|
-
voice=voice,
|
92
|
-
audio_config=texttospeech.AudioConfig(
|
93
|
-
audio_encoding=_audio_encoding,
|
94
|
-
sample_rate_hertz=sample_rate,
|
95
|
-
speaking_rate=speaking_rate,
|
96
|
-
),
|
97
|
-
)
|
98
|
-
|
99
|
-
def _ensure_client(self) -> texttospeech.TextToSpeechAsyncClient:
|
100
|
-
if not self._client:
|
101
|
-
if self._credentials_info:
|
102
|
-
self._client = (
|
103
|
-
texttospeech.TextToSpeechAsyncClient.from_service_account_info(
|
104
|
-
self._credentials_info
|
105
|
-
)
|
106
|
-
)
|
107
|
-
|
108
|
-
elif self._credentials_file:
|
109
|
-
self._client = (
|
110
|
-
texttospeech.TextToSpeechAsyncClient.from_service_account_file(
|
111
|
-
self._credentials_file
|
112
|
-
)
|
113
|
-
)
|
114
|
-
else:
|
115
|
-
self._client = texttospeech.TextToSpeechAsyncClient()
|
116
|
-
|
117
|
-
assert self._client is not None
|
118
|
-
return self._client
|
119
|
-
|
120
|
-
def synthesize(self, text: str) -> "ChunkedStream":
|
121
|
-
return ChunkedStream(text, self._opts, self._ensure_client())
|
122
|
-
|
123
|
-
|
124
|
-
class ChunkedStream(tts.ChunkedStream):
|
125
|
-
def __init__(
|
126
|
-
self, text: str, opts: _TTSOptions, client: texttospeech.TextToSpeechAsyncClient
|
127
|
-
) -> None:
|
128
|
-
super().__init__()
|
129
|
-
self._text, self._opts, self._client = text, opts, client
|
130
|
-
|
131
|
-
@utils.log_exceptions(logger=logger)
|
132
|
-
async def _main_task(self) -> None:
|
133
|
-
request_id = utils.shortuuid()
|
134
|
-
segment_id = utils.shortuuid()
|
135
|
-
response: SynthesizeSpeechResponse = await self._client.synthesize_speech(
|
136
|
-
input=texttospeech.SynthesisInput(text=self._text),
|
137
|
-
voice=self._opts.voice,
|
138
|
-
audio_config=self._opts.audio_config,
|
139
|
-
)
|
140
|
-
|
141
|
-
data = response.audio_content
|
142
|
-
if self._opts.audio_config.audio_encoding == "mp3":
|
143
|
-
decoder = utils.codecs.Mp3StreamDecoder()
|
144
|
-
for frame in decoder.decode_chunk(data):
|
145
|
-
self._event_ch.send_nowait(
|
146
|
-
tts.SynthesizedAudio(
|
147
|
-
request_id=request_id, segment_id=segment_id, frame=frame
|
148
|
-
)
|
149
|
-
)
|
150
|
-
else:
|
151
|
-
data = data[44:] # skip WAV header
|
152
|
-
self._event_ch.send_nowait(
|
153
|
-
tts.SynthesizedAudio(
|
154
|
-
request_id=request_id,
|
155
|
-
segment_id=segment_id,
|
156
|
-
frame=rtc.AudioFrame(
|
157
|
-
data=data,
|
158
|
-
sample_rate=self._opts.audio_config.sample_rate_hertz,
|
159
|
-
num_channels=1,
|
160
|
-
samples_per_channel=len(data) // 2, # 16-bit
|
161
|
-
),
|
162
|
-
)
|
163
|
-
)
|
File without changes
|
File without changes
|
{livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit/plugins/google/models.py
RENAMED
File without changes
|
{livekit_plugins_google-0.7.0 → livekit_plugins_google-0.7.2}/livekit/plugins/google/py.typed
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|