livekit-plugins-google 1.0.21__py3-none-any.whl → 1.0.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,8 @@
14
14
 
15
15
  from __future__ import annotations
16
16
 
17
+ import asyncio
18
+ import weakref
17
19
  from dataclasses import dataclass
18
20
 
19
21
  from google.api_core.client_options import ClientOptions
@@ -25,6 +27,7 @@ from livekit.agents import (
25
27
  APIConnectOptions,
26
28
  APIStatusError,
27
29
  APITimeoutError,
30
+ tokenize,
28
31
  tts,
29
32
  utils,
30
33
  )
@@ -35,13 +38,21 @@ from livekit.agents.types import (
35
38
  )
36
39
  from livekit.agents.utils import is_given
37
40
 
41
+ from .log import logger
38
42
  from .models import Gender, SpeechLanguages
39
43
 
44
+ BUFFERED_WORDS_COUNT = 8
45
+ NUM_CHANNELS = 1
46
+ DEFAULT_VOICE_NAME = "en-US-Chirp3-HD-Charon"
47
+ DEFAULT_LANGUAGE = "en-US"
48
+ DEFAULT_GENDER = "neutral"
49
+
40
50
 
41
51
  @dataclass
42
52
  class _TTSOptions:
43
53
  voice: texttospeech.VoiceSelectionParams
44
54
  audio_config: texttospeech.AudioConfig
55
+ tokenizer: tokenize.SentenceTokenizer
45
56
 
46
57
 
47
58
  class TTS(tts.TTS):
@@ -59,6 +70,8 @@ class TTS(tts.TTS):
59
70
  audio_encoding: texttospeech.AudioEncoding = texttospeech.AudioEncoding.PCM,
60
71
  credentials_info: NotGivenOr[dict] = NOT_GIVEN,
61
72
  credentials_file: NotGivenOr[str] = NOT_GIVEN,
73
+ tokenizer: NotGivenOr[tokenize.SentenceTokenizer] = NOT_GIVEN,
74
+ use_streaming: NotGivenOr[bool] = NOT_GIVEN,
62
75
  ) -> None:
63
76
  """
64
77
  Create a new instance of Google TTS.
@@ -78,12 +91,14 @@ class TTS(tts.TTS):
78
91
  speaking_rate (float, optional): Speed of speech. Default is 1.0.
79
92
  credentials_info (dict, optional): Dictionary containing Google Cloud credentials. Default is None.
80
93
  credentials_file (str, optional): Path to the Google Cloud credentials JSON file. Default is None.
94
+ tokenizer (tokenize.SentenceTokenizer, optional): Tokenizer for the TTS. Default is a basic sentence tokenizer.
95
+ use_streaming (bool, optional): Whether to use streaming synthesis. Default is True.
81
96
  """ # noqa: E501
97
+ if not is_given(use_streaming):
98
+ use_streaming = True
82
99
 
83
100
  super().__init__(
84
- capabilities=tts.TTSCapabilities(
85
- streaming=False,
86
- ),
101
+ capabilities=tts.TTSCapabilities(streaming=use_streaming),
87
102
  sample_rate=sample_rate,
88
103
  num_channels=1,
89
104
  )
@@ -93,15 +108,17 @@ class TTS(tts.TTS):
93
108
  self._credentials_file = credentials_file
94
109
  self._location = location
95
110
 
96
- lang = language if is_given(language) else "en-US"
97
- ssml_gender = _gender_from_str("neutral" if not is_given(gender) else gender)
98
- name = "" if not is_given(voice_name) else voice_name
111
+ lang = language if is_given(language) else DEFAULT_LANGUAGE
112
+ ssml_gender = _gender_from_str(DEFAULT_GENDER if not is_given(gender) else gender)
113
+ name = DEFAULT_VOICE_NAME if not is_given(voice_name) else voice_name
99
114
 
100
115
  voice_params = texttospeech.VoiceSelectionParams(
101
116
  name=name,
102
117
  language_code=lang,
103
118
  ssml_gender=ssml_gender,
104
119
  )
120
+ if not is_given(tokenizer):
121
+ tokenizer = tokenize.basic.SentenceTokenizer(min_sentence_len=BUFFERED_WORDS_COUNT)
105
122
 
106
123
  self._opts = _TTSOptions(
107
124
  voice=voice_params,
@@ -112,7 +129,9 @@ class TTS(tts.TTS):
112
129
  effects_profile_id=effects_profile_id,
113
130
  speaking_rate=speaking_rate,
114
131
  ),
132
+ tokenizer=tokenizer,
115
133
  )
134
+ self._streams = weakref.WeakSet[SynthesizeStream]()
116
135
 
117
136
  def update_options(
118
137
  self,
@@ -168,6 +187,18 @@ class TTS(tts.TTS):
168
187
  assert self._client is not None
169
188
  return self._client
170
189
 
190
+ def stream(
191
+ self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
192
+ ) -> SynthesizeStream:
193
+ stream = SynthesizeStream(
194
+ tts=self,
195
+ opts=self._opts,
196
+ client=self._ensure_client(),
197
+ conn_options=conn_options,
198
+ )
199
+ self._streams.add(stream)
200
+ return stream
201
+
171
202
  def synthesize(
172
203
  self,
173
204
  text: str,
@@ -182,6 +213,12 @@ class TTS(tts.TTS):
182
213
  client=self._ensure_client(),
183
214
  )
184
215
 
216
+ async def aclose(self) -> None:
217
+ for stream in list(self._streams):
218
+ await stream.aclose()
219
+ self._streams.clear()
220
+ await super().aclose()
221
+
185
222
 
186
223
  class ChunkedStream(tts.ChunkedStream):
187
224
  def __init__(
@@ -230,8 +267,105 @@ class ChunkedStream(tts.ChunkedStream):
230
267
  raise APITimeoutError() from None
231
268
  except GoogleAPICallError as e:
232
269
  raise APIStatusError(
233
- e.message, status_code=e.code or -1, request_id=None, body=None
234
- ) from None
270
+ f"{e.message} {e.details}", status_code=e.code or -1, request_id=None, body=None
271
+ ) from e
272
+ except Exception as e:
273
+ raise APIConnectionError() from e
274
+
275
+
276
+ class SynthesizeStream(tts.SynthesizeStream):
277
+ def __init__(
278
+ self,
279
+ *,
280
+ tts: TTS,
281
+ opts: _TTSOptions,
282
+ client: texttospeech.TextToSpeechAsyncClient,
283
+ conn_options: APIConnectOptions,
284
+ ):
285
+ super().__init__(tts=tts, conn_options=conn_options)
286
+ self._opts, self._client = opts, client
287
+ self._segments_ch = utils.aio.Chan[tokenize.SentenceStream]()
288
+
289
+ async def _run(self) -> None:
290
+ request_id = utils.shortuuid()
291
+
292
+ @utils.log_exceptions(logger=logger)
293
+ async def _tokenize_input():
294
+ input_stream = None
295
+ async for input in self._input_ch:
296
+ if isinstance(input, str):
297
+ if input_stream is None:
298
+ input_stream = self._opts.tokenizer.stream()
299
+ self._segments_ch.send_nowait(input_stream)
300
+ input_stream.push_text(input)
301
+ elif isinstance(input, self._FlushSentinel):
302
+ if input_stream:
303
+ input_stream.end_input()
304
+ input_stream = None
305
+ self._segments_ch.close()
306
+
307
+ @utils.log_exceptions(logger=logger)
308
+ async def _run_segments():
309
+ async for input_stream in self._segments_ch:
310
+ await self._run_stream(input_stream, request_id)
311
+
312
+ tasks = [
313
+ asyncio.create_task(_tokenize_input()),
314
+ asyncio.create_task(_run_segments()),
315
+ ]
316
+ try:
317
+ await asyncio.gather(*tasks)
318
+ except Exception as e:
319
+ raise APIConnectionError() from e
320
+
321
+ async def _run_stream(self, input_stream, request_id):
322
+ streaming_config = texttospeech.StreamingSynthesizeConfig(
323
+ voice=self._opts.voice,
324
+ streaming_audio_config=texttospeech.StreamingAudioConfig(
325
+ audio_encoding=texttospeech.AudioEncoding.PCM
326
+ ),
327
+ )
328
+ emitter = tts.SynthesizedAudioEmitter(event_ch=self._event_ch, request_id=request_id)
329
+ audio_bstream = utils.audio.AudioByteStream(
330
+ sample_rate=self._opts.audio_config.sample_rate_hertz,
331
+ num_channels=NUM_CHANNELS,
332
+ )
333
+
334
+ @utils.log_exceptions(logger=logger)
335
+ async def input_generator():
336
+ try:
337
+ yield texttospeech.StreamingSynthesizeRequest(streaming_config=streaming_config)
338
+ async for input in input_stream:
339
+ self._mark_started()
340
+ yield texttospeech.StreamingSynthesizeRequest(
341
+ input=texttospeech.StreamingSynthesisInput(text=input.token)
342
+ )
343
+
344
+ except Exception:
345
+ logger.exception("an error occurred while streaming input to google TTS")
346
+
347
+ try:
348
+ stream = await self._client.streaming_synthesize(
349
+ input_generator(),
350
+ timeout=self._conn_options.timeout,
351
+ )
352
+ async for resp in stream:
353
+ for frame in audio_bstream.write(resp.audio_content):
354
+ emitter.push(frame)
355
+
356
+ for frame in audio_bstream.flush():
357
+ emitter.push(frame)
358
+ emitter.flush()
359
+ except DeadlineExceeded as e:
360
+ logger.debug(f"google tts deadline exceeded: {e}")
361
+ pass
362
+ except GoogleAPICallError as e:
363
+ raise APIStatusError(
364
+ f"{e.message} {e.details}",
365
+ status_code=e.code or -1,
366
+ request_id=request_id,
367
+ body=None,
368
+ ) from e
235
369
  except Exception as e:
236
370
  raise APIConnectionError() from e
237
371
 
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.0.21"
15
+ __version__ = "1.0.22"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-google
3
- Version: 1.0.21
3
+ Version: 1.0.22
4
4
  Summary: Agent Framework plugin for services from Google Cloud
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
22
22
  Requires-Dist: google-cloud-speech<3,>=2
23
23
  Requires-Dist: google-cloud-texttospeech<3,>=2.24
24
24
  Requires-Dist: google-genai>=1.14.0
25
- Requires-Dist: livekit-agents>=1.0.21
25
+ Requires-Dist: livekit-agents>=1.0.22
26
26
  Description-Content-Type: text/markdown
27
27
 
28
28
  # Google AI plugin for LiveKit Agents
@@ -4,13 +4,13 @@ livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA
4
4
  livekit/plugins/google/models.py,sha256=maGlEM3hK4-5hMnH9UQMJewA7BZMrnStsFLBNoNVySg,1531
5
5
  livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  livekit/plugins/google/stt.py,sha256=2jk-1fHiBT8UW_n3CZsIEdMp2iBnUAlTnmefdUd8rAM,23620
7
- livekit/plugins/google/tts.py,sha256=29R0ieV5sRPBf5Yi0SPFQk7ZZMbELF30bIL9K_j_Wcg,9100
7
+ livekit/plugins/google/tts.py,sha256=FfhNfGtW8drmYDDfLLZDjaIp2GvNiIdoovgtZq4t_l8,14211
8
8
  livekit/plugins/google/utils.py,sha256=UBAbddYk7G8Nojg6bSC7_xN2pdl9qhs86HGhKYFuf9M,10509
9
- livekit/plugins/google/version.py,sha256=5lzQkS1jEPqreexacwMd18b2EOx7R5m8AQMKtQRBgC4,601
9
+ livekit/plugins/google/version.py,sha256=-8dkOE2vDSF9WN8VoBrSwU2sb5YBGFuwPnSQXQ-uaYM,601
10
10
  livekit/plugins/google/beta/__init__.py,sha256=5PnoG3Ux24bjzMSzmTeSVljE9EINivGcbWUEV6egGnM,216
11
11
  livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
12
12
  livekit/plugins/google/beta/realtime/api_proto.py,sha256=Fyrejs3SG0EjOPCCFLEnWXKEUxCff47PMWk2VsKJm5E,594
13
13
  livekit/plugins/google/beta/realtime/realtime_api.py,sha256=yYB5fKXl_aaMH_ZSpfUlfOTUg4eRqqRENLTZhZMfBMc,36253
14
- livekit_plugins_google-1.0.21.dist-info/METADATA,sha256=mQA8BfvWhAjp3V9GJA5OsZLzP_Q03UuDbRX2HbcEgtY,1908
15
- livekit_plugins_google-1.0.21.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
- livekit_plugins_google-1.0.21.dist-info/RECORD,,
14
+ livekit_plugins_google-1.0.22.dist-info/METADATA,sha256=S4bQZr4NhWrAI6vyJi299sh5lsD5eVMNfxvN9__xAMY,1908
15
+ livekit_plugins_google-1.0.22.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
+ livekit_plugins_google-1.0.22.dist-info/RECORD,,