livekit-plugins-google 0.6.2__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,13 +16,14 @@ from __future__ import annotations
16
16
 
17
17
  import asyncio
18
18
  import dataclasses
19
- import os
20
19
  from dataclasses import dataclass
21
20
  from typing import AsyncIterable, List, Union
22
21
 
23
22
  from livekit import agents, rtc
24
23
  from livekit.agents import stt, utils
25
24
 
25
+ from google.auth import default as gauth_default
26
+ from google.auth.exceptions import DefaultCredentialsError
26
27
  from google.cloud.speech_v2 import SpeechAsyncClient
27
28
  from google.cloud.speech_v2.types import cloud_speech
28
29
 
@@ -58,8 +59,11 @@ class STT(stt.STT):
58
59
  credentials_file: str | None = None,
59
60
  ):
60
61
  """
61
- if no credentials is provided, it will use the credentials on the environment
62
- GOOGLE_APPLICATION_CREDENTIALS (default behavior of Google SpeechAsyncClient)
62
+ Create a new instance of Google STT.
63
+
64
+ Credentials must be provided, either by using the ``credentials_info`` dict, or reading
65
+ from the file specified in ``credentials_file`` or via Application Default Credentials as
66
+ described in https://cloud.google.com/docs/authentication/application-default-credentials
63
67
  """
64
68
  super().__init__(
65
69
  capabilities=stt.STTCapabilities(streaming=True, interim_results=True)
@@ -70,10 +74,13 @@ class STT(stt.STT):
70
74
  self._credentials_file = credentials_file
71
75
 
72
76
  if credentials_file is None and credentials_info is None:
73
- creds = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
74
- if not creds:
77
+ try:
78
+ gauth_default()
79
+ except DefaultCredentialsError:
75
80
  raise ValueError(
76
- "GOOGLE_APPLICATION_CREDENTIALS must be set if no credentials is provided"
81
+ "Application default credentials must be available "
82
+ "when using Google STT without explicitly passing "
83
+ "credentials through credentials_info or credentials_file."
77
84
  )
78
85
 
79
86
  if isinstance(languages, str):
@@ -109,7 +116,12 @@ class STT(stt.STT):
109
116
  # recognizers may improve latency https://cloud.google.com/speech-to-text/v2/docs/recognizers#understand_recognizers
110
117
 
111
118
  # TODO(theomonnom): find a better way to access the project_id
112
- project_id = self._ensure_client().transport._credentials.project_id # type: ignore
119
+ try:
120
+ project_id = self._ensure_client().transport._credentials.project_id # type: ignore
121
+ except AttributeError:
122
+ from google.auth import default as ga_default
123
+
124
+ _, project_id = ga_default()
113
125
  return f"projects/{project_id}/locations/global/recognizers/_"
114
126
 
115
127
  def _sanitize_options(self, *, language: str | None = None) -> STTOptions:
@@ -278,22 +290,22 @@ class SpeechStream(stt.SpeechStream):
278
290
  == cloud_speech.StreamingRecognizeResponse.SpeechEventType.SPEECH_EVENT_TYPE_UNSPECIFIED
279
291
  ):
280
292
  result = resp.results[0]
293
+ speech_data = _streaming_recognize_response_to_speech_data(resp)
294
+ if speech_data is None:
295
+ continue
296
+
281
297
  if not result.is_final:
282
298
  self._event_ch.send_nowait(
283
299
  stt.SpeechEvent(
284
300
  type=stt.SpeechEventType.INTERIM_TRANSCRIPT,
285
- alternatives=[
286
- _streaming_recognize_response_to_speech_data(resp)
287
- ],
301
+ alternatives=[speech_data],
288
302
  )
289
303
  )
290
304
  else:
291
305
  self._event_ch.send_nowait(
292
306
  stt.SpeechEvent(
293
307
  type=stt.SpeechEventType.FINAL_TRANSCRIPT,
294
- alternatives=[
295
- _streaming_recognize_response_to_speech_data(resp)
296
- ],
308
+ alternatives=[speech_data],
297
309
  )
298
310
  )
299
311
 
@@ -337,16 +349,21 @@ def _recognize_response_to_speech_event(
337
349
 
338
350
  def _streaming_recognize_response_to_speech_data(
339
351
  resp: cloud_speech.StreamingRecognizeResponse,
340
- ) -> stt.SpeechData:
352
+ ) -> stt.SpeechData | None:
341
353
  text = ""
342
354
  confidence = 0.0
343
355
  for result in resp.results:
356
+ if len(result.alternatives) == 0:
357
+ continue
344
358
  text += result.alternatives[0].transcript
345
359
  confidence += result.alternatives[0].confidence
346
360
 
347
361
  confidence /= len(resp.results)
348
362
  lg = resp.results[0].language_code
349
363
 
364
+ if text == "":
365
+ return None
366
+
350
367
  data = stt.SpeechData(
351
368
  language=lg, start_time=0, end_time=0, confidence=confidence, text=text
352
369
  )
@@ -51,9 +51,13 @@ class TTS(tts.TTS):
51
51
  credentials_file: str | None = None,
52
52
  ) -> None:
53
53
  """
54
- if no credentials is provided, it will use the credentials on the environment
55
- GOOGLE_APPLICATION_CREDENTIALS (default behavior of Google TextToSpeechAsyncClient)
54
+ Create a new instance of Google TTS.
55
+
56
+ Credentials must be provided, either by using the ``credentials_info`` dict, or reading
57
+ from the file specified in ``credentials_file`` or the ``GOOGLE_APPLICATION_CREDENTIALS``
58
+ environmental variable.
56
59
  """
60
+
57
61
  super().__init__(
58
62
  capabilities=tts.TTSCapabilities(
59
63
  streaming=False,
@@ -144,6 +148,7 @@ class ChunkedStream(tts.ChunkedStream):
144
148
  )
145
149
  )
146
150
  else:
151
+ data = data[44:] # skip WAV header
147
152
  self._event_ch.send_nowait(
148
153
  tts.SynthesizedAudio(
149
154
  request_id=request_id,
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.6.2"
15
+ __version__ = "0.7.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: livekit-plugins-google
3
- Version: 0.6.2
3
+ Version: 0.7.0
4
4
  Summary: Agent Framework plugin for services from Google Cloud
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -19,6 +19,7 @@ Classifier: Programming Language :: Python :: 3.10
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: >=3.9.0
21
21
  Description-Content-Type: text/markdown
22
+ Requires-Dist: google-auth <3,>=2
22
23
  Requires-Dist: google-cloud-speech <3,>=2
23
24
  Requires-Dist: google-cloud-texttospeech <3,>=2
24
25
  Requires-Dist: livekit-agents >=0.8.0.dev0
@@ -35,4 +36,4 @@ pip install livekit-plugins-google
35
36
 
36
37
  ## Pre-requisites
37
38
 
38
- For credentials, you'll need a Google Cloud account and obtain the correct credentials. Credentials can be passed directly or set as [GOOGLE_APPLICATION_CREDENTIALS](https://cloud.google.com/docs/authentication/application-default-credentials) environment variable.
39
+ For credentials, you'll need a Google Cloud account and obtain the correct credentials. Credentials can be passed directly or via Application Default Credentials as specified in [How Application Default Credentials works](https://cloud.google.com/docs/authentication/application-default-credentials).
@@ -0,0 +1,11 @@
1
+ livekit/plugins/google/__init__.py,sha256=CYbSmm5fEw71F_r_4pEApGaWQ_r15Y3ZEocH88a4yc8,948
2
+ livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
3
+ livekit/plugins/google/models.py,sha256=n8pgTJ7xyJpPCZJ_y0GzaQq6LqYknL6K6trpi07-AxM,1307
4
+ livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/google/stt.py,sha256=XXDOISg-8U1MzVu543xLEB3-mr_NFKJp9qo1-ya2-Hc,13569
6
+ livekit/plugins/google/tts.py,sha256=LQttOY3rI8TQ7w3FT-nBv-PDg5oXwITvFeBZtjwrwJE,5692
7
+ livekit/plugins/google/version.py,sha256=G63knoeV7ai0fH-1DCHqI3a7eSI4LlHqjV64n4GbCGg,600
8
+ livekit_plugins_google-0.7.0.dist-info/METADATA,sha256=cHccq2kH8vnXd7qNkADTDbmH1dOd7haUonMGNvdMbmo,1653
9
+ livekit_plugins_google-0.7.0.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
10
+ livekit_plugins_google-0.7.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
11
+ livekit_plugins_google-0.7.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (72.1.0)
2
+ Generator: setuptools (74.1.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,11 +0,0 @@
1
- livekit/plugins/google/__init__.py,sha256=CYbSmm5fEw71F_r_4pEApGaWQ_r15Y3ZEocH88a4yc8,948
2
- livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
3
- livekit/plugins/google/models.py,sha256=n8pgTJ7xyJpPCZJ_y0GzaQq6LqYknL6K6trpi07-AxM,1307
4
- livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- livekit/plugins/google/stt.py,sha256=Jt3_uc6F9rBZootKyPYslgCgusQB_k7NQ1Cvj9CwppI,12970
6
- livekit/plugins/google/tts.py,sha256=jvbw-T-JlVxcYWiF-tCox35LZuCN3DBKwDp3zN8JCTw,5549
7
- livekit/plugins/google/version.py,sha256=hj5La4IQz5ccAWt5oJAkV9TnNFuujYmxSTjcRby-kNQ,600
8
- livekit_plugins_google-0.6.2.dist-info/METADATA,sha256=fvHmaZHNW-dAFBr8-G5Mrm1sHs66AY6Ur8rXvD2Q-rg,1584
9
- livekit_plugins_google-0.6.2.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
10
- livekit_plugins_google-0.6.2.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
11
- livekit_plugins_google-0.6.2.dist-info/RECORD,,