livekit-plugins-google 1.2.8__py3-none-any.whl → 1.2.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of livekit-plugins-google might be problematic. Click here for more details.

@@ -202,6 +202,7 @@ class RealtimeModel(llm.RealtimeModel):
202
202
  user_transcription=input_audio_transcription is not None,
203
203
  auto_tool_reply_generation=True,
204
204
  audio_output=types.Modality.AUDIO in modalities,
205
+ manual_function_calls=False,
205
206
  )
206
207
  )
207
208
 
@@ -304,6 +305,10 @@ class RealtimeModel(llm.RealtimeModel):
304
305
  async def aclose(self) -> None:
305
306
  pass
306
307
 
308
+ @property
309
+ def model(self) -> str:
310
+ return self._opts.model
311
+
307
312
 
308
313
  class RealtimeSession(llm.RealtimeSession):
309
314
  def __init__(self, realtime_model: RealtimeModel) -> None:
@@ -775,7 +780,7 @@ class RealtimeSession(llm.RealtimeSession):
775
780
  ),
776
781
  language_code=self._opts.language if is_given(self._opts.language) else None,
777
782
  ),
778
- tools=tools_config, # type: ignore
783
+ tools=tools_config,
779
784
  input_audio_transcription=self._opts.input_audio_transcription,
780
785
  output_audio_transcription=self._opts.output_audio_transcription,
781
786
  session_resumption=types.SessionResumptionConfig(
@@ -829,6 +834,7 @@ class RealtimeSession(llm.RealtimeSession):
829
834
  message_stream=self._current_generation.message_ch,
830
835
  function_stream=self._current_generation.function_ch,
831
836
  user_initiated=False,
837
+ response_id=self._current_generation.response_id,
832
838
  )
833
839
 
834
840
  if self._pending_generation_fut and not self._pending_generation_fut.done():
@@ -969,7 +975,7 @@ class RealtimeSession(llm.RealtimeSession):
969
975
  gen.function_ch.send_nowait(
970
976
  llm.FunctionCall(
971
977
  call_id=fnc_call.id or utils.shortuuid("fnc-call-"),
972
- name=fnc_call.name, # type: ignore
978
+ name=fnc_call.name,
973
979
  arguments=arguments,
974
980
  )
975
981
  )
@@ -1018,7 +1024,8 @@ class RealtimeSession(llm.RealtimeSession):
1018
1024
  return token_details_map
1019
1025
 
1020
1026
  metrics = RealtimeModelMetrics(
1021
- label=self._realtime_model._label,
1027
+ label=self._realtime_model.label,
1028
+ model=self._realtime_model.model,
1022
1029
  request_id=current_gen.response_id,
1023
1030
  timestamp=current_gen._created_timestamp,
1024
1031
  duration=duration,
@@ -423,7 +423,7 @@ class LLMStream(llm.LLMStream):
423
423
  tool_calls=[
424
424
  llm.FunctionToolCall(
425
425
  arguments=json.dumps(part.function_call.args),
426
- name=part.function_call.name, # type: ignore
426
+ name=part.function_call.name,
427
427
  call_id=part.function_call.id or utils.shortuuid("function_call_"),
428
428
  )
429
429
  ],
@@ -70,6 +70,7 @@ class STTOptions:
70
70
  spoken_punctuation: bool
71
71
  enable_word_time_offsets: bool
72
72
  enable_word_confidence: bool
73
+ enable_voice_activity_events: bool
73
74
  model: SpeechModels | str
74
75
  sample_rate: int
75
76
  min_confidence_threshold: float
@@ -103,6 +104,7 @@ class STT(stt.STT):
103
104
  spoken_punctuation: bool = False,
104
105
  enable_word_time_offsets: bool = True,
105
106
  enable_word_confidence: bool = False,
107
+ enable_voice_activity_events: bool = False,
106
108
  model: SpeechModels | str = "latest_long",
107
109
  location: str = "global",
108
110
  sample_rate: int = 16000,
@@ -127,6 +129,7 @@ class STT(stt.STT):
127
129
  spoken_punctuation(bool): whether to use spoken punctuation (default: False)
128
130
  enable_word_time_offsets(bool): whether to enable word time offsets (default: True)
129
131
  enable_word_confidence(bool): whether to enable word confidence (default: False)
132
+ enable_voice_activity_events(bool): whether to enable voice activity events (default: False)
130
133
  model(SpeechModels): the model to use for recognition default: "latest_long"
131
134
  location(str): the location to use for recognition default: "global"
132
135
  sample_rate(int): the sample rate of the audio default: 16000
@@ -168,6 +171,7 @@ class STT(stt.STT):
168
171
  spoken_punctuation=spoken_punctuation,
169
172
  enable_word_time_offsets=enable_word_time_offsets,
170
173
  enable_word_confidence=enable_word_confidence,
174
+ enable_voice_activity_events=enable_voice_activity_events,
171
175
  model=model,
172
176
  sample_rate=sample_rate,
173
177
  min_confidence_threshold=min_confidence_threshold,
@@ -507,6 +511,7 @@ class SpeechStream(stt.SpeechStream):
507
511
  ),
508
512
  streaming_features=cloud_speech.StreamingRecognitionFeatures(
509
513
  interim_results=self._config.interim_results,
514
+ enable_voice_activity_events=self._config.enable_voice_activity_events,
510
515
  ),
511
516
  )
512
517
 
@@ -52,6 +52,7 @@ class _TTSOptions:
52
52
  volume_gain_db: float
53
53
  custom_pronunciations: CustomPronunciations | None
54
54
  enable_ssml: bool
55
+ use_markup: bool
55
56
 
56
57
 
57
58
  class TTS(tts.TTS):
@@ -75,6 +76,7 @@ class TTS(tts.TTS):
75
76
  custom_pronunciations: NotGivenOr[CustomPronunciations] = NOT_GIVEN,
76
77
  use_streaming: bool = True,
77
78
  enable_ssml: bool = False,
79
+ use_markup: bool = False,
78
80
  ) -> None:
79
81
  """
80
82
  Create a new instance of Google TTS.
@@ -100,6 +102,7 @@ class TTS(tts.TTS):
100
102
  custom_pronunciations (CustomPronunciations, optional): Custom pronunciations for the TTS. Default is None.
101
103
  use_streaming (bool, optional): Whether to use streaming synthesis. Default is True.
102
104
  enable_ssml (bool, optional): Whether to enable SSML support. Default is False.
105
+ use_markup (bool, optional): Whether to enable markup input for HD voices. Default is False.
103
106
  """ # noqa: E501
104
107
  super().__init__(
105
108
  capabilities=tts.TTSCapabilities(streaming=use_streaming),
@@ -107,8 +110,11 @@ class TTS(tts.TTS):
107
110
  num_channels=1,
108
111
  )
109
112
 
110
- if enable_ssml and use_streaming:
111
- raise ValueError("SSML support is not available for streaming synthesis")
113
+ if enable_ssml:
114
+ if use_streaming:
115
+ raise ValueError("SSML support is not available for streaming synthesis")
116
+ if use_markup:
117
+ raise ValueError("SSML support is not available for markup input")
112
118
 
113
119
  self._client: texttospeech.TextToSpeechAsyncClient | None = None
114
120
  self._credentials_info = credentials_info
@@ -145,6 +151,7 @@ class TTS(tts.TTS):
145
151
  volume_gain_db=volume_gain_db,
146
152
  custom_pronunciations=pronunciations,
147
153
  enable_ssml=enable_ssml,
154
+ use_markup=use_markup,
148
155
  )
149
156
  self._streams = weakref.WeakSet[SynthesizeStream]()
150
157
 
@@ -238,19 +245,21 @@ class ChunkedStream(tts.ChunkedStream):
238
245
 
239
246
  async def _run(self, output_emitter: tts.AudioEmitter) -> None:
240
247
  try:
241
- input = (
242
- texttospeech.SynthesisInput(
243
- ssml=self._build_ssml(),
244
- custom_pronunciations=self._opts.custom_pronunciations,
248
+ if self._opts.use_markup:
249
+ tts_input = texttospeech.SynthesisInput(
250
+ markup=self._input_text, custom_pronunciations=self._opts.custom_pronunciations
245
251
  )
246
- if self._opts.enable_ssml
247
- else texttospeech.SynthesisInput(
248
- text=self._input_text,
249
- custom_pronunciations=self._opts.custom_pronunciations,
252
+ elif self._opts.enable_ssml:
253
+ tts_input = texttospeech.SynthesisInput(
254
+ ssml=self._build_ssml(), custom_pronunciations=self._opts.custom_pronunciations
250
255
  )
251
- )
256
+ else:
257
+ tts_input = texttospeech.SynthesisInput(
258
+ text=self._input_text, custom_pronunciations=self._opts.custom_pronunciations
259
+ )
260
+
252
261
  response: SynthesizeSpeechResponse = await self._tts._ensure_client().synthesize_speech(
253
- input=input,
262
+ input=tts_input,
254
263
  voice=self._opts.voice,
255
264
  audio_config=texttospeech.AudioConfig(
256
265
  audio_encoding=self._opts.encoding,
@@ -355,8 +364,12 @@ class SynthesizeStream(tts.SynthesizeStream):
355
364
 
356
365
  async for input in input_stream:
357
366
  self._mark_started()
358
- yield texttospeech.StreamingSynthesizeRequest(
359
- input=texttospeech.StreamingSynthesisInput(text=input.token)
367
+ yield (
368
+ texttospeech.StreamingSynthesizeRequest(
369
+ input=texttospeech.StreamingSynthesisInput(markup=input.token)
370
+ if self._opts.use_markup
371
+ else texttospeech.StreamingSynthesisInput(text=input.token)
372
+ )
360
373
  )
361
374
 
362
375
  except Exception:
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.2.8"
15
+ __version__ = "1.2.11"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-google
3
- Version: 1.2.8
3
+ Version: 1.2.11
4
4
  Summary: Agent Framework plugin for services from Google Cloud
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
22
22
  Requires-Dist: google-cloud-speech<3,>=2
23
23
  Requires-Dist: google-cloud-texttospeech<3,>=2.27
24
24
  Requires-Dist: google-genai>=v1.23.0
25
- Requires-Dist: livekit-agents>=1.2.8
25
+ Requires-Dist: livekit-agents>=1.2.11
26
26
  Description-Content-Type: text/markdown
27
27
 
28
28
  # Google AI plugin for LiveKit Agents
@@ -1,18 +1,18 @@
1
1
  livekit/plugins/google/__init__.py,sha256=XIyZ-iFnRBpaLtOJgVwojlB-a8GjdDugVFcjBpMEww8,1412
2
- livekit/plugins/google/llm.py,sha256=cMlmLX1m3TsrLW0a-k2oj6WQSNWEjj3jv7ob8MUoXCI,18825
2
+ livekit/plugins/google/llm.py,sha256=aeeGqhbEScbEs-GKp1T8rLocNqmvG4UBj6diekYe4FU,18809
3
3
  livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
4
4
  livekit/plugins/google/models.py,sha256=poOvUBvgpqmmQV5EUQsq0RgNIRAq7nH-_IZIcIfPSBI,2801
5
5
  livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- livekit/plugins/google/stt.py,sha256=gRhVRsfg3BPNkBJGG78QOxEia0mF1jBnI_Ckq1jxqIs,25938
6
+ livekit/plugins/google/stt.py,sha256=RhbDkCbrGWb8R7feS5w766fwUNJEbg7hHlba7Oq9lBI,26305
7
7
  livekit/plugins/google/tools.py,sha256=tD5HVDHO5JfUF029Cx3axHMJec0Gxalkl7s1FDgxLzI,259
8
- livekit/plugins/google/tts.py,sha256=3TPHBKJJwIt-hSTAdbI4NUcQNerhV0eDuK_o2rprdqg,16606
8
+ livekit/plugins/google/tts.py,sha256=LBLP3pEq1iCCgfidpRTtpeoDKYmXh8PKeJf1llAsybQ,17302
9
9
  livekit/plugins/google/utils.py,sha256=z0iCP6-hYix3JRm2RM5JOBEJCICehUe5N4FTl-JpXLc,9269
10
- livekit/plugins/google/version.py,sha256=XZ3forlpqz9F1SmliM6XQGe2MyHu60jOjDbNGs_mcRg,600
10
+ livekit/plugins/google/version.py,sha256=xsWwuH5qgJrB3wPPfmZaiEH7zObN2yGboBmyrTHj-b8,601
11
11
  livekit/plugins/google/beta/__init__.py,sha256=RvAUdvEiRN-fe4JrgPcN0Jkw1kZR9wPerGMFVjS1Cc0,270
12
12
  livekit/plugins/google/beta/gemini_tts.py,sha256=esWjr0Xf95tl0_AB7MXiFZ_VCORWgcWjzvLvRa3t0FQ,8515
13
13
  livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
14
14
  livekit/plugins/google/beta/realtime/api_proto.py,sha256=nb_QkVQDEH7h0SKA9vdS3JaL12a6t2Z1ja4SdnxE6a8,814
15
- livekit/plugins/google/beta/realtime/realtime_api.py,sha256=p0vEaxQhPLUbGjHo7Za2rbBrCjD_UqPk-thd9ybIiuk,47817
16
- livekit_plugins_google-1.2.8.dist-info/METADATA,sha256=2I3YmnnGQGcd8qxz8AGaxG_KflpdYu-3oUBVCudZfA8,1907
17
- livekit_plugins_google-1.2.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
- livekit_plugins_google-1.2.8.dist-info/RECORD,,
15
+ livekit/plugins/google/beta/realtime/realtime_api.py,sha256=RALLfKWb8c4K8ennINDLeVxKrP5JXvGa_nNGP0_ASlI,48012
16
+ livekit_plugins_google-1.2.11.dist-info/METADATA,sha256=ulwiM6njmKCQG_8e1imKwV6oG0IHuXRahBSB0UI0OBM,1909
17
+ livekit_plugins_google-1.2.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
+ livekit_plugins_google-1.2.11.dist-info/RECORD,,