livekit-plugins-google 1.2.13__tar.gz → 1.2.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of livekit-plugins-google might be problematic. Click here for more details.

Files changed (19) hide show
  1. {livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/.gitignore +5 -1
  2. {livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/PKG-INFO +2 -2
  3. {livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/livekit/plugins/google/__init__.py +2 -2
  4. {livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/livekit/plugins/google/beta/__init__.py +2 -2
  5. {livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/livekit/plugins/google/models.py +1 -0
  6. {livekit_plugins_google-1.2.13/livekit/plugins/google/beta → livekit_plugins_google-1.2.15/livekit/plugins/google}/realtime/api_proto.py +33 -1
  7. {livekit_plugins_google-1.2.13/livekit/plugins/google/beta → livekit_plugins_google-1.2.15/livekit/plugins/google}/realtime/realtime_api.py +17 -5
  8. {livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/livekit/plugins/google/stt.py +18 -7
  9. {livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/livekit/plugins/google/version.py +1 -1
  10. {livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/pyproject.toml +1 -1
  11. {livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/README.md +0 -0
  12. {livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/livekit/plugins/google/beta/gemini_tts.py +0 -0
  13. {livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/livekit/plugins/google/llm.py +0 -0
  14. {livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/livekit/plugins/google/log.py +0 -0
  15. {livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/livekit/plugins/google/py.typed +0 -0
  16. {livekit_plugins_google-1.2.13/livekit/plugins/google/beta → livekit_plugins_google-1.2.15/livekit/plugins/google}/realtime/__init__.py +0 -0
  17. {livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/livekit/plugins/google/tools.py +0 -0
  18. {livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/livekit/plugins/google/tts.py +0 -0
  19. {livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/livekit/plugins/google/utils.py +0 -0
@@ -172,4 +172,8 @@ pyrightconfig.json
172
172
  docs/
173
173
 
174
174
  # Database files
175
- *.db
175
+ *.db
176
+
177
+
178
+ # Examples for development
179
+ examples/dev/*
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-google
3
- Version: 1.2.13
3
+ Version: 1.2.15
4
4
  Summary: Agent Framework plugin for services from Google Cloud
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
22
22
  Requires-Dist: google-cloud-speech<3,>=2
23
23
  Requires-Dist: google-cloud-texttospeech<3,>=2.27
24
24
  Requires-Dist: google-genai>=v1.23.0
25
- Requires-Dist: livekit-agents>=1.2.13
25
+ Requires-Dist: livekit-agents>=1.2.15
26
26
  Description-Content-Type: text/markdown
27
27
 
28
28
  # Google AI plugin for LiveKit Agents
@@ -19,14 +19,14 @@ Supports Gemini, Cloud Speech-to-Text, and Cloud Text-to-Speech.
19
19
  See https://docs.livekit.io/agents/integrations/stt/google/ for more information.
20
20
  """
21
21
 
22
- from . import beta
22
+ from . import beta, realtime
23
23
  from .llm import LLM
24
24
  from .stt import STT, SpeechStream
25
25
  from .tools import _LLMTool
26
26
  from .tts import TTS
27
27
  from .version import __version__
28
28
 
29
- __all__ = ["STT", "TTS", "SpeechStream", "__version__", "beta", "LLM", "_LLMTool"]
29
+ __all__ = ["STT", "TTS", "realtime", "SpeechStream", "__version__", "beta", "LLM", "_LLMTool"]
30
30
  from livekit.agents import Plugin
31
31
 
32
32
  from .log import logger
@@ -1,7 +1,7 @@
1
- from . import realtime
1
+ from .. import realtime
2
2
  from .gemini_tts import TTS as GeminiTTS
3
3
 
4
- __all__ = ["realtime", "GeminiTTS"]
4
+ __all__ = ["GeminiTTS", "realtime"]
5
5
 
6
6
  # Cleanup docs of unexported modules
7
7
  _module = dir()
@@ -10,6 +10,7 @@ SpeechModels = Literal[
10
10
  "medical_conversation",
11
11
  "chirp",
12
12
  "chirp_2",
13
+ "chirp_3",
13
14
  "latest_long",
14
15
  "latest_short",
15
16
  ]
@@ -10,11 +10,43 @@ LiveAPIModels = Literal[
10
10
  # models supported on Gemini API
11
11
  "gemini-2.0-flash-live-001",
12
12
  "gemini-live-2.5-flash-preview",
13
+ "gemini-2.5-flash-native-audio-preview-09-2025",
13
14
  "gemini-2.5-flash-preview-native-audio-dialog",
14
15
  "gemini-2.5-flash-exp-native-audio-thinking-dialog",
15
16
  ]
16
17
 
17
- Voice = Literal["Puck", "Charon", "Kore", "Fenrir", "Aoede", "Leda", "Orus", "Zephyr"]
18
+ Voice = Literal[
19
+ "Achernar",
20
+ "Achird",
21
+ "Algenib",
22
+ "Algieba",
23
+ "Alnilam",
24
+ "Aoede",
25
+ "Autonoe",
26
+ "Callirrhoe",
27
+ "Charon",
28
+ "Despina",
29
+ "Enceladus",
30
+ "Erinome",
31
+ "Fenrir",
32
+ "Gacrux",
33
+ "Iapetus",
34
+ "Kore",
35
+ "Laomedeia",
36
+ "Leda",
37
+ "Orus",
38
+ "Pulcherrima",
39
+ "Puck",
40
+ "Rasalgethi",
41
+ "Sadachbia",
42
+ "Sadaltager",
43
+ "Schedar",
44
+ "Sulafat",
45
+ "Umbriel",
46
+ "Vindemiatrix",
47
+ "Zephyr",
48
+ "Zubenelgenubi",
49
+ ]
18
50
 
19
51
 
20
52
  ClientEvents = Union[
@@ -23,11 +23,11 @@ from livekit.agents.types import (
23
23
  NotGivenOr,
24
24
  )
25
25
  from livekit.agents.utils import audio as audio_utils, images, is_given
26
- from livekit.plugins.google.beta.realtime.api_proto import ClientEvents, LiveAPIModels, Voice
26
+ from livekit.plugins.google.realtime.api_proto import ClientEvents, LiveAPIModels, Voice
27
27
 
28
- from ...log import logger
29
- from ...tools import _LLMTool
30
- from ...utils import create_tools_config, get_tool_results_for_realtime, to_fnc_ctx
28
+ from ..log import logger
29
+ from ..tools import _LLMTool
30
+ from ..utils import create_tools_config, get_tool_results_for_realtime, to_fnc_ctx
31
31
 
32
32
  INPUT_AUDIO_SAMPLE_RATE = 16000
33
33
  INPUT_AUDIO_CHANNELS = 1
@@ -78,6 +78,7 @@ class _RealtimeOptions:
78
78
  gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN
79
79
  tool_behavior: NotGivenOr[types.Behavior] = NOT_GIVEN
80
80
  tool_response_scheduling: NotGivenOr[types.FunctionResponseScheduling] = NOT_GIVEN
81
+ thinking_config: NotGivenOr[types.ThinkingConfig] = NOT_GIVEN
81
82
 
82
83
 
83
84
  @dataclass
@@ -144,6 +145,7 @@ class RealtimeModel(llm.RealtimeModel):
144
145
  conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
145
146
  http_options: NotGivenOr[types.HttpOptions] = NOT_GIVEN,
146
147
  _gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN,
148
+ thinking_config: NotGivenOr[types.ThinkingConfig] = NOT_GIVEN,
147
149
  ) -> None:
148
150
  """
149
151
  Initializes a RealtimeModel instance for interacting with Google's Realtime API.
@@ -180,6 +182,7 @@ class RealtimeModel(llm.RealtimeModel):
180
182
  context_window_compression (ContextWindowCompressionConfig, optional): The configuration for context window compression. Defaults to None.
181
183
  tool_behavior (Behavior, optional): The behavior for tool call. Default behavior is BLOCK in Gemini Realtime API.
182
184
  tool_response_scheduling (FunctionResponseScheduling, optional): The scheduling for tool response. Default scheduling is WHEN_IDLE.
185
+ thinking_config (ThinkingConfig, optional): Native audio thinking configuration.
183
186
  conn_options (APIConnectOptions, optional): The configuration for the API connection. Defaults to DEFAULT_API_CONNECT_OPTIONS.
184
187
  _gemini_tools (list[LLMTool], optional): Gemini-specific tools to use for the session. This parameter is experimental and may change.
185
188
 
@@ -274,6 +277,7 @@ class RealtimeModel(llm.RealtimeModel):
274
277
  tool_behavior=tool_behavior,
275
278
  conn_options=conn_options,
276
279
  http_options=http_options,
280
+ thinking_config=thinking_config,
277
281
  )
278
282
 
279
283
  self._sessions = weakref.WeakSet[RealtimeSession]()
@@ -510,7 +514,12 @@ class RealtimeSession(llm.RealtimeSession):
510
514
  for f in self._resample_audio(frame):
511
515
  for nf in self._bstream.write(f.data.tobytes()):
512
516
  realtime_input = types.LiveClientRealtimeInput(
513
- media_chunks=[types.Blob(data=nf.data.tobytes(), mime_type="audio/pcm")]
517
+ media_chunks=[
518
+ types.Blob(
519
+ data=nf.data.tobytes(),
520
+ mime_type=f"audio/pcm;rate={INPUT_AUDIO_SAMPLE_RATE}",
521
+ )
522
+ ]
514
523
  )
515
524
  self._send_client_event(realtime_input)
516
525
 
@@ -814,6 +823,9 @@ class RealtimeSession(llm.RealtimeSession):
814
823
  frequency_penalty=self._opts.frequency_penalty
815
824
  if is_given(self._opts.frequency_penalty)
816
825
  else None,
826
+ thinking_config=self._opts.thinking_config
827
+ if is_given(self._opts.thinking_config)
828
+ else None,
817
829
  ),
818
830
  system_instruction=types.Content(parts=[types.Part(text=self._opts.instructions)])
819
831
  if is_given(self._opts.instructions)
@@ -618,17 +618,28 @@ def _streaming_recognize_response_to_speech_data(
618
618
  ) -> stt.SpeechData | None:
619
619
  text = ""
620
620
  confidence = 0.0
621
+ final_result = None
621
622
  for result in resp.results:
622
623
  if len(result.alternatives) == 0:
623
624
  continue
624
- text += result.alternatives[0].transcript
625
- confidence += result.alternatives[0].confidence
626
-
627
- confidence /= len(resp.results)
628
- lg = resp.results[0].language_code
625
+ else:
626
+ if result.is_final:
627
+ final_result = result
628
+ break
629
+ else:
630
+ text += result.alternatives[0].transcript
631
+ confidence += result.alternatives[0].confidence
632
+
633
+ if final_result is not None:
634
+ text = final_result.alternatives[0].transcript
635
+ confidence = final_result.alternatives[0].confidence
636
+ lg = final_result.language_code
637
+ else:
638
+ confidence /= len(resp.results)
639
+ if confidence < min_confidence_threshold:
640
+ return None
641
+ lg = resp.results[0].language_code
629
642
 
630
- if confidence < min_confidence_threshold:
631
- return None
632
643
  if text == "":
633
644
  return None
634
645
 
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.2.13"
15
+ __version__ = "1.2.15"
@@ -27,7 +27,7 @@ dependencies = [
27
27
  "google-cloud-speech >= 2, < 3",
28
28
  "google-cloud-texttospeech >= 2.27, < 3",
29
29
  "google-genai >= v1.23.0",
30
- "livekit-agents>=1.2.13",
30
+ "livekit-agents>=1.2.15",
31
31
  ]
32
32
 
33
33
  [project.urls]