livekit-plugins-google 0.11.0__tar.gz → 1.0.0.dev4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. livekit_plugins_google-1.0.0.dev4/.gitignore +168 -0
  2. {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/PKG-INFO +12 -22
  3. {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/beta/realtime/__init__.py +1 -5
  4. {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/beta/realtime/api_proto.py +2 -1
  5. {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/beta/realtime/realtime_api.py +21 -46
  6. {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/beta/realtime/transcriber.py +11 -27
  7. {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/llm.py +127 -197
  8. {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/stt.py +28 -58
  9. {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/tts.py +10 -16
  10. livekit_plugins_google-1.0.0.dev4/livekit/plugins/google/utils.py +213 -0
  11. {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/version.py +1 -1
  12. livekit_plugins_google-1.0.0.dev4/pyproject.toml +45 -0
  13. livekit_plugins_google-0.11.0/livekit/plugins/google/_utils.py +0 -199
  14. livekit_plugins_google-0.11.0/livekit_plugins_google.egg-info/PKG-INFO +0 -109
  15. livekit_plugins_google-0.11.0/livekit_plugins_google.egg-info/SOURCES.txt +0 -22
  16. livekit_plugins_google-0.11.0/livekit_plugins_google.egg-info/dependency_links.txt +0 -1
  17. livekit_plugins_google-0.11.0/livekit_plugins_google.egg-info/requires.txt +0 -5
  18. livekit_plugins_google-0.11.0/livekit_plugins_google.egg-info/top_level.txt +0 -1
  19. livekit_plugins_google-0.11.0/pyproject.toml +0 -3
  20. livekit_plugins_google-0.11.0/setup.cfg +0 -4
  21. livekit_plugins_google-0.11.0/setup.py +0 -63
  22. {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/README.md +0 -0
  23. {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/__init__.py +0 -0
  24. {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/beta/__init__.py +0 -0
  25. {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/log.py +0 -0
  26. {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/models.py +0 -0
  27. {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/py.typed +0 -0
@@ -0,0 +1,168 @@
1
+ **/.vscode
2
+ **/.DS_Store
3
+
4
+ # Byte-compiled / optimized / DLL files
5
+ __pycache__/
6
+ *.py[cod]
7
+ *$py.class
8
+
9
+ # C extensions
10
+ *.so
11
+
12
+ # Distribution / packaging
13
+ .Python
14
+ build/
15
+ develop-eggs/
16
+ dist/
17
+ downloads/
18
+ eggs/
19
+ .eggs/
20
+ lib/
21
+ lib64/
22
+ parts/
23
+ sdist/
24
+ var/
25
+ wheels/
26
+ share/python-wheels/
27
+ *.egg-info/
28
+ .installed.cfg
29
+ *.egg
30
+ MANIFEST
31
+
32
+ # PyInstaller
33
+ # Usually these files are written by a python script from a template
34
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Installer logs
39
+ pip-log.txt
40
+ pip-delete-this-directory.txt
41
+
42
+ # Unit test / coverage reports
43
+ htmlcov/
44
+ .tox/
45
+ .nox/
46
+ .coverage
47
+ .coverage.*
48
+ .cache
49
+ nosetests.xml
50
+ coverage.xml
51
+ *.cover
52
+ *.py,cover
53
+ .hypothesis/
54
+ .pytest_cache/
55
+ cover/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ .pybuilder/
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # IPython
85
+ profile_default/
86
+ ipython_config.py
87
+
88
+ # pyenv
89
+ # For a library or package, you might want to ignore these files since the code is
90
+ # intended to run in multiple environments; otherwise, check them in:
91
+ # .python-version
92
+
93
+ # pipenv
94
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
+ # install all needed dependencies.
98
+ #Pipfile.lock
99
+
100
+ # poetry
101
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
103
+ # commonly ignored for libraries.
104
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105
+ #poetry.lock
106
+
107
+ # pdm
108
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109
+ #pdm.lock
110
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111
+ # in version control.
112
+ # https://pdm.fming.dev/#use-with-ide
113
+ .pdm.toml
114
+
115
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116
+ __pypackages__/
117
+
118
+ # Celery stuff
119
+ celerybeat-schedule
120
+ celerybeat.pid
121
+
122
+ # SageMath parsed files
123
+ *.sage.py
124
+
125
+ # Environments
126
+ .env
127
+ .venv
128
+ env/
129
+ venv/
130
+ ENV/
131
+ env.bak/
132
+ venv.bak/
133
+
134
+ # Spyder project settings
135
+ .spyderproject
136
+ .spyproject
137
+
138
+ # Rope project settings
139
+ .ropeproject
140
+
141
+ # mkdocs documentation
142
+ /site
143
+
144
+ # mypy
145
+ .mypy_cache/
146
+ .dmypy.json
147
+ dmypy.json
148
+
149
+ # Pyre type checker
150
+ .pyre/
151
+
152
+ # pytype static type analyzer
153
+ .pytype/
154
+
155
+ # Cython debug symbols
156
+ cython_debug/
157
+
158
+ # PyCharm
159
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
160
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
162
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
163
+ .idea/
164
+
165
+ node_modules
166
+
167
+ credentials.json
168
+ pyrightconfig.json
@@ -1,39 +1,29 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: livekit-plugins-google
3
- Version: 0.11.0
3
+ Version: 1.0.0.dev4
4
4
  Summary: Agent Framework plugin for services from Google Cloud
5
- Home-page: https://github.com/livekit/agents
6
- License: Apache-2.0
7
5
  Project-URL: Documentation, https://docs.livekit.io
8
6
  Project-URL: Website, https://livekit.io/
9
7
  Project-URL: Source, https://github.com/livekit/agents
10
- Keywords: webrtc,realtime,audio,video,livekit
8
+ Author: LiveKit
9
+ License-Expression: Apache-2.0
10
+ Keywords: audio,livekit,realtime,video,webrtc
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: License :: OSI Approved :: Apache Software License
13
- Classifier: Topic :: Multimedia :: Sound/Audio
14
- Classifier: Topic :: Multimedia :: Video
15
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
13
  Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3 :: Only
17
15
  Classifier: Programming Language :: Python :: 3.9
18
16
  Classifier: Programming Language :: Python :: 3.10
19
- Classifier: Programming Language :: Python :: 3 :: Only
17
+ Classifier: Topic :: Multimedia :: Sound/Audio
18
+ Classifier: Topic :: Multimedia :: Video
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
20
  Requires-Python: >=3.9.0
21
- Description-Content-Type: text/markdown
22
21
  Requires-Dist: google-auth<3,>=2
23
22
  Requires-Dist: google-cloud-speech<3,>=2
24
23
  Requires-Dist: google-cloud-texttospeech<3,>=2
25
- Requires-Dist: google-genai==1.3.0
26
- Requires-Dist: livekit-agents<1.0.0,>=0.12.16
27
- Dynamic: classifier
28
- Dynamic: description
29
- Dynamic: description-content-type
30
- Dynamic: home-page
31
- Dynamic: keywords
32
- Dynamic: license
33
- Dynamic: project-url
34
- Dynamic: requires-dist
35
- Dynamic: requires-python
36
- Dynamic: summary
24
+ Requires-Dist: google-genai==1.5.0
25
+ Requires-Dist: livekit-agents>=1.0.0.dev4
26
+ Description-Content-Type: text/markdown
37
27
 
38
28
  # LiveKit Plugins Google
39
29
 
@@ -1,8 +1,4 @@
1
- from .api_proto import (
2
- ClientEvents,
3
- LiveAPIModels,
4
- Voice,
5
- )
1
+ from .api_proto import ClientEvents, LiveAPIModels, Voice
6
2
  from .realtime_api import RealtimeModel
7
3
 
8
4
  __all__ = [
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Literal, Sequence, Union
3
+ from collections.abc import Sequence
4
+ from typing import Literal, Union
4
5
 
5
6
  from google.genai import types
6
7
 
@@ -3,21 +3,17 @@ from __future__ import annotations
3
3
  import asyncio
4
4
  import json
5
5
  import os
6
+ from collections.abc import AsyncIterable
6
7
  from dataclasses import dataclass
7
- from typing import AsyncIterable, Literal
8
-
9
- from livekit import rtc
10
- from livekit.agents import llm, utils
11
- from livekit.agents.llm.function_context import _create_ai_function_info
12
- from livekit.agents.utils import images
8
+ from typing import Literal
13
9
 
14
10
  from google import genai
11
+ from google.genai._api_client import HttpOptions
15
12
  from google.genai.types import (
16
13
  Blob,
17
14
  Content,
18
15
  FunctionResponse,
19
16
  GenerationConfig,
20
- HttpOptions,
21
17
  LiveClientContent,
22
18
  LiveClientRealtimeInput,
23
19
  LiveClientToolResponse,
@@ -29,15 +25,13 @@ from google.genai.types import (
29
25
  Tool,
30
26
  VoiceConfig,
31
27
  )
28
+ from livekit import rtc
29
+ from livekit.agents import llm, utils
30
+ from livekit.agents.llm.function_context import _create_ai_function_info
31
+ from livekit.agents.utils import images
32
32
 
33
33
  from ...log import logger
34
- from .api_proto import (
35
- ClientEvents,
36
- LiveAPIModels,
37
- Voice,
38
- _build_gemini_ctx,
39
- _build_tools,
40
- )
34
+ from .api_proto import ClientEvents, LiveAPIModels, Voice, _build_gemini_ctx, _build_tools
41
35
  from .transcriber import ModelTranscriber, TranscriberSession, TranscriptionContent
42
36
 
43
37
  EventTypes = Literal[
@@ -108,7 +102,7 @@ class RealtimeModel:
108
102
  model: LiveAPIModels | str = "gemini-2.0-flash-exp",
109
103
  api_key: str | None = None,
110
104
  voice: Voice | str = "Puck",
111
- modalities: list[Modality] = [Modality.AUDIO],
105
+ modalities: list[Modality] = None,
112
106
  enable_user_audio_transcription: bool = True,
113
107
  enable_agent_audio_transcription: bool = True,
114
108
  vertexai: bool = False,
@@ -155,6 +149,8 @@ class RealtimeModel:
155
149
  Raises:
156
150
  ValueError: If the API key is not provided and cannot be found in environment variables.
157
151
  """
152
+ if modalities is None:
153
+ modalities = ["AUDIO"]
158
154
  super().__init__()
159
155
  self._capabilities = Capabilities(
160
156
  supports_truncate=False,
@@ -180,9 +176,7 @@ class RealtimeModel:
180
176
  "API key is required for Google API either via api_key or GOOGLE_API_KEY environment variable"
181
177
  )
182
178
 
183
- instructions_content = (
184
- Content(parts=[Part(text=instructions)]) if instructions else None
185
- )
179
+ instructions_content = Content(parts=[Part(text=instructions)]) if instructions else None
186
180
 
187
181
  self._rt_sessions: list[GeminiRealtimeSession] = []
188
182
  self._opts = ModelOptions(
@@ -259,8 +253,6 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
259
253
  self._fnc_ctx = fnc_ctx
260
254
  self._fnc_tasks = utils.aio.TaskSet()
261
255
  self._is_interrupted = False
262
- self._playout_complete = asyncio.Event()
263
- self._playout_complete.set()
264
256
 
265
257
  tools = []
266
258
  if self._fnc_ctx is not None:
@@ -281,9 +273,7 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
281
273
  system_instruction=self._opts.instructions,
282
274
  speech_config=SpeechConfig(
283
275
  voice_config=VoiceConfig(
284
- prebuilt_voice_config=PrebuiltVoiceConfig(
285
- voice_name=self._opts.voice
286
- )
276
+ prebuilt_voice_config=PrebuiltVoiceConfig(voice_name=self._opts.voice)
287
277
  )
288
278
  ),
289
279
  tools=tools,
@@ -295,18 +285,12 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
295
285
  project=self._opts.project,
296
286
  location=self._opts.location,
297
287
  )
298
- self._main_atask = asyncio.create_task(
299
- self._main_task(), name="gemini-realtime-session"
300
- )
288
+ self._main_atask = asyncio.create_task(self._main_task(), name="gemini-realtime-session")
301
289
  if self._opts.enable_user_audio_transcription:
302
- self._transcriber = TranscriberSession(
303
- client=self._client, model=self._opts.model
304
- )
290
+ self._transcriber = TranscriberSession(client=self._client, model=self._opts.model)
305
291
  self._transcriber.on("input_speech_done", self._on_input_speech_done)
306
292
  if self._opts.enable_agent_audio_transcription:
307
- self._agent_transcriber = ModelTranscriber(
308
- client=self._client, model=self._opts.model
309
- )
293
+ self._agent_transcriber = ModelTranscriber(client=self._client, model=self._opts.model)
310
294
  self._agent_transcriber.on("input_speech_done", self._on_agent_speech_done)
311
295
  # init dummy task
312
296
  self._init_sync_task = asyncio.create_task(asyncio.sleep(0))
@@ -320,10 +304,6 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
320
304
  self._send_ch.close()
321
305
  await self._main_atask
322
306
 
323
- @property
324
- def playout_complete(self) -> asyncio.Event | None:
325
- return self._playout_complete
326
-
327
307
  @property
328
308
  def fnc_ctx(self) -> llm.FunctionContext | None:
329
309
  return self._fnc_ctx
@@ -341,9 +321,7 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
341
321
  DEFAULT_ENCODE_OPTIONS = images.EncodeOptions(
342
322
  format="JPEG",
343
323
  quality=75,
344
- resize_options=images.ResizeOptions(
345
- width=1024, height=1024, strategy="scale_aspect_fit"
346
- ),
324
+ resize_options=images.ResizeOptions(width=1024, height=1024, strategy="scale_aspect_fit"),
347
325
  )
348
326
 
349
327
  def push_video(
@@ -393,9 +371,7 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
393
371
 
394
372
  def create_response(
395
373
  self,
396
- on_duplicate: Literal[
397
- "cancel_existing", "cancel_new", "keep_both"
398
- ] = "keep_both",
374
+ on_duplicate: Literal["cancel_existing", "cancel_new", "keep_both"] = "keep_both",
399
375
  ) -> None:
400
376
  turns, _ = _build_gemini_ctx(self._chat_ctx, id(self))
401
377
  ctx = [self._opts.instructions] + turns if self._opts.instructions else turns
@@ -481,8 +457,7 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
481
457
  data=part.inline_data.data,
482
458
  sample_rate=24000,
483
459
  num_channels=1,
484
- samples_per_channel=len(part.inline_data.data)
485
- // 2,
460
+ samples_per_channel=len(part.inline_data.data) // 2,
486
461
  )
487
462
  if self._opts.enable_agent_audio_transcription:
488
463
  content.audio.append(frame)
@@ -525,12 +500,12 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
525
500
  logger.warning(
526
501
  "function call cancelled",
527
502
  extra={
528
- "function_call_ids": response.tool_call_cancellation.ids,
503
+ "function_call_ids": response.tool_call_cancellation.function_call_ids,
529
504
  },
530
505
  )
531
506
  self.emit(
532
507
  "function_calls_cancelled",
533
- response.tool_call_cancellation.ids,
508
+ response.tool_call_cancellation.function_call_ids,
534
509
  )
535
510
 
536
511
  async with self._client.aio.live.connect(
@@ -6,12 +6,12 @@ from dataclasses import dataclass
6
6
  from typing import Literal
7
7
 
8
8
  import websockets
9
- from livekit import rtc
10
- from livekit.agents import APIConnectionError, APIStatusError, utils
11
9
 
12
10
  from google import genai
13
11
  from google.genai import types
14
12
  from google.genai.errors import APIError, ClientError, ServerError
13
+ from livekit import rtc
14
+ from livekit.agents import APIConnectionError, APIStatusError, utils
15
15
 
16
16
  from ...log import logger
17
17
  from .api_proto import ClientEvents, LiveAPIModels
@@ -51,11 +51,9 @@ class TranscriberSession(utils.EventEmitter[EventTypes]):
51
51
  self._needed_sr = 16000
52
52
  self._closed = False
53
53
 
54
- system_instructions = types.Content(
55
- parts=[types.Part(text=SYSTEM_INSTRUCTIONS)]
56
- )
54
+ system_instructions = types.Content(parts=[types.Part(text=SYSTEM_INSTRUCTIONS)])
57
55
  self._config = types.LiveConnectConfig(
58
- response_modalities=[types.Modality.TEXT],
56
+ response_modalities=["TEXT"],
59
57
  system_instruction=system_instructions,
60
58
  generation_config=types.GenerationConfig(temperature=0.0),
61
59
  )
@@ -81,17 +79,13 @@ class TranscriberSession(utils.EventEmitter[EventTypes]):
81
79
  for f in self._resampler.push(frame):
82
80
  self._queue_msg(
83
81
  types.LiveClientRealtimeInput(
84
- media_chunks=[
85
- types.Blob(data=f.data.tobytes(), mime_type="audio/pcm")
86
- ]
82
+ media_chunks=[types.Blob(data=f.data.tobytes(), mime_type="audio/pcm")]
87
83
  )
88
84
  )
89
85
  else:
90
86
  self._queue_msg(
91
87
  types.LiveClientRealtimeInput(
92
- media_chunks=[
93
- types.Blob(data=frame.data.tobytes(), mime_type="audio/pcm")
94
- ]
88
+ media_chunks=[types.Blob(data=frame.data.tobytes(), mime_type="audio/pcm")]
95
89
  )
96
90
  )
97
91
 
@@ -157,17 +151,11 @@ class TranscriberSession(utils.EventEmitter[EventTypes]):
157
151
  logger.exception(f"Uncaught error in transcriber _recv_task: {e}")
158
152
  self._closed = True
159
153
 
160
- async with self._client.aio.live.connect(
161
- model=self._model, config=self._config
162
- ) as session:
154
+ async with self._client.aio.live.connect(model=self._model, config=self._config) as session:
163
155
  self._session = session
164
156
  tasks = [
165
- asyncio.create_task(
166
- _send_task(), name="gemini-realtime-transcriber-send"
167
- ),
168
- asyncio.create_task(
169
- _recv_task(), name="gemini-realtime-transcriber-recv"
170
- ),
157
+ asyncio.create_task(_send_task(), name="gemini-realtime-transcriber-send"),
158
+ asyncio.create_task(_recv_task(), name="gemini-realtime-transcriber-recv"),
171
159
  ]
172
160
 
173
161
  try:
@@ -187,9 +175,7 @@ class ModelTranscriber(utils.EventEmitter[EventTypes]):
187
175
  self._client = client
188
176
  self._model = model
189
177
  self._needed_sr = 16000
190
- self._system_instructions = types.Content(
191
- parts=[types.Part(text=SYSTEM_INSTRUCTIONS)]
192
- )
178
+ self._system_instructions = types.Content(parts=[types.Part(text=SYSTEM_INSTRUCTIONS)])
193
179
  self._config = types.GenerateContentConfig(
194
180
  temperature=0.0,
195
181
  system_instruction=self._system_instructions,
@@ -198,9 +184,7 @@ class ModelTranscriber(utils.EventEmitter[EventTypes]):
198
184
  self._resampler: rtc.AudioResampler | None = None
199
185
  self._buffer: rtc.AudioFrame | None = None
200
186
  self._audio_ch = utils.aio.Chan[rtc.AudioFrame]()
201
- self._main_atask = asyncio.create_task(
202
- self._main_task(), name="gemini-model-transcriber"
203
- )
187
+ self._main_atask = asyncio.create_task(self._main_task(), name="gemini-model-transcriber")
204
188
 
205
189
  async def aclose(self) -> None:
206
190
  if self._audio_ch.closed: