livekit-plugins-google 0.11.0__tar.gz → 1.0.0.dev4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit_plugins_google-1.0.0.dev4/.gitignore +168 -0
- {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/PKG-INFO +12 -22
- {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/beta/realtime/__init__.py +1 -5
- {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/beta/realtime/api_proto.py +2 -1
- {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/beta/realtime/realtime_api.py +21 -46
- {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/beta/realtime/transcriber.py +11 -27
- {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/llm.py +127 -197
- {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/stt.py +28 -58
- {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/tts.py +10 -16
- livekit_plugins_google-1.0.0.dev4/livekit/plugins/google/utils.py +213 -0
- {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/version.py +1 -1
- livekit_plugins_google-1.0.0.dev4/pyproject.toml +45 -0
- livekit_plugins_google-0.11.0/livekit/plugins/google/_utils.py +0 -199
- livekit_plugins_google-0.11.0/livekit_plugins_google.egg-info/PKG-INFO +0 -109
- livekit_plugins_google-0.11.0/livekit_plugins_google.egg-info/SOURCES.txt +0 -22
- livekit_plugins_google-0.11.0/livekit_plugins_google.egg-info/dependency_links.txt +0 -1
- livekit_plugins_google-0.11.0/livekit_plugins_google.egg-info/requires.txt +0 -5
- livekit_plugins_google-0.11.0/livekit_plugins_google.egg-info/top_level.txt +0 -1
- livekit_plugins_google-0.11.0/pyproject.toml +0 -3
- livekit_plugins_google-0.11.0/setup.cfg +0 -4
- livekit_plugins_google-0.11.0/setup.py +0 -63
- {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/README.md +0 -0
- {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/__init__.py +0 -0
- {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/beta/__init__.py +0 -0
- {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/log.py +0 -0
- {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/models.py +0 -0
- {livekit_plugins_google-0.11.0 → livekit_plugins_google-1.0.0.dev4}/livekit/plugins/google/py.typed +0 -0
@@ -0,0 +1,168 @@
|
|
1
|
+
**/.vscode
|
2
|
+
**/.DS_Store
|
3
|
+
|
4
|
+
# Byte-compiled / optimized / DLL files
|
5
|
+
__pycache__/
|
6
|
+
*.py[cod]
|
7
|
+
*$py.class
|
8
|
+
|
9
|
+
# C extensions
|
10
|
+
*.so
|
11
|
+
|
12
|
+
# Distribution / packaging
|
13
|
+
.Python
|
14
|
+
build/
|
15
|
+
develop-eggs/
|
16
|
+
dist/
|
17
|
+
downloads/
|
18
|
+
eggs/
|
19
|
+
.eggs/
|
20
|
+
lib/
|
21
|
+
lib64/
|
22
|
+
parts/
|
23
|
+
sdist/
|
24
|
+
var/
|
25
|
+
wheels/
|
26
|
+
share/python-wheels/
|
27
|
+
*.egg-info/
|
28
|
+
.installed.cfg
|
29
|
+
*.egg
|
30
|
+
MANIFEST
|
31
|
+
|
32
|
+
# PyInstaller
|
33
|
+
# Usually these files are written by a python script from a template
|
34
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
35
|
+
*.manifest
|
36
|
+
*.spec
|
37
|
+
|
38
|
+
# Installer logs
|
39
|
+
pip-log.txt
|
40
|
+
pip-delete-this-directory.txt
|
41
|
+
|
42
|
+
# Unit test / coverage reports
|
43
|
+
htmlcov/
|
44
|
+
.tox/
|
45
|
+
.nox/
|
46
|
+
.coverage
|
47
|
+
.coverage.*
|
48
|
+
.cache
|
49
|
+
nosetests.xml
|
50
|
+
coverage.xml
|
51
|
+
*.cover
|
52
|
+
*.py,cover
|
53
|
+
.hypothesis/
|
54
|
+
.pytest_cache/
|
55
|
+
cover/
|
56
|
+
|
57
|
+
# Translations
|
58
|
+
*.mo
|
59
|
+
*.pot
|
60
|
+
|
61
|
+
# Django stuff:
|
62
|
+
*.log
|
63
|
+
local_settings.py
|
64
|
+
db.sqlite3
|
65
|
+
db.sqlite3-journal
|
66
|
+
|
67
|
+
# Flask stuff:
|
68
|
+
instance/
|
69
|
+
.webassets-cache
|
70
|
+
|
71
|
+
# Scrapy stuff:
|
72
|
+
.scrapy
|
73
|
+
|
74
|
+
# Sphinx documentation
|
75
|
+
docs/_build/
|
76
|
+
|
77
|
+
# PyBuilder
|
78
|
+
.pybuilder/
|
79
|
+
target/
|
80
|
+
|
81
|
+
# Jupyter Notebook
|
82
|
+
.ipynb_checkpoints
|
83
|
+
|
84
|
+
# IPython
|
85
|
+
profile_default/
|
86
|
+
ipython_config.py
|
87
|
+
|
88
|
+
# pyenv
|
89
|
+
# For a library or package, you might want to ignore these files since the code is
|
90
|
+
# intended to run in multiple environments; otherwise, check them in:
|
91
|
+
# .python-version
|
92
|
+
|
93
|
+
# pipenv
|
94
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
95
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
96
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
97
|
+
# install all needed dependencies.
|
98
|
+
#Pipfile.lock
|
99
|
+
|
100
|
+
# poetry
|
101
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
102
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
103
|
+
# commonly ignored for libraries.
|
104
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
105
|
+
#poetry.lock
|
106
|
+
|
107
|
+
# pdm
|
108
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
109
|
+
#pdm.lock
|
110
|
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
111
|
+
# in version control.
|
112
|
+
# https://pdm.fming.dev/#use-with-ide
|
113
|
+
.pdm.toml
|
114
|
+
|
115
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
116
|
+
__pypackages__/
|
117
|
+
|
118
|
+
# Celery stuff
|
119
|
+
celerybeat-schedule
|
120
|
+
celerybeat.pid
|
121
|
+
|
122
|
+
# SageMath parsed files
|
123
|
+
*.sage.py
|
124
|
+
|
125
|
+
# Environments
|
126
|
+
.env
|
127
|
+
.venv
|
128
|
+
env/
|
129
|
+
venv/
|
130
|
+
ENV/
|
131
|
+
env.bak/
|
132
|
+
venv.bak/
|
133
|
+
|
134
|
+
# Spyder project settings
|
135
|
+
.spyderproject
|
136
|
+
.spyproject
|
137
|
+
|
138
|
+
# Rope project settings
|
139
|
+
.ropeproject
|
140
|
+
|
141
|
+
# mkdocs documentation
|
142
|
+
/site
|
143
|
+
|
144
|
+
# mypy
|
145
|
+
.mypy_cache/
|
146
|
+
.dmypy.json
|
147
|
+
dmypy.json
|
148
|
+
|
149
|
+
# Pyre type checker
|
150
|
+
.pyre/
|
151
|
+
|
152
|
+
# pytype static type analyzer
|
153
|
+
.pytype/
|
154
|
+
|
155
|
+
# Cython debug symbols
|
156
|
+
cython_debug/
|
157
|
+
|
158
|
+
# PyCharm
|
159
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
160
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
161
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
162
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
163
|
+
.idea/
|
164
|
+
|
165
|
+
node_modules
|
166
|
+
|
167
|
+
credentials.json
|
168
|
+
pyrightconfig.json
|
@@ -1,39 +1,29 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: livekit-plugins-google
|
3
|
-
Version: 0.
|
3
|
+
Version: 1.0.0.dev4
|
4
4
|
Summary: Agent Framework plugin for services from Google Cloud
|
5
|
-
Home-page: https://github.com/livekit/agents
|
6
|
-
License: Apache-2.0
|
7
5
|
Project-URL: Documentation, https://docs.livekit.io
|
8
6
|
Project-URL: Website, https://livekit.io/
|
9
7
|
Project-URL: Source, https://github.com/livekit/agents
|
10
|
-
|
8
|
+
Author: LiveKit
|
9
|
+
License-Expression: Apache-2.0
|
10
|
+
Keywords: audio,livekit,realtime,video,webrtc
|
11
11
|
Classifier: Intended Audience :: Developers
|
12
12
|
Classifier: License :: OSI Approved :: Apache Software License
|
13
|
-
Classifier: Topic :: Multimedia :: Sound/Audio
|
14
|
-
Classifier: Topic :: Multimedia :: Video
|
15
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
16
13
|
Classifier: Programming Language :: Python :: 3
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
17
15
|
Classifier: Programming Language :: Python :: 3.9
|
18
16
|
Classifier: Programming Language :: Python :: 3.10
|
19
|
-
Classifier:
|
17
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
18
|
+
Classifier: Topic :: Multimedia :: Video
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
20
20
|
Requires-Python: >=3.9.0
|
21
|
-
Description-Content-Type: text/markdown
|
22
21
|
Requires-Dist: google-auth<3,>=2
|
23
22
|
Requires-Dist: google-cloud-speech<3,>=2
|
24
23
|
Requires-Dist: google-cloud-texttospeech<3,>=2
|
25
|
-
Requires-Dist: google-genai==1.
|
26
|
-
Requires-Dist: livekit-agents
|
27
|
-
|
28
|
-
Dynamic: description
|
29
|
-
Dynamic: description-content-type
|
30
|
-
Dynamic: home-page
|
31
|
-
Dynamic: keywords
|
32
|
-
Dynamic: license
|
33
|
-
Dynamic: project-url
|
34
|
-
Dynamic: requires-dist
|
35
|
-
Dynamic: requires-python
|
36
|
-
Dynamic: summary
|
24
|
+
Requires-Dist: google-genai==1.5.0
|
25
|
+
Requires-Dist: livekit-agents>=1.0.0.dev4
|
26
|
+
Description-Content-Type: text/markdown
|
37
27
|
|
38
28
|
# LiveKit Plugins Google
|
39
29
|
|
@@ -3,21 +3,17 @@ from __future__ import annotations
|
|
3
3
|
import asyncio
|
4
4
|
import json
|
5
5
|
import os
|
6
|
+
from collections.abc import AsyncIterable
|
6
7
|
from dataclasses import dataclass
|
7
|
-
from typing import
|
8
|
-
|
9
|
-
from livekit import rtc
|
10
|
-
from livekit.agents import llm, utils
|
11
|
-
from livekit.agents.llm.function_context import _create_ai_function_info
|
12
|
-
from livekit.agents.utils import images
|
8
|
+
from typing import Literal
|
13
9
|
|
14
10
|
from google import genai
|
11
|
+
from google.genai._api_client import HttpOptions
|
15
12
|
from google.genai.types import (
|
16
13
|
Blob,
|
17
14
|
Content,
|
18
15
|
FunctionResponse,
|
19
16
|
GenerationConfig,
|
20
|
-
HttpOptions,
|
21
17
|
LiveClientContent,
|
22
18
|
LiveClientRealtimeInput,
|
23
19
|
LiveClientToolResponse,
|
@@ -29,15 +25,13 @@ from google.genai.types import (
|
|
29
25
|
Tool,
|
30
26
|
VoiceConfig,
|
31
27
|
)
|
28
|
+
from livekit import rtc
|
29
|
+
from livekit.agents import llm, utils
|
30
|
+
from livekit.agents.llm.function_context import _create_ai_function_info
|
31
|
+
from livekit.agents.utils import images
|
32
32
|
|
33
33
|
from ...log import logger
|
34
|
-
from .api_proto import
|
35
|
-
ClientEvents,
|
36
|
-
LiveAPIModels,
|
37
|
-
Voice,
|
38
|
-
_build_gemini_ctx,
|
39
|
-
_build_tools,
|
40
|
-
)
|
34
|
+
from .api_proto import ClientEvents, LiveAPIModels, Voice, _build_gemini_ctx, _build_tools
|
41
35
|
from .transcriber import ModelTranscriber, TranscriberSession, TranscriptionContent
|
42
36
|
|
43
37
|
EventTypes = Literal[
|
@@ -108,7 +102,7 @@ class RealtimeModel:
|
|
108
102
|
model: LiveAPIModels | str = "gemini-2.0-flash-exp",
|
109
103
|
api_key: str | None = None,
|
110
104
|
voice: Voice | str = "Puck",
|
111
|
-
modalities: list[Modality] =
|
105
|
+
modalities: list[Modality] = None,
|
112
106
|
enable_user_audio_transcription: bool = True,
|
113
107
|
enable_agent_audio_transcription: bool = True,
|
114
108
|
vertexai: bool = False,
|
@@ -155,6 +149,8 @@ class RealtimeModel:
|
|
155
149
|
Raises:
|
156
150
|
ValueError: If the API key is not provided and cannot be found in environment variables.
|
157
151
|
"""
|
152
|
+
if modalities is None:
|
153
|
+
modalities = ["AUDIO"]
|
158
154
|
super().__init__()
|
159
155
|
self._capabilities = Capabilities(
|
160
156
|
supports_truncate=False,
|
@@ -180,9 +176,7 @@ class RealtimeModel:
|
|
180
176
|
"API key is required for Google API either via api_key or GOOGLE_API_KEY environment variable"
|
181
177
|
)
|
182
178
|
|
183
|
-
instructions_content = (
|
184
|
-
Content(parts=[Part(text=instructions)]) if instructions else None
|
185
|
-
)
|
179
|
+
instructions_content = Content(parts=[Part(text=instructions)]) if instructions else None
|
186
180
|
|
187
181
|
self._rt_sessions: list[GeminiRealtimeSession] = []
|
188
182
|
self._opts = ModelOptions(
|
@@ -259,8 +253,6 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
|
|
259
253
|
self._fnc_ctx = fnc_ctx
|
260
254
|
self._fnc_tasks = utils.aio.TaskSet()
|
261
255
|
self._is_interrupted = False
|
262
|
-
self._playout_complete = asyncio.Event()
|
263
|
-
self._playout_complete.set()
|
264
256
|
|
265
257
|
tools = []
|
266
258
|
if self._fnc_ctx is not None:
|
@@ -281,9 +273,7 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
|
|
281
273
|
system_instruction=self._opts.instructions,
|
282
274
|
speech_config=SpeechConfig(
|
283
275
|
voice_config=VoiceConfig(
|
284
|
-
prebuilt_voice_config=PrebuiltVoiceConfig(
|
285
|
-
voice_name=self._opts.voice
|
286
|
-
)
|
276
|
+
prebuilt_voice_config=PrebuiltVoiceConfig(voice_name=self._opts.voice)
|
287
277
|
)
|
288
278
|
),
|
289
279
|
tools=tools,
|
@@ -295,18 +285,12 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
|
|
295
285
|
project=self._opts.project,
|
296
286
|
location=self._opts.location,
|
297
287
|
)
|
298
|
-
self._main_atask = asyncio.create_task(
|
299
|
-
self._main_task(), name="gemini-realtime-session"
|
300
|
-
)
|
288
|
+
self._main_atask = asyncio.create_task(self._main_task(), name="gemini-realtime-session")
|
301
289
|
if self._opts.enable_user_audio_transcription:
|
302
|
-
self._transcriber = TranscriberSession(
|
303
|
-
client=self._client, model=self._opts.model
|
304
|
-
)
|
290
|
+
self._transcriber = TranscriberSession(client=self._client, model=self._opts.model)
|
305
291
|
self._transcriber.on("input_speech_done", self._on_input_speech_done)
|
306
292
|
if self._opts.enable_agent_audio_transcription:
|
307
|
-
self._agent_transcriber = ModelTranscriber(
|
308
|
-
client=self._client, model=self._opts.model
|
309
|
-
)
|
293
|
+
self._agent_transcriber = ModelTranscriber(client=self._client, model=self._opts.model)
|
310
294
|
self._agent_transcriber.on("input_speech_done", self._on_agent_speech_done)
|
311
295
|
# init dummy task
|
312
296
|
self._init_sync_task = asyncio.create_task(asyncio.sleep(0))
|
@@ -320,10 +304,6 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
|
|
320
304
|
self._send_ch.close()
|
321
305
|
await self._main_atask
|
322
306
|
|
323
|
-
@property
|
324
|
-
def playout_complete(self) -> asyncio.Event | None:
|
325
|
-
return self._playout_complete
|
326
|
-
|
327
307
|
@property
|
328
308
|
def fnc_ctx(self) -> llm.FunctionContext | None:
|
329
309
|
return self._fnc_ctx
|
@@ -341,9 +321,7 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
|
|
341
321
|
DEFAULT_ENCODE_OPTIONS = images.EncodeOptions(
|
342
322
|
format="JPEG",
|
343
323
|
quality=75,
|
344
|
-
resize_options=images.ResizeOptions(
|
345
|
-
width=1024, height=1024, strategy="scale_aspect_fit"
|
346
|
-
),
|
324
|
+
resize_options=images.ResizeOptions(width=1024, height=1024, strategy="scale_aspect_fit"),
|
347
325
|
)
|
348
326
|
|
349
327
|
def push_video(
|
@@ -393,9 +371,7 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
|
|
393
371
|
|
394
372
|
def create_response(
|
395
373
|
self,
|
396
|
-
on_duplicate: Literal[
|
397
|
-
"cancel_existing", "cancel_new", "keep_both"
|
398
|
-
] = "keep_both",
|
374
|
+
on_duplicate: Literal["cancel_existing", "cancel_new", "keep_both"] = "keep_both",
|
399
375
|
) -> None:
|
400
376
|
turns, _ = _build_gemini_ctx(self._chat_ctx, id(self))
|
401
377
|
ctx = [self._opts.instructions] + turns if self._opts.instructions else turns
|
@@ -481,8 +457,7 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
|
|
481
457
|
data=part.inline_data.data,
|
482
458
|
sample_rate=24000,
|
483
459
|
num_channels=1,
|
484
|
-
samples_per_channel=len(part.inline_data.data)
|
485
|
-
// 2,
|
460
|
+
samples_per_channel=len(part.inline_data.data) // 2,
|
486
461
|
)
|
487
462
|
if self._opts.enable_agent_audio_transcription:
|
488
463
|
content.audio.append(frame)
|
@@ -525,12 +500,12 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
|
|
525
500
|
logger.warning(
|
526
501
|
"function call cancelled",
|
527
502
|
extra={
|
528
|
-
"function_call_ids": response.tool_call_cancellation.
|
503
|
+
"function_call_ids": response.tool_call_cancellation.function_call_ids,
|
529
504
|
},
|
530
505
|
)
|
531
506
|
self.emit(
|
532
507
|
"function_calls_cancelled",
|
533
|
-
response.tool_call_cancellation.
|
508
|
+
response.tool_call_cancellation.function_call_ids,
|
534
509
|
)
|
535
510
|
|
536
511
|
async with self._client.aio.live.connect(
|
@@ -6,12 +6,12 @@ from dataclasses import dataclass
|
|
6
6
|
from typing import Literal
|
7
7
|
|
8
8
|
import websockets
|
9
|
-
from livekit import rtc
|
10
|
-
from livekit.agents import APIConnectionError, APIStatusError, utils
|
11
9
|
|
12
10
|
from google import genai
|
13
11
|
from google.genai import types
|
14
12
|
from google.genai.errors import APIError, ClientError, ServerError
|
13
|
+
from livekit import rtc
|
14
|
+
from livekit.agents import APIConnectionError, APIStatusError, utils
|
15
15
|
|
16
16
|
from ...log import logger
|
17
17
|
from .api_proto import ClientEvents, LiveAPIModels
|
@@ -51,11 +51,9 @@ class TranscriberSession(utils.EventEmitter[EventTypes]):
|
|
51
51
|
self._needed_sr = 16000
|
52
52
|
self._closed = False
|
53
53
|
|
54
|
-
system_instructions = types.Content(
|
55
|
-
parts=[types.Part(text=SYSTEM_INSTRUCTIONS)]
|
56
|
-
)
|
54
|
+
system_instructions = types.Content(parts=[types.Part(text=SYSTEM_INSTRUCTIONS)])
|
57
55
|
self._config = types.LiveConnectConfig(
|
58
|
-
response_modalities=[
|
56
|
+
response_modalities=["TEXT"],
|
59
57
|
system_instruction=system_instructions,
|
60
58
|
generation_config=types.GenerationConfig(temperature=0.0),
|
61
59
|
)
|
@@ -81,17 +79,13 @@ class TranscriberSession(utils.EventEmitter[EventTypes]):
|
|
81
79
|
for f in self._resampler.push(frame):
|
82
80
|
self._queue_msg(
|
83
81
|
types.LiveClientRealtimeInput(
|
84
|
-
media_chunks=[
|
85
|
-
types.Blob(data=f.data.tobytes(), mime_type="audio/pcm")
|
86
|
-
]
|
82
|
+
media_chunks=[types.Blob(data=f.data.tobytes(), mime_type="audio/pcm")]
|
87
83
|
)
|
88
84
|
)
|
89
85
|
else:
|
90
86
|
self._queue_msg(
|
91
87
|
types.LiveClientRealtimeInput(
|
92
|
-
media_chunks=[
|
93
|
-
types.Blob(data=frame.data.tobytes(), mime_type="audio/pcm")
|
94
|
-
]
|
88
|
+
media_chunks=[types.Blob(data=frame.data.tobytes(), mime_type="audio/pcm")]
|
95
89
|
)
|
96
90
|
)
|
97
91
|
|
@@ -157,17 +151,11 @@ class TranscriberSession(utils.EventEmitter[EventTypes]):
|
|
157
151
|
logger.exception(f"Uncaught error in transcriber _recv_task: {e}")
|
158
152
|
self._closed = True
|
159
153
|
|
160
|
-
async with self._client.aio.live.connect(
|
161
|
-
model=self._model, config=self._config
|
162
|
-
) as session:
|
154
|
+
async with self._client.aio.live.connect(model=self._model, config=self._config) as session:
|
163
155
|
self._session = session
|
164
156
|
tasks = [
|
165
|
-
asyncio.create_task(
|
166
|
-
|
167
|
-
),
|
168
|
-
asyncio.create_task(
|
169
|
-
_recv_task(), name="gemini-realtime-transcriber-recv"
|
170
|
-
),
|
157
|
+
asyncio.create_task(_send_task(), name="gemini-realtime-transcriber-send"),
|
158
|
+
asyncio.create_task(_recv_task(), name="gemini-realtime-transcriber-recv"),
|
171
159
|
]
|
172
160
|
|
173
161
|
try:
|
@@ -187,9 +175,7 @@ class ModelTranscriber(utils.EventEmitter[EventTypes]):
|
|
187
175
|
self._client = client
|
188
176
|
self._model = model
|
189
177
|
self._needed_sr = 16000
|
190
|
-
self._system_instructions = types.Content(
|
191
|
-
parts=[types.Part(text=SYSTEM_INSTRUCTIONS)]
|
192
|
-
)
|
178
|
+
self._system_instructions = types.Content(parts=[types.Part(text=SYSTEM_INSTRUCTIONS)])
|
193
179
|
self._config = types.GenerateContentConfig(
|
194
180
|
temperature=0.0,
|
195
181
|
system_instruction=self._system_instructions,
|
@@ -198,9 +184,7 @@ class ModelTranscriber(utils.EventEmitter[EventTypes]):
|
|
198
184
|
self._resampler: rtc.AudioResampler | None = None
|
199
185
|
self._buffer: rtc.AudioFrame | None = None
|
200
186
|
self._audio_ch = utils.aio.Chan[rtc.AudioFrame]()
|
201
|
-
self._main_atask = asyncio.create_task(
|
202
|
-
self._main_task(), name="gemini-model-transcriber"
|
203
|
-
)
|
187
|
+
self._main_atask = asyncio.create_task(self._main_task(), name="gemini-model-transcriber")
|
204
188
|
|
205
189
|
async def aclose(self) -> None:
|
206
190
|
if self._audio_ch.closed:
|