google-genai 1.7.0__py3-none-any.whl → 1.53.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. google/genai/__init__.py +4 -2
  2. google/genai/_adapters.py +55 -0
  3. google/genai/_api_client.py +1301 -299
  4. google/genai/_api_module.py +1 -1
  5. google/genai/_automatic_function_calling_util.py +54 -33
  6. google/genai/_base_transformers.py +26 -0
  7. google/genai/_base_url.py +50 -0
  8. google/genai/_common.py +560 -59
  9. google/genai/_extra_utils.py +371 -38
  10. google/genai/_live_converters.py +1467 -0
  11. google/genai/_local_tokenizer_loader.py +214 -0
  12. google/genai/_mcp_utils.py +117 -0
  13. google/genai/_operations_converters.py +394 -0
  14. google/genai/_replay_api_client.py +204 -92
  15. google/genai/_test_api_client.py +1 -1
  16. google/genai/_tokens_converters.py +520 -0
  17. google/genai/_transformers.py +633 -233
  18. google/genai/batches.py +1733 -538
  19. google/genai/caches.py +678 -1012
  20. google/genai/chats.py +48 -38
  21. google/genai/client.py +142 -15
  22. google/genai/documents.py +532 -0
  23. google/genai/errors.py +141 -35
  24. google/genai/file_search_stores.py +1296 -0
  25. google/genai/files.py +312 -744
  26. google/genai/live.py +617 -367
  27. google/genai/live_music.py +197 -0
  28. google/genai/local_tokenizer.py +395 -0
  29. google/genai/models.py +3598 -3116
  30. google/genai/operations.py +201 -362
  31. google/genai/pagers.py +23 -7
  32. google/genai/py.typed +1 -0
  33. google/genai/tokens.py +362 -0
  34. google/genai/tunings.py +1274 -496
  35. google/genai/types.py +14535 -5454
  36. google/genai/version.py +2 -2
  37. {google_genai-1.7.0.dist-info → google_genai-1.53.0.dist-info}/METADATA +736 -234
  38. google_genai-1.53.0.dist-info/RECORD +41 -0
  39. {google_genai-1.7.0.dist-info → google_genai-1.53.0.dist-info}/WHEEL +1 -1
  40. google_genai-1.7.0.dist-info/RECORD +0 -27
  41. {google_genai-1.7.0.dist-info → google_genai-1.53.0.dist-info/licenses}/LICENSE +0 -0
  42. {google_genai-1.7.0.dist-info → google_genai-1.53.0.dist-info}/top_level.txt +0 -0
google/genai/live.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Google LLC
1
+ # Copyright 2025 Google LLC
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -13,14 +13,16 @@
13
13
  # limitations under the License.
14
14
  #
15
15
 
16
- """Live client. The live module is experimental."""
16
+ """[Preview] Live API client."""
17
17
 
18
18
  import asyncio
19
19
  import base64
20
20
  import contextlib
21
21
  import json
22
22
  import logging
23
- from typing import Any, AsyncIterator, Dict, Optional, Sequence, Union, get_args
23
+ import typing
24
+ from typing import Any, AsyncIterator, Optional, Sequence, Union, get_args
25
+ import warnings
24
26
 
25
27
  import google.auth
26
28
  import pydantic
@@ -28,34 +30,50 @@ from websockets import ConnectionClosed
28
30
 
29
31
  from . import _api_module
30
32
  from . import _common
33
+ from . import _live_converters as live_converters
34
+ from . import _mcp_utils
31
35
  from . import _transformers as t
32
- from . import client
33
36
  from . import errors
34
37
  from . import types
35
38
  from ._api_client import BaseApiClient
36
- from ._common import experimental_warning
37
39
  from ._common import get_value_by_path as getv
38
40
  from ._common import set_value_by_path as setv
39
- from .models import _Content_from_mldev
40
- from .models import _Content_from_vertex
41
+ from .live_music import AsyncLiveMusic
41
42
  from .models import _Content_to_mldev
42
- from .models import _Content_to_vertex
43
- from .models import _GenerateContentConfig_to_mldev
44
- from .models import _GenerateContentConfig_to_vertex
45
- from .models import _SafetySetting_to_mldev
46
- from .models import _SafetySetting_to_vertex
47
- from .models import _SpeechConfig_to_mldev
48
- from .models import _SpeechConfig_to_vertex
49
- from .models import _Tool_to_mldev
50
- from .models import _Tool_to_vertex
43
+
51
44
 
52
45
  try:
53
- from websockets.asyncio.client import ClientConnection # type: ignore
54
- from websockets.asyncio.client import connect # type: ignore
46
+ from websockets.asyncio.client import ClientConnection
47
+ from websockets.asyncio.client import connect as ws_connect
55
48
  except ModuleNotFoundError:
56
49
  # This try/except is for TAP, mypy complains about it which is why we have the type: ignore
57
50
  from websockets.client import ClientConnection # type: ignore
58
- from websockets.client import connect # type: ignore
51
+ from websockets.client import connect as ws_connect # type: ignore
52
+
53
+ try:
54
+ from google.auth.transport import requests
55
+ except ImportError:
56
+ requests = None # type: ignore[assignment]
57
+
58
+ if typing.TYPE_CHECKING:
59
+ from mcp import ClientSession as McpClientSession
60
+ from mcp.types import Tool as McpTool
61
+ from ._adapters import McpToGenAiToolAdapter
62
+ from ._mcp_utils import mcp_to_gemini_tool
63
+ else:
64
+ McpClientSession: typing.Type = Any
65
+ McpTool: typing.Type = Any
66
+ McpToGenAiToolAdapter: typing.Type = Any
67
+ try:
68
+ from mcp import ClientSession as McpClientSession
69
+ from mcp.types import Tool as McpTool
70
+ from ._adapters import McpToGenAiToolAdapter
71
+ from ._mcp_utils import mcp_to_gemini_tool
72
+ except ImportError:
73
+ McpClientSession = None
74
+ McpTool = None
75
+ McpToGenAiToolAdapter = None
76
+ mcp_to_gemini_tool = None
59
77
 
60
78
  logger = logging.getLogger('google_genai.live')
61
79
 
@@ -66,13 +84,17 @@ _FUNCTION_RESPONSE_REQUIRES_ID = (
66
84
 
67
85
 
68
86
  class AsyncSession:
69
- """AsyncSession. The live module is experimental."""
87
+ """[Preview] AsyncSession."""
70
88
 
71
89
  def __init__(
72
- self, api_client: client.BaseApiClient, websocket: ClientConnection
90
+ self,
91
+ api_client: BaseApiClient,
92
+ websocket: ClientConnection,
93
+ session_id: Optional[str] = None,
73
94
  ):
74
95
  self._api_client = api_client
75
96
  self._ws = websocket
97
+ self.session_id = session_id
76
98
 
77
99
  async def send(
78
100
  self,
@@ -89,8 +111,13 @@ class AsyncSession:
89
111
  ]
90
112
  ] = None,
91
113
  end_of_turn: Optional[bool] = False,
92
- ):
93
- """Send input to the model.
114
+ ) -> None:
115
+ """[Deprecated] Send input to the model.
116
+
117
+ > **Warning**: This method is deprecated and will be removed in a future
118
+ version (not before Q3 2025). Please use one of the more specific methods:
119
+ `send_client_content`, `send_realtime_input`, or `send_tool_response`
120
+ instead.
94
121
 
95
122
  The method will send the input request to the server.
96
123
 
@@ -109,9 +136,297 @@ class AsyncSession:
109
136
  async for message in session.receive():
110
137
  print(message)
111
138
  """
139
+ warnings.warn(
140
+ 'The `session.send` method is deprecated and will be removed in a '
141
+ 'future version (not before Q3 2025).\n'
142
+ 'Please use one of the more specific methods: `send_client_content`, '
143
+ '`send_realtime_input`, or `send_tool_response` instead.',
144
+ DeprecationWarning,
145
+ stacklevel=2,
146
+ )
112
147
  client_message = self._parse_client_message(input, end_of_turn)
113
148
  await self._ws.send(json.dumps(client_message))
114
149
 
150
+ async def send_client_content(
151
+ self,
152
+ *,
153
+ turns: Optional[
154
+ Union[
155
+ types.Content,
156
+ types.ContentDict,
157
+ list[Union[types.Content, types.ContentDict]],
158
+ ]
159
+ ] = None,
160
+ turn_complete: bool = True,
161
+ ) -> None:
162
+ """Send non-realtime, turn based content to the model.
163
+
164
+ There are two ways to send messages to the live API:
165
+ `send_client_content` and `send_realtime_input`.
166
+
167
+ `send_client_content` messages are added to the model context **in order**.
168
+ Having a conversation using `send_client_content` messages is roughly
169
+ equivalent to using the `Chat.send_message_stream` method, except that the
170
+ state of the `chat` history is stored on the API server.
171
+
172
+ Because of `send_client_content`'s order guarantee, the model cannot
173
+ respond as quickly to `send_client_content` messages as to
174
+ `send_realtime_input` messages. This makes the biggest difference when
175
+ sending objects that have significant preprocessing time (typically images).
176
+
177
+ The `send_client_content` message sends a list of `Content` objects,
178
+ which has more options than the `media:Blob` sent by `send_realtime_input`.
179
+
180
+ The main use-cases for `send_client_content` over `send_realtime_input` are:
181
+
182
+ - Prefilling a conversation context (including sending anything that can't
183
+ be represented as a realtime message), before starting a realtime
184
+ conversation.
185
+ - Conducting a non-realtime conversation, similar to `client.chat`, using
186
+ the live api.
187
+
188
+ Caution: Interleaving `send_client_content` and `send_realtime_input`
189
+ in the same conversation is not recommended and can lead to unexpected
190
+ results.
191
+
192
+ Args:
193
+ turns: A `Content` object or list of `Content` objects (or equivalent
194
+ dicts).
195
+ turn_complete: if true (the default) the model will reply immediately. If
196
+ false, the model will wait for you to send additional client_content,
197
+ and will not return until you send `turn_complete=True`.
198
+
199
+ Example:
200
+
201
+ .. code-block:: python
202
+
203
+ import google.genai
204
+ from google.genai import types
205
+ import os
206
+
207
+ if os.environ.get('GOOGLE_GENAI_USE_VERTEXAI'):
208
+ MODEL_NAME = 'gemini-2.0-flash-live-preview-04-09'
209
+ else:
210
+ MODEL_NAME = 'gemini-live-2.5-flash-preview';
211
+
212
+ client = genai.Client()
213
+ async with client.aio.live.connect(
214
+ model=MODEL_NAME,
215
+ config={"response_modalities": ["TEXT"]}
216
+ ) as session:
217
+ await session.send_client_content(
218
+ turns=types.Content(
219
+ role='user',
220
+ parts=[types.Part(text="Hello world!")]))
221
+ async for msg in session.receive():
222
+ if msg.text:
223
+ print(msg.text)
224
+ """
225
+ client_content = t.t_client_content(turns, turn_complete).model_dump(
226
+ mode='json', exclude_none=True
227
+ )
228
+
229
+ if self._api_client.vertexai:
230
+ client_content_dict = _common.convert_to_dict(
231
+ client_content, convert_keys=True
232
+ )
233
+ else:
234
+ client_content_dict = live_converters._LiveClientContent_to_mldev(
235
+ from_object=client_content
236
+ )
237
+
238
+ await self._ws.send(json.dumps({'client_content': client_content_dict}))
239
+
240
+ async def send_realtime_input(
241
+ self,
242
+ *,
243
+ media: Optional[types.BlobImageUnionDict] = None,
244
+ audio: Optional[types.BlobOrDict] = None,
245
+ audio_stream_end: Optional[bool] = None,
246
+ video: Optional[types.BlobImageUnionDict] = None,
247
+ text: Optional[str] = None,
248
+ activity_start: Optional[types.ActivityStartOrDict] = None,
249
+ activity_end: Optional[types.ActivityEndOrDict] = None,
250
+ ) -> None:
251
+ """Send realtime input to the model, only send one argument per call.
252
+
253
+ Use `send_realtime_input` for realtime audio chunks and video
254
+ frames(images).
255
+
256
+ With `send_realtime_input` the api will respond to audio automatically
257
+ based on voice activity detection (VAD).
258
+
259
+ `send_realtime_input` is optimized for responsivness at the expense of
260
+ deterministic ordering. Audio and video tokens are added to the
261
+ context when they become available.
262
+
263
+ Args:
264
+ media: A `Blob`-like object, the realtime media to send.
265
+
266
+ Example:
267
+
268
+ .. code-block:: python
269
+
270
+ from pathlib import Path
271
+
272
+ from google import genai
273
+ from google.genai import types
274
+
275
+ import PIL.Image
276
+
277
+ import os
278
+
279
+ if os.environ.get('GOOGLE_GENAI_USE_VERTEXAI'):
280
+ MODEL_NAME = 'gemini-2.0-flash-live-preview-04-09'
281
+ else:
282
+ MODEL_NAME = 'gemini-live-2.5-flash-preview';
283
+
284
+
285
+ client = genai.Client()
286
+
287
+ async with client.aio.live.connect(
288
+ model=MODEL_NAME,
289
+ config={"response_modalities": ["TEXT"]},
290
+ ) as session:
291
+ await session.send_realtime_input(
292
+ media=PIL.Image.open('image.jpg'))
293
+
294
+ audio_bytes = Path('audio.pcm').read_bytes()
295
+ await session.send_realtime_input(
296
+ media=types.Blob(data=audio_bytes, mime_type='audio/pcm;rate=16000'))
297
+
298
+ async for msg in session.receive():
299
+ if msg.text is not None:
300
+ print(f'{msg.text}')
301
+ """
302
+ kwargs: _common.StringDict = {}
303
+ if media is not None:
304
+ kwargs['media'] = media
305
+ if audio is not None:
306
+ kwargs['audio'] = audio
307
+ if audio_stream_end is not None:
308
+ kwargs['audio_stream_end'] = audio_stream_end
309
+ if video is not None:
310
+ kwargs['video'] = video
311
+ if text is not None:
312
+ kwargs['text'] = text
313
+ if activity_start is not None:
314
+ kwargs['activity_start'] = activity_start
315
+ if activity_end is not None:
316
+ kwargs['activity_end'] = activity_end
317
+
318
+ if len(kwargs) != 1:
319
+ raise ValueError(
320
+ f'Only one argument can be set, got {len(kwargs)}:'
321
+ f' {list(kwargs.keys())}'
322
+ )
323
+ realtime_input = types.LiveSendRealtimeInputParameters.model_validate(
324
+ kwargs
325
+ )
326
+
327
+ if self._api_client.vertexai:
328
+ realtime_input_dict = (
329
+ live_converters._LiveSendRealtimeInputParameters_to_vertex(
330
+ from_object=realtime_input
331
+ )
332
+ )
333
+ else:
334
+ realtime_input_dict = (
335
+ live_converters._LiveSendRealtimeInputParameters_to_mldev(
336
+ from_object=realtime_input
337
+ )
338
+ )
339
+ realtime_input_dict = _common.convert_to_dict(realtime_input_dict)
340
+ realtime_input_dict = _common.encode_unserializable_types(
341
+ realtime_input_dict
342
+ )
343
+ await self._ws.send(json.dumps({'realtime_input': realtime_input_dict}))
344
+
345
+ async def send_tool_response(
346
+ self,
347
+ *,
348
+ function_responses: Union[
349
+ types.FunctionResponseOrDict,
350
+ Sequence[types.FunctionResponseOrDict],
351
+ ],
352
+ ) -> None:
353
+ """Send a tool response to the session.
354
+
355
+ Use `send_tool_response` to reply to `LiveServerToolCall` messages
356
+ from the server.
357
+
358
+ To set the available tools, use the `config.tools` argument
359
+ when you connect to the session (`client.live.connect`).
360
+
361
+ Args:
362
+ function_responses: A `FunctionResponse`-like object or list of
363
+ `FunctionResponse`-like objects.
364
+
365
+ Example:
366
+
367
+ .. code-block:: python
368
+
369
+ from google import genai
370
+ from google.genai import types
371
+
372
+ import os
373
+
374
+ if os.environ.get('GOOGLE_GENAI_USE_VERTEXAI'):
375
+ MODEL_NAME = 'gemini-2.0-flash-live-preview-04-09'
376
+ else:
377
+ MODEL_NAME = 'gemini-live-2.5-flash-preview';
378
+
379
+ client = genai.Client()
380
+
381
+ tools = [{'function_declarations': [{'name': 'turn_on_the_lights'}]}]
382
+ config = {
383
+ "tools": tools,
384
+ "response_modalities": ['TEXT']
385
+ }
386
+
387
+ async with client.aio.live.connect(
388
+ model='models/gemini-live-2.5-flash-preview',
389
+ config=config
390
+ ) as session:
391
+ prompt = "Turn on the lights please"
392
+ await session.send_client_content(
393
+ turns={"parts": [{'text': prompt}]}
394
+ )
395
+
396
+ async for chunk in session.receive():
397
+ if chunk.server_content:
398
+ if chunk.text is not None:
399
+ print(chunk.text)
400
+ elif chunk.tool_call:
401
+ print(chunk.tool_call)
402
+ print('_'*80)
403
+ function_response=types.FunctionResponse(
404
+ name='turn_on_the_lights',
405
+ response={'result': 'ok'},
406
+ id=chunk.tool_call.function_calls[0].id,
407
+ )
408
+ print(function_response)
409
+ await session.send_tool_response(
410
+ function_responses=function_response
411
+ )
412
+
413
+ print('_'*80)
414
+ """
415
+ tool_response = t.t_tool_response(function_responses)
416
+ if self._api_client.vertexai:
417
+ tool_response_dict = _common.convert_to_dict(
418
+ tool_response, convert_keys=True
419
+ )
420
+ else:
421
+ tool_response_dict = _common.convert_to_dict(
422
+ tool_response, convert_keys=True
423
+ )
424
+ for response in tool_response_dict.get('functionResponses', []):
425
+ if response.get('id') is None:
426
+ raise ValueError(_FUNCTION_RESPONSE_REQUIRES_ID)
427
+
428
+ await self._ws.send(json.dumps({'tool_response': tool_response_dict}))
429
+
115
430
  async def receive(self) -> AsyncIterator[types.LiveServerMessage]:
116
431
  """Receive model responses from the server.
117
432
 
@@ -120,8 +435,6 @@ class AsyncSession:
120
435
  is function call, user must call `send` with the function response to
121
436
  continue the turn.
122
437
 
123
- The live module is experimental.
124
-
125
438
  Yields:
126
439
  The model responses from the server.
127
440
 
@@ -146,15 +459,18 @@ class AsyncSession:
146
459
  async def start_stream(
147
460
  self, *, stream: AsyncIterator[bytes], mime_type: str
148
461
  ) -> AsyncIterator[types.LiveServerMessage]:
149
- """start a live session from a data stream.
462
+ """[Deprecated] Start a live session from a data stream.
463
+
464
+ > **Warning**: This method is deprecated and will be removed in a future
465
+ version (not before Q2 2025). Please use one of the more specific methods:
466
+ `send_client_content`, `send_realtime_input`, or `send_tool_response`
467
+ instead.
150
468
 
151
469
  The interaction terminates when the input stream is complete.
152
470
  This method will start two async tasks. One task will be used to send the
153
471
  input stream to the model and the other task will be used to receive the
154
472
  responses from the model.
155
473
 
156
- The live module is experimental.
157
-
158
474
  Args:
159
475
  stream: An iterator that yields the model response.
160
476
  mime_type: The MIME type of the data in the stream.
@@ -177,6 +493,13 @@ class AsyncSession:
177
493
  mime_type = 'audio/pcm'):
178
494
  play_audio_chunk(audio.data)
179
495
  """
496
+ warnings.warn(
497
+ 'Setting `AsyncSession.start_stream` is deprecated, '
498
+ 'and will be removed in a future release (not before Q3 2025). '
499
+ 'Please use the `receive`, and `send_realtime_input`, methods instead.',
500
+ DeprecationWarning,
501
+ stacklevel=4,
502
+ )
180
503
  stop_event = asyncio.Event()
181
504
  # Start the send loop. When stream is complete stop_event is set.
182
505
  asyncio.create_task(self._send_loop(stream, mime_type, stop_event))
@@ -207,7 +530,10 @@ class AsyncSession:
207
530
 
208
531
  async def _receive(self) -> types.LiveServerMessage:
209
532
  parameter_model = types.LiveServerMessage()
210
- raw_response = await self._ws.recv(decode=False)
533
+ try:
534
+ raw_response = await self._ws.recv(decode=False)
535
+ except TypeError:
536
+ raw_response = await self._ws.recv() # type: ignore[assignment]
211
537
  if raw_response:
212
538
  try:
213
539
  response = json.loads(raw_response)
@@ -215,10 +541,11 @@ class AsyncSession:
215
541
  raise ValueError(f'Failed to parse response: {raw_response!r}')
216
542
  else:
217
543
  response = {}
544
+
218
545
  if self._api_client.vertexai:
219
- response_dict = self._LiveServerMessage_from_vertex(response)
546
+ response_dict = live_converters._LiveServerMessage_from_vertex(response)
220
547
  else:
221
- response_dict = self._LiveServerMessage_from_mldev(response)
548
+ response_dict = response
222
549
 
223
550
  return types.LiveServerMessage._from_response(
224
551
  response=response_dict, kwargs=parameter_model.model_dump()
@@ -229,10 +556,10 @@ class AsyncSession:
229
556
  data_stream: AsyncIterator[bytes],
230
557
  mime_type: str,
231
558
  stop_event: asyncio.Event,
232
- ):
559
+ ) -> None:
233
560
  async for data in data_stream:
234
561
  model_input = types.LiveClientRealtimeInput(
235
- media_chunks=[types.Blob(data=data, mime_type=mime_type)]
562
+ media_chunks=[types.Blob(data=data, mime_type=mime_type)]
236
563
  )
237
564
  await self.send(input=model_input)
238
565
  # Give a chance for the receive loop to process responses.
@@ -240,127 +567,6 @@ class AsyncSession:
240
567
  # Give a chance for the receiver to process the last response.
241
568
  stop_event.set()
242
569
 
243
- def _LiveServerContent_from_mldev(
244
- self,
245
- from_object: Union[dict, object],
246
- ) -> Dict[str, Any]:
247
- to_object: dict[str, Any] = {}
248
- if getv(from_object, ['modelTurn']) is not None:
249
- setv(
250
- to_object,
251
- ['model_turn'],
252
- _Content_from_mldev(
253
- self._api_client,
254
- getv(from_object, ['modelTurn']),
255
- ),
256
- )
257
- if getv(from_object, ['turnComplete']) is not None:
258
- setv(to_object, ['turn_complete'], getv(from_object, ['turnComplete']))
259
- if getv(from_object, ['interrupted']) is not None:
260
- setv(to_object, ['interrupted'], getv(from_object, ['interrupted']))
261
- return to_object
262
-
263
- def _LiveToolCall_from_mldev(
264
- self,
265
- from_object: Union[dict, object],
266
- ) -> Dict[str, Any]:
267
- to_object: dict[str, Any] = {}
268
- if getv(from_object, ['functionCalls']) is not None:
269
- setv(
270
- to_object,
271
- ['function_calls'],
272
- getv(from_object, ['functionCalls']),
273
- )
274
- return to_object
275
-
276
- def _LiveToolCall_from_vertex(
277
- self,
278
- from_object: Union[dict, object],
279
- ) -> Dict[str, Any]:
280
- to_object: dict[str, Any] = {}
281
- if getv(from_object, ['functionCalls']) is not None:
282
- setv(
283
- to_object,
284
- ['function_calls'],
285
- getv(from_object, ['functionCalls']),
286
- )
287
- return to_object
288
-
289
- def _LiveServerMessage_from_mldev(
290
- self,
291
- from_object: Union[dict, object],
292
- ) -> Dict[str, Any]:
293
- to_object: dict[str, Any] = {}
294
- if getv(from_object, ['serverContent']) is not None:
295
- setv(
296
- to_object,
297
- ['server_content'],
298
- self._LiveServerContent_from_mldev(
299
- getv(from_object, ['serverContent'])
300
- ),
301
- )
302
- if getv(from_object, ['toolCall']) is not None:
303
- setv(
304
- to_object,
305
- ['tool_call'],
306
- self._LiveToolCall_from_mldev(getv(from_object, ['toolCall'])),
307
- )
308
- if getv(from_object, ['toolCallCancellation']) is not None:
309
- setv(
310
- to_object,
311
- ['tool_call_cancellation'],
312
- getv(from_object, ['toolCallCancellation']),
313
- )
314
- return to_object
315
-
316
- def _LiveServerContent_from_vertex(
317
- self,
318
- from_object: Union[dict, object],
319
- ) -> Dict[str, Any]:
320
- to_object: dict[str, Any] = {}
321
- if getv(from_object, ['modelTurn']) is not None:
322
- setv(
323
- to_object,
324
- ['model_turn'],
325
- _Content_from_vertex(
326
- self._api_client,
327
- getv(from_object, ['modelTurn']),
328
- ),
329
- )
330
- if getv(from_object, ['turnComplete']) is not None:
331
- setv(to_object, ['turn_complete'], getv(from_object, ['turnComplete']))
332
- if getv(from_object, ['interrupted']) is not None:
333
- setv(to_object, ['interrupted'], getv(from_object, ['interrupted']))
334
- return to_object
335
-
336
- def _LiveServerMessage_from_vertex(
337
- self,
338
- from_object: Union[dict, object],
339
- ) -> Dict[str, Any]:
340
- to_object: dict[str, Any] = {}
341
- if getv(from_object, ['serverContent']) is not None:
342
- setv(
343
- to_object,
344
- ['server_content'],
345
- self._LiveServerContent_from_vertex(
346
- getv(from_object, ['serverContent'])
347
- ),
348
- )
349
-
350
- if getv(from_object, ['toolCall']) is not None:
351
- setv(
352
- to_object,
353
- ['tool_call'],
354
- self._LiveToolCall_from_vertex(getv(from_object, ['toolCall'])),
355
- )
356
- if getv(from_object, ['toolCallCancellation']) is not None:
357
- setv(
358
- to_object,
359
- ['tool_call_cancellation'],
360
- getv(from_object, ['toolCallCancellation']),
361
- )
362
- return to_object
363
-
364
570
  def _parse_client_message(
365
571
  self,
366
572
  input: Optional[
@@ -391,9 +597,8 @@ class AsyncSession:
391
597
  raise ValueError(
392
598
  f'Unsupported input type "{type(input)}" or input content "{input}"'
393
599
  )
394
- if (
395
- isinstance(blob_input, types.Blob)
396
- and isinstance(blob_input.data, bytes)
600
+ if isinstance(blob_input, types.Blob) and isinstance(
601
+ blob_input.data, bytes
397
602
  ):
398
603
  formatted_input = [
399
604
  blob_input.model_dump(mode='json', exclude_none=True)
@@ -447,20 +652,20 @@ class AsyncSession:
447
652
  elif isinstance(formatted_input, Sequence) and any(
448
653
  isinstance(c, str) for c in formatted_input
449
654
  ):
450
- to_object: dict[str, Any] = {}
655
+ to_object: _common.StringDict = {}
451
656
  content_input_parts: list[types.PartUnion] = []
452
657
  for item in formatted_input:
453
658
  if isinstance(item, get_args(types.PartUnion)):
454
659
  content_input_parts.append(item)
455
660
  if self._api_client.vertexai:
456
661
  contents = [
457
- _Content_to_vertex(self._api_client, item, to_object)
458
- for item in t.t_contents(self._api_client, content_input_parts)
662
+ _common.convert_to_dict(item, convert_keys=True)
663
+ for item in t.t_contents(content_input_parts)
459
664
  ]
460
665
  else:
461
666
  contents = [
462
- _Content_to_mldev(self._api_client, item, to_object)
463
- for item in t.t_contents(self._api_client, content_input_parts)
667
+ _Content_to_mldev(item, to_object)
668
+ for item in t.t_contents(content_input_parts)
464
669
  ]
465
670
 
466
671
  content_dict_list: list[types.ContentDict] = []
@@ -664,169 +869,22 @@ class AsyncSession:
664
869
 
665
870
  return client_message
666
871
 
667
- async def close(self):
872
+ async def close(self) -> None:
668
873
  # Close the websocket connection.
669
874
  await self._ws.close()
670
875
 
671
876
 
672
877
  class AsyncLive(_api_module.BaseModule):
673
- """AsyncLive. The live module is experimental."""
878
+ """[Preview] AsyncLive."""
674
879
 
675
- def _LiveSetup_to_mldev(
676
- self, model: str, config: Optional[types.LiveConnectConfig] = None
677
- ):
678
-
679
- to_object: dict[str, Any] = {}
680
- if getv(config, ['generation_config']) is not None:
681
- setv(
682
- to_object,
683
- ['generationConfig'],
684
- _GenerateContentConfig_to_mldev(
685
- self._api_client,
686
- getv(config, ['generation_config']),
687
- to_object,
688
- ),
689
- )
690
- if getv(config, ['response_modalities']) is not None:
691
- if getv(to_object, ['generationConfig']) is not None:
692
- to_object['generationConfig']['responseModalities'] = getv(
693
- config, ['response_modalities']
694
- )
695
- else:
696
- to_object['generationConfig'] = {
697
- 'responseModalities': getv(config, ['response_modalities'])
698
- }
699
- if getv(config, ['speech_config']) is not None:
700
- if getv(to_object, ['generationConfig']) is not None:
701
- to_object['generationConfig']['speechConfig'] = _SpeechConfig_to_mldev(
702
- self._api_client,
703
- t.t_speech_config(
704
- self._api_client, getv(config, ['speech_config'])
705
- ),
706
- to_object,
707
- )
708
- else:
709
- to_object['generationConfig'] = {
710
- 'speechConfig': _SpeechConfig_to_mldev(
711
- self._api_client,
712
- t.t_speech_config(
713
- self._api_client, getv(config, ['speech_config'])
714
- ),
715
- to_object,
716
- )
717
- }
718
-
719
- if getv(config, ['system_instruction']) is not None:
720
- setv(
721
- to_object,
722
- ['systemInstruction'],
723
- _Content_to_mldev(
724
- self._api_client,
725
- t.t_content(
726
- self._api_client, getv(config, ['system_instruction'])
727
- ),
728
- to_object,
729
- ),
730
- )
731
- if getv(config, ['tools']) is not None:
732
- setv(
733
- to_object,
734
- ['tools'],
735
- [
736
- _Tool_to_mldev(
737
- self._api_client, t.t_tool(self._api_client, item), to_object
738
- )
739
- for item in t.t_tools(self._api_client, getv(config, ['tools']))
740
- ],
741
- )
880
+ def __init__(self, api_client: BaseApiClient):
881
+ super().__init__(api_client)
882
+ self._music = AsyncLiveMusic(api_client)
742
883
 
743
- return_value = {'setup': {'model': model}}
744
- return_value['setup'].update(to_object)
745
- return return_value
884
+ @property
885
+ def music(self) -> AsyncLiveMusic:
886
+ return self._music
746
887
 
747
- def _LiveSetup_to_vertex(
748
- self, model: str, config: Optional[types.LiveConnectConfig] = None
749
- ):
750
-
751
- to_object: dict[str, Any] = {}
752
-
753
- if getv(config, ['generation_config']) is not None:
754
- setv(
755
- to_object,
756
- ['generationConfig'],
757
- _GenerateContentConfig_to_vertex(
758
- self._api_client,
759
- getv(config, ['generation_config']),
760
- to_object,
761
- ),
762
- )
763
- if getv(config, ['response_modalities']) is not None:
764
- if getv(to_object, ['generationConfig']) is not None:
765
- to_object['generationConfig']['responseModalities'] = getv(
766
- config, ['response_modalities']
767
- )
768
- else:
769
- to_object['generationConfig'] = {
770
- 'responseModalities': getv(config, ['response_modalities'])
771
- }
772
- else:
773
- # Set default to AUDIO to align with MLDev API.
774
- if getv(to_object, ['generationConfig']) is not None:
775
- to_object['generationConfig'].update({'responseModalities': ['AUDIO']})
776
- else:
777
- to_object.update(
778
- {'generationConfig': {'responseModalities': ['AUDIO']}}
779
- )
780
- if getv(config, ['speech_config']) is not None:
781
- if getv(to_object, ['generationConfig']) is not None:
782
- to_object['generationConfig']['speechConfig'] = _SpeechConfig_to_vertex(
783
- self._api_client,
784
- t.t_speech_config(
785
- self._api_client, getv(config, ['speech_config'])
786
- ),
787
- to_object,
788
- )
789
- else:
790
- to_object['generationConfig'] = {
791
- 'speechConfig': _SpeechConfig_to_vertex(
792
- self._api_client,
793
- t.t_speech_config(
794
- self._api_client, getv(config, ['speech_config'])
795
- ),
796
- to_object,
797
- )
798
- }
799
- if getv(config, ['system_instruction']) is not None:
800
- setv(
801
- to_object,
802
- ['systemInstruction'],
803
- _Content_to_vertex(
804
- self._api_client,
805
- t.t_content(
806
- self._api_client, getv(config, ['system_instruction'])
807
- ),
808
- to_object,
809
- ),
810
- )
811
- if getv(config, ['tools']) is not None:
812
- setv(
813
- to_object,
814
- ['tools'],
815
- [
816
- _Tool_to_vertex(
817
- self._api_client, t.t_tool(self._api_client, item), to_object
818
- )
819
- for item in t.t_tools(self._api_client, getv(config, ['tools']))
820
- ],
821
- )
822
-
823
- return_value = {'setup': {'model': model}}
824
- return_value['setup'].update(to_object)
825
- return return_value
826
-
827
- @experimental_warning(
828
- 'The live API is experimental and may change in future versions.',
829
- )
830
888
  @contextlib.asynccontextmanager
831
889
  async def connect(
832
890
  self,
@@ -834,9 +892,9 @@ class AsyncLive(_api_module.BaseModule):
834
892
  model: str,
835
893
  config: Optional[types.LiveConnectConfigOrDict] = None,
836
894
  ) -> AsyncIterator[AsyncSession]:
837
- """Connect to the live server.
895
+ """[Preview] Connect to the live server.
838
896
 
839
- The live module is experimental.
897
+ Note: the live API is currently in preview.
840
898
 
841
899
  Usage:
842
900
 
@@ -845,78 +903,270 @@ class AsyncLive(_api_module.BaseModule):
845
903
  client = genai.Client(api_key=API_KEY)
846
904
  config = {}
847
905
  async with client.aio.live.connect(model='...', config=config) as session:
848
- await session.send(input='Hello world!', end_of_turn=True)
906
+ await session.send_client_content(
907
+ turns=types.Content(
908
+ role='user',
909
+ parts=[types.Part(text='hello!')]
910
+ ),
911
+ turn_complete=True
912
+ )
849
913
  async for message in session.receive():
850
914
  print(message)
915
+
916
+ Args:
917
+ model: The model to use for the live session.
918
+ config: The configuration for the live session.
919
+ **kwargs: additional keyword arguments.
920
+
921
+ Yields:
922
+ An AsyncSession object.
851
923
  """
924
+ # TODO(b/404946570): Support per request http options.
925
+ if isinstance(config, dict):
926
+ config = types.LiveConnectConfig(**config)
927
+ if config and config.http_options:
928
+ raise ValueError(
929
+ 'google.genai.client.aio.live.connect() does not support'
930
+ ' http_options at request-level in LiveConnectConfig yet. Please use'
931
+ ' the client-level http_options configuration instead.'
932
+ )
933
+
852
934
  base_url = self._api_client._websocket_base_url()
853
- transformed_model = t.t_model(self._api_client, model)
854
- # Ensure the config is a LiveConnectConfig.
855
- if config is None:
856
- parameter_model = types.LiveConnectConfig()
857
- elif isinstance(config, dict):
858
- if config.get('system_instruction') is None:
859
- system_instruction = None
860
- else:
861
- system_instruction = t.t_content(
862
- self._api_client, config.get('system_instruction')
935
+ if isinstance(base_url, bytes):
936
+ base_url = base_url.decode('utf-8')
937
+ transformed_model = t.t_model(self._api_client, model) # type: ignore
938
+
939
+ parameter_model = await _t_live_connect_config(self._api_client, config)
940
+
941
+ if self._api_client.api_key and not self._api_client.vertexai:
942
+ version = self._api_client._http_options.api_version
943
+ api_key = self._api_client.api_key
944
+ method = 'BidiGenerateContent'
945
+ original_headers = self._api_client._http_options.headers
946
+ headers = original_headers.copy() if original_headers is not None else {}
947
+ if api_key.startswith('auth_tokens/'):
948
+ warnings.warn(
949
+ message=(
950
+ "The SDK's ephemeral token support is experimental, and may"
951
+ ' change in future versions.'
952
+ ),
953
+ category=errors.ExperimentalWarning,
863
954
  )
864
- parameter_model = types.LiveConnectConfig(
865
- generation_config=config.get('generation_config'),
866
- response_modalities=config.get('response_modalities'),
867
- speech_config=config.get('speech_config'),
868
- system_instruction=system_instruction,
869
- tools=config.get('tools'),
955
+ method = 'BidiGenerateContentConstrained'
956
+ headers['Authorization'] = f'Token {api_key}'
957
+ if version != 'v1alpha':
958
+ warnings.warn(
959
+ message=(
960
+ "The SDK's ephemeral token support is in v1alpha only."
961
+ 'Please use client = genai.Client(api_key=token.name, '
962
+ 'http_options=types.HttpOptions(api_version="v1alpha"))'
963
+ ' before session connection.'
964
+ ),
965
+ category=errors.ExperimentalWarning,
966
+ )
967
+ uri = f'{base_url}/ws/google.ai.generativelanguage.{version}.GenerativeService.{method}'
968
+
969
+ request_dict = _common.convert_to_dict(
970
+ live_converters._LiveConnectParameters_to_mldev(
971
+ api_client=self._api_client,
972
+ from_object=types.LiveConnectParameters(
973
+ model=transformed_model,
974
+ config=parameter_model,
975
+ ).model_dump(exclude_none=True),
976
+ )
870
977
  )
871
- else:
872
- parameter_model = config
978
+ del request_dict['config']
979
+
980
+ setv(request_dict, ['setup', 'model'], transformed_model)
873
981
 
874
- if self._api_client.api_key:
982
+ request = json.dumps(request_dict)
983
+ elif self._api_client.api_key and self._api_client.vertexai:
984
+ # Headers already contains api key for express mode.
875
985
  api_key = self._api_client.api_key
876
- version = self._api_client._http_options['api_version']
877
- uri = f'{base_url}/ws/google.ai.generativelanguage.{version}.GenerativeService.BidiGenerateContent?key={api_key}'
878
- headers = self._api_client._http_options['headers']
986
+ version = self._api_client._http_options.api_version
987
+ uri = f'{base_url}/ws/google.cloud.aiplatform.{version}.LlmBidiService/BidiGenerateContent'
988
+ original_headers = self._api_client._http_options.headers
989
+ headers = original_headers.copy() if original_headers is not None else {}
990
+
879
991
  request_dict = _common.convert_to_dict(
880
- self._LiveSetup_to_mldev(
881
- model=transformed_model,
882
- config=parameter_model,
992
+ live_converters._LiveConnectParameters_to_vertex(
993
+ api_client=self._api_client,
994
+ from_object=types.LiveConnectParameters(
995
+ model=transformed_model,
996
+ config=parameter_model,
997
+ ).model_dump(exclude_none=True),
883
998
  )
884
999
  )
1000
+ del request_dict['config']
1001
+
1002
+ setv(request_dict, ['setup', 'model'], transformed_model)
1003
+
885
1004
  request = json.dumps(request_dict)
886
1005
  else:
887
- # Get bearer token through Application Default Credentials.
888
- creds, _ = google.auth.default(
889
- scopes=['https://www.googleapis.com/auth/cloud-platform']
1006
+ version = self._api_client._http_options.api_version
1007
+ has_sufficient_auth = (
1008
+ self._api_client.project and self._api_client.location
890
1009
  )
1010
+ if self._api_client.custom_base_url and not has_sufficient_auth:
1011
+ # API gateway proxy can use the auth in custom headers, not url.
1012
+ # Enable custom url if auth is not sufficient.
1013
+ uri = self._api_client.custom_base_url
1014
+ # Keep the model as is.
1015
+ transformed_model = model
1016
+ # Do not get credentials for custom url.
1017
+ original_headers = self._api_client._http_options.headers
1018
+ headers = (
1019
+ original_headers.copy() if original_headers is not None else {}
1020
+ )
1021
+
1022
+ else:
1023
+ uri = f'{base_url}/ws/google.cloud.aiplatform.{version}.LlmBidiService/BidiGenerateContent'
1024
+
1025
+ if not self._api_client._credentials:
1026
+ # Get bearer token through Application Default Credentials.
1027
+ creds, _ = google.auth.default( # type: ignore
1028
+ scopes=['https://www.googleapis.com/auth/cloud-platform']
1029
+ )
1030
+ else:
1031
+ creds = self._api_client._credentials
1032
+ # creds.valid is False, and creds.token is None
1033
+ # Need to refresh credentials to populate those
1034
+ if not (creds.token and creds.valid):
1035
+ if requests is None:
1036
+ raise ValueError('The requests module is required to refresh google-auth credentials. Please install with `pip install google-auth[requests]`')
1037
+ auth_req = requests.Request() # type: ignore
1038
+ creds.refresh(auth_req)
1039
+ bearer_token = creds.token
1040
+
1041
+ original_headers = self._api_client._http_options.headers
1042
+ headers = (
1043
+ original_headers.copy() if original_headers is not None else {}
1044
+ )
1045
+ if not headers.get('Authorization'):
1046
+ headers['Authorization'] = f'Bearer {bearer_token}'
891
1047
 
892
- # creds.valid is False, and creds.token is None
893
- # Need to refresh credentials to populate those
894
- auth_req = google.auth.transport.requests.Request()
895
- creds.refresh(auth_req)
896
- bearer_token = creds.token
897
- headers = self._api_client._http_options['headers']
898
- if headers is not None:
899
- headers.update({
900
- 'Authorization': 'Bearer {}'.format(bearer_token),
901
- })
902
- version = self._api_client._http_options['api_version']
903
- uri = f'{base_url}/ws/google.cloud.aiplatform.{version}.LlmBidiService/BidiGenerateContent'
904
1048
  location = self._api_client.location
905
1049
  project = self._api_client.project
906
- if transformed_model.startswith('publishers/'):
1050
+ if transformed_model.startswith('publishers/') and project and location:
907
1051
  transformed_model = (
908
1052
  f'projects/{project}/locations/{location}/' + transformed_model
909
1053
  )
910
1054
  request_dict = _common.convert_to_dict(
911
- self._LiveSetup_to_vertex(
912
- model=transformed_model,
913
- config=parameter_model,
1055
+ live_converters._LiveConnectParameters_to_vertex(
1056
+ api_client=self._api_client,
1057
+ from_object=types.LiveConnectParameters(
1058
+ model=transformed_model,
1059
+ config=parameter_model,
1060
+ ).model_dump(exclude_none=True),
914
1061
  )
915
1062
  )
1063
+ del request_dict['config']
1064
+
1065
+ if (
1066
+ getv(
1067
+ request_dict, ['setup', 'generationConfig', 'responseModalities']
1068
+ )
1069
+ is None
1070
+ ):
1071
+ setv(
1072
+ request_dict,
1073
+ ['setup', 'generationConfig', 'responseModalities'],
1074
+ ['AUDIO'],
1075
+ )
1076
+
916
1077
  request = json.dumps(request_dict)
917
1078
 
918
- async with connect(uri, additional_headers=headers) as ws:
1079
+ if parameter_model.tools and _mcp_utils.has_mcp_tool_usage(
1080
+ parameter_model.tools
1081
+ ):
1082
+ if headers is None:
1083
+ headers = {}
1084
+ _mcp_utils.set_mcp_usage_header(headers)
1085
+
1086
+ async with ws_connect(
1087
+ uri, additional_headers=headers, **self._api_client._websocket_ssl_ctx
1088
+ ) as ws:
919
1089
  await ws.send(request)
920
- logger.info(await ws.recv(decode=False))
1090
+ try:
1091
+ # websockets 14.0+
1092
+ raw_response = await ws.recv(decode=False)
1093
+ except TypeError:
1094
+ raw_response = await ws.recv() # type: ignore[assignment]
1095
+ if raw_response:
1096
+ try:
1097
+ response = json.loads(raw_response)
1098
+ except json.decoder.JSONDecodeError:
1099
+ raise ValueError(f'Failed to parse response: {raw_response!r}')
1100
+ else:
1101
+ response = {}
1102
+
1103
+ if self._api_client.vertexai:
1104
+ response_dict = live_converters._LiveServerMessage_from_vertex(response)
1105
+ else:
1106
+ response_dict = response
1107
+
1108
+ setup_response = types.LiveServerMessage._from_response(
1109
+ response=response_dict, kwargs=parameter_model.model_dump()
1110
+ )
1111
+ if setup_response.setup_complete:
1112
+ session_id = setup_response.setup_complete.session_id
1113
+ else:
1114
+ session_id = None
1115
+ yield AsyncSession(
1116
+ api_client=self._api_client,
1117
+ websocket=ws,
1118
+ session_id=session_id,
1119
+ )
1120
+
1121
+
1122
+ async def _t_live_connect_config(
1123
+ api_client: BaseApiClient,
1124
+ config: Optional[types.LiveConnectConfigOrDict],
1125
+ ) -> types.LiveConnectConfig:
1126
+ # Ensure the config is a LiveConnectConfig.
1127
+ if config is None:
1128
+ parameter_model = types.LiveConnectConfig()
1129
+ elif isinstance(config, dict):
1130
+ if getv(config, ['system_instruction']) is not None:
1131
+ converted_system_instruction = t.t_content(
1132
+ getv(config, ['system_instruction'])
1133
+ )
1134
+ else:
1135
+ converted_system_instruction = None
1136
+ parameter_model = types.LiveConnectConfig(**config)
1137
+ parameter_model.system_instruction = converted_system_instruction
1138
+ else:
1139
+ if config.system_instruction is None:
1140
+ system_instruction = None
1141
+ else:
1142
+ system_instruction = t.t_content(getv(config, ['system_instruction']))
1143
+ parameter_model = config
1144
+ parameter_model.system_instruction = system_instruction
1145
+
1146
+ # Create a copy of the config model with the tools field cleared as they will
1147
+ # be replaced with the MCP tools converted to GenAI tools.
1148
+ parameter_model_copy = parameter_model.model_copy(update={'tools': None})
1149
+ if parameter_model.tools:
1150
+ parameter_model_copy.tools = []
1151
+ for tool in parameter_model.tools:
1152
+ if McpClientSession is not None and isinstance(tool, McpClientSession):
1153
+ mcp_to_genai_tool_adapter = McpToGenAiToolAdapter(
1154
+ tool, await tool.list_tools()
1155
+ )
1156
+ # Extend the config with the MCP session tools converted to GenAI tools.
1157
+ parameter_model_copy.tools.extend(mcp_to_genai_tool_adapter.tools)
1158
+ elif McpTool is not None and isinstance(tool, McpTool):
1159
+ parameter_model_copy.tools.append(mcp_to_gemini_tool(tool))
1160
+ else:
1161
+ parameter_model_copy.tools.append(tool)
1162
+
1163
+ if parameter_model_copy.generation_config is not None:
1164
+ warnings.warn(
1165
+ 'Setting `LiveConnectConfig.generation_config` is deprecated, '
1166
+ 'please set the fields on `LiveConnectConfig` directly. This will '
1167
+ 'become an error in a future version (not before Q3 2025)',
1168
+ DeprecationWarning,
1169
+ stacklevel=4,
1170
+ )
921
1171
 
922
- yield AsyncSession(api_client=self._api_client, websocket=ws)
1172
+ return parameter_model_copy