google-genai 1.30.0__py3-none-any.whl → 1.31.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- google/genai/_api_client.py +32 -32
- google/genai/_automatic_function_calling_util.py +12 -0
- google/genai/_live_converters.py +1 -0
- google/genai/_tokens_converters.py +1 -0
- google/genai/batches.py +141 -0
- google/genai/caches.py +1 -0
- google/genai/files.py +1 -0
- google/genai/models.py +374 -0
- google/genai/operations.py +1 -0
- google/genai/tunings.py +1 -0
- google/genai/types.py +469 -180
- google/genai/version.py +1 -1
- {google_genai-1.30.0.dist-info → google_genai-1.31.0.dist-info}/METADATA +1 -1
- {google_genai-1.30.0.dist-info → google_genai-1.31.0.dist-info}/RECORD +17 -17
- {google_genai-1.30.0.dist-info → google_genai-1.31.0.dist-info}/WHEEL +0 -0
- {google_genai-1.30.0.dist-info → google_genai-1.31.0.dist-info}/licenses/LICENSE +0 -0
- {google_genai-1.30.0.dist-info → google_genai-1.31.0.dist-info}/top_level.txt +0 -0
google/genai/types.py
CHANGED
@@ -429,6 +429,22 @@ class AdapterSize(_common.CaseInSensitiveEnum):
|
|
429
429
|
"""Adapter size 32."""
|
430
430
|
|
431
431
|
|
432
|
+
class JSONSchemaType(Enum):
|
433
|
+
"""The type of the data supported by JSON Schema.
|
434
|
+
|
435
|
+
The values of the enums are lower case strings, while the values of the enums
|
436
|
+
for the Type class are upper case strings.
|
437
|
+
"""
|
438
|
+
|
439
|
+
NULL = 'null'
|
440
|
+
BOOLEAN = 'boolean'
|
441
|
+
OBJECT = 'object'
|
442
|
+
ARRAY = 'array'
|
443
|
+
NUMBER = 'number'
|
444
|
+
INTEGER = 'integer'
|
445
|
+
STRING = 'string'
|
446
|
+
|
447
|
+
|
432
448
|
class FeatureSelectionPreference(_common.CaseInSensitiveEnum):
|
433
449
|
"""Options for feature selection preference."""
|
434
450
|
|
@@ -564,6 +580,16 @@ class EditMode(_common.CaseInSensitiveEnum):
|
|
564
580
|
EDIT_MODE_PRODUCT_IMAGE = 'EDIT_MODE_PRODUCT_IMAGE'
|
565
581
|
|
566
582
|
|
583
|
+
class SegmentMode(_common.CaseInSensitiveEnum):
|
584
|
+
"""Enum that represents the segmentation mode."""
|
585
|
+
|
586
|
+
FOREGROUND = 'FOREGROUND'
|
587
|
+
BACKGROUND = 'BACKGROUND'
|
588
|
+
PROMPT = 'PROMPT'
|
589
|
+
SEMANTIC = 'SEMANTIC'
|
590
|
+
INTERACTIVE = 'INTERACTIVE'
|
591
|
+
|
592
|
+
|
567
593
|
class VideoCompressionQuality(_common.CaseInSensitiveEnum):
|
568
594
|
"""Enum that controls the compression quality of the generated videos."""
|
569
595
|
|
@@ -609,6 +635,19 @@ class MediaModality(_common.CaseInSensitiveEnum):
|
|
609
635
|
"""Document, e.g. PDF."""
|
610
636
|
|
611
637
|
|
638
|
+
class FunctionResponseScheduling(_common.CaseInSensitiveEnum):
|
639
|
+
"""Specifies how the response should be scheduled in the conversation."""
|
640
|
+
|
641
|
+
SCHEDULING_UNSPECIFIED = 'SCHEDULING_UNSPECIFIED'
|
642
|
+
"""This value is unused."""
|
643
|
+
SILENT = 'SILENT'
|
644
|
+
"""Only add the result to the conversation context, do not interrupt or trigger generation."""
|
645
|
+
WHEN_IDLE = 'WHEN_IDLE'
|
646
|
+
"""Add the result to the conversation context, and prompt to generate output without interrupting ongoing generation."""
|
647
|
+
INTERRUPT = 'INTERRUPT'
|
648
|
+
"""Add the result to the conversation context, interrupt ongoing generation and prompt to generate output."""
|
649
|
+
|
650
|
+
|
612
651
|
class StartSensitivity(_common.CaseInSensitiveEnum):
|
613
652
|
"""Start of speech sensitivity."""
|
614
653
|
|
@@ -653,19 +692,6 @@ class TurnCoverage(_common.CaseInSensitiveEnum):
|
|
653
692
|
"""The users turn includes all realtime input since the last turn, including inactivity (e.g. silence on the audio stream)."""
|
654
693
|
|
655
694
|
|
656
|
-
class FunctionResponseScheduling(_common.CaseInSensitiveEnum):
|
657
|
-
"""Specifies how the response should be scheduled in the conversation."""
|
658
|
-
|
659
|
-
SCHEDULING_UNSPECIFIED = 'SCHEDULING_UNSPECIFIED'
|
660
|
-
"""This value is unused."""
|
661
|
-
SILENT = 'SILENT'
|
662
|
-
"""Only add the result to the conversation context, do not interrupt or trigger generation."""
|
663
|
-
WHEN_IDLE = 'WHEN_IDLE'
|
664
|
-
"""Add the result to the conversation context, and prompt to generate output without interrupting ongoing generation."""
|
665
|
-
INTERRUPT = 'INTERRUPT'
|
666
|
-
"""Add the result to the conversation context, interrupt ongoing generation and prompt to generate output."""
|
667
|
-
|
668
|
-
|
669
695
|
class Scale(_common.CaseInSensitiveEnum):
|
670
696
|
"""Scale of the generated music."""
|
671
697
|
|
@@ -1152,67 +1178,6 @@ class Content(_common.BaseModel):
|
|
1152
1178
|
)
|
1153
1179
|
|
1154
1180
|
|
1155
|
-
class UserContent(Content):
|
1156
|
-
"""UserContent facilitates the creation of a Content object with a user role.
|
1157
|
-
|
1158
|
-
Example usages:
|
1159
|
-
|
1160
|
-
|
1161
|
-
- Create a user Content object with a string:
|
1162
|
-
user_content = UserContent("Why is the sky blue?")
|
1163
|
-
- Create a user Content object with a file data Part object:
|
1164
|
-
user_content = UserContent(Part.from_uri(file_uril="gs://bucket/file.txt",
|
1165
|
-
mime_type="text/plain"))
|
1166
|
-
- Create a user Content object with byte data Part object:
|
1167
|
-
user_content = UserContent(Part.from_bytes(data=b"Hello, World!",
|
1168
|
-
mime_type="text/plain"))
|
1169
|
-
|
1170
|
-
You can create a user Content object using other classmethods in the Part
|
1171
|
-
class as well.
|
1172
|
-
You can also create a user Content using a list of Part objects or strings.
|
1173
|
-
"""
|
1174
|
-
|
1175
|
-
role: Literal['user'] = Field(default='user', init=False, frozen=True)
|
1176
|
-
parts: list[Part] = Field()
|
1177
|
-
|
1178
|
-
def __init__(
|
1179
|
-
self, parts: Union['PartUnionDict', list['PartUnionDict'], list['Part']]
|
1180
|
-
):
|
1181
|
-
from . import _transformers as t
|
1182
|
-
|
1183
|
-
super().__init__(parts=t.t_parts(parts=parts))
|
1184
|
-
|
1185
|
-
|
1186
|
-
class ModelContent(Content):
|
1187
|
-
"""ModelContent facilitates the creation of a Content object with a model role.
|
1188
|
-
|
1189
|
-
Example usages:
|
1190
|
-
|
1191
|
-
- Create a model Content object with a string:
|
1192
|
-
model_content = ModelContent("Why is the sky blue?")
|
1193
|
-
- Create a model Content object with a file data Part object:
|
1194
|
-
model_content = ModelContent(Part.from_uri(file_uril="gs://bucket/file.txt",
|
1195
|
-
mime_type="text/plain"))
|
1196
|
-
- Create a model Content object with byte data Part object:
|
1197
|
-
model_content = ModelContent(Part.from_bytes(data=b"Hello, World!",
|
1198
|
-
mime_type="text/plain"))
|
1199
|
-
|
1200
|
-
You can create a model Content object using other classmethods in the Part
|
1201
|
-
class as well.
|
1202
|
-
You can also create a model Content using a list of Part objects or strings.
|
1203
|
-
"""
|
1204
|
-
|
1205
|
-
role: Literal['model'] = Field(default='model', init=False, frozen=True)
|
1206
|
-
parts: list[Part] = Field()
|
1207
|
-
|
1208
|
-
def __init__(
|
1209
|
-
self, parts: Union['PartUnionDict', list['PartUnionDict'], list['Part']]
|
1210
|
-
):
|
1211
|
-
from . import _transformers as t
|
1212
|
-
|
1213
|
-
super().__init__(parts=t.t_parts(parts=parts))
|
1214
|
-
|
1215
|
-
|
1216
1181
|
class ContentDict(TypedDict, total=False):
|
1217
1182
|
"""Contains the multi-part content of a message."""
|
1218
1183
|
|
@@ -1355,23 +1320,7 @@ class HttpOptionsDict(TypedDict, total=False):
|
|
1355
1320
|
HttpOptionsOrDict = Union[HttpOptions, HttpOptionsDict]
|
1356
1321
|
|
1357
1322
|
|
1358
|
-
class
|
1359
|
-
"""The type of the data supported by JSON Schema.
|
1360
|
-
|
1361
|
-
The values of the enums are lower case strings, while the values of the enums
|
1362
|
-
for the Type class are upper case strings.
|
1363
|
-
"""
|
1364
|
-
|
1365
|
-
NULL = 'null'
|
1366
|
-
BOOLEAN = 'boolean'
|
1367
|
-
OBJECT = 'object'
|
1368
|
-
ARRAY = 'array'
|
1369
|
-
NUMBER = 'number'
|
1370
|
-
INTEGER = 'integer'
|
1371
|
-
STRING = 'string'
|
1372
|
-
|
1373
|
-
|
1374
|
-
class JSONSchema(pydantic.BaseModel):
|
1323
|
+
class JSONSchema(_common.BaseModel):
|
1375
1324
|
"""A subset of JSON Schema according to 2020-12 JSON Schema draft.
|
1376
1325
|
|
1377
1326
|
Represents a subset of a JSON Schema object that is used by the Gemini model.
|
@@ -7241,6 +7190,236 @@ RecontextImageResponseOrDict = Union[
|
|
7241
7190
|
]
|
7242
7191
|
|
7243
7192
|
|
7193
|
+
class ScribbleImage(_common.BaseModel):
|
7194
|
+
"""An image mask representing a brush scribble."""
|
7195
|
+
|
7196
|
+
image: Optional[Image] = Field(
|
7197
|
+
default=None,
|
7198
|
+
description="""The brush scribble to guide segmentation. Valid for the interactive mode.""",
|
7199
|
+
)
|
7200
|
+
|
7201
|
+
|
7202
|
+
class ScribbleImageDict(TypedDict, total=False):
|
7203
|
+
"""An image mask representing a brush scribble."""
|
7204
|
+
|
7205
|
+
image: Optional[ImageDict]
|
7206
|
+
"""The brush scribble to guide segmentation. Valid for the interactive mode."""
|
7207
|
+
|
7208
|
+
|
7209
|
+
ScribbleImageOrDict = Union[ScribbleImage, ScribbleImageDict]
|
7210
|
+
|
7211
|
+
|
7212
|
+
class SegmentImageSource(_common.BaseModel):
|
7213
|
+
"""A set of source input(s) for image segmentation."""
|
7214
|
+
|
7215
|
+
prompt: Optional[str] = Field(
|
7216
|
+
default=None,
|
7217
|
+
description="""A text prompt for guiding the model during image segmentation.
|
7218
|
+
Required for prompt mode and semantic mode, disallowed for other modes.""",
|
7219
|
+
)
|
7220
|
+
image: Optional[Image] = Field(
|
7221
|
+
default=None, description="""The image to be segmented."""
|
7222
|
+
)
|
7223
|
+
scribble_image: Optional[ScribbleImage] = Field(
|
7224
|
+
default=None,
|
7225
|
+
description="""The brush scribble to guide segmentation.
|
7226
|
+
Required for the interactive mode, disallowed for other modes.""",
|
7227
|
+
)
|
7228
|
+
|
7229
|
+
|
7230
|
+
class SegmentImageSourceDict(TypedDict, total=False):
|
7231
|
+
"""A set of source input(s) for image segmentation."""
|
7232
|
+
|
7233
|
+
prompt: Optional[str]
|
7234
|
+
"""A text prompt for guiding the model during image segmentation.
|
7235
|
+
Required for prompt mode and semantic mode, disallowed for other modes."""
|
7236
|
+
|
7237
|
+
image: Optional[ImageDict]
|
7238
|
+
"""The image to be segmented."""
|
7239
|
+
|
7240
|
+
scribble_image: Optional[ScribbleImageDict]
|
7241
|
+
"""The brush scribble to guide segmentation.
|
7242
|
+
Required for the interactive mode, disallowed for other modes."""
|
7243
|
+
|
7244
|
+
|
7245
|
+
SegmentImageSourceOrDict = Union[SegmentImageSource, SegmentImageSourceDict]
|
7246
|
+
|
7247
|
+
|
7248
|
+
class SegmentImageConfig(_common.BaseModel):
|
7249
|
+
"""Configuration for segmenting an image."""
|
7250
|
+
|
7251
|
+
http_options: Optional[HttpOptions] = Field(
|
7252
|
+
default=None, description="""Used to override HTTP request options."""
|
7253
|
+
)
|
7254
|
+
mode: Optional[SegmentMode] = Field(
|
7255
|
+
default=None, description="""The segmentation mode to use."""
|
7256
|
+
)
|
7257
|
+
max_predictions: Optional[int] = Field(
|
7258
|
+
default=None,
|
7259
|
+
description="""The maximum number of predictions to return up to, by top
|
7260
|
+
confidence score.""",
|
7261
|
+
)
|
7262
|
+
confidence_threshold: Optional[float] = Field(
|
7263
|
+
default=None,
|
7264
|
+
description="""The confidence score threshold for the detections as a decimal
|
7265
|
+
value. Only predictions with a confidence score higher than this
|
7266
|
+
threshold will be returned.""",
|
7267
|
+
)
|
7268
|
+
mask_dilation: Optional[float] = Field(
|
7269
|
+
default=None,
|
7270
|
+
description="""A decimal value representing how much dilation to apply to the
|
7271
|
+
masks. 0 for no dilation. 1.0 means the masked area covers the whole
|
7272
|
+
image.""",
|
7273
|
+
)
|
7274
|
+
binary_color_threshold: Optional[float] = Field(
|
7275
|
+
default=None,
|
7276
|
+
description="""The binary color threshold to apply to the masks. The threshold
|
7277
|
+
can be set to a decimal value between 0 and 255 non-inclusive.
|
7278
|
+
Set to -1 for no binary color thresholding.""",
|
7279
|
+
)
|
7280
|
+
|
7281
|
+
|
7282
|
+
class SegmentImageConfigDict(TypedDict, total=False):
|
7283
|
+
"""Configuration for segmenting an image."""
|
7284
|
+
|
7285
|
+
http_options: Optional[HttpOptionsDict]
|
7286
|
+
"""Used to override HTTP request options."""
|
7287
|
+
|
7288
|
+
mode: Optional[SegmentMode]
|
7289
|
+
"""The segmentation mode to use."""
|
7290
|
+
|
7291
|
+
max_predictions: Optional[int]
|
7292
|
+
"""The maximum number of predictions to return up to, by top
|
7293
|
+
confidence score."""
|
7294
|
+
|
7295
|
+
confidence_threshold: Optional[float]
|
7296
|
+
"""The confidence score threshold for the detections as a decimal
|
7297
|
+
value. Only predictions with a confidence score higher than this
|
7298
|
+
threshold will be returned."""
|
7299
|
+
|
7300
|
+
mask_dilation: Optional[float]
|
7301
|
+
"""A decimal value representing how much dilation to apply to the
|
7302
|
+
masks. 0 for no dilation. 1.0 means the masked area covers the whole
|
7303
|
+
image."""
|
7304
|
+
|
7305
|
+
binary_color_threshold: Optional[float]
|
7306
|
+
"""The binary color threshold to apply to the masks. The threshold
|
7307
|
+
can be set to a decimal value between 0 and 255 non-inclusive.
|
7308
|
+
Set to -1 for no binary color thresholding."""
|
7309
|
+
|
7310
|
+
|
7311
|
+
SegmentImageConfigOrDict = Union[SegmentImageConfig, SegmentImageConfigDict]
|
7312
|
+
|
7313
|
+
|
7314
|
+
class _SegmentImageParameters(_common.BaseModel):
|
7315
|
+
"""The parameters for segmenting an image."""
|
7316
|
+
|
7317
|
+
model: Optional[str] = Field(
|
7318
|
+
default=None,
|
7319
|
+
description="""ID of the model to use. For a list of models, see `Google models
|
7320
|
+
<https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models>`_.""",
|
7321
|
+
)
|
7322
|
+
source: Optional[SegmentImageSource] = Field(
|
7323
|
+
default=None,
|
7324
|
+
description="""A set of source input(s) for image segmentation.""",
|
7325
|
+
)
|
7326
|
+
config: Optional[SegmentImageConfig] = Field(
|
7327
|
+
default=None, description="""Configuration for image segmentation."""
|
7328
|
+
)
|
7329
|
+
|
7330
|
+
|
7331
|
+
class _SegmentImageParametersDict(TypedDict, total=False):
|
7332
|
+
"""The parameters for segmenting an image."""
|
7333
|
+
|
7334
|
+
model: Optional[str]
|
7335
|
+
"""ID of the model to use. For a list of models, see `Google models
|
7336
|
+
<https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models>`_."""
|
7337
|
+
|
7338
|
+
source: Optional[SegmentImageSourceDict]
|
7339
|
+
"""A set of source input(s) for image segmentation."""
|
7340
|
+
|
7341
|
+
config: Optional[SegmentImageConfigDict]
|
7342
|
+
"""Configuration for image segmentation."""
|
7343
|
+
|
7344
|
+
|
7345
|
+
_SegmentImageParametersOrDict = Union[
|
7346
|
+
_SegmentImageParameters, _SegmentImageParametersDict
|
7347
|
+
]
|
7348
|
+
|
7349
|
+
|
7350
|
+
class EntityLabel(_common.BaseModel):
|
7351
|
+
"""An entity representing the segmented area."""
|
7352
|
+
|
7353
|
+
label: Optional[str] = Field(
|
7354
|
+
default=None, description="""The label of the segmented entity."""
|
7355
|
+
)
|
7356
|
+
score: Optional[float] = Field(
|
7357
|
+
default=None,
|
7358
|
+
description="""The confidence score of the detected label.""",
|
7359
|
+
)
|
7360
|
+
|
7361
|
+
|
7362
|
+
class EntityLabelDict(TypedDict, total=False):
|
7363
|
+
"""An entity representing the segmented area."""
|
7364
|
+
|
7365
|
+
label: Optional[str]
|
7366
|
+
"""The label of the segmented entity."""
|
7367
|
+
|
7368
|
+
score: Optional[float]
|
7369
|
+
"""The confidence score of the detected label."""
|
7370
|
+
|
7371
|
+
|
7372
|
+
EntityLabelOrDict = Union[EntityLabel, EntityLabelDict]
|
7373
|
+
|
7374
|
+
|
7375
|
+
class GeneratedImageMask(_common.BaseModel):
|
7376
|
+
"""A generated image mask."""
|
7377
|
+
|
7378
|
+
mask: Optional[Image] = Field(
|
7379
|
+
default=None, description="""The generated image mask."""
|
7380
|
+
)
|
7381
|
+
labels: Optional[list[EntityLabel]] = Field(
|
7382
|
+
default=None,
|
7383
|
+
description="""The detected entities on the segmented area.""",
|
7384
|
+
)
|
7385
|
+
|
7386
|
+
|
7387
|
+
class GeneratedImageMaskDict(TypedDict, total=False):
|
7388
|
+
"""A generated image mask."""
|
7389
|
+
|
7390
|
+
mask: Optional[ImageDict]
|
7391
|
+
"""The generated image mask."""
|
7392
|
+
|
7393
|
+
labels: Optional[list[EntityLabelDict]]
|
7394
|
+
"""The detected entities on the segmented area."""
|
7395
|
+
|
7396
|
+
|
7397
|
+
GeneratedImageMaskOrDict = Union[GeneratedImageMask, GeneratedImageMaskDict]
|
7398
|
+
|
7399
|
+
|
7400
|
+
class SegmentImageResponse(_common.BaseModel):
|
7401
|
+
"""The output images response."""
|
7402
|
+
|
7403
|
+
generated_masks: Optional[list[GeneratedImageMask]] = Field(
|
7404
|
+
default=None,
|
7405
|
+
description="""List of generated image masks.
|
7406
|
+
""",
|
7407
|
+
)
|
7408
|
+
|
7409
|
+
|
7410
|
+
class SegmentImageResponseDict(TypedDict, total=False):
|
7411
|
+
"""The output images response."""
|
7412
|
+
|
7413
|
+
generated_masks: Optional[list[GeneratedImageMaskDict]]
|
7414
|
+
"""List of generated image masks.
|
7415
|
+
"""
|
7416
|
+
|
7417
|
+
|
7418
|
+
SegmentImageResponseOrDict = Union[
|
7419
|
+
SegmentImageResponse, SegmentImageResponseDict
|
7420
|
+
]
|
7421
|
+
|
7422
|
+
|
7244
7423
|
class GetModelConfig(_common.BaseModel):
|
7245
7424
|
"""Optional parameters for models.get method."""
|
7246
7425
|
|
@@ -8175,6 +8354,40 @@ class VideoDict(TypedDict, total=False):
|
|
8175
8354
|
VideoOrDict = Union[Video, VideoDict]
|
8176
8355
|
|
8177
8356
|
|
8357
|
+
class VideoGenerationReferenceImage(_common.BaseModel):
|
8358
|
+
"""A reference image for video generation."""
|
8359
|
+
|
8360
|
+
image: Optional[Image] = Field(
|
8361
|
+
default=None,
|
8362
|
+
description="""The reference image.
|
8363
|
+
""",
|
8364
|
+
)
|
8365
|
+
reference_type: Optional[str] = Field(
|
8366
|
+
default=None,
|
8367
|
+
description="""The type of the reference image, which defines how the reference
|
8368
|
+
image will be used to generate the video. Supported values are 'asset'
|
8369
|
+
or 'style'.""",
|
8370
|
+
)
|
8371
|
+
|
8372
|
+
|
8373
|
+
class VideoGenerationReferenceImageDict(TypedDict, total=False):
|
8374
|
+
"""A reference image for video generation."""
|
8375
|
+
|
8376
|
+
image: Optional[ImageDict]
|
8377
|
+
"""The reference image.
|
8378
|
+
"""
|
8379
|
+
|
8380
|
+
reference_type: Optional[str]
|
8381
|
+
"""The type of the reference image, which defines how the reference
|
8382
|
+
image will be used to generate the video. Supported values are 'asset'
|
8383
|
+
or 'style'."""
|
8384
|
+
|
8385
|
+
|
8386
|
+
VideoGenerationReferenceImageOrDict = Union[
|
8387
|
+
VideoGenerationReferenceImage, VideoGenerationReferenceImageDict
|
8388
|
+
]
|
8389
|
+
|
8390
|
+
|
8178
8391
|
class GenerateVideosConfig(_common.BaseModel):
|
8179
8392
|
"""Configuration for generating videos."""
|
8180
8393
|
|
@@ -8230,6 +8443,14 @@ class GenerateVideosConfig(_common.BaseModel):
|
|
8230
8443
|
default=None,
|
8231
8444
|
description="""Image to use as the last frame of generated videos. Only supported for image to video use cases.""",
|
8232
8445
|
)
|
8446
|
+
reference_images: Optional[list[VideoGenerationReferenceImage]] = Field(
|
8447
|
+
default=None,
|
8448
|
+
description="""The images to use as the references to generate the videos.
|
8449
|
+
If this field is provided, the text prompt field must also be provided.
|
8450
|
+
The image, video, or last_frame field are not supported. Each image must
|
8451
|
+
be associated with a type. Veo 2 supports up to 3 asset images *or* 1
|
8452
|
+
style image.""",
|
8453
|
+
)
|
8233
8454
|
compression_quality: Optional[VideoCompressionQuality] = Field(
|
8234
8455
|
default=None,
|
8235
8456
|
description="""Compression quality of the generated videos.""",
|
@@ -8281,6 +8502,13 @@ class GenerateVideosConfigDict(TypedDict, total=False):
|
|
8281
8502
|
last_frame: Optional[ImageDict]
|
8282
8503
|
"""Image to use as the last frame of generated videos. Only supported for image to video use cases."""
|
8283
8504
|
|
8505
|
+
reference_images: Optional[list[VideoGenerationReferenceImageDict]]
|
8506
|
+
"""The images to use as the references to generate the videos.
|
8507
|
+
If this field is provided, the text prompt field must also be provided.
|
8508
|
+
The image, video, or last_frame field are not supported. Each image must
|
8509
|
+
be associated with a type. Veo 2 supports up to 3 asset images *or* 1
|
8510
|
+
style image."""
|
8511
|
+
|
8284
8512
|
compression_quality: Optional[VideoCompressionQuality]
|
8285
8513
|
"""Compression quality of the generated videos."""
|
8286
8514
|
|
@@ -12964,95 +13192,6 @@ class LiveServerMessageDict(TypedDict, total=False):
|
|
12964
13192
|
LiveServerMessageOrDict = Union[LiveServerMessage, LiveServerMessageDict]
|
12965
13193
|
|
12966
13194
|
|
12967
|
-
class AutomaticActivityDetection(_common.BaseModel):
|
12968
|
-
"""Configures automatic detection of activity."""
|
12969
|
-
|
12970
|
-
disabled: Optional[bool] = Field(
|
12971
|
-
default=None,
|
12972
|
-
description="""If enabled, detected voice and text input count as activity. If disabled, the client must send activity signals.""",
|
12973
|
-
)
|
12974
|
-
start_of_speech_sensitivity: Optional[StartSensitivity] = Field(
|
12975
|
-
default=None,
|
12976
|
-
description="""Determines how likely speech is to be detected.""",
|
12977
|
-
)
|
12978
|
-
end_of_speech_sensitivity: Optional[EndSensitivity] = Field(
|
12979
|
-
default=None,
|
12980
|
-
description="""Determines how likely detected speech is ended.""",
|
12981
|
-
)
|
12982
|
-
prefix_padding_ms: Optional[int] = Field(
|
12983
|
-
default=None,
|
12984
|
-
description="""The required duration of detected speech before start-of-speech is committed. The lower this value the more sensitive the start-of-speech detection is and the shorter speech can be recognized. However, this also increases the probability of false positives.""",
|
12985
|
-
)
|
12986
|
-
silence_duration_ms: Optional[int] = Field(
|
12987
|
-
default=None,
|
12988
|
-
description="""The required duration of detected non-speech (e.g. silence) before end-of-speech is committed. The larger this value, the longer speech gaps can be without interrupting the user's activity but this will increase the model's latency.""",
|
12989
|
-
)
|
12990
|
-
|
12991
|
-
|
12992
|
-
class AutomaticActivityDetectionDict(TypedDict, total=False):
|
12993
|
-
"""Configures automatic detection of activity."""
|
12994
|
-
|
12995
|
-
disabled: Optional[bool]
|
12996
|
-
"""If enabled, detected voice and text input count as activity. If disabled, the client must send activity signals."""
|
12997
|
-
|
12998
|
-
start_of_speech_sensitivity: Optional[StartSensitivity]
|
12999
|
-
"""Determines how likely speech is to be detected."""
|
13000
|
-
|
13001
|
-
end_of_speech_sensitivity: Optional[EndSensitivity]
|
13002
|
-
"""Determines how likely detected speech is ended."""
|
13003
|
-
|
13004
|
-
prefix_padding_ms: Optional[int]
|
13005
|
-
"""The required duration of detected speech before start-of-speech is committed. The lower this value the more sensitive the start-of-speech detection is and the shorter speech can be recognized. However, this also increases the probability of false positives."""
|
13006
|
-
|
13007
|
-
silence_duration_ms: Optional[int]
|
13008
|
-
"""The required duration of detected non-speech (e.g. silence) before end-of-speech is committed. The larger this value, the longer speech gaps can be without interrupting the user's activity but this will increase the model's latency."""
|
13009
|
-
|
13010
|
-
|
13011
|
-
AutomaticActivityDetectionOrDict = Union[
|
13012
|
-
AutomaticActivityDetection, AutomaticActivityDetectionDict
|
13013
|
-
]
|
13014
|
-
|
13015
|
-
|
13016
|
-
class RealtimeInputConfig(_common.BaseModel):
|
13017
|
-
"""Marks the end of user activity.
|
13018
|
-
|
13019
|
-
This can only be sent if automatic (i.e. server-side) activity detection is
|
13020
|
-
disabled.
|
13021
|
-
"""
|
13022
|
-
|
13023
|
-
automatic_activity_detection: Optional[AutomaticActivityDetection] = Field(
|
13024
|
-
default=None,
|
13025
|
-
description="""If not set, automatic activity detection is enabled by default. If automatic voice detection is disabled, the client must send activity signals.""",
|
13026
|
-
)
|
13027
|
-
activity_handling: Optional[ActivityHandling] = Field(
|
13028
|
-
default=None, description="""Defines what effect activity has."""
|
13029
|
-
)
|
13030
|
-
turn_coverage: Optional[TurnCoverage] = Field(
|
13031
|
-
default=None,
|
13032
|
-
description="""Defines which input is included in the user's turn.""",
|
13033
|
-
)
|
13034
|
-
|
13035
|
-
|
13036
|
-
class RealtimeInputConfigDict(TypedDict, total=False):
|
13037
|
-
"""Marks the end of user activity.
|
13038
|
-
|
13039
|
-
This can only be sent if automatic (i.e. server-side) activity detection is
|
13040
|
-
disabled.
|
13041
|
-
"""
|
13042
|
-
|
13043
|
-
automatic_activity_detection: Optional[AutomaticActivityDetectionDict]
|
13044
|
-
"""If not set, automatic activity detection is enabled by default. If automatic voice detection is disabled, the client must send activity signals."""
|
13045
|
-
|
13046
|
-
activity_handling: Optional[ActivityHandling]
|
13047
|
-
"""Defines what effect activity has."""
|
13048
|
-
|
13049
|
-
turn_coverage: Optional[TurnCoverage]
|
13050
|
-
"""Defines which input is included in the user's turn."""
|
13051
|
-
|
13052
|
-
|
13053
|
-
RealtimeInputConfigOrDict = Union[RealtimeInputConfig, RealtimeInputConfigDict]
|
13054
|
-
|
13055
|
-
|
13056
13195
|
class SessionResumptionConfig(_common.BaseModel):
|
13057
13196
|
"""Configuration of session resumption mechanism.
|
13058
13197
|
|
@@ -13191,6 +13330,95 @@ class ProactivityConfigDict(TypedDict, total=False):
|
|
13191
13330
|
ProactivityConfigOrDict = Union[ProactivityConfig, ProactivityConfigDict]
|
13192
13331
|
|
13193
13332
|
|
13333
|
+
class AutomaticActivityDetection(_common.BaseModel):
|
13334
|
+
"""Configures automatic detection of activity."""
|
13335
|
+
|
13336
|
+
disabled: Optional[bool] = Field(
|
13337
|
+
default=None,
|
13338
|
+
description="""If enabled, detected voice and text input count as activity. If disabled, the client must send activity signals.""",
|
13339
|
+
)
|
13340
|
+
start_of_speech_sensitivity: Optional[StartSensitivity] = Field(
|
13341
|
+
default=None,
|
13342
|
+
description="""Determines how likely speech is to be detected.""",
|
13343
|
+
)
|
13344
|
+
end_of_speech_sensitivity: Optional[EndSensitivity] = Field(
|
13345
|
+
default=None,
|
13346
|
+
description="""Determines how likely detected speech is ended.""",
|
13347
|
+
)
|
13348
|
+
prefix_padding_ms: Optional[int] = Field(
|
13349
|
+
default=None,
|
13350
|
+
description="""The required duration of detected speech before start-of-speech is committed. The lower this value the more sensitive the start-of-speech detection is and the shorter speech can be recognized. However, this also increases the probability of false positives.""",
|
13351
|
+
)
|
13352
|
+
silence_duration_ms: Optional[int] = Field(
|
13353
|
+
default=None,
|
13354
|
+
description="""The required duration of detected non-speech (e.g. silence) before end-of-speech is committed. The larger this value, the longer speech gaps can be without interrupting the user's activity but this will increase the model's latency.""",
|
13355
|
+
)
|
13356
|
+
|
13357
|
+
|
13358
|
+
class AutomaticActivityDetectionDict(TypedDict, total=False):
|
13359
|
+
"""Configures automatic detection of activity."""
|
13360
|
+
|
13361
|
+
disabled: Optional[bool]
|
13362
|
+
"""If enabled, detected voice and text input count as activity. If disabled, the client must send activity signals."""
|
13363
|
+
|
13364
|
+
start_of_speech_sensitivity: Optional[StartSensitivity]
|
13365
|
+
"""Determines how likely speech is to be detected."""
|
13366
|
+
|
13367
|
+
end_of_speech_sensitivity: Optional[EndSensitivity]
|
13368
|
+
"""Determines how likely detected speech is ended."""
|
13369
|
+
|
13370
|
+
prefix_padding_ms: Optional[int]
|
13371
|
+
"""The required duration of detected speech before start-of-speech is committed. The lower this value the more sensitive the start-of-speech detection is and the shorter speech can be recognized. However, this also increases the probability of false positives."""
|
13372
|
+
|
13373
|
+
silence_duration_ms: Optional[int]
|
13374
|
+
"""The required duration of detected non-speech (e.g. silence) before end-of-speech is committed. The larger this value, the longer speech gaps can be without interrupting the user's activity but this will increase the model's latency."""
|
13375
|
+
|
13376
|
+
|
13377
|
+
AutomaticActivityDetectionOrDict = Union[
|
13378
|
+
AutomaticActivityDetection, AutomaticActivityDetectionDict
|
13379
|
+
]
|
13380
|
+
|
13381
|
+
|
13382
|
+
class RealtimeInputConfig(_common.BaseModel):
|
13383
|
+
"""Marks the end of user activity.
|
13384
|
+
|
13385
|
+
This can only be sent if automatic (i.e. server-side) activity detection is
|
13386
|
+
disabled.
|
13387
|
+
"""
|
13388
|
+
|
13389
|
+
automatic_activity_detection: Optional[AutomaticActivityDetection] = Field(
|
13390
|
+
default=None,
|
13391
|
+
description="""If not set, automatic activity detection is enabled by default. If automatic voice detection is disabled, the client must send activity signals.""",
|
13392
|
+
)
|
13393
|
+
activity_handling: Optional[ActivityHandling] = Field(
|
13394
|
+
default=None, description="""Defines what effect activity has."""
|
13395
|
+
)
|
13396
|
+
turn_coverage: Optional[TurnCoverage] = Field(
|
13397
|
+
default=None,
|
13398
|
+
description="""Defines which input is included in the user's turn.""",
|
13399
|
+
)
|
13400
|
+
|
13401
|
+
|
13402
|
+
class RealtimeInputConfigDict(TypedDict, total=False):
|
13403
|
+
"""Marks the end of user activity.
|
13404
|
+
|
13405
|
+
This can only be sent if automatic (i.e. server-side) activity detection is
|
13406
|
+
disabled.
|
13407
|
+
"""
|
13408
|
+
|
13409
|
+
automatic_activity_detection: Optional[AutomaticActivityDetectionDict]
|
13410
|
+
"""If not set, automatic activity detection is enabled by default. If automatic voice detection is disabled, the client must send activity signals."""
|
13411
|
+
|
13412
|
+
activity_handling: Optional[ActivityHandling]
|
13413
|
+
"""Defines what effect activity has."""
|
13414
|
+
|
13415
|
+
turn_coverage: Optional[TurnCoverage]
|
13416
|
+
"""Defines which input is included in the user's turn."""
|
13417
|
+
|
13418
|
+
|
13419
|
+
RealtimeInputConfigOrDict = Union[RealtimeInputConfig, RealtimeInputConfigDict]
|
13420
|
+
|
13421
|
+
|
13194
13422
|
class LiveClientSetup(_common.BaseModel):
|
13195
13423
|
"""Message contains configuration that will apply for the duration of the streaming session."""
|
13196
13424
|
|
@@ -14543,6 +14771,67 @@ CreateTuningJobParametersOrDict = Union[
|
|
14543
14771
|
]
|
14544
14772
|
|
14545
14773
|
|
14774
|
+
class UserContent(Content):
|
14775
|
+
"""UserContent facilitates the creation of a Content object with a user role.
|
14776
|
+
|
14777
|
+
Example usages:
|
14778
|
+
|
14779
|
+
|
14780
|
+
- Create a user Content object with a string:
|
14781
|
+
user_content = UserContent("Why is the sky blue?")
|
14782
|
+
- Create a user Content object with a file data Part object:
|
14783
|
+
user_content = UserContent(Part.from_uri(file_uril="gs://bucket/file.txt",
|
14784
|
+
mime_type="text/plain"))
|
14785
|
+
- Create a user Content object with byte data Part object:
|
14786
|
+
user_content = UserContent(Part.from_bytes(data=b"Hello, World!",
|
14787
|
+
mime_type="text/plain"))
|
14788
|
+
|
14789
|
+
You can create a user Content object using other classmethods in the Part
|
14790
|
+
class as well.
|
14791
|
+
You can also create a user Content using a list of Part objects or strings.
|
14792
|
+
"""
|
14793
|
+
|
14794
|
+
role: Literal['user'] = Field(default='user', init=False, frozen=True)
|
14795
|
+
parts: list[Part] = Field()
|
14796
|
+
|
14797
|
+
def __init__(
|
14798
|
+
self, parts: Union['PartUnionDict', list['PartUnionDict'], list['Part']]
|
14799
|
+
):
|
14800
|
+
from . import _transformers as t
|
14801
|
+
|
14802
|
+
super().__init__(parts=t.t_parts(parts=parts))
|
14803
|
+
|
14804
|
+
|
14805
|
+
class ModelContent(Content):
|
14806
|
+
"""ModelContent facilitates the creation of a Content object with a model role.
|
14807
|
+
|
14808
|
+
Example usages:
|
14809
|
+
|
14810
|
+
- Create a model Content object with a string:
|
14811
|
+
model_content = ModelContent("Why is the sky blue?")
|
14812
|
+
- Create a model Content object with a file data Part object:
|
14813
|
+
model_content = ModelContent(Part.from_uri(file_uril="gs://bucket/file.txt",
|
14814
|
+
mime_type="text/plain"))
|
14815
|
+
- Create a model Content object with byte data Part object:
|
14816
|
+
model_content = ModelContent(Part.from_bytes(data=b"Hello, World!",
|
14817
|
+
mime_type="text/plain"))
|
14818
|
+
|
14819
|
+
You can create a model Content object using other classmethods in the Part
|
14820
|
+
class as well.
|
14821
|
+
You can also create a model Content using a list of Part objects or strings.
|
14822
|
+
"""
|
14823
|
+
|
14824
|
+
role: Literal['model'] = Field(default='model', init=False, frozen=True)
|
14825
|
+
parts: list[Part] = Field()
|
14826
|
+
|
14827
|
+
def __init__(
|
14828
|
+
self, parts: Union['PartUnionDict', list['PartUnionDict'], list['Part']]
|
14829
|
+
):
|
14830
|
+
from . import _transformers as t
|
14831
|
+
|
14832
|
+
super().__init__(parts=t.t_parts(parts=parts))
|
14833
|
+
|
14834
|
+
|
14546
14835
|
class CustomOutputFormatConfig(_common.BaseModel):
|
14547
14836
|
"""Config for custom output format."""
|
14548
14837
|
|