google-genai 1.30.0__py3-none-any.whl → 1.31.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
google/genai/types.py CHANGED
@@ -429,6 +429,22 @@ class AdapterSize(_common.CaseInSensitiveEnum):
429
429
  """Adapter size 32."""
430
430
 
431
431
 
432
+ class JSONSchemaType(Enum):
433
+ """The type of the data supported by JSON Schema.
434
+
435
+ The values of the enums are lower case strings, while the values of the enums
436
+ for the Type class are upper case strings.
437
+ """
438
+
439
+ NULL = 'null'
440
+ BOOLEAN = 'boolean'
441
+ OBJECT = 'object'
442
+ ARRAY = 'array'
443
+ NUMBER = 'number'
444
+ INTEGER = 'integer'
445
+ STRING = 'string'
446
+
447
+
432
448
  class FeatureSelectionPreference(_common.CaseInSensitiveEnum):
433
449
  """Options for feature selection preference."""
434
450
 
@@ -564,6 +580,16 @@ class EditMode(_common.CaseInSensitiveEnum):
564
580
  EDIT_MODE_PRODUCT_IMAGE = 'EDIT_MODE_PRODUCT_IMAGE'
565
581
 
566
582
 
583
+ class SegmentMode(_common.CaseInSensitiveEnum):
584
+ """Enum that represents the segmentation mode."""
585
+
586
+ FOREGROUND = 'FOREGROUND'
587
+ BACKGROUND = 'BACKGROUND'
588
+ PROMPT = 'PROMPT'
589
+ SEMANTIC = 'SEMANTIC'
590
+ INTERACTIVE = 'INTERACTIVE'
591
+
592
+
567
593
  class VideoCompressionQuality(_common.CaseInSensitiveEnum):
568
594
  """Enum that controls the compression quality of the generated videos."""
569
595
 
@@ -609,6 +635,19 @@ class MediaModality(_common.CaseInSensitiveEnum):
609
635
  """Document, e.g. PDF."""
610
636
 
611
637
 
638
+ class FunctionResponseScheduling(_common.CaseInSensitiveEnum):
639
+ """Specifies how the response should be scheduled in the conversation."""
640
+
641
+ SCHEDULING_UNSPECIFIED = 'SCHEDULING_UNSPECIFIED'
642
+ """This value is unused."""
643
+ SILENT = 'SILENT'
644
+ """Only add the result to the conversation context, do not interrupt or trigger generation."""
645
+ WHEN_IDLE = 'WHEN_IDLE'
646
+ """Add the result to the conversation context, and prompt to generate output without interrupting ongoing generation."""
647
+ INTERRUPT = 'INTERRUPT'
648
+ """Add the result to the conversation context, interrupt ongoing generation and prompt to generate output."""
649
+
650
+
612
651
  class StartSensitivity(_common.CaseInSensitiveEnum):
613
652
  """Start of speech sensitivity."""
614
653
 
@@ -653,19 +692,6 @@ class TurnCoverage(_common.CaseInSensitiveEnum):
653
692
  """The users turn includes all realtime input since the last turn, including inactivity (e.g. silence on the audio stream)."""
654
693
 
655
694
 
656
- class FunctionResponseScheduling(_common.CaseInSensitiveEnum):
657
- """Specifies how the response should be scheduled in the conversation."""
658
-
659
- SCHEDULING_UNSPECIFIED = 'SCHEDULING_UNSPECIFIED'
660
- """This value is unused."""
661
- SILENT = 'SILENT'
662
- """Only add the result to the conversation context, do not interrupt or trigger generation."""
663
- WHEN_IDLE = 'WHEN_IDLE'
664
- """Add the result to the conversation context, and prompt to generate output without interrupting ongoing generation."""
665
- INTERRUPT = 'INTERRUPT'
666
- """Add the result to the conversation context, interrupt ongoing generation and prompt to generate output."""
667
-
668
-
669
695
  class Scale(_common.CaseInSensitiveEnum):
670
696
  """Scale of the generated music."""
671
697
 
@@ -1152,67 +1178,6 @@ class Content(_common.BaseModel):
1152
1178
  )
1153
1179
 
1154
1180
 
1155
- class UserContent(Content):
1156
- """UserContent facilitates the creation of a Content object with a user role.
1157
-
1158
- Example usages:
1159
-
1160
-
1161
- - Create a user Content object with a string:
1162
- user_content = UserContent("Why is the sky blue?")
1163
- - Create a user Content object with a file data Part object:
1164
- user_content = UserContent(Part.from_uri(file_uril="gs://bucket/file.txt",
1165
- mime_type="text/plain"))
1166
- - Create a user Content object with byte data Part object:
1167
- user_content = UserContent(Part.from_bytes(data=b"Hello, World!",
1168
- mime_type="text/plain"))
1169
-
1170
- You can create a user Content object using other classmethods in the Part
1171
- class as well.
1172
- You can also create a user Content using a list of Part objects or strings.
1173
- """
1174
-
1175
- role: Literal['user'] = Field(default='user', init=False, frozen=True)
1176
- parts: list[Part] = Field()
1177
-
1178
- def __init__(
1179
- self, parts: Union['PartUnionDict', list['PartUnionDict'], list['Part']]
1180
- ):
1181
- from . import _transformers as t
1182
-
1183
- super().__init__(parts=t.t_parts(parts=parts))
1184
-
1185
-
1186
- class ModelContent(Content):
1187
- """ModelContent facilitates the creation of a Content object with a model role.
1188
-
1189
- Example usages:
1190
-
1191
- - Create a model Content object with a string:
1192
- model_content = ModelContent("Why is the sky blue?")
1193
- - Create a model Content object with a file data Part object:
1194
- model_content = ModelContent(Part.from_uri(file_uril="gs://bucket/file.txt",
1195
- mime_type="text/plain"))
1196
- - Create a model Content object with byte data Part object:
1197
- model_content = ModelContent(Part.from_bytes(data=b"Hello, World!",
1198
- mime_type="text/plain"))
1199
-
1200
- You can create a model Content object using other classmethods in the Part
1201
- class as well.
1202
- You can also create a model Content using a list of Part objects or strings.
1203
- """
1204
-
1205
- role: Literal['model'] = Field(default='model', init=False, frozen=True)
1206
- parts: list[Part] = Field()
1207
-
1208
- def __init__(
1209
- self, parts: Union['PartUnionDict', list['PartUnionDict'], list['Part']]
1210
- ):
1211
- from . import _transformers as t
1212
-
1213
- super().__init__(parts=t.t_parts(parts=parts))
1214
-
1215
-
1216
1181
  class ContentDict(TypedDict, total=False):
1217
1182
  """Contains the multi-part content of a message."""
1218
1183
 
@@ -1355,23 +1320,7 @@ class HttpOptionsDict(TypedDict, total=False):
1355
1320
  HttpOptionsOrDict = Union[HttpOptions, HttpOptionsDict]
1356
1321
 
1357
1322
 
1358
- class JSONSchemaType(Enum):
1359
- """The type of the data supported by JSON Schema.
1360
-
1361
- The values of the enums are lower case strings, while the values of the enums
1362
- for the Type class are upper case strings.
1363
- """
1364
-
1365
- NULL = 'null'
1366
- BOOLEAN = 'boolean'
1367
- OBJECT = 'object'
1368
- ARRAY = 'array'
1369
- NUMBER = 'number'
1370
- INTEGER = 'integer'
1371
- STRING = 'string'
1372
-
1373
-
1374
- class JSONSchema(pydantic.BaseModel):
1323
+ class JSONSchema(_common.BaseModel):
1375
1324
  """A subset of JSON Schema according to 2020-12 JSON Schema draft.
1376
1325
 
1377
1326
  Represents a subset of a JSON Schema object that is used by the Gemini model.
@@ -7241,6 +7190,236 @@ RecontextImageResponseOrDict = Union[
7241
7190
  ]
7242
7191
 
7243
7192
 
7193
+ class ScribbleImage(_common.BaseModel):
7194
+ """An image mask representing a brush scribble."""
7195
+
7196
+ image: Optional[Image] = Field(
7197
+ default=None,
7198
+ description="""The brush scribble to guide segmentation. Valid for the interactive mode.""",
7199
+ )
7200
+
7201
+
7202
+ class ScribbleImageDict(TypedDict, total=False):
7203
+ """An image mask representing a brush scribble."""
7204
+
7205
+ image: Optional[ImageDict]
7206
+ """The brush scribble to guide segmentation. Valid for the interactive mode."""
7207
+
7208
+
7209
+ ScribbleImageOrDict = Union[ScribbleImage, ScribbleImageDict]
7210
+
7211
+
7212
+ class SegmentImageSource(_common.BaseModel):
7213
+ """A set of source input(s) for image segmentation."""
7214
+
7215
+ prompt: Optional[str] = Field(
7216
+ default=None,
7217
+ description="""A text prompt for guiding the model during image segmentation.
7218
+ Required for prompt mode and semantic mode, disallowed for other modes.""",
7219
+ )
7220
+ image: Optional[Image] = Field(
7221
+ default=None, description="""The image to be segmented."""
7222
+ )
7223
+ scribble_image: Optional[ScribbleImage] = Field(
7224
+ default=None,
7225
+ description="""The brush scribble to guide segmentation.
7226
+ Required for the interactive mode, disallowed for other modes.""",
7227
+ )
7228
+
7229
+
7230
+ class SegmentImageSourceDict(TypedDict, total=False):
7231
+ """A set of source input(s) for image segmentation."""
7232
+
7233
+ prompt: Optional[str]
7234
+ """A text prompt for guiding the model during image segmentation.
7235
+ Required for prompt mode and semantic mode, disallowed for other modes."""
7236
+
7237
+ image: Optional[ImageDict]
7238
+ """The image to be segmented."""
7239
+
7240
+ scribble_image: Optional[ScribbleImageDict]
7241
+ """The brush scribble to guide segmentation.
7242
+ Required for the interactive mode, disallowed for other modes."""
7243
+
7244
+
7245
+ SegmentImageSourceOrDict = Union[SegmentImageSource, SegmentImageSourceDict]
7246
+
7247
+
7248
+ class SegmentImageConfig(_common.BaseModel):
7249
+ """Configuration for segmenting an image."""
7250
+
7251
+ http_options: Optional[HttpOptions] = Field(
7252
+ default=None, description="""Used to override HTTP request options."""
7253
+ )
7254
+ mode: Optional[SegmentMode] = Field(
7255
+ default=None, description="""The segmentation mode to use."""
7256
+ )
7257
+ max_predictions: Optional[int] = Field(
7258
+ default=None,
7259
+ description="""The maximum number of predictions to return up to, by top
7260
+ confidence score.""",
7261
+ )
7262
+ confidence_threshold: Optional[float] = Field(
7263
+ default=None,
7264
+ description="""The confidence score threshold for the detections as a decimal
7265
+ value. Only predictions with a confidence score higher than this
7266
+ threshold will be returned.""",
7267
+ )
7268
+ mask_dilation: Optional[float] = Field(
7269
+ default=None,
7270
+ description="""A decimal value representing how much dilation to apply to the
7271
+ masks. 0 for no dilation. 1.0 means the masked area covers the whole
7272
+ image.""",
7273
+ )
7274
+ binary_color_threshold: Optional[float] = Field(
7275
+ default=None,
7276
+ description="""The binary color threshold to apply to the masks. The threshold
7277
+ can be set to a decimal value between 0 and 255 non-inclusive.
7278
+ Set to -1 for no binary color thresholding.""",
7279
+ )
7280
+
7281
+
7282
+ class SegmentImageConfigDict(TypedDict, total=False):
7283
+ """Configuration for segmenting an image."""
7284
+
7285
+ http_options: Optional[HttpOptionsDict]
7286
+ """Used to override HTTP request options."""
7287
+
7288
+ mode: Optional[SegmentMode]
7289
+ """The segmentation mode to use."""
7290
+
7291
+ max_predictions: Optional[int]
7292
+ """The maximum number of predictions to return up to, by top
7293
+ confidence score."""
7294
+
7295
+ confidence_threshold: Optional[float]
7296
+ """The confidence score threshold for the detections as a decimal
7297
+ value. Only predictions with a confidence score higher than this
7298
+ threshold will be returned."""
7299
+
7300
+ mask_dilation: Optional[float]
7301
+ """A decimal value representing how much dilation to apply to the
7302
+ masks. 0 for no dilation. 1.0 means the masked area covers the whole
7303
+ image."""
7304
+
7305
+ binary_color_threshold: Optional[float]
7306
+ """The binary color threshold to apply to the masks. The threshold
7307
+ can be set to a decimal value between 0 and 255 non-inclusive.
7308
+ Set to -1 for no binary color thresholding."""
7309
+
7310
+
7311
+ SegmentImageConfigOrDict = Union[SegmentImageConfig, SegmentImageConfigDict]
7312
+
7313
+
7314
+ class _SegmentImageParameters(_common.BaseModel):
7315
+ """The parameters for segmenting an image."""
7316
+
7317
+ model: Optional[str] = Field(
7318
+ default=None,
7319
+ description="""ID of the model to use. For a list of models, see `Google models
7320
+ <https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models>`_.""",
7321
+ )
7322
+ source: Optional[SegmentImageSource] = Field(
7323
+ default=None,
7324
+ description="""A set of source input(s) for image segmentation.""",
7325
+ )
7326
+ config: Optional[SegmentImageConfig] = Field(
7327
+ default=None, description="""Configuration for image segmentation."""
7328
+ )
7329
+
7330
+
7331
+ class _SegmentImageParametersDict(TypedDict, total=False):
7332
+ """The parameters for segmenting an image."""
7333
+
7334
+ model: Optional[str]
7335
+ """ID of the model to use. For a list of models, see `Google models
7336
+ <https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models>`_."""
7337
+
7338
+ source: Optional[SegmentImageSourceDict]
7339
+ """A set of source input(s) for image segmentation."""
7340
+
7341
+ config: Optional[SegmentImageConfigDict]
7342
+ """Configuration for image segmentation."""
7343
+
7344
+
7345
+ _SegmentImageParametersOrDict = Union[
7346
+ _SegmentImageParameters, _SegmentImageParametersDict
7347
+ ]
7348
+
7349
+
7350
+ class EntityLabel(_common.BaseModel):
7351
+ """An entity representing the segmented area."""
7352
+
7353
+ label: Optional[str] = Field(
7354
+ default=None, description="""The label of the segmented entity."""
7355
+ )
7356
+ score: Optional[float] = Field(
7357
+ default=None,
7358
+ description="""The confidence score of the detected label.""",
7359
+ )
7360
+
7361
+
7362
+ class EntityLabelDict(TypedDict, total=False):
7363
+ """An entity representing the segmented area."""
7364
+
7365
+ label: Optional[str]
7366
+ """The label of the segmented entity."""
7367
+
7368
+ score: Optional[float]
7369
+ """The confidence score of the detected label."""
7370
+
7371
+
7372
+ EntityLabelOrDict = Union[EntityLabel, EntityLabelDict]
7373
+
7374
+
7375
+ class GeneratedImageMask(_common.BaseModel):
7376
+ """A generated image mask."""
7377
+
7378
+ mask: Optional[Image] = Field(
7379
+ default=None, description="""The generated image mask."""
7380
+ )
7381
+ labels: Optional[list[EntityLabel]] = Field(
7382
+ default=None,
7383
+ description="""The detected entities on the segmented area.""",
7384
+ )
7385
+
7386
+
7387
+ class GeneratedImageMaskDict(TypedDict, total=False):
7388
+ """A generated image mask."""
7389
+
7390
+ mask: Optional[ImageDict]
7391
+ """The generated image mask."""
7392
+
7393
+ labels: Optional[list[EntityLabelDict]]
7394
+ """The detected entities on the segmented area."""
7395
+
7396
+
7397
+ GeneratedImageMaskOrDict = Union[GeneratedImageMask, GeneratedImageMaskDict]
7398
+
7399
+
7400
+ class SegmentImageResponse(_common.BaseModel):
7401
+ """The output images response."""
7402
+
7403
+ generated_masks: Optional[list[GeneratedImageMask]] = Field(
7404
+ default=None,
7405
+ description="""List of generated image masks.
7406
+ """,
7407
+ )
7408
+
7409
+
7410
+ class SegmentImageResponseDict(TypedDict, total=False):
7411
+ """The output images response."""
7412
+
7413
+ generated_masks: Optional[list[GeneratedImageMaskDict]]
7414
+ """List of generated image masks.
7415
+ """
7416
+
7417
+
7418
+ SegmentImageResponseOrDict = Union[
7419
+ SegmentImageResponse, SegmentImageResponseDict
7420
+ ]
7421
+
7422
+
7244
7423
  class GetModelConfig(_common.BaseModel):
7245
7424
  """Optional parameters for models.get method."""
7246
7425
 
@@ -8175,6 +8354,40 @@ class VideoDict(TypedDict, total=False):
8175
8354
  VideoOrDict = Union[Video, VideoDict]
8176
8355
 
8177
8356
 
8357
+ class VideoGenerationReferenceImage(_common.BaseModel):
8358
+ """A reference image for video generation."""
8359
+
8360
+ image: Optional[Image] = Field(
8361
+ default=None,
8362
+ description="""The reference image.
8363
+ """,
8364
+ )
8365
+ reference_type: Optional[str] = Field(
8366
+ default=None,
8367
+ description="""The type of the reference image, which defines how the reference
8368
+ image will be used to generate the video. Supported values are 'asset'
8369
+ or 'style'.""",
8370
+ )
8371
+
8372
+
8373
+ class VideoGenerationReferenceImageDict(TypedDict, total=False):
8374
+ """A reference image for video generation."""
8375
+
8376
+ image: Optional[ImageDict]
8377
+ """The reference image.
8378
+ """
8379
+
8380
+ reference_type: Optional[str]
8381
+ """The type of the reference image, which defines how the reference
8382
+ image will be used to generate the video. Supported values are 'asset'
8383
+ or 'style'."""
8384
+
8385
+
8386
+ VideoGenerationReferenceImageOrDict = Union[
8387
+ VideoGenerationReferenceImage, VideoGenerationReferenceImageDict
8388
+ ]
8389
+
8390
+
8178
8391
  class GenerateVideosConfig(_common.BaseModel):
8179
8392
  """Configuration for generating videos."""
8180
8393
 
@@ -8230,6 +8443,14 @@ class GenerateVideosConfig(_common.BaseModel):
8230
8443
  default=None,
8231
8444
  description="""Image to use as the last frame of generated videos. Only supported for image to video use cases.""",
8232
8445
  )
8446
+ reference_images: Optional[list[VideoGenerationReferenceImage]] = Field(
8447
+ default=None,
8448
+ description="""The images to use as the references to generate the videos.
8449
+ If this field is provided, the text prompt field must also be provided.
8450
+ The image, video, or last_frame field are not supported. Each image must
8451
+ be associated with a type. Veo 2 supports up to 3 asset images *or* 1
8452
+ style image.""",
8453
+ )
8233
8454
  compression_quality: Optional[VideoCompressionQuality] = Field(
8234
8455
  default=None,
8235
8456
  description="""Compression quality of the generated videos.""",
@@ -8281,6 +8502,13 @@ class GenerateVideosConfigDict(TypedDict, total=False):
8281
8502
  last_frame: Optional[ImageDict]
8282
8503
  """Image to use as the last frame of generated videos. Only supported for image to video use cases."""
8283
8504
 
8505
+ reference_images: Optional[list[VideoGenerationReferenceImageDict]]
8506
+ """The images to use as the references to generate the videos.
8507
+ If this field is provided, the text prompt field must also be provided.
8508
+ The image, video, or last_frame field are not supported. Each image must
8509
+ be associated with a type. Veo 2 supports up to 3 asset images *or* 1
8510
+ style image."""
8511
+
8284
8512
  compression_quality: Optional[VideoCompressionQuality]
8285
8513
  """Compression quality of the generated videos."""
8286
8514
 
@@ -12964,95 +13192,6 @@ class LiveServerMessageDict(TypedDict, total=False):
12964
13192
  LiveServerMessageOrDict = Union[LiveServerMessage, LiveServerMessageDict]
12965
13193
 
12966
13194
 
12967
- class AutomaticActivityDetection(_common.BaseModel):
12968
- """Configures automatic detection of activity."""
12969
-
12970
- disabled: Optional[bool] = Field(
12971
- default=None,
12972
- description="""If enabled, detected voice and text input count as activity. If disabled, the client must send activity signals.""",
12973
- )
12974
- start_of_speech_sensitivity: Optional[StartSensitivity] = Field(
12975
- default=None,
12976
- description="""Determines how likely speech is to be detected.""",
12977
- )
12978
- end_of_speech_sensitivity: Optional[EndSensitivity] = Field(
12979
- default=None,
12980
- description="""Determines how likely detected speech is ended.""",
12981
- )
12982
- prefix_padding_ms: Optional[int] = Field(
12983
- default=None,
12984
- description="""The required duration of detected speech before start-of-speech is committed. The lower this value the more sensitive the start-of-speech detection is and the shorter speech can be recognized. However, this also increases the probability of false positives.""",
12985
- )
12986
- silence_duration_ms: Optional[int] = Field(
12987
- default=None,
12988
- description="""The required duration of detected non-speech (e.g. silence) before end-of-speech is committed. The larger this value, the longer speech gaps can be without interrupting the user's activity but this will increase the model's latency.""",
12989
- )
12990
-
12991
-
12992
- class AutomaticActivityDetectionDict(TypedDict, total=False):
12993
- """Configures automatic detection of activity."""
12994
-
12995
- disabled: Optional[bool]
12996
- """If enabled, detected voice and text input count as activity. If disabled, the client must send activity signals."""
12997
-
12998
- start_of_speech_sensitivity: Optional[StartSensitivity]
12999
- """Determines how likely speech is to be detected."""
13000
-
13001
- end_of_speech_sensitivity: Optional[EndSensitivity]
13002
- """Determines how likely detected speech is ended."""
13003
-
13004
- prefix_padding_ms: Optional[int]
13005
- """The required duration of detected speech before start-of-speech is committed. The lower this value the more sensitive the start-of-speech detection is and the shorter speech can be recognized. However, this also increases the probability of false positives."""
13006
-
13007
- silence_duration_ms: Optional[int]
13008
- """The required duration of detected non-speech (e.g. silence) before end-of-speech is committed. The larger this value, the longer speech gaps can be without interrupting the user's activity but this will increase the model's latency."""
13009
-
13010
-
13011
- AutomaticActivityDetectionOrDict = Union[
13012
- AutomaticActivityDetection, AutomaticActivityDetectionDict
13013
- ]
13014
-
13015
-
13016
- class RealtimeInputConfig(_common.BaseModel):
13017
- """Marks the end of user activity.
13018
-
13019
- This can only be sent if automatic (i.e. server-side) activity detection is
13020
- disabled.
13021
- """
13022
-
13023
- automatic_activity_detection: Optional[AutomaticActivityDetection] = Field(
13024
- default=None,
13025
- description="""If not set, automatic activity detection is enabled by default. If automatic voice detection is disabled, the client must send activity signals.""",
13026
- )
13027
- activity_handling: Optional[ActivityHandling] = Field(
13028
- default=None, description="""Defines what effect activity has."""
13029
- )
13030
- turn_coverage: Optional[TurnCoverage] = Field(
13031
- default=None,
13032
- description="""Defines which input is included in the user's turn.""",
13033
- )
13034
-
13035
-
13036
- class RealtimeInputConfigDict(TypedDict, total=False):
13037
- """Marks the end of user activity.
13038
-
13039
- This can only be sent if automatic (i.e. server-side) activity detection is
13040
- disabled.
13041
- """
13042
-
13043
- automatic_activity_detection: Optional[AutomaticActivityDetectionDict]
13044
- """If not set, automatic activity detection is enabled by default. If automatic voice detection is disabled, the client must send activity signals."""
13045
-
13046
- activity_handling: Optional[ActivityHandling]
13047
- """Defines what effect activity has."""
13048
-
13049
- turn_coverage: Optional[TurnCoverage]
13050
- """Defines which input is included in the user's turn."""
13051
-
13052
-
13053
- RealtimeInputConfigOrDict = Union[RealtimeInputConfig, RealtimeInputConfigDict]
13054
-
13055
-
13056
13195
  class SessionResumptionConfig(_common.BaseModel):
13057
13196
  """Configuration of session resumption mechanism.
13058
13197
 
@@ -13191,6 +13330,95 @@ class ProactivityConfigDict(TypedDict, total=False):
13191
13330
  ProactivityConfigOrDict = Union[ProactivityConfig, ProactivityConfigDict]
13192
13331
 
13193
13332
 
13333
+ class AutomaticActivityDetection(_common.BaseModel):
13334
+ """Configures automatic detection of activity."""
13335
+
13336
+ disabled: Optional[bool] = Field(
13337
+ default=None,
13338
+ description="""If enabled, detected voice and text input count as activity. If disabled, the client must send activity signals.""",
13339
+ )
13340
+ start_of_speech_sensitivity: Optional[StartSensitivity] = Field(
13341
+ default=None,
13342
+ description="""Determines how likely speech is to be detected.""",
13343
+ )
13344
+ end_of_speech_sensitivity: Optional[EndSensitivity] = Field(
13345
+ default=None,
13346
+ description="""Determines how likely detected speech is ended.""",
13347
+ )
13348
+ prefix_padding_ms: Optional[int] = Field(
13349
+ default=None,
13350
+ description="""The required duration of detected speech before start-of-speech is committed. The lower this value the more sensitive the start-of-speech detection is and the shorter speech can be recognized. However, this also increases the probability of false positives.""",
13351
+ )
13352
+ silence_duration_ms: Optional[int] = Field(
13353
+ default=None,
13354
+ description="""The required duration of detected non-speech (e.g. silence) before end-of-speech is committed. The larger this value, the longer speech gaps can be without interrupting the user's activity but this will increase the model's latency.""",
13355
+ )
13356
+
13357
+
13358
+ class AutomaticActivityDetectionDict(TypedDict, total=False):
13359
+ """Configures automatic detection of activity."""
13360
+
13361
+ disabled: Optional[bool]
13362
+ """If enabled, detected voice and text input count as activity. If disabled, the client must send activity signals."""
13363
+
13364
+ start_of_speech_sensitivity: Optional[StartSensitivity]
13365
+ """Determines how likely speech is to be detected."""
13366
+
13367
+ end_of_speech_sensitivity: Optional[EndSensitivity]
13368
+ """Determines how likely detected speech is ended."""
13369
+
13370
+ prefix_padding_ms: Optional[int]
13371
+ """The required duration of detected speech before start-of-speech is committed. The lower this value the more sensitive the start-of-speech detection is and the shorter speech can be recognized. However, this also increases the probability of false positives."""
13372
+
13373
+ silence_duration_ms: Optional[int]
13374
+ """The required duration of detected non-speech (e.g. silence) before end-of-speech is committed. The larger this value, the longer speech gaps can be without interrupting the user's activity but this will increase the model's latency."""
13375
+
13376
+
13377
+ AutomaticActivityDetectionOrDict = Union[
13378
+ AutomaticActivityDetection, AutomaticActivityDetectionDict
13379
+ ]
13380
+
13381
+
13382
+ class RealtimeInputConfig(_common.BaseModel):
13383
+ """Marks the end of user activity.
13384
+
13385
+ This can only be sent if automatic (i.e. server-side) activity detection is
13386
+ disabled.
13387
+ """
13388
+
13389
+ automatic_activity_detection: Optional[AutomaticActivityDetection] = Field(
13390
+ default=None,
13391
+ description="""If not set, automatic activity detection is enabled by default. If automatic voice detection is disabled, the client must send activity signals.""",
13392
+ )
13393
+ activity_handling: Optional[ActivityHandling] = Field(
13394
+ default=None, description="""Defines what effect activity has."""
13395
+ )
13396
+ turn_coverage: Optional[TurnCoverage] = Field(
13397
+ default=None,
13398
+ description="""Defines which input is included in the user's turn.""",
13399
+ )
13400
+
13401
+
13402
+ class RealtimeInputConfigDict(TypedDict, total=False):
13403
+ """Marks the end of user activity.
13404
+
13405
+ This can only be sent if automatic (i.e. server-side) activity detection is
13406
+ disabled.
13407
+ """
13408
+
13409
+ automatic_activity_detection: Optional[AutomaticActivityDetectionDict]
13410
+ """If not set, automatic activity detection is enabled by default. If automatic voice detection is disabled, the client must send activity signals."""
13411
+
13412
+ activity_handling: Optional[ActivityHandling]
13413
+ """Defines what effect activity has."""
13414
+
13415
+ turn_coverage: Optional[TurnCoverage]
13416
+ """Defines which input is included in the user's turn."""
13417
+
13418
+
13419
+ RealtimeInputConfigOrDict = Union[RealtimeInputConfig, RealtimeInputConfigDict]
13420
+
13421
+
13194
13422
  class LiveClientSetup(_common.BaseModel):
13195
13423
  """Message contains configuration that will apply for the duration of the streaming session."""
13196
13424
 
@@ -14543,6 +14771,67 @@ CreateTuningJobParametersOrDict = Union[
14543
14771
  ]
14544
14772
 
14545
14773
 
14774
+ class UserContent(Content):
14775
+ """UserContent facilitates the creation of a Content object with a user role.
14776
+
14777
+ Example usages:
14778
+
14779
+
14780
+ - Create a user Content object with a string:
14781
+ user_content = UserContent("Why is the sky blue?")
14782
+ - Create a user Content object with a file data Part object:
14783
+ user_content = UserContent(Part.from_uri(file_uril="gs://bucket/file.txt",
14784
+ mime_type="text/plain"))
14785
+ - Create a user Content object with byte data Part object:
14786
+ user_content = UserContent(Part.from_bytes(data=b"Hello, World!",
14787
+ mime_type="text/plain"))
14788
+
14789
+ You can create a user Content object using other classmethods in the Part
14790
+ class as well.
14791
+ You can also create a user Content using a list of Part objects or strings.
14792
+ """
14793
+
14794
+ role: Literal['user'] = Field(default='user', init=False, frozen=True)
14795
+ parts: list[Part] = Field()
14796
+
14797
+ def __init__(
14798
+ self, parts: Union['PartUnionDict', list['PartUnionDict'], list['Part']]
14799
+ ):
14800
+ from . import _transformers as t
14801
+
14802
+ super().__init__(parts=t.t_parts(parts=parts))
14803
+
14804
+
14805
+ class ModelContent(Content):
14806
+ """ModelContent facilitates the creation of a Content object with a model role.
14807
+
14808
+ Example usages:
14809
+
14810
+ - Create a model Content object with a string:
14811
+ model_content = ModelContent("Why is the sky blue?")
14812
+ - Create a model Content object with a file data Part object:
14813
+ model_content = ModelContent(Part.from_uri(file_uril="gs://bucket/file.txt",
14814
+ mime_type="text/plain"))
14815
+ - Create a model Content object with byte data Part object:
14816
+ model_content = ModelContent(Part.from_bytes(data=b"Hello, World!",
14817
+ mime_type="text/plain"))
14818
+
14819
+ You can create a model Content object using other classmethods in the Part
14820
+ class as well.
14821
+ You can also create a model Content using a list of Part objects or strings.
14822
+ """
14823
+
14824
+ role: Literal['model'] = Field(default='model', init=False, frozen=True)
14825
+ parts: list[Part] = Field()
14826
+
14827
+ def __init__(
14828
+ self, parts: Union['PartUnionDict', list['PartUnionDict'], list['Part']]
14829
+ ):
14830
+ from . import _transformers as t
14831
+
14832
+ super().__init__(parts=t.t_parts(parts=parts))
14833
+
14834
+
14546
14835
  class CustomOutputFormatConfig(_common.BaseModel):
14547
14836
  """Config for custom output format."""
14548
14837