google-genai 1.8.0__py3-none-any.whl → 1.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- google/genai/_api_client.py +117 -28
- google/genai/_automatic_function_calling_util.py +1 -1
- google/genai/_extra_utils.py +1 -1
- google/genai/_replay_api_client.py +32 -8
- google/genai/_transformers.py +101 -61
- google/genai/batches.py +1 -1
- google/genai/caches.py +1 -1
- google/genai/errors.py +1 -1
- google/genai/files.py +23 -7
- google/genai/live.py +996 -43
- google/genai/models.py +24 -10
- google/genai/operations.py +18 -10
- google/genai/tunings.py +1 -4
- google/genai/types.py +742 -81
- google/genai/version.py +1 -1
- {google_genai-1.8.0.dist-info → google_genai-1.10.0.dist-info}/METADATA +1 -1
- google_genai-1.10.0.dist-info/RECORD +27 -0
- google_genai-1.8.0.dist-info/RECORD +0 -27
- {google_genai-1.8.0.dist-info → google_genai-1.10.0.dist-info}/WHEEL +0 -0
- {google_genai-1.8.0.dist-info → google_genai-1.10.0.dist-info}/licenses/LICENSE +0 -0
- {google_genai-1.8.0.dist-info → google_genai-1.10.0.dist-info}/top_level.txt +0 -0
google/genai/types.py
CHANGED
@@ -128,14 +128,6 @@ class Mode(_common.CaseInSensitiveEnum):
|
|
128
128
|
MODE_DYNAMIC = 'MODE_DYNAMIC'
|
129
129
|
|
130
130
|
|
131
|
-
class State(_common.CaseInSensitiveEnum):
|
132
|
-
"""Output only. RagFile state."""
|
133
|
-
|
134
|
-
STATE_UNSPECIFIED = 'STATE_UNSPECIFIED'
|
135
|
-
ACTIVE = 'ACTIVE'
|
136
|
-
ERROR = 'ERROR'
|
137
|
-
|
138
|
-
|
139
131
|
class FinishReason(_common.CaseInSensitiveEnum):
|
140
132
|
"""Output only. The reason why the model stopped generating tokens.
|
141
133
|
|
@@ -185,6 +177,18 @@ class BlockedReason(_common.CaseInSensitiveEnum):
|
|
185
177
|
PROHIBITED_CONTENT = 'PROHIBITED_CONTENT'
|
186
178
|
|
187
179
|
|
180
|
+
class TrafficType(_common.CaseInSensitiveEnum):
|
181
|
+
"""Output only.
|
182
|
+
|
183
|
+
Traffic type. This shows whether a request consumes Pay-As-You-Go or
|
184
|
+
Provisioned Throughput quota.
|
185
|
+
"""
|
186
|
+
|
187
|
+
TRAFFIC_TYPE_UNSPECIFIED = 'TRAFFIC_TYPE_UNSPECIFIED'
|
188
|
+
ON_DEMAND = 'ON_DEMAND'
|
189
|
+
PROVISIONED_THROUGHPUT = 'PROVISIONED_THROUGHPUT'
|
190
|
+
|
191
|
+
|
188
192
|
class Modality(_common.CaseInSensitiveEnum):
|
189
193
|
"""Server content modalities."""
|
190
194
|
|
@@ -194,15 +198,13 @@ class Modality(_common.CaseInSensitiveEnum):
|
|
194
198
|
AUDIO = 'AUDIO'
|
195
199
|
|
196
200
|
|
197
|
-
class
|
198
|
-
""""""
|
201
|
+
class MediaResolution(_common.CaseInSensitiveEnum):
|
202
|
+
"""The media resolution to use."""
|
199
203
|
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
AUTOMATIC_RESOURCES = 'AUTOMATIC_RESOURCES'
|
205
|
-
SHARED_RESOURCES = 'SHARED_RESOURCES'
|
204
|
+
MEDIA_RESOLUTION_UNSPECIFIED = 'MEDIA_RESOLUTION_UNSPECIFIED'
|
205
|
+
MEDIA_RESOLUTION_LOW = 'MEDIA_RESOLUTION_LOW'
|
206
|
+
MEDIA_RESOLUTION_MEDIUM = 'MEDIA_RESOLUTION_MEDIUM'
|
207
|
+
MEDIA_RESOLUTION_HIGH = 'MEDIA_RESOLUTION_HIGH'
|
206
208
|
|
207
209
|
|
208
210
|
class JobState(_common.CaseInSensitiveEnum):
|
@@ -227,6 +229,7 @@ class AdapterSize(_common.CaseInSensitiveEnum):
|
|
227
229
|
|
228
230
|
ADAPTER_SIZE_UNSPECIFIED = 'ADAPTER_SIZE_UNSPECIFIED'
|
229
231
|
ADAPTER_SIZE_ONE = 'ADAPTER_SIZE_ONE'
|
232
|
+
ADAPTER_SIZE_TWO = 'ADAPTER_SIZE_TWO'
|
230
233
|
ADAPTER_SIZE_FOUR = 'ADAPTER_SIZE_FOUR'
|
231
234
|
ADAPTER_SIZE_EIGHT = 'ADAPTER_SIZE_EIGHT'
|
232
235
|
ADAPTER_SIZE_SIXTEEN = 'ADAPTER_SIZE_SIXTEEN'
|
@@ -249,15 +252,6 @@ class FunctionCallingConfigMode(_common.CaseInSensitiveEnum):
|
|
249
252
|
NONE = 'NONE'
|
250
253
|
|
251
254
|
|
252
|
-
class MediaResolution(_common.CaseInSensitiveEnum):
|
253
|
-
"""The media resolution to use."""
|
254
|
-
|
255
|
-
MEDIA_RESOLUTION_UNSPECIFIED = 'MEDIA_RESOLUTION_UNSPECIFIED'
|
256
|
-
MEDIA_RESOLUTION_LOW = 'MEDIA_RESOLUTION_LOW'
|
257
|
-
MEDIA_RESOLUTION_MEDIUM = 'MEDIA_RESOLUTION_MEDIUM'
|
258
|
-
MEDIA_RESOLUTION_HIGH = 'MEDIA_RESOLUTION_HIGH'
|
259
|
-
|
260
|
-
|
261
255
|
class SafetyFilterLevel(_common.CaseInSensitiveEnum):
|
262
256
|
"""Enum that controls the safety filter level for objectionable content."""
|
263
257
|
|
@@ -354,6 +348,38 @@ class MediaModality(_common.CaseInSensitiveEnum):
|
|
354
348
|
DOCUMENT = 'DOCUMENT'
|
355
349
|
|
356
350
|
|
351
|
+
class StartSensitivity(_common.CaseInSensitiveEnum):
|
352
|
+
"""Start of speech sensitivity."""
|
353
|
+
|
354
|
+
START_SENSITIVITY_UNSPECIFIED = 'START_SENSITIVITY_UNSPECIFIED'
|
355
|
+
START_SENSITIVITY_HIGH = 'START_SENSITIVITY_HIGH'
|
356
|
+
START_SENSITIVITY_LOW = 'START_SENSITIVITY_LOW'
|
357
|
+
|
358
|
+
|
359
|
+
class EndSensitivity(_common.CaseInSensitiveEnum):
|
360
|
+
"""End of speech sensitivity."""
|
361
|
+
|
362
|
+
END_SENSITIVITY_UNSPECIFIED = 'END_SENSITIVITY_UNSPECIFIED'
|
363
|
+
END_SENSITIVITY_HIGH = 'END_SENSITIVITY_HIGH'
|
364
|
+
END_SENSITIVITY_LOW = 'END_SENSITIVITY_LOW'
|
365
|
+
|
366
|
+
|
367
|
+
class ActivityHandling(_common.CaseInSensitiveEnum):
|
368
|
+
"""The different ways of handling user activity."""
|
369
|
+
|
370
|
+
ACTIVITY_HANDLING_UNSPECIFIED = 'ACTIVITY_HANDLING_UNSPECIFIED'
|
371
|
+
START_OF_ACTIVITY_INTERRUPTS = 'START_OF_ACTIVITY_INTERRUPTS'
|
372
|
+
NO_INTERRUPTION = 'NO_INTERRUPTION'
|
373
|
+
|
374
|
+
|
375
|
+
class TurnCoverage(_common.CaseInSensitiveEnum):
|
376
|
+
"""Options about which input is included in the user's turn."""
|
377
|
+
|
378
|
+
TURN_COVERAGE_UNSPECIFIED = 'TURN_COVERAGE_UNSPECIFIED'
|
379
|
+
TURN_INCLUDES_ONLY_ACTIVITY = 'TURN_INCLUDES_ONLY_ACTIVITY'
|
380
|
+
TURN_INCLUDES_ALL_INPUT = 'TURN_INCLUDES_ALL_INPUT'
|
381
|
+
|
382
|
+
|
357
383
|
class VideoMetadata(_common.BaseModel):
|
358
384
|
"""Metadata describes the input video content."""
|
359
385
|
|
@@ -632,18 +658,6 @@ class Part(_common.BaseModel):
|
|
632
658
|
function_response = FunctionResponse(name=name, response=response)
|
633
659
|
return cls(function_response=function_response)
|
634
660
|
|
635
|
-
@classmethod
|
636
|
-
def from_video_metadata(cls, *, start_offset: str, end_offset: str) -> 'Part':
|
637
|
-
logger.warning("""Part.from_video_metadata will be deprecated soon.
|
638
|
-
Because a Part instance needs to include at least one of the fields:
|
639
|
-
text, file_data, inline_data, function_call, function_response, executable_code or code_execution_result.
|
640
|
-
A Part instance contains only video_metadata is not a valid Part.
|
641
|
-
""")
|
642
|
-
video_metadata = VideoMetadata(
|
643
|
-
end_offset=end_offset, start_offset=start_offset
|
644
|
-
)
|
645
|
-
return cls(video_metadata=video_metadata)
|
646
|
-
|
647
661
|
@classmethod
|
648
662
|
def from_executable_code(cls, *, code: str, language: Language) -> 'Part':
|
649
663
|
executable_code = ExecutableCode(code=code, language=language)
|
@@ -708,7 +722,7 @@ class Content(_common.BaseModel):
|
|
708
722
|
default=None,
|
709
723
|
description="""Optional. The producer of the content. Must be either 'user' or
|
710
724
|
'model'. Useful to set for multi-turn conversations, otherwise can be
|
711
|
-
|
725
|
+
empty. If role is not specified, SDK will determine the role.""",
|
712
726
|
)
|
713
727
|
|
714
728
|
|
@@ -783,7 +797,7 @@ class ContentDict(TypedDict, total=False):
|
|
783
797
|
role: Optional[str]
|
784
798
|
"""Optional. The producer of the content. Must be either 'user' or
|
785
799
|
'model'. Useful to set for multi-turn conversations, otherwise can be
|
786
|
-
|
800
|
+
empty. If role is not specified, SDK will determine the role."""
|
787
801
|
|
788
802
|
|
789
803
|
ContentOrDict = Union[Content, ContentDict]
|
@@ -1433,6 +1447,11 @@ ToolOrDict = Union[Tool, ToolDict]
|
|
1433
1447
|
ToolListUnion = list[Union[Tool, Callable]]
|
1434
1448
|
ToolListUnionDict = list[Union[ToolDict, Callable]]
|
1435
1449
|
|
1450
|
+
SchemaUnion = Union[
|
1451
|
+
dict, type, Schema, builtin_types.GenericAlias, VersionedUnionType # type: ignore[valid-type]
|
1452
|
+
]
|
1453
|
+
SchemaUnionDict = Union[SchemaUnion, SchemaDict]
|
1454
|
+
|
1436
1455
|
|
1437
1456
|
class FunctionCallingConfig(_common.BaseModel):
|
1438
1457
|
"""Function calling config."""
|
@@ -1615,6 +1634,11 @@ class ThinkingConfig(_common.BaseModel):
|
|
1615
1634
|
description="""Indicates whether to include thoughts in the response. If true, thoughts are returned only if the model supports thought and thoughts are available.
|
1616
1635
|
""",
|
1617
1636
|
)
|
1637
|
+
thinking_budget: Optional[int] = Field(
|
1638
|
+
default=None,
|
1639
|
+
description="""Indicates the thinking budget in tokens.
|
1640
|
+
""",
|
1641
|
+
)
|
1618
1642
|
|
1619
1643
|
|
1620
1644
|
class ThinkingConfigDict(TypedDict, total=False):
|
@@ -1624,6 +1648,10 @@ class ThinkingConfigDict(TypedDict, total=False):
|
|
1624
1648
|
"""Indicates whether to include thoughts in the response. If true, thoughts are returned only if the model supports thought and thoughts are available.
|
1625
1649
|
"""
|
1626
1650
|
|
1651
|
+
thinking_budget: Optional[int]
|
1652
|
+
"""Indicates the thinking budget in tokens.
|
1653
|
+
"""
|
1654
|
+
|
1627
1655
|
|
1628
1656
|
ThinkingConfigOrDict = Union[ThinkingConfig, ThinkingConfigDict]
|
1629
1657
|
|
@@ -1778,14 +1806,6 @@ ContentUnion = Union[Content, list[PartUnion], PartUnion]
|
|
1778
1806
|
ContentUnionDict = Union[ContentUnion, ContentDict]
|
1779
1807
|
|
1780
1808
|
|
1781
|
-
SchemaUnion = Union[
|
1782
|
-
dict, type, Schema, builtin_types.GenericAlias, VersionedUnionType
|
1783
|
-
]
|
1784
|
-
|
1785
|
-
|
1786
|
-
SchemaUnionDict = Union[SchemaUnion, SchemaDict]
|
1787
|
-
|
1788
|
-
|
1789
1809
|
class GenerationConfigRoutingConfigAutoRoutingMode(_common.BaseModel):
|
1790
1810
|
"""When automated routing is specified, the routing will be determined by the pretrained routing model and customer provided model routing preference."""
|
1791
1811
|
|
@@ -2397,6 +2417,9 @@ GroundingChunkRetrievedContextOrDict = Union[
|
|
2397
2417
|
class GroundingChunkWeb(_common.BaseModel):
|
2398
2418
|
"""Chunk from the web."""
|
2399
2419
|
|
2420
|
+
domain: Optional[str] = Field(
|
2421
|
+
default=None, description="""Domain of the (original) URI."""
|
2422
|
+
)
|
2400
2423
|
title: Optional[str] = Field(
|
2401
2424
|
default=None, description="""Title of the chunk."""
|
2402
2425
|
)
|
@@ -2408,6 +2431,9 @@ class GroundingChunkWeb(_common.BaseModel):
|
|
2408
2431
|
class GroundingChunkWebDict(TypedDict, total=False):
|
2409
2432
|
"""Chunk from the web."""
|
2410
2433
|
|
2434
|
+
domain: Optional[str]
|
2435
|
+
"""Domain of the (original) URI."""
|
2436
|
+
|
2411
2437
|
title: Optional[str]
|
2412
2438
|
"""Title of the chunk."""
|
2413
2439
|
|
@@ -2936,6 +2962,10 @@ class GenerateContentResponseUsageMetadata(_common.BaseModel):
|
|
2936
2962
|
default=None,
|
2937
2963
|
description="""Total token count for prompt, response candidates, and tool-use prompts (if present).""",
|
2938
2964
|
)
|
2965
|
+
traffic_type: Optional[TrafficType] = Field(
|
2966
|
+
default=None,
|
2967
|
+
description="""Output only. Traffic type. This shows whether a request consumes Pay-As-You-Go or Provisioned Throughput quota.""",
|
2968
|
+
)
|
2939
2969
|
|
2940
2970
|
|
2941
2971
|
class GenerateContentResponseUsageMetadataDict(TypedDict, total=False):
|
@@ -2971,6 +3001,9 @@ class GenerateContentResponseUsageMetadataDict(TypedDict, total=False):
|
|
2971
3001
|
total_token_count: Optional[int]
|
2972
3002
|
"""Total token count for prompt, response candidates, and tool-use prompts (if present)."""
|
2973
3003
|
|
3004
|
+
traffic_type: Optional[TrafficType]
|
3005
|
+
"""Output only. Traffic type. This shows whether a request consumes Pay-As-You-Go or Provisioned Throughput quota."""
|
3006
|
+
|
2974
3007
|
|
2975
3008
|
GenerateContentResponseUsageMetadataOrDict = Union[
|
2976
3009
|
GenerateContentResponseUsageMetadata,
|
@@ -4878,6 +4911,10 @@ class GenerationConfig(_common.BaseModel):
|
|
4878
4911
|
default=None,
|
4879
4912
|
description="""Optional. The maximum number of output tokens to generate per message.""",
|
4880
4913
|
)
|
4914
|
+
media_resolution: Optional[MediaResolution] = Field(
|
4915
|
+
default=None,
|
4916
|
+
description="""Optional. If specified, the media resolution specified will be used.""",
|
4917
|
+
)
|
4881
4918
|
presence_penalty: Optional[float] = Field(
|
4882
4919
|
default=None, description="""Optional. Positive penalties."""
|
4883
4920
|
)
|
@@ -4932,6 +4969,9 @@ class GenerationConfigDict(TypedDict, total=False):
|
|
4932
4969
|
max_output_tokens: Optional[int]
|
4933
4970
|
"""Optional. The maximum number of output tokens to generate per message."""
|
4934
4971
|
|
4972
|
+
media_resolution: Optional[MediaResolution]
|
4973
|
+
"""Optional. If specified, the media resolution specified will be used."""
|
4974
|
+
|
4935
4975
|
presence_penalty: Optional[float]
|
4936
4976
|
"""Optional. Positive penalties."""
|
4937
4977
|
|
@@ -5471,9 +5511,8 @@ class GenerateVideosOperation(_common.BaseModel):
|
|
5471
5511
|
default=None,
|
5472
5512
|
description="""The error result of the operation in case of failure or cancellation.""",
|
5473
5513
|
)
|
5474
|
-
response: Optional[
|
5475
|
-
default=None,
|
5476
|
-
description="""The normal response of the operation in case of success.""",
|
5514
|
+
response: Optional[GenerateVideosResponse] = Field(
|
5515
|
+
default=None, description="""The generated videos."""
|
5477
5516
|
)
|
5478
5517
|
result: Optional[GenerateVideosResponse] = Field(
|
5479
5518
|
default=None, description="""The generated videos."""
|
@@ -5495,8 +5534,8 @@ class GenerateVideosOperationDict(TypedDict, total=False):
|
|
5495
5534
|
error: Optional[dict[str, Any]]
|
5496
5535
|
"""The error result of the operation in case of failure or cancellation."""
|
5497
5536
|
|
5498
|
-
response: Optional[
|
5499
|
-
"""The
|
5537
|
+
response: Optional[GenerateVideosResponseDict]
|
5538
|
+
"""The generated videos."""
|
5500
5539
|
|
5501
5540
|
result: Optional[GenerateVideosResponseDict]
|
5502
5541
|
"""The generated videos."""
|
@@ -6693,10 +6732,6 @@ class Operation(_common.BaseModel):
|
|
6693
6732
|
default=None,
|
6694
6733
|
description="""The error result of the operation in case of failure or cancellation.""",
|
6695
6734
|
)
|
6696
|
-
response: Optional[dict[str, Any]] = Field(
|
6697
|
-
default=None,
|
6698
|
-
description="""The normal response of the operation in case of success.""",
|
6699
|
-
)
|
6700
6735
|
|
6701
6736
|
|
6702
6737
|
class OperationDict(TypedDict, total=False):
|
@@ -6714,9 +6749,6 @@ class OperationDict(TypedDict, total=False):
|
|
6714
6749
|
error: Optional[dict[str, Any]]
|
6715
6750
|
"""The error result of the operation in case of failure or cancellation."""
|
6716
6751
|
|
6717
|
-
response: Optional[dict[str, Any]]
|
6718
|
-
"""The normal response of the operation in case of success."""
|
6719
|
-
|
6720
6752
|
|
6721
6753
|
OperationOrDict = Union[Operation, OperationDict]
|
6722
6754
|
|
@@ -8774,6 +8806,36 @@ LiveServerSetupCompleteOrDict = Union[
|
|
8774
8806
|
]
|
8775
8807
|
|
8776
8808
|
|
8809
|
+
class Transcription(_common.BaseModel):
|
8810
|
+
"""Audio transcription in Server Conent."""
|
8811
|
+
|
8812
|
+
text: Optional[str] = Field(
|
8813
|
+
default=None,
|
8814
|
+
description="""Transcription text.
|
8815
|
+
""",
|
8816
|
+
)
|
8817
|
+
finished: Optional[bool] = Field(
|
8818
|
+
default=None,
|
8819
|
+
description="""The bool indicates the end of the transcription.
|
8820
|
+
""",
|
8821
|
+
)
|
8822
|
+
|
8823
|
+
|
8824
|
+
class TranscriptionDict(TypedDict, total=False):
|
8825
|
+
"""Audio transcription in Server Conent."""
|
8826
|
+
|
8827
|
+
text: Optional[str]
|
8828
|
+
"""Transcription text.
|
8829
|
+
"""
|
8830
|
+
|
8831
|
+
finished: Optional[bool]
|
8832
|
+
"""The bool indicates the end of the transcription.
|
8833
|
+
"""
|
8834
|
+
|
8835
|
+
|
8836
|
+
TranscriptionOrDict = Union[Transcription, TranscriptionDict]
|
8837
|
+
|
8838
|
+
|
8777
8839
|
class LiveServerContent(_common.BaseModel):
|
8778
8840
|
"""Incremental server update generated by the model in response to client messages.
|
8779
8841
|
|
@@ -8793,6 +8855,30 @@ class LiveServerContent(_common.BaseModel):
|
|
8793
8855
|
default=None,
|
8794
8856
|
description="""If true, indicates that a client message has interrupted current model generation. If the client is playing out the content in realtime, this is a good signal to stop and empty the current queue.""",
|
8795
8857
|
)
|
8858
|
+
generation_complete: Optional[bool] = Field(
|
8859
|
+
default=None,
|
8860
|
+
description="""If true, indicates that the model is done generating. When model is
|
8861
|
+
interrupted while generating there will be no generation_complete message
|
8862
|
+
in interrupted turn, it will go through interrupted > turn_complete.
|
8863
|
+
When model assumes realtime playback there will be delay between
|
8864
|
+
generation_complete and turn_complete that is caused by model
|
8865
|
+
waiting for playback to finish. If true, indicates that the model
|
8866
|
+
has finished generating all content. This is a signal to the client
|
8867
|
+
that it can stop sending messages.""",
|
8868
|
+
)
|
8869
|
+
input_transcription: Optional[Transcription] = Field(
|
8870
|
+
default=None,
|
8871
|
+
description="""Input transcription. The transcription is independent to the model
|
8872
|
+
turn which means it doesn’t imply any ordering between transcription and
|
8873
|
+
model turn.""",
|
8874
|
+
)
|
8875
|
+
output_transcription: Optional[Transcription] = Field(
|
8876
|
+
default=None,
|
8877
|
+
description="""Output transcription. The transcription is independent to the model
|
8878
|
+
turn which means it doesn’t imply any ordering between transcription and
|
8879
|
+
model turn.
|
8880
|
+
""",
|
8881
|
+
)
|
8796
8882
|
|
8797
8883
|
|
8798
8884
|
class LiveServerContentDict(TypedDict, total=False):
|
@@ -8811,6 +8897,27 @@ class LiveServerContentDict(TypedDict, total=False):
|
|
8811
8897
|
interrupted: Optional[bool]
|
8812
8898
|
"""If true, indicates that a client message has interrupted current model generation. If the client is playing out the content in realtime, this is a good signal to stop and empty the current queue."""
|
8813
8899
|
|
8900
|
+
generation_complete: Optional[bool]
|
8901
|
+
"""If true, indicates that the model is done generating. When model is
|
8902
|
+
interrupted while generating there will be no generation_complete message
|
8903
|
+
in interrupted turn, it will go through interrupted > turn_complete.
|
8904
|
+
When model assumes realtime playback there will be delay between
|
8905
|
+
generation_complete and turn_complete that is caused by model
|
8906
|
+
waiting for playback to finish. If true, indicates that the model
|
8907
|
+
has finished generating all content. This is a signal to the client
|
8908
|
+
that it can stop sending messages."""
|
8909
|
+
|
8910
|
+
input_transcription: Optional[TranscriptionDict]
|
8911
|
+
"""Input transcription. The transcription is independent to the model
|
8912
|
+
turn which means it doesn’t imply any ordering between transcription and
|
8913
|
+
model turn."""
|
8914
|
+
|
8915
|
+
output_transcription: Optional[TranscriptionDict]
|
8916
|
+
"""Output transcription. The transcription is independent to the model
|
8917
|
+
turn which means it doesn’t imply any ordering between transcription and
|
8918
|
+
model turn.
|
8919
|
+
"""
|
8920
|
+
|
8814
8921
|
|
8815
8922
|
LiveServerContentOrDict = Union[LiveServerContent, LiveServerContentDict]
|
8816
8923
|
|
@@ -8863,6 +8970,165 @@ LiveServerToolCallCancellationOrDict = Union[
|
|
8863
8970
|
]
|
8864
8971
|
|
8865
8972
|
|
8973
|
+
class UsageMetadata(_common.BaseModel):
|
8974
|
+
"""Usage metadata about response(s)."""
|
8975
|
+
|
8976
|
+
prompt_token_count: Optional[int] = Field(
|
8977
|
+
default=None,
|
8978
|
+
description="""Number of tokens in the prompt. When `cached_content` is set, this is still the total effective prompt size meaning this includes the number of tokens in the cached content.""",
|
8979
|
+
)
|
8980
|
+
cached_content_token_count: Optional[int] = Field(
|
8981
|
+
default=None,
|
8982
|
+
description="""Number of tokens in the cached part of the prompt (the cached content).""",
|
8983
|
+
)
|
8984
|
+
response_token_count: Optional[int] = Field(
|
8985
|
+
default=None,
|
8986
|
+
description="""Total number of tokens across all the generated response candidates.""",
|
8987
|
+
)
|
8988
|
+
tool_use_prompt_token_count: Optional[int] = Field(
|
8989
|
+
default=None,
|
8990
|
+
description="""Number of tokens present in tool-use prompt(s).""",
|
8991
|
+
)
|
8992
|
+
thoughts_token_count: Optional[int] = Field(
|
8993
|
+
default=None,
|
8994
|
+
description="""Number of tokens of thoughts for thinking models.""",
|
8995
|
+
)
|
8996
|
+
total_token_count: Optional[int] = Field(
|
8997
|
+
default=None,
|
8998
|
+
description="""Total token count for prompt, response candidates, and tool-use prompts(if present).""",
|
8999
|
+
)
|
9000
|
+
prompt_tokens_details: Optional[list[ModalityTokenCount]] = Field(
|
9001
|
+
default=None,
|
9002
|
+
description="""List of modalities that were processed in the request input.""",
|
9003
|
+
)
|
9004
|
+
cache_tokens_details: Optional[list[ModalityTokenCount]] = Field(
|
9005
|
+
default=None,
|
9006
|
+
description="""List of modalities that were processed in the cache input.""",
|
9007
|
+
)
|
9008
|
+
response_tokens_details: Optional[list[ModalityTokenCount]] = Field(
|
9009
|
+
default=None,
|
9010
|
+
description="""List of modalities that were returned in the response.""",
|
9011
|
+
)
|
9012
|
+
tool_use_prompt_tokens_details: Optional[list[ModalityTokenCount]] = Field(
|
9013
|
+
default=None,
|
9014
|
+
description="""List of modalities that were processed in the tool-use prompt.""",
|
9015
|
+
)
|
9016
|
+
traffic_type: Optional[TrafficType] = Field(
|
9017
|
+
default=None,
|
9018
|
+
description="""Traffic type. This shows whether a request consumes Pay-As-You-Go
|
9019
|
+
or Provisioned Throughput quota.""",
|
9020
|
+
)
|
9021
|
+
|
9022
|
+
|
9023
|
+
class UsageMetadataDict(TypedDict, total=False):
|
9024
|
+
"""Usage metadata about response(s)."""
|
9025
|
+
|
9026
|
+
prompt_token_count: Optional[int]
|
9027
|
+
"""Number of tokens in the prompt. When `cached_content` is set, this is still the total effective prompt size meaning this includes the number of tokens in the cached content."""
|
9028
|
+
|
9029
|
+
cached_content_token_count: Optional[int]
|
9030
|
+
"""Number of tokens in the cached part of the prompt (the cached content)."""
|
9031
|
+
|
9032
|
+
response_token_count: Optional[int]
|
9033
|
+
"""Total number of tokens across all the generated response candidates."""
|
9034
|
+
|
9035
|
+
tool_use_prompt_token_count: Optional[int]
|
9036
|
+
"""Number of tokens present in tool-use prompt(s)."""
|
9037
|
+
|
9038
|
+
thoughts_token_count: Optional[int]
|
9039
|
+
"""Number of tokens of thoughts for thinking models."""
|
9040
|
+
|
9041
|
+
total_token_count: Optional[int]
|
9042
|
+
"""Total token count for prompt, response candidates, and tool-use prompts(if present)."""
|
9043
|
+
|
9044
|
+
prompt_tokens_details: Optional[list[ModalityTokenCountDict]]
|
9045
|
+
"""List of modalities that were processed in the request input."""
|
9046
|
+
|
9047
|
+
cache_tokens_details: Optional[list[ModalityTokenCountDict]]
|
9048
|
+
"""List of modalities that were processed in the cache input."""
|
9049
|
+
|
9050
|
+
response_tokens_details: Optional[list[ModalityTokenCountDict]]
|
9051
|
+
"""List of modalities that were returned in the response."""
|
9052
|
+
|
9053
|
+
tool_use_prompt_tokens_details: Optional[list[ModalityTokenCountDict]]
|
9054
|
+
"""List of modalities that were processed in the tool-use prompt."""
|
9055
|
+
|
9056
|
+
traffic_type: Optional[TrafficType]
|
9057
|
+
"""Traffic type. This shows whether a request consumes Pay-As-You-Go
|
9058
|
+
or Provisioned Throughput quota."""
|
9059
|
+
|
9060
|
+
|
9061
|
+
UsageMetadataOrDict = Union[UsageMetadata, UsageMetadataDict]
|
9062
|
+
|
9063
|
+
|
9064
|
+
class LiveServerGoAway(_common.BaseModel):
|
9065
|
+
"""Server will not be able to service client soon."""
|
9066
|
+
|
9067
|
+
time_left: Optional[str] = Field(
|
9068
|
+
default=None,
|
9069
|
+
description="""The remaining time before the connection will be terminated as ABORTED. The minimal time returned here is specified differently together with the rate limits for a given model.""",
|
9070
|
+
)
|
9071
|
+
|
9072
|
+
|
9073
|
+
class LiveServerGoAwayDict(TypedDict, total=False):
|
9074
|
+
"""Server will not be able to service client soon."""
|
9075
|
+
|
9076
|
+
time_left: Optional[str]
|
9077
|
+
"""The remaining time before the connection will be terminated as ABORTED. The minimal time returned here is specified differently together with the rate limits for a given model."""
|
9078
|
+
|
9079
|
+
|
9080
|
+
LiveServerGoAwayOrDict = Union[LiveServerGoAway, LiveServerGoAwayDict]
|
9081
|
+
|
9082
|
+
|
9083
|
+
class LiveServerSessionResumptionUpdate(_common.BaseModel):
|
9084
|
+
"""Update of the session resumption state.
|
9085
|
+
|
9086
|
+
Only sent if `session_resumption` was set in the connection config.
|
9087
|
+
"""
|
9088
|
+
|
9089
|
+
new_handle: Optional[str] = Field(
|
9090
|
+
default=None,
|
9091
|
+
description="""New handle that represents state that can be resumed. Empty if `resumable`=false.""",
|
9092
|
+
)
|
9093
|
+
resumable: Optional[bool] = Field(
|
9094
|
+
default=None,
|
9095
|
+
description="""True if session can be resumed at this point. It might be not possible to resume session at some points. In that case we send update empty new_handle and resumable=false. Example of such case could be model executing function calls or just generating. Resuming session (using previous session token) in such state will result in some data loss.""",
|
9096
|
+
)
|
9097
|
+
last_consumed_client_message_index: Optional[int] = Field(
|
9098
|
+
default=None,
|
9099
|
+
description="""Index of last message sent by client that is included in state represented by this SessionResumptionToken. Only sent when `SessionResumptionConfig.transparent` is set.
|
9100
|
+
|
9101
|
+
Presence of this index allows users to transparently reconnect and avoid issue of losing some part of realtime audio input/video. If client wishes to temporarily disconnect (for example as result of receiving GoAway) they can do it without losing state by buffering messages sent since last `SessionResmumptionTokenUpdate`. This field will enable them to limit buffering (avoid keeping all requests in RAM).
|
9102
|
+
|
9103
|
+
Note: This should not be used for when resuming a session at some time later -- in those cases partial audio and video frames arelikely not needed.""",
|
9104
|
+
)
|
9105
|
+
|
9106
|
+
|
9107
|
+
class LiveServerSessionResumptionUpdateDict(TypedDict, total=False):
|
9108
|
+
"""Update of the session resumption state.
|
9109
|
+
|
9110
|
+
Only sent if `session_resumption` was set in the connection config.
|
9111
|
+
"""
|
9112
|
+
|
9113
|
+
new_handle: Optional[str]
|
9114
|
+
"""New handle that represents state that can be resumed. Empty if `resumable`=false."""
|
9115
|
+
|
9116
|
+
resumable: Optional[bool]
|
9117
|
+
"""True if session can be resumed at this point. It might be not possible to resume session at some points. In that case we send update empty new_handle and resumable=false. Example of such case could be model executing function calls or just generating. Resuming session (using previous session token) in such state will result in some data loss."""
|
9118
|
+
|
9119
|
+
last_consumed_client_message_index: Optional[int]
|
9120
|
+
"""Index of last message sent by client that is included in state represented by this SessionResumptionToken. Only sent when `SessionResumptionConfig.transparent` is set.
|
9121
|
+
|
9122
|
+
Presence of this index allows users to transparently reconnect and avoid issue of losing some part of realtime audio input/video. If client wishes to temporarily disconnect (for example as result of receiving GoAway) they can do it without losing state by buffering messages sent since last `SessionResmumptionTokenUpdate`. This field will enable them to limit buffering (avoid keeping all requests in RAM).
|
9123
|
+
|
9124
|
+
Note: This should not be used for when resuming a session at some time later -- in those cases partial audio and video frames arelikely not needed."""
|
9125
|
+
|
9126
|
+
|
9127
|
+
LiveServerSessionResumptionUpdateOrDict = Union[
|
9128
|
+
LiveServerSessionResumptionUpdate, LiveServerSessionResumptionUpdateDict
|
9129
|
+
]
|
9130
|
+
|
9131
|
+
|
8866
9132
|
class LiveServerMessage(_common.BaseModel):
|
8867
9133
|
"""Response message for API call."""
|
8868
9134
|
|
@@ -8882,6 +9148,18 @@ class LiveServerMessage(_common.BaseModel):
|
|
8882
9148
|
default=None,
|
8883
9149
|
description="""Notification for the client that a previously issued `ToolCallMessage` with the specified `id`s should have been not executed and should be cancelled.""",
|
8884
9150
|
)
|
9151
|
+
usage_metadata: Optional[UsageMetadata] = Field(
|
9152
|
+
default=None, description="""Usage metadata about model response(s)."""
|
9153
|
+
)
|
9154
|
+
go_away: Optional[LiveServerGoAway] = Field(
|
9155
|
+
default=None, description="""Server will disconnect soon."""
|
9156
|
+
)
|
9157
|
+
session_resumption_update: Optional[LiveServerSessionResumptionUpdate] = (
|
9158
|
+
Field(
|
9159
|
+
default=None,
|
9160
|
+
description="""Update of the session resumption state.""",
|
9161
|
+
)
|
9162
|
+
)
|
8885
9163
|
|
8886
9164
|
@property
|
8887
9165
|
def text(self) -> Optional[str]:
|
@@ -8933,10 +9211,206 @@ class LiveServerMessageDict(TypedDict, total=False):
|
|
8933
9211
|
tool_call_cancellation: Optional[LiveServerToolCallCancellationDict]
|
8934
9212
|
"""Notification for the client that a previously issued `ToolCallMessage` with the specified `id`s should have been not executed and should be cancelled."""
|
8935
9213
|
|
9214
|
+
usage_metadata: Optional[UsageMetadataDict]
|
9215
|
+
"""Usage metadata about model response(s)."""
|
9216
|
+
|
9217
|
+
go_away: Optional[LiveServerGoAwayDict]
|
9218
|
+
"""Server will disconnect soon."""
|
9219
|
+
|
9220
|
+
session_resumption_update: Optional[LiveServerSessionResumptionUpdateDict]
|
9221
|
+
"""Update of the session resumption state."""
|
9222
|
+
|
8936
9223
|
|
8937
9224
|
LiveServerMessageOrDict = Union[LiveServerMessage, LiveServerMessageDict]
|
8938
9225
|
|
8939
9226
|
|
9227
|
+
class AutomaticActivityDetection(_common.BaseModel):
|
9228
|
+
"""Configures automatic detection of activity."""
|
9229
|
+
|
9230
|
+
disabled: Optional[bool] = Field(
|
9231
|
+
default=None,
|
9232
|
+
description="""If enabled, detected voice and text input count as activity. If disabled, the client must send activity signals.""",
|
9233
|
+
)
|
9234
|
+
start_of_speech_sensitivity: Optional[StartSensitivity] = Field(
|
9235
|
+
default=None,
|
9236
|
+
description="""Determines how likely speech is to be detected.""",
|
9237
|
+
)
|
9238
|
+
end_of_speech_sensitivity: Optional[EndSensitivity] = Field(
|
9239
|
+
default=None,
|
9240
|
+
description="""Determines how likely detected speech is ended.""",
|
9241
|
+
)
|
9242
|
+
prefix_padding_ms: Optional[int] = Field(
|
9243
|
+
default=None,
|
9244
|
+
description="""The required duration of detected speech before start-of-speech is committed. The lower this value the more sensitive the start-of-speech detection is and the shorter speech can be recognized. However, this also increases the probability of false positives.""",
|
9245
|
+
)
|
9246
|
+
silence_duration_ms: Optional[int] = Field(
|
9247
|
+
default=None,
|
9248
|
+
description="""The required duration of detected non-speech (e.g. silence) before end-of-speech is committed. The larger this value, the longer speech gaps can be without interrupting the user's activity but this will increase the model's latency.""",
|
9249
|
+
)
|
9250
|
+
|
9251
|
+
|
9252
|
+
class AutomaticActivityDetectionDict(TypedDict, total=False):
|
9253
|
+
"""Configures automatic detection of activity."""
|
9254
|
+
|
9255
|
+
disabled: Optional[bool]
|
9256
|
+
"""If enabled, detected voice and text input count as activity. If disabled, the client must send activity signals."""
|
9257
|
+
|
9258
|
+
start_of_speech_sensitivity: Optional[StartSensitivity]
|
9259
|
+
"""Determines how likely speech is to be detected."""
|
9260
|
+
|
9261
|
+
end_of_speech_sensitivity: Optional[EndSensitivity]
|
9262
|
+
"""Determines how likely detected speech is ended."""
|
9263
|
+
|
9264
|
+
prefix_padding_ms: Optional[int]
|
9265
|
+
"""The required duration of detected speech before start-of-speech is committed. The lower this value the more sensitive the start-of-speech detection is and the shorter speech can be recognized. However, this also increases the probability of false positives."""
|
9266
|
+
|
9267
|
+
silence_duration_ms: Optional[int]
|
9268
|
+
"""The required duration of detected non-speech (e.g. silence) before end-of-speech is committed. The larger this value, the longer speech gaps can be without interrupting the user's activity but this will increase the model's latency."""
|
9269
|
+
|
9270
|
+
|
9271
|
+
AutomaticActivityDetectionOrDict = Union[
|
9272
|
+
AutomaticActivityDetection, AutomaticActivityDetectionDict
|
9273
|
+
]
|
9274
|
+
|
9275
|
+
|
9276
|
+
class RealtimeInputConfig(_common.BaseModel):
|
9277
|
+
"""Marks the end of user activity.
|
9278
|
+
|
9279
|
+
This can only be sent if automatic (i.e. server-side) activity detection is
|
9280
|
+
disabled.
|
9281
|
+
"""
|
9282
|
+
|
9283
|
+
automatic_activity_detection: Optional[AutomaticActivityDetection] = Field(
|
9284
|
+
default=None,
|
9285
|
+
description="""If not set, automatic activity detection is enabled by default. If automatic voice detection is disabled, the client must send activity signals.""",
|
9286
|
+
)
|
9287
|
+
activity_handling: Optional[ActivityHandling] = Field(
|
9288
|
+
default=None, description="""Defines what effect activity has."""
|
9289
|
+
)
|
9290
|
+
turn_coverage: Optional[TurnCoverage] = Field(
|
9291
|
+
default=None,
|
9292
|
+
description="""Defines which input is included in the user's turn.""",
|
9293
|
+
)
|
9294
|
+
|
9295
|
+
|
9296
|
+
class RealtimeInputConfigDict(TypedDict, total=False):
|
9297
|
+
"""Marks the end of user activity.
|
9298
|
+
|
9299
|
+
This can only be sent if automatic (i.e. server-side) activity detection is
|
9300
|
+
disabled.
|
9301
|
+
"""
|
9302
|
+
|
9303
|
+
automatic_activity_detection: Optional[AutomaticActivityDetectionDict]
|
9304
|
+
"""If not set, automatic activity detection is enabled by default. If automatic voice detection is disabled, the client must send activity signals."""
|
9305
|
+
|
9306
|
+
activity_handling: Optional[ActivityHandling]
|
9307
|
+
"""Defines what effect activity has."""
|
9308
|
+
|
9309
|
+
turn_coverage: Optional[TurnCoverage]
|
9310
|
+
"""Defines which input is included in the user's turn."""
|
9311
|
+
|
9312
|
+
|
9313
|
+
RealtimeInputConfigOrDict = Union[RealtimeInputConfig, RealtimeInputConfigDict]
|
9314
|
+
|
9315
|
+
|
9316
|
+
class SessionResumptionConfig(_common.BaseModel):
|
9317
|
+
"""Configuration of session resumption mechanism.
|
9318
|
+
|
9319
|
+
Included in `LiveConnectConfig.session_resumption`. If included server
|
9320
|
+
will send `LiveServerSessionResumptionUpdate` messages.
|
9321
|
+
"""
|
9322
|
+
|
9323
|
+
handle: Optional[str] = Field(
|
9324
|
+
default=None,
|
9325
|
+
description="""Session resumption handle of previous session (session to restore).
|
9326
|
+
|
9327
|
+
If not present new session will be started.""",
|
9328
|
+
)
|
9329
|
+
transparent: Optional[bool] = Field(
|
9330
|
+
default=None,
|
9331
|
+
description="""If set the server will send `last_consumed_client_message_index` in the `session_resumption_update` messages to allow for transparent reconnections.""",
|
9332
|
+
)
|
9333
|
+
|
9334
|
+
|
9335
|
+
class SessionResumptionConfigDict(TypedDict, total=False):
|
9336
|
+
"""Configuration of session resumption mechanism.
|
9337
|
+
|
9338
|
+
Included in `LiveConnectConfig.session_resumption`. If included server
|
9339
|
+
will send `LiveServerSessionResumptionUpdate` messages.
|
9340
|
+
"""
|
9341
|
+
|
9342
|
+
handle: Optional[str]
|
9343
|
+
"""Session resumption handle of previous session (session to restore).
|
9344
|
+
|
9345
|
+
If not present new session will be started."""
|
9346
|
+
|
9347
|
+
transparent: Optional[bool]
|
9348
|
+
"""If set the server will send `last_consumed_client_message_index` in the `session_resumption_update` messages to allow for transparent reconnections."""
|
9349
|
+
|
9350
|
+
|
9351
|
+
SessionResumptionConfigOrDict = Union[
|
9352
|
+
SessionResumptionConfig, SessionResumptionConfigDict
|
9353
|
+
]
|
9354
|
+
|
9355
|
+
|
9356
|
+
class SlidingWindow(_common.BaseModel):
|
9357
|
+
"""Context window will be truncated by keeping only suffix of it.
|
9358
|
+
|
9359
|
+
Context window will always be cut at start of USER role turn. System
|
9360
|
+
instructions and `BidiGenerateContentSetup.prefix_turns` will not be
|
9361
|
+
subject to the sliding window mechanism, they will always stay at the
|
9362
|
+
beginning of context window.
|
9363
|
+
"""
|
9364
|
+
|
9365
|
+
target_tokens: Optional[int] = Field(
|
9366
|
+
default=None,
|
9367
|
+
description="""Session reduction target -- how many tokens we should keep. Window shortening operation has some latency costs, so we should avoid running it on every turn. Should be < trigger_tokens. If not set, trigger_tokens/2 is assumed.""",
|
9368
|
+
)
|
9369
|
+
|
9370
|
+
|
9371
|
+
class SlidingWindowDict(TypedDict, total=False):
|
9372
|
+
"""Context window will be truncated by keeping only suffix of it.
|
9373
|
+
|
9374
|
+
Context window will always be cut at start of USER role turn. System
|
9375
|
+
instructions and `BidiGenerateContentSetup.prefix_turns` will not be
|
9376
|
+
subject to the sliding window mechanism, they will always stay at the
|
9377
|
+
beginning of context window.
|
9378
|
+
"""
|
9379
|
+
|
9380
|
+
target_tokens: Optional[int]
|
9381
|
+
"""Session reduction target -- how many tokens we should keep. Window shortening operation has some latency costs, so we should avoid running it on every turn. Should be < trigger_tokens. If not set, trigger_tokens/2 is assumed."""
|
9382
|
+
|
9383
|
+
|
9384
|
+
SlidingWindowOrDict = Union[SlidingWindow, SlidingWindowDict]
|
9385
|
+
|
9386
|
+
|
9387
|
+
class ContextWindowCompressionConfig(_common.BaseModel):
|
9388
|
+
"""Enables context window compression -- mechanism managing model context window so it does not exceed given length."""
|
9389
|
+
|
9390
|
+
trigger_tokens: Optional[int] = Field(
|
9391
|
+
default=None,
|
9392
|
+
description="""Number of tokens (before running turn) that triggers context window compression mechanism.""",
|
9393
|
+
)
|
9394
|
+
sliding_window: Optional[SlidingWindow] = Field(
|
9395
|
+
default=None, description="""Sliding window compression mechanism."""
|
9396
|
+
)
|
9397
|
+
|
9398
|
+
|
9399
|
+
class ContextWindowCompressionConfigDict(TypedDict, total=False):
|
9400
|
+
"""Enables context window compression -- mechanism managing model context window so it does not exceed given length."""
|
9401
|
+
|
9402
|
+
trigger_tokens: Optional[int]
|
9403
|
+
"""Number of tokens (before running turn) that triggers context window compression mechanism."""
|
9404
|
+
|
9405
|
+
sliding_window: Optional[SlidingWindowDict]
|
9406
|
+
"""Sliding window compression mechanism."""
|
9407
|
+
|
9408
|
+
|
9409
|
+
ContextWindowCompressionConfigOrDict = Union[
|
9410
|
+
ContextWindowCompressionConfig, ContextWindowCompressionConfigDict
|
9411
|
+
]
|
9412
|
+
|
9413
|
+
|
8940
9414
|
class LiveClientSetup(_common.BaseModel):
|
8941
9415
|
"""Message contains configuration that will apply for the duration of the streaming session."""
|
8942
9416
|
|
@@ -8950,15 +9424,7 @@ class LiveClientSetup(_common.BaseModel):
|
|
8950
9424
|
generation_config: Optional[GenerationConfig] = Field(
|
8951
9425
|
default=None,
|
8952
9426
|
description="""The generation configuration for the session.
|
8953
|
-
|
8954
|
-
The following fields are supported:
|
8955
|
-
- `response_logprobs`
|
8956
|
-
- `response_mime_type`
|
8957
|
-
- `logprobs`
|
8958
|
-
- `response_schema`
|
8959
|
-
- `stop_sequence`
|
8960
|
-
- `routing_config`
|
8961
|
-
- `audio_timestamp`
|
9427
|
+
Note: only a subset of fields are supported.
|
8962
9428
|
""",
|
8963
9429
|
)
|
8964
9430
|
system_instruction: Optional[Content] = Field(
|
@@ -8975,6 +9441,18 @@ The following fields are supported:
|
|
8975
9441
|
external systems to perform an action, or set of actions, outside of
|
8976
9442
|
knowledge and scope of the model.""",
|
8977
9443
|
)
|
9444
|
+
session_resumption: Optional[SessionResumptionConfig] = Field(
|
9445
|
+
default=None,
|
9446
|
+
description="""Configures session resumption mechanism.
|
9447
|
+
|
9448
|
+
If included server will send SessionResumptionUpdate messages.""",
|
9449
|
+
)
|
9450
|
+
context_window_compression: Optional[ContextWindowCompressionConfig] = Field(
|
9451
|
+
default=None,
|
9452
|
+
description="""Configures context window compression mechanism.
|
9453
|
+
|
9454
|
+
If included, server will compress context window to fit into given length.""",
|
9455
|
+
)
|
8978
9456
|
|
8979
9457
|
|
8980
9458
|
class LiveClientSetupDict(TypedDict, total=False):
|
@@ -8988,15 +9466,7 @@ class LiveClientSetupDict(TypedDict, total=False):
|
|
8988
9466
|
|
8989
9467
|
generation_config: Optional[GenerationConfigDict]
|
8990
9468
|
"""The generation configuration for the session.
|
8991
|
-
|
8992
|
-
The following fields are supported:
|
8993
|
-
- `response_logprobs`
|
8994
|
-
- `response_mime_type`
|
8995
|
-
- `logprobs`
|
8996
|
-
- `response_schema`
|
8997
|
-
- `stop_sequence`
|
8998
|
-
- `routing_config`
|
8999
|
-
- `audio_timestamp`
|
9469
|
+
Note: only a subset of fields are supported.
|
9000
9470
|
"""
|
9001
9471
|
|
9002
9472
|
system_instruction: Optional[ContentDict]
|
@@ -9011,6 +9481,16 @@ The following fields are supported:
|
|
9011
9481
|
external systems to perform an action, or set of actions, outside of
|
9012
9482
|
knowledge and scope of the model."""
|
9013
9483
|
|
9484
|
+
session_resumption: Optional[SessionResumptionConfigDict]
|
9485
|
+
"""Configures session resumption mechanism.
|
9486
|
+
|
9487
|
+
If included server will send SessionResumptionUpdate messages."""
|
9488
|
+
|
9489
|
+
context_window_compression: Optional[ContextWindowCompressionConfigDict]
|
9490
|
+
"""Configures context window compression mechanism.
|
9491
|
+
|
9492
|
+
If included, server will compress context window to fit into given length."""
|
9493
|
+
|
9014
9494
|
|
9015
9495
|
LiveClientSetupOrDict = Union[LiveClientSetup, LiveClientSetupDict]
|
9016
9496
|
|
@@ -9067,14 +9547,60 @@ class LiveClientContentDict(TypedDict, total=False):
|
|
9067
9547
|
LiveClientContentOrDict = Union[LiveClientContent, LiveClientContentDict]
|
9068
9548
|
|
9069
9549
|
|
9550
|
+
class ActivityStart(_common.BaseModel):
|
9551
|
+
"""Marks the start of user activity.
|
9552
|
+
|
9553
|
+
This can only be sent if automatic (i.e. server-side) activity detection is
|
9554
|
+
disabled.
|
9555
|
+
"""
|
9556
|
+
|
9557
|
+
pass
|
9558
|
+
|
9559
|
+
|
9560
|
+
class ActivityStartDict(TypedDict, total=False):
|
9561
|
+
"""Marks the start of user activity.
|
9562
|
+
|
9563
|
+
This can only be sent if automatic (i.e. server-side) activity detection is
|
9564
|
+
disabled.
|
9565
|
+
"""
|
9566
|
+
|
9567
|
+
pass
|
9568
|
+
|
9569
|
+
|
9570
|
+
ActivityStartOrDict = Union[ActivityStart, ActivityStartDict]
|
9571
|
+
|
9572
|
+
|
9573
|
+
class ActivityEnd(_common.BaseModel):
|
9574
|
+
"""Marks the end of user activity.
|
9575
|
+
|
9576
|
+
This can only be sent if automatic (i.e. server-side) activity detection is
|
9577
|
+
disabled.
|
9578
|
+
"""
|
9579
|
+
|
9580
|
+
pass
|
9581
|
+
|
9582
|
+
|
9583
|
+
class ActivityEndDict(TypedDict, total=False):
|
9584
|
+
"""Marks the end of user activity.
|
9585
|
+
|
9586
|
+
This can only be sent if automatic (i.e. server-side) activity detection is
|
9587
|
+
disabled.
|
9588
|
+
"""
|
9589
|
+
|
9590
|
+
pass
|
9591
|
+
|
9592
|
+
|
9593
|
+
ActivityEndOrDict = Union[ActivityEnd, ActivityEndDict]
|
9594
|
+
|
9595
|
+
|
9070
9596
|
class LiveClientRealtimeInput(_common.BaseModel):
|
9071
9597
|
"""User input that is sent in real time.
|
9072
9598
|
|
9073
|
-
This is different from `
|
9599
|
+
This is different from `LiveClientContent` in a few ways:
|
9074
9600
|
|
9075
9601
|
- Can be sent continuously without interruption to model generation.
|
9076
9602
|
- If there is a need to mix data interleaved across the
|
9077
|
-
`
|
9603
|
+
`LiveClientContent` and the `LiveClientRealtimeInput`, server attempts to
|
9078
9604
|
optimize for best response, but there are no guarantees.
|
9079
9605
|
- End of turn is not explicitly specified, but is rather derived from user
|
9080
9606
|
activity (for example, end of speech).
|
@@ -9092,11 +9618,11 @@ class LiveClientRealtimeInput(_common.BaseModel):
|
|
9092
9618
|
class LiveClientRealtimeInputDict(TypedDict, total=False):
|
9093
9619
|
"""User input that is sent in real time.
|
9094
9620
|
|
9095
|
-
This is different from `
|
9621
|
+
This is different from `LiveClientContent` in a few ways:
|
9096
9622
|
|
9097
9623
|
- Can be sent continuously without interruption to model generation.
|
9098
9624
|
- If there is a need to mix data interleaved across the
|
9099
|
-
`
|
9625
|
+
`LiveClientContent` and the `LiveClientRealtimeInput`, server attempts to
|
9100
9626
|
optimize for best response, but there are no guarantees.
|
9101
9627
|
- End of turn is not explicitly specified, but is rather derived from user
|
9102
9628
|
activity (for example, end of speech).
|
@@ -9192,6 +9718,23 @@ class LiveClientMessageDict(TypedDict, total=False):
|
|
9192
9718
|
LiveClientMessageOrDict = Union[LiveClientMessage, LiveClientMessageDict]
|
9193
9719
|
|
9194
9720
|
|
9721
|
+
class AudioTranscriptionConfig(_common.BaseModel):
|
9722
|
+
"""The audio transcription configuration in Setup."""
|
9723
|
+
|
9724
|
+
pass
|
9725
|
+
|
9726
|
+
|
9727
|
+
class AudioTranscriptionConfigDict(TypedDict, total=False):
|
9728
|
+
"""The audio transcription configuration in Setup."""
|
9729
|
+
|
9730
|
+
pass
|
9731
|
+
|
9732
|
+
|
9733
|
+
AudioTranscriptionConfigOrDict = Union[
|
9734
|
+
AudioTranscriptionConfig, AudioTranscriptionConfigDict
|
9735
|
+
]
|
9736
|
+
|
9737
|
+
|
9195
9738
|
class LiveConnectConfig(_common.BaseModel):
|
9196
9739
|
"""Session config for the API connection."""
|
9197
9740
|
|
@@ -9205,6 +9748,47 @@ class LiveConnectConfig(_common.BaseModel):
|
|
9205
9748
|
modalities that the model can return. Defaults to AUDIO if not specified.
|
9206
9749
|
""",
|
9207
9750
|
)
|
9751
|
+
temperature: Optional[float] = Field(
|
9752
|
+
default=None,
|
9753
|
+
description="""Value that controls the degree of randomness in token selection.
|
9754
|
+
Lower temperatures are good for prompts that require a less open-ended or
|
9755
|
+
creative response, while higher temperatures can lead to more diverse or
|
9756
|
+
creative results.
|
9757
|
+
""",
|
9758
|
+
)
|
9759
|
+
top_p: Optional[float] = Field(
|
9760
|
+
default=None,
|
9761
|
+
description="""Tokens are selected from the most to least probable until the sum
|
9762
|
+
of their probabilities equals this value. Use a lower value for less
|
9763
|
+
random responses and a higher value for more random responses.
|
9764
|
+
""",
|
9765
|
+
)
|
9766
|
+
top_k: Optional[float] = Field(
|
9767
|
+
default=None,
|
9768
|
+
description="""For each token selection step, the ``top_k`` tokens with the
|
9769
|
+
highest probabilities are sampled. Then tokens are further filtered based
|
9770
|
+
on ``top_p`` with the final token selected using temperature sampling. Use
|
9771
|
+
a lower number for less random responses and a higher number for more
|
9772
|
+
random responses.
|
9773
|
+
""",
|
9774
|
+
)
|
9775
|
+
max_output_tokens: Optional[int] = Field(
|
9776
|
+
default=None,
|
9777
|
+
description="""Maximum number of tokens that can be generated in the response.
|
9778
|
+
""",
|
9779
|
+
)
|
9780
|
+
media_resolution: Optional[MediaResolution] = Field(
|
9781
|
+
default=None,
|
9782
|
+
description="""If specified, the media resolution specified will be used.
|
9783
|
+
""",
|
9784
|
+
)
|
9785
|
+
seed: Optional[int] = Field(
|
9786
|
+
default=None,
|
9787
|
+
description="""When ``seed`` is fixed to a specific number, the model makes a best
|
9788
|
+
effort to provide the same response for repeated requests. By default, a
|
9789
|
+
random number is used.
|
9790
|
+
""",
|
9791
|
+
)
|
9208
9792
|
speech_config: Optional[SpeechConfig] = Field(
|
9209
9793
|
default=None,
|
9210
9794
|
description="""The speech generation configuration.
|
@@ -9224,6 +9808,29 @@ class LiveConnectConfig(_common.BaseModel):
|
|
9224
9808
|
external systems to perform an action, or set of actions, outside of
|
9225
9809
|
knowledge and scope of the model.""",
|
9226
9810
|
)
|
9811
|
+
session_resumption: Optional[SessionResumptionConfig] = Field(
|
9812
|
+
default=None,
|
9813
|
+
description="""Configures session resumption mechanism.
|
9814
|
+
|
9815
|
+
If included the server will send SessionResumptionUpdate messages.""",
|
9816
|
+
)
|
9817
|
+
input_audio_transcription: Optional[AudioTranscriptionConfig] = Field(
|
9818
|
+
default=None,
|
9819
|
+
description="""The transcription of the input aligns with the input audio language.
|
9820
|
+
""",
|
9821
|
+
)
|
9822
|
+
output_audio_transcription: Optional[AudioTranscriptionConfig] = Field(
|
9823
|
+
default=None,
|
9824
|
+
description="""The transcription of the output aligns with the language code
|
9825
|
+
specified for the output audio.
|
9826
|
+
""",
|
9827
|
+
)
|
9828
|
+
context_window_compression: Optional[ContextWindowCompressionConfig] = Field(
|
9829
|
+
default=None,
|
9830
|
+
description="""Configures context window compression mechanism.
|
9831
|
+
|
9832
|
+
If included, server will compress context window to fit into given length.""",
|
9833
|
+
)
|
9227
9834
|
|
9228
9835
|
|
9229
9836
|
class LiveConnectConfigDict(TypedDict, total=False):
|
@@ -9237,6 +9844,41 @@ class LiveConnectConfigDict(TypedDict, total=False):
|
|
9237
9844
|
modalities that the model can return. Defaults to AUDIO if not specified.
|
9238
9845
|
"""
|
9239
9846
|
|
9847
|
+
temperature: Optional[float]
|
9848
|
+
"""Value that controls the degree of randomness in token selection.
|
9849
|
+
Lower temperatures are good for prompts that require a less open-ended or
|
9850
|
+
creative response, while higher temperatures can lead to more diverse or
|
9851
|
+
creative results.
|
9852
|
+
"""
|
9853
|
+
|
9854
|
+
top_p: Optional[float]
|
9855
|
+
"""Tokens are selected from the most to least probable until the sum
|
9856
|
+
of their probabilities equals this value. Use a lower value for less
|
9857
|
+
random responses and a higher value for more random responses.
|
9858
|
+
"""
|
9859
|
+
|
9860
|
+
top_k: Optional[float]
|
9861
|
+
"""For each token selection step, the ``top_k`` tokens with the
|
9862
|
+
highest probabilities are sampled. Then tokens are further filtered based
|
9863
|
+
on ``top_p`` with the final token selected using temperature sampling. Use
|
9864
|
+
a lower number for less random responses and a higher number for more
|
9865
|
+
random responses.
|
9866
|
+
"""
|
9867
|
+
|
9868
|
+
max_output_tokens: Optional[int]
|
9869
|
+
"""Maximum number of tokens that can be generated in the response.
|
9870
|
+
"""
|
9871
|
+
|
9872
|
+
media_resolution: Optional[MediaResolution]
|
9873
|
+
"""If specified, the media resolution specified will be used.
|
9874
|
+
"""
|
9875
|
+
|
9876
|
+
seed: Optional[int]
|
9877
|
+
"""When ``seed`` is fixed to a specific number, the model makes a best
|
9878
|
+
effort to provide the same response for repeated requests. By default, a
|
9879
|
+
random number is used.
|
9880
|
+
"""
|
9881
|
+
|
9240
9882
|
speech_config: Optional[SpeechConfigDict]
|
9241
9883
|
"""The speech generation configuration.
|
9242
9884
|
"""
|
@@ -9253,5 +9895,24 @@ class LiveConnectConfigDict(TypedDict, total=False):
|
|
9253
9895
|
external systems to perform an action, or set of actions, outside of
|
9254
9896
|
knowledge and scope of the model."""
|
9255
9897
|
|
9898
|
+
session_resumption: Optional[SessionResumptionConfigDict]
|
9899
|
+
"""Configures session resumption mechanism.
|
9900
|
+
|
9901
|
+
If included the server will send SessionResumptionUpdate messages."""
|
9902
|
+
|
9903
|
+
input_audio_transcription: Optional[AudioTranscriptionConfigDict]
|
9904
|
+
"""The transcription of the input aligns with the input audio language.
|
9905
|
+
"""
|
9906
|
+
|
9907
|
+
output_audio_transcription: Optional[AudioTranscriptionConfigDict]
|
9908
|
+
"""The transcription of the output aligns with the language code
|
9909
|
+
specified for the output audio.
|
9910
|
+
"""
|
9911
|
+
|
9912
|
+
context_window_compression: Optional[ContextWindowCompressionConfigDict]
|
9913
|
+
"""Configures context window compression mechanism.
|
9914
|
+
|
9915
|
+
If included, server will compress context window to fit into given length."""
|
9916
|
+
|
9256
9917
|
|
9257
9918
|
LiveConnectConfigOrDict = Union[LiveConnectConfig, LiveConnectConfigDict]
|