databricks-sdk 0.40.0__py3-none-any.whl → 0.42.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of databricks-sdk might be problematic. Click here for more details.
- databricks/sdk/__init__.py +273 -239
- databricks/sdk/_base_client.py +36 -24
- databricks/sdk/config.py +5 -0
- databricks/sdk/credentials_provider.py +35 -20
- databricks/sdk/data_plane.py +1 -1
- databricks/sdk/mixins/files.py +184 -1
- databricks/sdk/mixins/open_ai_client.py +55 -1
- databricks/sdk/retries.py +5 -1
- databricks/sdk/service/apps.py +12 -4
- databricks/sdk/service/billing.py +348 -0
- databricks/sdk/service/catalog.py +16 -62
- databricks/sdk/service/cleanrooms.py +73 -2
- databricks/sdk/service/compute.py +40 -0
- databricks/sdk/service/dashboards.py +12 -4
- databricks/sdk/service/files.py +6 -3
- databricks/sdk/service/iam.py +158 -0
- databricks/sdk/service/jobs.py +253 -17
- databricks/sdk/service/oauth2.py +94 -50
- databricks/sdk/service/pipelines.py +89 -12
- databricks/sdk/service/serving.py +424 -222
- databricks/sdk/service/settings.py +206 -0
- databricks/sdk/service/sharing.py +51 -54
- databricks/sdk/useragent.py +54 -0
- databricks/sdk/version.py +1 -1
- {databricks_sdk-0.40.0.dist-info → databricks_sdk-0.42.0.dist-info}/METADATA +26 -26
- {databricks_sdk-0.40.0.dist-info → databricks_sdk-0.42.0.dist-info}/RECORD +30 -30
- {databricks_sdk-0.40.0.dist-info → databricks_sdk-0.42.0.dist-info}/WHEEL +1 -1
- {databricks_sdk-0.40.0.dist-info → databricks_sdk-0.42.0.dist-info}/LICENSE +0 -0
- {databricks_sdk-0.40.0.dist-info → databricks_sdk-0.42.0.dist-info}/NOTICE +0 -0
- {databricks_sdk-0.40.0.dist-info → databricks_sdk-0.42.0.dist-info}/top_level.txt +0 -0
|
@@ -12,14 +12,11 @@ from typing import Any, BinaryIO, Callable, Dict, Iterator, List, Optional
|
|
|
12
12
|
|
|
13
13
|
import requests
|
|
14
14
|
|
|
15
|
-
from ..data_plane import DataPlaneService
|
|
16
15
|
from ..errors import OperationFailed
|
|
17
16
|
from ._internal import Wait, _enum, _from_dict, _repeated_dict
|
|
18
17
|
|
|
19
18
|
_LOG = logging.getLogger('databricks.sdk')
|
|
20
19
|
|
|
21
|
-
from databricks.sdk.service import oauth2
|
|
22
|
-
|
|
23
20
|
# all definitions in this file are in alphabetical order
|
|
24
21
|
|
|
25
22
|
|
|
@@ -148,11 +145,8 @@ class AiGatewayGuardrailParameters:
|
|
|
148
145
|
|
|
149
146
|
@dataclass
|
|
150
147
|
class AiGatewayGuardrailPiiBehavior:
|
|
151
|
-
behavior: AiGatewayGuardrailPiiBehaviorBehavior
|
|
152
|
-
"""
|
|
153
|
-
guardrail and the request contains PII, the request is not sent to the model server and 400
|
|
154
|
-
status code is returned; if 'BLOCK' is set for the output guardrail and the model response
|
|
155
|
-
contains PII, the PII info in the response is redacted and 400 status code is returned."""
|
|
148
|
+
behavior: Optional[AiGatewayGuardrailPiiBehaviorBehavior] = None
|
|
149
|
+
"""Configuration for input guardrail filters."""
|
|
156
150
|
|
|
157
151
|
def as_dict(self) -> dict:
|
|
158
152
|
"""Serializes the AiGatewayGuardrailPiiBehavior into a dictionary suitable for use as a JSON request body."""
|
|
@@ -173,10 +167,6 @@ class AiGatewayGuardrailPiiBehavior:
|
|
|
173
167
|
|
|
174
168
|
|
|
175
169
|
class AiGatewayGuardrailPiiBehaviorBehavior(Enum):
|
|
176
|
-
"""Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input
|
|
177
|
-
guardrail and the request contains PII, the request is not sent to the model server and 400
|
|
178
|
-
status code is returned; if 'BLOCK' is set for the output guardrail and the model response
|
|
179
|
-
contains PII, the PII info in the response is redacted and 400 status code is returned."""
|
|
180
170
|
|
|
181
171
|
BLOCK = 'BLOCK'
|
|
182
172
|
NONE = 'NONE'
|
|
@@ -292,15 +282,12 @@ class AiGatewayRateLimit:
|
|
|
292
282
|
|
|
293
283
|
|
|
294
284
|
class AiGatewayRateLimitKey(Enum):
|
|
295
|
-
"""Key field for a rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint'
|
|
296
|
-
being the default if not specified."""
|
|
297
285
|
|
|
298
286
|
ENDPOINT = 'endpoint'
|
|
299
287
|
USER = 'user'
|
|
300
288
|
|
|
301
289
|
|
|
302
290
|
class AiGatewayRateLimitRenewalPeriod(Enum):
|
|
303
|
-
"""Renewal period field for a rate limit. Currently, only 'minute' is supported."""
|
|
304
291
|
|
|
305
292
|
MINUTE = 'minute'
|
|
306
293
|
|
|
@@ -339,9 +326,9 @@ class AmazonBedrockConfig:
|
|
|
339
326
|
|
|
340
327
|
aws_access_key_id: Optional[str] = None
|
|
341
328
|
"""The Databricks secret key reference for an AWS access key ID with permissions to interact with
|
|
342
|
-
Bedrock services. If you prefer to paste your API key directly, see
|
|
343
|
-
must provide an API key using one of the following fields:
|
|
344
|
-
`aws_access_key_id_plaintext`."""
|
|
329
|
+
Bedrock services. If you prefer to paste your API key directly, see
|
|
330
|
+
`aws_access_key_id_plaintext`. You must provide an API key using one of the following fields:
|
|
331
|
+
`aws_access_key_id` or `aws_access_key_id_plaintext`."""
|
|
345
332
|
|
|
346
333
|
aws_access_key_id_plaintext: Optional[str] = None
|
|
347
334
|
"""An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext
|
|
@@ -399,8 +386,6 @@ class AmazonBedrockConfig:
|
|
|
399
386
|
|
|
400
387
|
|
|
401
388
|
class AmazonBedrockConfigBedrockProvider(Enum):
|
|
402
|
-
"""The underlying provider in Amazon Bedrock. Supported values (case insensitive) include:
|
|
403
|
-
Anthropic, Cohere, AI21Labs, Amazon."""
|
|
404
389
|
|
|
405
390
|
AI21LABS = 'ai21labs'
|
|
406
391
|
AMAZON = 'amazon'
|
|
@@ -490,18 +475,21 @@ class AutoCaptureConfigInput:
|
|
|
490
475
|
@dataclass
|
|
491
476
|
class AutoCaptureConfigOutput:
|
|
492
477
|
catalog_name: Optional[str] = None
|
|
493
|
-
"""The name of the catalog in Unity Catalog.
|
|
478
|
+
"""The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if
|
|
479
|
+
the inference table is already enabled."""
|
|
494
480
|
|
|
495
481
|
enabled: Optional[bool] = None
|
|
496
482
|
"""Indicates whether the inference table is enabled."""
|
|
497
483
|
|
|
498
484
|
schema_name: Optional[str] = None
|
|
499
|
-
"""The name of the schema in Unity Catalog.
|
|
485
|
+
"""The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if
|
|
486
|
+
the inference table is already enabled."""
|
|
500
487
|
|
|
501
488
|
state: Optional[AutoCaptureState] = None
|
|
502
489
|
|
|
503
490
|
table_name_prefix: Optional[str] = None
|
|
504
|
-
"""The prefix of the table in Unity Catalog.
|
|
491
|
+
"""The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if
|
|
492
|
+
the inference table is already enabled."""
|
|
505
493
|
|
|
506
494
|
def as_dict(self) -> dict:
|
|
507
495
|
"""Serializes the AutoCaptureConfigOutput into a dictionary suitable for use as a JSON request body."""
|
|
@@ -662,12 +650,12 @@ class CreateServingEndpoint:
|
|
|
662
650
|
"""The name of the serving endpoint. This field is required and must be unique across a Databricks
|
|
663
651
|
workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores."""
|
|
664
652
|
|
|
665
|
-
config: EndpointCoreConfigInput
|
|
666
|
-
"""The core config of the serving endpoint."""
|
|
667
|
-
|
|
668
653
|
ai_gateway: Optional[AiGatewayConfig] = None
|
|
669
|
-
"""The AI Gateway configuration for the serving endpoint. NOTE:
|
|
670
|
-
|
|
654
|
+
"""The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned
|
|
655
|
+
throughput endpoints are currently supported."""
|
|
656
|
+
|
|
657
|
+
config: Optional[EndpointCoreConfigInput] = None
|
|
658
|
+
"""The core config of the serving endpoint."""
|
|
671
659
|
|
|
672
660
|
rate_limits: Optional[List[RateLimit]] = None
|
|
673
661
|
"""Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI
|
|
@@ -712,6 +700,37 @@ class CreateServingEndpoint:
|
|
|
712
700
|
tags=_repeated_dict(d, 'tags', EndpointTag))
|
|
713
701
|
|
|
714
702
|
|
|
703
|
+
@dataclass
|
|
704
|
+
class DataPlaneInfo:
|
|
705
|
+
"""Details necessary to query this object's API through the DataPlane APIs."""
|
|
706
|
+
|
|
707
|
+
authorization_details: Optional[str] = None
|
|
708
|
+
"""Authorization details as a string."""
|
|
709
|
+
|
|
710
|
+
endpoint_url: Optional[str] = None
|
|
711
|
+
"""The URL of the endpoint for this operation in the dataplane."""
|
|
712
|
+
|
|
713
|
+
def as_dict(self) -> dict:
|
|
714
|
+
"""Serializes the DataPlaneInfo into a dictionary suitable for use as a JSON request body."""
|
|
715
|
+
body = {}
|
|
716
|
+
if self.authorization_details is not None: body['authorization_details'] = self.authorization_details
|
|
717
|
+
if self.endpoint_url is not None: body['endpoint_url'] = self.endpoint_url
|
|
718
|
+
return body
|
|
719
|
+
|
|
720
|
+
def as_shallow_dict(self) -> dict:
|
|
721
|
+
"""Serializes the DataPlaneInfo into a shallow dictionary of its immediate attributes."""
|
|
722
|
+
body = {}
|
|
723
|
+
if self.authorization_details is not None: body['authorization_details'] = self.authorization_details
|
|
724
|
+
if self.endpoint_url is not None: body['endpoint_url'] = self.endpoint_url
|
|
725
|
+
return body
|
|
726
|
+
|
|
727
|
+
@classmethod
|
|
728
|
+
def from_dict(cls, d: Dict[str, any]) -> DataPlaneInfo:
|
|
729
|
+
"""Deserializes the DataPlaneInfo from a dictionary."""
|
|
730
|
+
return cls(authorization_details=d.get('authorization_details', None),
|
|
731
|
+
endpoint_url=d.get('endpoint_url', None))
|
|
732
|
+
|
|
733
|
+
|
|
715
734
|
@dataclass
|
|
716
735
|
class DatabricksModelServingConfig:
|
|
717
736
|
databricks_workspace_url: str
|
|
@@ -853,21 +872,22 @@ class EmbeddingsV1ResponseEmbeddingElementObject(Enum):
|
|
|
853
872
|
class EndpointCoreConfigInput:
|
|
854
873
|
auto_capture_config: Optional[AutoCaptureConfigInput] = None
|
|
855
874
|
"""Configuration for Inference Tables which automatically logs requests and responses to Unity
|
|
856
|
-
Catalog.
|
|
875
|
+
Catalog. Note: this field is deprecated for creating new provisioned throughput endpoints, or
|
|
876
|
+
updating existing provisioned throughput endpoints that never have inference table configured;
|
|
877
|
+
in these cases please use AI Gateway to manage inference tables."""
|
|
857
878
|
|
|
858
879
|
name: Optional[str] = None
|
|
859
880
|
"""The name of the serving endpoint to update. This field is required."""
|
|
860
881
|
|
|
861
882
|
served_entities: Optional[List[ServedEntityInput]] = None
|
|
862
|
-
"""
|
|
863
|
-
entities."""
|
|
883
|
+
"""The list of served entities under the serving endpoint config."""
|
|
864
884
|
|
|
865
885
|
served_models: Optional[List[ServedModelInput]] = None
|
|
866
|
-
"""(Deprecated, use served_entities instead)
|
|
867
|
-
|
|
886
|
+
"""(Deprecated, use served_entities instead) The list of served models under the serving endpoint
|
|
887
|
+
config."""
|
|
868
888
|
|
|
869
889
|
traffic_config: Optional[TrafficConfig] = None
|
|
870
|
-
"""The traffic
|
|
890
|
+
"""The traffic configuration associated with the serving endpoint config."""
|
|
871
891
|
|
|
872
892
|
def as_dict(self) -> dict:
|
|
873
893
|
"""Serializes the EndpointCoreConfigInput into a dictionary suitable for use as a JSON request body."""
|
|
@@ -903,7 +923,9 @@ class EndpointCoreConfigInput:
|
|
|
903
923
|
class EndpointCoreConfigOutput:
|
|
904
924
|
auto_capture_config: Optional[AutoCaptureConfigOutput] = None
|
|
905
925
|
"""Configuration for Inference Tables which automatically logs requests and responses to Unity
|
|
906
|
-
Catalog.
|
|
926
|
+
Catalog. Note: this field is deprecated for creating new provisioned throughput endpoints, or
|
|
927
|
+
updating existing provisioned throughput endpoints that never have inference table configured;
|
|
928
|
+
in these cases please use AI Gateway to manage inference tables."""
|
|
907
929
|
|
|
908
930
|
config_version: Optional[int] = None
|
|
909
931
|
"""The config version that the serving endpoint is currently serving."""
|
|
@@ -982,7 +1004,9 @@ class EndpointCoreConfigSummary:
|
|
|
982
1004
|
class EndpointPendingConfig:
|
|
983
1005
|
auto_capture_config: Optional[AutoCaptureConfigOutput] = None
|
|
984
1006
|
"""Configuration for Inference Tables which automatically logs requests and responses to Unity
|
|
985
|
-
Catalog.
|
|
1007
|
+
Catalog. Note: this field is deprecated for creating new provisioned throughput endpoints, or
|
|
1008
|
+
updating existing provisioned throughput endpoints that never have inference table configured;
|
|
1009
|
+
in these cases please use AI Gateway to manage inference tables."""
|
|
986
1010
|
|
|
987
1011
|
config_version: Optional[int] = None
|
|
988
1012
|
"""The config version that the serving endpoint is currently serving."""
|
|
@@ -1068,10 +1092,6 @@ class EndpointState:
|
|
|
1068
1092
|
|
|
1069
1093
|
|
|
1070
1094
|
class EndpointStateConfigUpdate(Enum):
|
|
1071
|
-
"""The state of an endpoint's config update. This informs the user if the pending_config is in
|
|
1072
|
-
progress, if the update failed, or if there is no update in progress. Note that if the
|
|
1073
|
-
endpoint's config_update state value is IN_PROGRESS, another update can not be made until the
|
|
1074
|
-
update completes or fails."""
|
|
1075
1095
|
|
|
1076
1096
|
IN_PROGRESS = 'IN_PROGRESS'
|
|
1077
1097
|
NOT_UPDATING = 'NOT_UPDATING'
|
|
@@ -1080,9 +1100,6 @@ class EndpointStateConfigUpdate(Enum):
|
|
|
1080
1100
|
|
|
1081
1101
|
|
|
1082
1102
|
class EndpointStateReady(Enum):
|
|
1083
|
-
"""The state of an endpoint, indicating whether or not the endpoint is queryable. An endpoint is
|
|
1084
|
-
READY if all of the served entities in its active configuration are ready. If any of the
|
|
1085
|
-
actively served entities are in a non-ready state, the endpoint state will be NOT_READY."""
|
|
1086
1103
|
|
|
1087
1104
|
NOT_READY = 'NOT_READY'
|
|
1088
1105
|
READY = 'READY'
|
|
@@ -1116,6 +1133,28 @@ class EndpointTag:
|
|
|
1116
1133
|
return cls(key=d.get('key', None), value=d.get('value', None))
|
|
1117
1134
|
|
|
1118
1135
|
|
|
1136
|
+
@dataclass
|
|
1137
|
+
class EndpointTags:
|
|
1138
|
+
tags: Optional[List[EndpointTag]] = None
|
|
1139
|
+
|
|
1140
|
+
def as_dict(self) -> dict:
|
|
1141
|
+
"""Serializes the EndpointTags into a dictionary suitable for use as a JSON request body."""
|
|
1142
|
+
body = {}
|
|
1143
|
+
if self.tags: body['tags'] = [v.as_dict() for v in self.tags]
|
|
1144
|
+
return body
|
|
1145
|
+
|
|
1146
|
+
def as_shallow_dict(self) -> dict:
|
|
1147
|
+
"""Serializes the EndpointTags into a shallow dictionary of its immediate attributes."""
|
|
1148
|
+
body = {}
|
|
1149
|
+
if self.tags: body['tags'] = self.tags
|
|
1150
|
+
return body
|
|
1151
|
+
|
|
1152
|
+
@classmethod
|
|
1153
|
+
def from_dict(cls, d: Dict[str, any]) -> EndpointTags:
|
|
1154
|
+
"""Deserializes the EndpointTags from a dictionary."""
|
|
1155
|
+
return cls(tags=_repeated_dict(d, 'tags', EndpointTag))
|
|
1156
|
+
|
|
1157
|
+
|
|
1119
1158
|
@dataclass
|
|
1120
1159
|
class ExportMetricsResponse:
|
|
1121
1160
|
contents: Optional[BinaryIO] = None
|
|
@@ -1138,12 +1177,77 @@ class ExportMetricsResponse:
|
|
|
1138
1177
|
return cls(contents=d.get('contents', None))
|
|
1139
1178
|
|
|
1140
1179
|
|
|
1180
|
+
@dataclass
|
|
1181
|
+
class ExternalFunctionRequest:
|
|
1182
|
+
"""Simple Proto message for testing"""
|
|
1183
|
+
|
|
1184
|
+
connection_name: str
|
|
1185
|
+
"""The connection name to use. This is required to identify the external connection."""
|
|
1186
|
+
|
|
1187
|
+
method: ExternalFunctionRequestHttpMethod
|
|
1188
|
+
"""The HTTP method to use (e.g., 'GET', 'POST')."""
|
|
1189
|
+
|
|
1190
|
+
path: str
|
|
1191
|
+
"""The relative path for the API endpoint. This is required."""
|
|
1192
|
+
|
|
1193
|
+
headers: Optional[str] = None
|
|
1194
|
+
"""Additional headers for the request. If not provided, only auth headers from connections would be
|
|
1195
|
+
passed."""
|
|
1196
|
+
|
|
1197
|
+
json: Optional[str] = None
|
|
1198
|
+
"""The JSON payload to send in the request body."""
|
|
1199
|
+
|
|
1200
|
+
params: Optional[str] = None
|
|
1201
|
+
"""Query parameters for the request."""
|
|
1202
|
+
|
|
1203
|
+
def as_dict(self) -> dict:
|
|
1204
|
+
"""Serializes the ExternalFunctionRequest into a dictionary suitable for use as a JSON request body."""
|
|
1205
|
+
body = {}
|
|
1206
|
+
if self.connection_name is not None: body['connection_name'] = self.connection_name
|
|
1207
|
+
if self.headers is not None: body['headers'] = self.headers
|
|
1208
|
+
if self.json is not None: body['json'] = self.json
|
|
1209
|
+
if self.method is not None: body['method'] = self.method.value
|
|
1210
|
+
if self.params is not None: body['params'] = self.params
|
|
1211
|
+
if self.path is not None: body['path'] = self.path
|
|
1212
|
+
return body
|
|
1213
|
+
|
|
1214
|
+
def as_shallow_dict(self) -> dict:
|
|
1215
|
+
"""Serializes the ExternalFunctionRequest into a shallow dictionary of its immediate attributes."""
|
|
1216
|
+
body = {}
|
|
1217
|
+
if self.connection_name is not None: body['connection_name'] = self.connection_name
|
|
1218
|
+
if self.headers is not None: body['headers'] = self.headers
|
|
1219
|
+
if self.json is not None: body['json'] = self.json
|
|
1220
|
+
if self.method is not None: body['method'] = self.method
|
|
1221
|
+
if self.params is not None: body['params'] = self.params
|
|
1222
|
+
if self.path is not None: body['path'] = self.path
|
|
1223
|
+
return body
|
|
1224
|
+
|
|
1225
|
+
@classmethod
|
|
1226
|
+
def from_dict(cls, d: Dict[str, any]) -> ExternalFunctionRequest:
|
|
1227
|
+
"""Deserializes the ExternalFunctionRequest from a dictionary."""
|
|
1228
|
+
return cls(connection_name=d.get('connection_name', None),
|
|
1229
|
+
headers=d.get('headers', None),
|
|
1230
|
+
json=d.get('json', None),
|
|
1231
|
+
method=_enum(d, 'method', ExternalFunctionRequestHttpMethod),
|
|
1232
|
+
params=d.get('params', None),
|
|
1233
|
+
path=d.get('path', None))
|
|
1234
|
+
|
|
1235
|
+
|
|
1236
|
+
class ExternalFunctionRequestHttpMethod(Enum):
|
|
1237
|
+
|
|
1238
|
+
DELETE = 'DELETE'
|
|
1239
|
+
GET = 'GET'
|
|
1240
|
+
PATCH = 'PATCH'
|
|
1241
|
+
POST = 'POST'
|
|
1242
|
+
PUT = 'PUT'
|
|
1243
|
+
|
|
1244
|
+
|
|
1141
1245
|
@dataclass
|
|
1142
1246
|
class ExternalModel:
|
|
1143
1247
|
provider: ExternalModelProvider
|
|
1144
1248
|
"""The name of the provider for the external model. Currently, the supported providers are
|
|
1145
1249
|
'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving',
|
|
1146
|
-
'google-cloud-vertex-ai', 'openai', and 'palm'."
|
|
1250
|
+
'google-cloud-vertex-ai', 'openai', and 'palm'."""
|
|
1147
1251
|
|
|
1148
1252
|
name: str
|
|
1149
1253
|
"""The name of the external model."""
|
|
@@ -1230,9 +1334,6 @@ class ExternalModel:
|
|
|
1230
1334
|
|
|
1231
1335
|
|
|
1232
1336
|
class ExternalModelProvider(Enum):
|
|
1233
|
-
"""The name of the provider for the external model. Currently, the supported providers are
|
|
1234
|
-
'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving',
|
|
1235
|
-
'google-cloud-vertex-ai', 'openai', and 'palm'.","""
|
|
1236
1337
|
|
|
1237
1338
|
AI21LABS = 'ai21labs'
|
|
1238
1339
|
AMAZON_BEDROCK = 'amazon-bedrock'
|
|
@@ -1281,17 +1382,16 @@ class ExternalModelUsageElement:
|
|
|
1281
1382
|
|
|
1282
1383
|
@dataclass
|
|
1283
1384
|
class FoundationModel:
|
|
1385
|
+
"""All fields are not sensitive as they are hard-coded in the system and made available to
|
|
1386
|
+
customers."""
|
|
1387
|
+
|
|
1284
1388
|
description: Optional[str] = None
|
|
1285
|
-
"""The description of the foundation model."""
|
|
1286
1389
|
|
|
1287
1390
|
display_name: Optional[str] = None
|
|
1288
|
-
"""The display name of the foundation model."""
|
|
1289
1391
|
|
|
1290
1392
|
docs: Optional[str] = None
|
|
1291
|
-
"""The URL to the documentation of the foundation model."""
|
|
1292
1393
|
|
|
1293
1394
|
name: Optional[str] = None
|
|
1294
|
-
"""The name of the foundation model."""
|
|
1295
1395
|
|
|
1296
1396
|
def as_dict(self) -> dict:
|
|
1297
1397
|
"""Serializes the FoundationModel into a dictionary suitable for use as a JSON request body."""
|
|
@@ -1322,23 +1422,24 @@ class FoundationModel:
|
|
|
1322
1422
|
|
|
1323
1423
|
@dataclass
|
|
1324
1424
|
class GetOpenApiResponse:
|
|
1325
|
-
|
|
1326
|
-
info, servers and paths, etc."""
|
|
1425
|
+
contents: Optional[BinaryIO] = None
|
|
1327
1426
|
|
|
1328
1427
|
def as_dict(self) -> dict:
|
|
1329
1428
|
"""Serializes the GetOpenApiResponse into a dictionary suitable for use as a JSON request body."""
|
|
1330
1429
|
body = {}
|
|
1430
|
+
if self.contents: body['contents'] = self.contents
|
|
1331
1431
|
return body
|
|
1332
1432
|
|
|
1333
1433
|
def as_shallow_dict(self) -> dict:
|
|
1334
1434
|
"""Serializes the GetOpenApiResponse into a shallow dictionary of its immediate attributes."""
|
|
1335
1435
|
body = {}
|
|
1436
|
+
if self.contents: body['contents'] = self.contents
|
|
1336
1437
|
return body
|
|
1337
1438
|
|
|
1338
1439
|
@classmethod
|
|
1339
1440
|
def from_dict(cls, d: Dict[str, any]) -> GetOpenApiResponse:
|
|
1340
1441
|
"""Deserializes the GetOpenApiResponse from a dictionary."""
|
|
1341
|
-
return cls()
|
|
1442
|
+
return cls(contents=d.get('contents', None))
|
|
1342
1443
|
|
|
1343
1444
|
|
|
1344
1445
|
@dataclass
|
|
@@ -1367,13 +1468,23 @@ class GetServingEndpointPermissionLevelsResponse:
|
|
|
1367
1468
|
|
|
1368
1469
|
@dataclass
|
|
1369
1470
|
class GoogleCloudVertexAiConfig:
|
|
1471
|
+
project_id: str
|
|
1472
|
+
"""This is the Google Cloud project id that the service account is associated with."""
|
|
1473
|
+
|
|
1474
|
+
region: str
|
|
1475
|
+
"""This is the region for the Google Cloud Vertex AI Service. See [supported regions] for more
|
|
1476
|
+
details. Some models are only available in specific regions.
|
|
1477
|
+
|
|
1478
|
+
[supported regions]: https://cloud.google.com/vertex-ai/docs/general/locations"""
|
|
1479
|
+
|
|
1370
1480
|
private_key: Optional[str] = None
|
|
1371
1481
|
"""The Databricks secret key reference for a private key for the service account which has access
|
|
1372
1482
|
to the Google Cloud Vertex AI Service. See [Best practices for managing service account keys].
|
|
1373
1483
|
If you prefer to paste your API key directly, see `private_key_plaintext`. You must provide an
|
|
1374
1484
|
API key using one of the following fields: `private_key` or `private_key_plaintext`
|
|
1375
1485
|
|
|
1376
|
-
[Best practices for managing service account keys]:
|
|
1486
|
+
[Best practices for managing service account keys]:
|
|
1487
|
+
https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys"""
|
|
1377
1488
|
|
|
1378
1489
|
private_key_plaintext: Optional[str] = None
|
|
1379
1490
|
"""The private key for the service account which has access to the Google Cloud Vertex AI Service
|
|
@@ -1381,16 +1492,8 @@ class GoogleCloudVertexAiConfig:
|
|
|
1381
1492
|
prefer to reference your key using Databricks Secrets, see `private_key`. You must provide an
|
|
1382
1493
|
API key using one of the following fields: `private_key` or `private_key_plaintext`.
|
|
1383
1494
|
|
|
1384
|
-
[Best practices for managing service account keys]:
|
|
1385
|
-
|
|
1386
|
-
project_id: Optional[str] = None
|
|
1387
|
-
"""This is the Google Cloud project id that the service account is associated with."""
|
|
1388
|
-
|
|
1389
|
-
region: Optional[str] = None
|
|
1390
|
-
"""This is the region for the Google Cloud Vertex AI Service. See [supported regions] for more
|
|
1391
|
-
details. Some models are only available in specific regions.
|
|
1392
|
-
|
|
1393
|
-
[supported regions]: https://cloud.google.com/vertex-ai/docs/general/locations"""
|
|
1495
|
+
[Best practices for managing service account keys]:
|
|
1496
|
+
https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys"""
|
|
1394
1497
|
|
|
1395
1498
|
def as_dict(self) -> dict:
|
|
1396
1499
|
"""Serializes the GoogleCloudVertexAiConfig into a dictionary suitable for use as a JSON request body."""
|
|
@@ -1419,6 +1522,28 @@ class GoogleCloudVertexAiConfig:
|
|
|
1419
1522
|
region=d.get('region', None))
|
|
1420
1523
|
|
|
1421
1524
|
|
|
1525
|
+
@dataclass
|
|
1526
|
+
class HttpRequestResponse:
|
|
1527
|
+
contents: Optional[BinaryIO] = None
|
|
1528
|
+
|
|
1529
|
+
def as_dict(self) -> dict:
|
|
1530
|
+
"""Serializes the HttpRequestResponse into a dictionary suitable for use as a JSON request body."""
|
|
1531
|
+
body = {}
|
|
1532
|
+
if self.contents: body['contents'] = self.contents
|
|
1533
|
+
return body
|
|
1534
|
+
|
|
1535
|
+
def as_shallow_dict(self) -> dict:
|
|
1536
|
+
"""Serializes the HttpRequestResponse into a shallow dictionary of its immediate attributes."""
|
|
1537
|
+
body = {}
|
|
1538
|
+
if self.contents: body['contents'] = self.contents
|
|
1539
|
+
return body
|
|
1540
|
+
|
|
1541
|
+
@classmethod
|
|
1542
|
+
def from_dict(cls, d: Dict[str, any]) -> HttpRequestResponse:
|
|
1543
|
+
"""Deserializes the HttpRequestResponse from a dictionary."""
|
|
1544
|
+
return cls(contents=d.get('contents', None))
|
|
1545
|
+
|
|
1546
|
+
|
|
1422
1547
|
@dataclass
|
|
1423
1548
|
class ListEndpointsResponse:
|
|
1424
1549
|
endpoints: Optional[List[ServingEndpoint]] = None
|
|
@@ -1444,7 +1569,10 @@ class ListEndpointsResponse:
|
|
|
1444
1569
|
|
|
1445
1570
|
@dataclass
|
|
1446
1571
|
class ModelDataPlaneInfo:
|
|
1447
|
-
|
|
1572
|
+
"""A representation of all DataPlaneInfo for operations that can be done on a model through Data
|
|
1573
|
+
Plane APIs."""
|
|
1574
|
+
|
|
1575
|
+
query_info: Optional[DataPlaneInfo] = None
|
|
1448
1576
|
"""Information required to query DataPlane API 'query' endpoint."""
|
|
1449
1577
|
|
|
1450
1578
|
def as_dict(self) -> dict:
|
|
@@ -1462,11 +1590,13 @@ class ModelDataPlaneInfo:
|
|
|
1462
1590
|
@classmethod
|
|
1463
1591
|
def from_dict(cls, d: Dict[str, any]) -> ModelDataPlaneInfo:
|
|
1464
1592
|
"""Deserializes the ModelDataPlaneInfo from a dictionary."""
|
|
1465
|
-
return cls(query_info=_from_dict(d, 'query_info',
|
|
1593
|
+
return cls(query_info=_from_dict(d, 'query_info', DataPlaneInfo))
|
|
1466
1594
|
|
|
1467
1595
|
|
|
1468
1596
|
@dataclass
|
|
1469
1597
|
class OpenAiConfig:
|
|
1598
|
+
"""Configs needed to create an OpenAI model route."""
|
|
1599
|
+
|
|
1470
1600
|
microsoft_entra_client_id: Optional[str] = None
|
|
1471
1601
|
"""This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID."""
|
|
1472
1602
|
|
|
@@ -1652,13 +1782,10 @@ class PatchServingEndpointTags:
|
|
|
1652
1782
|
@dataclass
|
|
1653
1783
|
class PayloadTable:
|
|
1654
1784
|
name: Optional[str] = None
|
|
1655
|
-
"""The name of the payload table."""
|
|
1656
1785
|
|
|
1657
1786
|
status: Optional[str] = None
|
|
1658
|
-
"""The status of the payload table."""
|
|
1659
1787
|
|
|
1660
1788
|
status_message: Optional[str] = None
|
|
1661
|
-
"""The status message of the payload table."""
|
|
1662
1789
|
|
|
1663
1790
|
def as_dict(self) -> dict:
|
|
1664
1791
|
"""Serializes the PayloadTable into a dictionary suitable for use as a JSON request body."""
|
|
@@ -1684,6 +1811,57 @@ class PayloadTable:
|
|
|
1684
1811
|
status_message=d.get('status_message', None))
|
|
1685
1812
|
|
|
1686
1813
|
|
|
1814
|
+
@dataclass
|
|
1815
|
+
class PutAiGatewayRequest:
|
|
1816
|
+
guardrails: Optional[AiGatewayGuardrails] = None
|
|
1817
|
+
"""Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and
|
|
1818
|
+
responses."""
|
|
1819
|
+
|
|
1820
|
+
inference_table_config: Optional[AiGatewayInferenceTableConfig] = None
|
|
1821
|
+
"""Configuration for payload logging using inference tables. Use these tables to monitor and audit
|
|
1822
|
+
data being sent to and received from model APIs and to improve model quality."""
|
|
1823
|
+
|
|
1824
|
+
name: Optional[str] = None
|
|
1825
|
+
"""The name of the serving endpoint whose AI Gateway is being updated. This field is required."""
|
|
1826
|
+
|
|
1827
|
+
rate_limits: Optional[List[AiGatewayRateLimit]] = None
|
|
1828
|
+
"""Configuration for rate limits which can be set to limit endpoint traffic."""
|
|
1829
|
+
|
|
1830
|
+
usage_tracking_config: Optional[AiGatewayUsageTrackingConfig] = None
|
|
1831
|
+
"""Configuration to enable usage tracking using system tables. These tables allow you to monitor
|
|
1832
|
+
operational usage on endpoints and their associated costs."""
|
|
1833
|
+
|
|
1834
|
+
def as_dict(self) -> dict:
|
|
1835
|
+
"""Serializes the PutAiGatewayRequest into a dictionary suitable for use as a JSON request body."""
|
|
1836
|
+
body = {}
|
|
1837
|
+
if self.guardrails: body['guardrails'] = self.guardrails.as_dict()
|
|
1838
|
+
if self.inference_table_config: body['inference_table_config'] = self.inference_table_config.as_dict()
|
|
1839
|
+
if self.name is not None: body['name'] = self.name
|
|
1840
|
+
if self.rate_limits: body['rate_limits'] = [v.as_dict() for v in self.rate_limits]
|
|
1841
|
+
if self.usage_tracking_config: body['usage_tracking_config'] = self.usage_tracking_config.as_dict()
|
|
1842
|
+
return body
|
|
1843
|
+
|
|
1844
|
+
def as_shallow_dict(self) -> dict:
|
|
1845
|
+
"""Serializes the PutAiGatewayRequest into a shallow dictionary of its immediate attributes."""
|
|
1846
|
+
body = {}
|
|
1847
|
+
if self.guardrails: body['guardrails'] = self.guardrails
|
|
1848
|
+
if self.inference_table_config: body['inference_table_config'] = self.inference_table_config
|
|
1849
|
+
if self.name is not None: body['name'] = self.name
|
|
1850
|
+
if self.rate_limits: body['rate_limits'] = self.rate_limits
|
|
1851
|
+
if self.usage_tracking_config: body['usage_tracking_config'] = self.usage_tracking_config
|
|
1852
|
+
return body
|
|
1853
|
+
|
|
1854
|
+
@classmethod
|
|
1855
|
+
def from_dict(cls, d: Dict[str, any]) -> PutAiGatewayRequest:
|
|
1856
|
+
"""Deserializes the PutAiGatewayRequest from a dictionary."""
|
|
1857
|
+
return cls(guardrails=_from_dict(d, 'guardrails', AiGatewayGuardrails),
|
|
1858
|
+
inference_table_config=_from_dict(d, 'inference_table_config',
|
|
1859
|
+
AiGatewayInferenceTableConfig),
|
|
1860
|
+
name=d.get('name', None),
|
|
1861
|
+
rate_limits=_repeated_dict(d, 'rate_limits', AiGatewayRateLimit),
|
|
1862
|
+
usage_tracking_config=_from_dict(d, 'usage_tracking_config', AiGatewayUsageTrackingConfig))
|
|
1863
|
+
|
|
1864
|
+
|
|
1687
1865
|
@dataclass
|
|
1688
1866
|
class PutAiGatewayResponse:
|
|
1689
1867
|
guardrails: Optional[AiGatewayGuardrails] = None
|
|
@@ -1692,7 +1870,7 @@ class PutAiGatewayResponse:
|
|
|
1692
1870
|
|
|
1693
1871
|
inference_table_config: Optional[AiGatewayInferenceTableConfig] = None
|
|
1694
1872
|
"""Configuration for payload logging using inference tables. Use these tables to monitor and audit
|
|
1695
|
-
data being sent to and received from model APIs and to improve model quality
|
|
1873
|
+
data being sent to and received from model APIs and to improve model quality."""
|
|
1696
1874
|
|
|
1697
1875
|
rate_limits: Optional[List[AiGatewayRateLimit]] = None
|
|
1698
1876
|
"""Configuration for rate limits which can be set to limit endpoint traffic."""
|
|
@@ -1729,6 +1907,34 @@ class PutAiGatewayResponse:
|
|
|
1729
1907
|
usage_tracking_config=_from_dict(d, 'usage_tracking_config', AiGatewayUsageTrackingConfig))
|
|
1730
1908
|
|
|
1731
1909
|
|
|
1910
|
+
@dataclass
|
|
1911
|
+
class PutRequest:
|
|
1912
|
+
name: Optional[str] = None
|
|
1913
|
+
"""The name of the serving endpoint whose rate limits are being updated. This field is required."""
|
|
1914
|
+
|
|
1915
|
+
rate_limits: Optional[List[RateLimit]] = None
|
|
1916
|
+
"""The list of endpoint rate limits."""
|
|
1917
|
+
|
|
1918
|
+
def as_dict(self) -> dict:
|
|
1919
|
+
"""Serializes the PutRequest into a dictionary suitable for use as a JSON request body."""
|
|
1920
|
+
body = {}
|
|
1921
|
+
if self.name is not None: body['name'] = self.name
|
|
1922
|
+
if self.rate_limits: body['rate_limits'] = [v.as_dict() for v in self.rate_limits]
|
|
1923
|
+
return body
|
|
1924
|
+
|
|
1925
|
+
def as_shallow_dict(self) -> dict:
|
|
1926
|
+
"""Serializes the PutRequest into a shallow dictionary of its immediate attributes."""
|
|
1927
|
+
body = {}
|
|
1928
|
+
if self.name is not None: body['name'] = self.name
|
|
1929
|
+
if self.rate_limits: body['rate_limits'] = self.rate_limits
|
|
1930
|
+
return body
|
|
1931
|
+
|
|
1932
|
+
@classmethod
|
|
1933
|
+
def from_dict(cls, d: Dict[str, any]) -> PutRequest:
|
|
1934
|
+
"""Deserializes the PutRequest from a dictionary."""
|
|
1935
|
+
return cls(name=d.get('name', None), rate_limits=_repeated_dict(d, 'rate_limits', RateLimit))
|
|
1936
|
+
|
|
1937
|
+
|
|
1732
1938
|
@dataclass
|
|
1733
1939
|
class PutResponse:
|
|
1734
1940
|
rate_limits: Optional[List[RateLimit]] = None
|
|
@@ -1994,15 +2200,12 @@ class RateLimit:
|
|
|
1994
2200
|
|
|
1995
2201
|
|
|
1996
2202
|
class RateLimitKey(Enum):
|
|
1997
|
-
"""Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are
|
|
1998
|
-
supported, with 'endpoint' being the default if not specified."""
|
|
1999
2203
|
|
|
2000
2204
|
ENDPOINT = 'endpoint'
|
|
2001
2205
|
USER = 'user'
|
|
2002
2206
|
|
|
2003
2207
|
|
|
2004
2208
|
class RateLimitRenewalPeriod(Enum):
|
|
2005
|
-
"""Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported."""
|
|
2006
2209
|
|
|
2007
2210
|
MINUTE = 'minute'
|
|
2008
2211
|
|
|
@@ -2043,11 +2246,9 @@ class ServedEntityInput:
|
|
|
2043
2246
|
"""The name of the entity to be served. The entity may be a model in the Databricks Model Registry,
|
|
2044
2247
|
a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC
|
|
2045
2248
|
object, the full name of the object should be given in the form of
|
|
2046
|
-
|
|
2249
|
+
**catalog_name.schema_name.model_name**."""
|
|
2047
2250
|
|
|
2048
2251
|
entity_version: Optional[str] = None
|
|
2049
|
-
"""The version of the model in Databricks Model Registry to be served or empty if the entity is a
|
|
2050
|
-
FEATURE_SPEC."""
|
|
2051
2252
|
|
|
2052
2253
|
environment_vars: Optional[Dict[str, str]] = None
|
|
2053
2254
|
"""An object containing a set of optional, user-specified environment variable key-value pairs used
|
|
@@ -2076,7 +2277,7 @@ class ServedEntityInput:
|
|
|
2076
2277
|
"""The name of a served entity. It must be unique across an endpoint. A served entity name can
|
|
2077
2278
|
consist of alphanumeric characters, dashes, and underscores. If not specified for an external
|
|
2078
2279
|
model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if
|
|
2079
|
-
not specified for other entities, it defaults to
|
|
2280
|
+
not specified for other entities, it defaults to entity_name-entity_version."""
|
|
2080
2281
|
|
|
2081
2282
|
scale_to_zero_enabled: Optional[bool] = None
|
|
2082
2283
|
"""Whether the compute resources for the served entity should scale down to zero."""
|
|
@@ -2089,13 +2290,13 @@ class ServedEntityInput:
|
|
|
2089
2290
|
scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
|
|
2090
2291
|
is 0."""
|
|
2091
2292
|
|
|
2092
|
-
workload_type: Optional[
|
|
2293
|
+
workload_type: Optional[ServingModelWorkloadType] = None
|
|
2093
2294
|
"""The workload type of the served entity. The workload type selects which type of compute to use
|
|
2094
2295
|
in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
|
|
2095
2296
|
acceleration is available by selecting workload types like GPU_SMALL and others. See the
|
|
2096
2297
|
available [GPU types].
|
|
2097
2298
|
|
|
2098
|
-
[GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
|
|
2299
|
+
[GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
|
|
2099
2300
|
|
|
2100
2301
|
def as_dict(self) -> dict:
|
|
2101
2302
|
"""Serializes the ServedEntityInput into a dictionary suitable for use as a JSON request body."""
|
|
@@ -2112,7 +2313,7 @@ class ServedEntityInput:
|
|
|
2112
2313
|
if self.name is not None: body['name'] = self.name
|
|
2113
2314
|
if self.scale_to_zero_enabled is not None: body['scale_to_zero_enabled'] = self.scale_to_zero_enabled
|
|
2114
2315
|
if self.workload_size is not None: body['workload_size'] = self.workload_size
|
|
2115
|
-
if self.workload_type is not None: body['workload_type'] = self.workload_type
|
|
2316
|
+
if self.workload_type is not None: body['workload_type'] = self.workload_type.value
|
|
2116
2317
|
return body
|
|
2117
2318
|
|
|
2118
2319
|
def as_shallow_dict(self) -> dict:
|
|
@@ -2146,26 +2347,22 @@ class ServedEntityInput:
|
|
|
2146
2347
|
name=d.get('name', None),
|
|
2147
2348
|
scale_to_zero_enabled=d.get('scale_to_zero_enabled', None),
|
|
2148
2349
|
workload_size=d.get('workload_size', None),
|
|
2149
|
-
workload_type=d
|
|
2350
|
+
workload_type=_enum(d, 'workload_type', ServingModelWorkloadType))
|
|
2150
2351
|
|
|
2151
2352
|
|
|
2152
2353
|
@dataclass
|
|
2153
2354
|
class ServedEntityOutput:
|
|
2154
2355
|
creation_timestamp: Optional[int] = None
|
|
2155
|
-
"""The creation timestamp of the served entity in Unix time."""
|
|
2156
2356
|
|
|
2157
2357
|
creator: Optional[str] = None
|
|
2158
|
-
"""The email of the user who created the served entity."""
|
|
2159
2358
|
|
|
2160
2359
|
entity_name: Optional[str] = None
|
|
2161
|
-
"""The name of the entity served. The entity may be a model in the Databricks Model Registry,
|
|
2162
|
-
model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC
|
|
2163
|
-
object, the full name of the object
|
|
2164
|
-
|
|
2360
|
+
"""The name of the entity to be served. The entity may be a model in the Databricks Model Registry,
|
|
2361
|
+
a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC
|
|
2362
|
+
object, the full name of the object should be given in the form of
|
|
2363
|
+
**catalog_name.schema_name.model_name**."""
|
|
2165
2364
|
|
|
2166
2365
|
entity_version: Optional[str] = None
|
|
2167
|
-
"""The version of the served entity in Databricks Model Registry or empty if the entity is a
|
|
2168
|
-
FEATURE_SPEC."""
|
|
2169
2366
|
|
|
2170
2367
|
environment_vars: Optional[Dict[str, str]] = None
|
|
2171
2368
|
"""An object containing a set of optional, user-specified environment variable key-value pairs used
|
|
@@ -2174,14 +2371,16 @@ class ServedEntityOutput:
|
|
|
2174
2371
|
"{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}`"""
|
|
2175
2372
|
|
|
2176
2373
|
external_model: Optional[ExternalModel] = None
|
|
2177
|
-
"""The external model
|
|
2178
|
-
|
|
2179
|
-
|
|
2374
|
+
"""The external model to be served. NOTE: Only one of external_model and (entity_name,
|
|
2375
|
+
entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with
|
|
2376
|
+
the latter set being used for custom model serving for a Databricks registered model. For an
|
|
2377
|
+
existing endpoint with external_model, it cannot be updated to an endpoint without
|
|
2378
|
+
external_model. If the endpoint is created without external_model, users cannot update it to add
|
|
2379
|
+
external_model later. The task type of all external models within an endpoint must be the same."""
|
|
2180
2380
|
|
|
2181
2381
|
foundation_model: Optional[FoundationModel] = None
|
|
2182
|
-
"""
|
|
2183
|
-
|
|
2184
|
-
returned based on the endpoint type."""
|
|
2382
|
+
"""All fields are not sensitive as they are hard-coded in the system and made available to
|
|
2383
|
+
customers."""
|
|
2185
2384
|
|
|
2186
2385
|
instance_profile_arn: Optional[str] = None
|
|
2187
2386
|
"""ARN of the instance profile that the served entity uses to access AWS resources."""
|
|
@@ -2193,13 +2392,15 @@ class ServedEntityOutput:
|
|
|
2193
2392
|
"""The minimum tokens per second that the endpoint can scale down to."""
|
|
2194
2393
|
|
|
2195
2394
|
name: Optional[str] = None
|
|
2196
|
-
"""The name of
|
|
2395
|
+
"""The name of a served entity. It must be unique across an endpoint. A served entity name can
|
|
2396
|
+
consist of alphanumeric characters, dashes, and underscores. If not specified for an external
|
|
2397
|
+
model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if
|
|
2398
|
+
not specified for other entities, it defaults to entity_name-entity_version."""
|
|
2197
2399
|
|
|
2198
2400
|
scale_to_zero_enabled: Optional[bool] = None
|
|
2199
2401
|
"""Whether the compute resources for the served entity should scale down to zero."""
|
|
2200
2402
|
|
|
2201
2403
|
state: Optional[ServedModelState] = None
|
|
2202
|
-
"""Information corresponding to the state of the served entity."""
|
|
2203
2404
|
|
|
2204
2405
|
workload_size: Optional[str] = None
|
|
2205
2406
|
"""The workload size of the served entity. The workload size corresponds to a range of provisioned
|
|
@@ -2207,15 +2408,15 @@ class ServedEntityOutput:
|
|
|
2207
2408
|
process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency),
|
|
2208
2409
|
"Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If
|
|
2209
2410
|
scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
|
|
2210
|
-
|
|
2411
|
+
is 0."""
|
|
2211
2412
|
|
|
2212
|
-
workload_type: Optional[
|
|
2413
|
+
workload_type: Optional[ServingModelWorkloadType] = None
|
|
2213
2414
|
"""The workload type of the served entity. The workload type selects which type of compute to use
|
|
2214
2415
|
in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
|
|
2215
2416
|
acceleration is available by selecting workload types like GPU_SMALL and others. See the
|
|
2216
2417
|
available [GPU types].
|
|
2217
2418
|
|
|
2218
|
-
[GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
|
|
2419
|
+
[GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
|
|
2219
2420
|
|
|
2220
2421
|
def as_dict(self) -> dict:
|
|
2221
2422
|
"""Serializes the ServedEntityOutput into a dictionary suitable for use as a JSON request body."""
|
|
@@ -2236,7 +2437,7 @@ class ServedEntityOutput:
|
|
|
2236
2437
|
if self.scale_to_zero_enabled is not None: body['scale_to_zero_enabled'] = self.scale_to_zero_enabled
|
|
2237
2438
|
if self.state: body['state'] = self.state.as_dict()
|
|
2238
2439
|
if self.workload_size is not None: body['workload_size'] = self.workload_size
|
|
2239
|
-
if self.workload_type is not None: body['workload_type'] = self.workload_type
|
|
2440
|
+
if self.workload_type is not None: body['workload_type'] = self.workload_type.value
|
|
2240
2441
|
return body
|
|
2241
2442
|
|
|
2242
2443
|
def as_shallow_dict(self) -> dict:
|
|
@@ -2278,31 +2479,22 @@ class ServedEntityOutput:
|
|
|
2278
2479
|
scale_to_zero_enabled=d.get('scale_to_zero_enabled', None),
|
|
2279
2480
|
state=_from_dict(d, 'state', ServedModelState),
|
|
2280
2481
|
workload_size=d.get('workload_size', None),
|
|
2281
|
-
workload_type=d
|
|
2482
|
+
workload_type=_enum(d, 'workload_type', ServingModelWorkloadType))
|
|
2282
2483
|
|
|
2283
2484
|
|
|
2284
2485
|
@dataclass
|
|
2285
2486
|
class ServedEntitySpec:
|
|
2286
2487
|
entity_name: Optional[str] = None
|
|
2287
|
-
"""The name of the entity served. The entity may be a model in the Databricks Model Registry, a
|
|
2288
|
-
model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC
|
|
2289
|
-
object, the full name of the object is given in the form of
|
|
2290
|
-
__catalog_name__.__schema_name__.__model_name__."""
|
|
2291
2488
|
|
|
2292
2489
|
entity_version: Optional[str] = None
|
|
2293
|
-
"""The version of the served entity in Databricks Model Registry or empty if the entity is a
|
|
2294
|
-
FEATURE_SPEC."""
|
|
2295
2490
|
|
|
2296
2491
|
external_model: Optional[ExternalModel] = None
|
|
2297
|
-
"""The external model that is served. NOTE: Only one of external_model, foundation_model, and
|
|
2298
|
-
(entity_name, entity_version) is returned based on the endpoint type."""
|
|
2299
2492
|
|
|
2300
2493
|
foundation_model: Optional[FoundationModel] = None
|
|
2301
|
-
"""
|
|
2302
|
-
|
|
2494
|
+
"""All fields are not sensitive as they are hard-coded in the system and made available to
|
|
2495
|
+
customers."""
|
|
2303
2496
|
|
|
2304
2497
|
name: Optional[str] = None
|
|
2305
|
-
"""The name of the served entity."""
|
|
2306
2498
|
|
|
2307
2499
|
def as_dict(self) -> dict:
|
|
2308
2500
|
"""Serializes the ServedEntitySpec into a dictionary suitable for use as a JSON request body."""
|
|
@@ -2336,24 +2528,21 @@ class ServedEntitySpec:
|
|
|
2336
2528
|
|
|
2337
2529
|
@dataclass
|
|
2338
2530
|
class ServedModelInput:
|
|
2531
|
+
scale_to_zero_enabled: bool
|
|
2532
|
+
"""Whether the compute resources for the served entity should scale down to zero."""
|
|
2533
|
+
|
|
2339
2534
|
model_name: str
|
|
2340
|
-
"""The name of the model in Databricks Model Registry to be served or if the model resides in Unity
|
|
2341
|
-
Catalog, the full name of model, in the form of __catalog_name__.__schema_name__.__model_name__."""
|
|
2342
2535
|
|
|
2343
2536
|
model_version: str
|
|
2344
|
-
"""The version of the model in Databricks Model Registry or Unity Catalog to be served."""
|
|
2345
|
-
|
|
2346
|
-
scale_to_zero_enabled: bool
|
|
2347
|
-
"""Whether the compute resources for the served model should scale down to zero."""
|
|
2348
2537
|
|
|
2349
2538
|
environment_vars: Optional[Dict[str, str]] = None
|
|
2350
2539
|
"""An object containing a set of optional, user-specified environment variable key-value pairs used
|
|
2351
|
-
for serving this
|
|
2352
|
-
|
|
2540
|
+
for serving this entity. Note: this is an experimental feature and subject to change. Example
|
|
2541
|
+
entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY":
|
|
2353
2542
|
"{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}`"""
|
|
2354
2543
|
|
|
2355
2544
|
instance_profile_arn: Optional[str] = None
|
|
2356
|
-
"""ARN of the instance profile that the served
|
|
2545
|
+
"""ARN of the instance profile that the served entity uses to access AWS resources."""
|
|
2357
2546
|
|
|
2358
2547
|
max_provisioned_throughput: Optional[int] = None
|
|
2359
2548
|
"""The maximum tokens per second that the endpoint can scale up to."""
|
|
@@ -2362,25 +2551,26 @@ class ServedModelInput:
|
|
|
2362
2551
|
"""The minimum tokens per second that the endpoint can scale down to."""
|
|
2363
2552
|
|
|
2364
2553
|
name: Optional[str] = None
|
|
2365
|
-
"""The name of a served
|
|
2366
|
-
|
|
2367
|
-
|
|
2554
|
+
"""The name of a served entity. It must be unique across an endpoint. A served entity name can
|
|
2555
|
+
consist of alphanumeric characters, dashes, and underscores. If not specified for an external
|
|
2556
|
+
model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if
|
|
2557
|
+
not specified for other entities, it defaults to entity_name-entity_version."""
|
|
2368
2558
|
|
|
2369
2559
|
workload_size: Optional[ServedModelInputWorkloadSize] = None
|
|
2370
|
-
"""The workload size of the served
|
|
2371
|
-
concurrency that the compute
|
|
2372
|
-
|
|
2373
|
-
|
|
2374
|
-
|
|
2375
|
-
|
|
2560
|
+
"""The workload size of the served entity. The workload size corresponds to a range of provisioned
|
|
2561
|
+
concurrency that the compute autoscales between. A single unit of provisioned concurrency can
|
|
2562
|
+
process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency),
|
|
2563
|
+
"Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If
|
|
2564
|
+
scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
|
|
2565
|
+
is 0."""
|
|
2376
2566
|
|
|
2377
2567
|
workload_type: Optional[ServedModelInputWorkloadType] = None
|
|
2378
|
-
"""The workload type of the served
|
|
2379
|
-
the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
|
|
2568
|
+
"""The workload type of the served entity. The workload type selects which type of compute to use
|
|
2569
|
+
in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
|
|
2380
2570
|
acceleration is available by selecting workload types like GPU_SMALL and others. See the
|
|
2381
2571
|
available [GPU types].
|
|
2382
2572
|
|
|
2383
|
-
[GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
|
|
2573
|
+
[GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
|
|
2384
2574
|
|
|
2385
2575
|
def as_dict(self) -> dict:
|
|
2386
2576
|
"""Serializes the ServedModelInput into a dictionary suitable for use as a JSON request body."""
|
|
@@ -2432,12 +2622,6 @@ class ServedModelInput:
|
|
|
2432
2622
|
|
|
2433
2623
|
|
|
2434
2624
|
class ServedModelInputWorkloadSize(Enum):
|
|
2435
|
-
"""The workload size of the served model. The workload size corresponds to a range of provisioned
|
|
2436
|
-
concurrency that the compute will autoscale between. A single unit of provisioned concurrency
|
|
2437
|
-
can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned
|
|
2438
|
-
concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned
|
|
2439
|
-
concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for
|
|
2440
|
-
each workload size will be 0."""
|
|
2441
2625
|
|
|
2442
2626
|
LARGE = 'Large'
|
|
2443
2627
|
MEDIUM = 'Medium'
|
|
@@ -2445,12 +2629,6 @@ class ServedModelInputWorkloadSize(Enum):
|
|
|
2445
2629
|
|
|
2446
2630
|
|
|
2447
2631
|
class ServedModelInputWorkloadType(Enum):
|
|
2448
|
-
"""The workload type of the served model. The workload type selects which type of compute to use in
|
|
2449
|
-
the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
|
|
2450
|
-
acceleration is available by selecting workload types like GPU_SMALL and others. See the
|
|
2451
|
-
available [GPU types].
|
|
2452
|
-
|
|
2453
|
-
[GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
|
|
2454
2632
|
|
|
2455
2633
|
CPU = 'CPU'
|
|
2456
2634
|
GPU_LARGE = 'GPU_LARGE'
|
|
@@ -2462,51 +2640,48 @@ class ServedModelInputWorkloadType(Enum):
|
|
|
2462
2640
|
@dataclass
|
|
2463
2641
|
class ServedModelOutput:
|
|
2464
2642
|
creation_timestamp: Optional[int] = None
|
|
2465
|
-
"""The creation timestamp of the served model in Unix time."""
|
|
2466
2643
|
|
|
2467
2644
|
creator: Optional[str] = None
|
|
2468
|
-
"""The email of the user who created the served model."""
|
|
2469
2645
|
|
|
2470
2646
|
environment_vars: Optional[Dict[str, str]] = None
|
|
2471
2647
|
"""An object containing a set of optional, user-specified environment variable key-value pairs used
|
|
2472
|
-
for serving this
|
|
2473
|
-
|
|
2648
|
+
for serving this entity. Note: this is an experimental feature and subject to change. Example
|
|
2649
|
+
entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY":
|
|
2474
2650
|
"{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}`"""
|
|
2475
2651
|
|
|
2476
2652
|
instance_profile_arn: Optional[str] = None
|
|
2477
|
-
"""ARN of the instance profile that the served
|
|
2653
|
+
"""ARN of the instance profile that the served entity uses to access AWS resources."""
|
|
2478
2654
|
|
|
2479
2655
|
model_name: Optional[str] = None
|
|
2480
|
-
"""The name of the model in Databricks Model Registry or the full name of the model in Unity
|
|
2481
|
-
Catalog."""
|
|
2482
2656
|
|
|
2483
2657
|
model_version: Optional[str] = None
|
|
2484
|
-
"""The version of the model in Databricks Model Registry or Unity Catalog to be served."""
|
|
2485
2658
|
|
|
2486
2659
|
name: Optional[str] = None
|
|
2487
|
-
"""The name of
|
|
2660
|
+
"""The name of a served entity. It must be unique across an endpoint. A served entity name can
|
|
2661
|
+
consist of alphanumeric characters, dashes, and underscores. If not specified for an external
|
|
2662
|
+
model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if
|
|
2663
|
+
not specified for other entities, it defaults to entity_name-entity_version."""
|
|
2488
2664
|
|
|
2489
2665
|
scale_to_zero_enabled: Optional[bool] = None
|
|
2490
|
-
"""Whether the compute resources for the
|
|
2666
|
+
"""Whether the compute resources for the served entity should scale down to zero."""
|
|
2491
2667
|
|
|
2492
2668
|
state: Optional[ServedModelState] = None
|
|
2493
|
-
"""Information corresponding to the state of the Served Model."""
|
|
2494
2669
|
|
|
2495
2670
|
workload_size: Optional[str] = None
|
|
2496
|
-
"""The workload size of the served
|
|
2497
|
-
concurrency that the compute
|
|
2498
|
-
|
|
2499
|
-
|
|
2500
|
-
|
|
2501
|
-
|
|
2502
|
-
|
|
2503
|
-
workload_type: Optional[
|
|
2504
|
-
"""The workload type of the served
|
|
2505
|
-
the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
|
|
2671
|
+
"""The workload size of the served entity. The workload size corresponds to a range of provisioned
|
|
2672
|
+
concurrency that the compute autoscales between. A single unit of provisioned concurrency can
|
|
2673
|
+
process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency),
|
|
2674
|
+
"Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If
|
|
2675
|
+
scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
|
|
2676
|
+
is 0."""
|
|
2677
|
+
|
|
2678
|
+
workload_type: Optional[ServingModelWorkloadType] = None
|
|
2679
|
+
"""The workload type of the served entity. The workload type selects which type of compute to use
|
|
2680
|
+
in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
|
|
2506
2681
|
acceleration is available by selecting workload types like GPU_SMALL and others. See the
|
|
2507
2682
|
available [GPU types].
|
|
2508
2683
|
|
|
2509
|
-
[GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
|
|
2684
|
+
[GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
|
|
2510
2685
|
|
|
2511
2686
|
def as_dict(self) -> dict:
|
|
2512
2687
|
"""Serializes the ServedModelOutput into a dictionary suitable for use as a JSON request body."""
|
|
@@ -2521,7 +2696,7 @@ class ServedModelOutput:
|
|
|
2521
2696
|
if self.scale_to_zero_enabled is not None: body['scale_to_zero_enabled'] = self.scale_to_zero_enabled
|
|
2522
2697
|
if self.state: body['state'] = self.state.as_dict()
|
|
2523
2698
|
if self.workload_size is not None: body['workload_size'] = self.workload_size
|
|
2524
|
-
if self.workload_type is not None: body['workload_type'] = self.workload_type
|
|
2699
|
+
if self.workload_type is not None: body['workload_type'] = self.workload_type.value
|
|
2525
2700
|
return body
|
|
2526
2701
|
|
|
2527
2702
|
def as_shallow_dict(self) -> dict:
|
|
@@ -2553,20 +2728,18 @@ class ServedModelOutput:
|
|
|
2553
2728
|
scale_to_zero_enabled=d.get('scale_to_zero_enabled', None),
|
|
2554
2729
|
state=_from_dict(d, 'state', ServedModelState),
|
|
2555
2730
|
workload_size=d.get('workload_size', None),
|
|
2556
|
-
workload_type=d
|
|
2731
|
+
workload_type=_enum(d, 'workload_type', ServingModelWorkloadType))
|
|
2557
2732
|
|
|
2558
2733
|
|
|
2559
2734
|
@dataclass
|
|
2560
2735
|
class ServedModelSpec:
|
|
2561
2736
|
model_name: Optional[str] = None
|
|
2562
|
-
"""
|
|
2563
|
-
Catalog."""
|
|
2737
|
+
"""Only one of model_name and entity_name should be populated"""
|
|
2564
2738
|
|
|
2565
2739
|
model_version: Optional[str] = None
|
|
2566
|
-
"""
|
|
2740
|
+
"""Only one of model_version and entity_version should be populated"""
|
|
2567
2741
|
|
|
2568
2742
|
name: Optional[str] = None
|
|
2569
|
-
"""The name of the served model."""
|
|
2570
2743
|
|
|
2571
2744
|
def as_dict(self) -> dict:
|
|
2572
2745
|
"""Serializes the ServedModelSpec into a dictionary suitable for use as a JSON request body."""
|
|
@@ -2595,18 +2768,8 @@ class ServedModelSpec:
|
|
|
2595
2768
|
@dataclass
|
|
2596
2769
|
class ServedModelState:
|
|
2597
2770
|
deployment: Optional[ServedModelStateDeployment] = None
|
|
2598
|
-
"""The state of the served entity deployment. DEPLOYMENT_CREATING indicates that the served entity
|
|
2599
|
-
is not ready yet because the deployment is still being created (i.e container image is building,
|
|
2600
|
-
model server is deploying for the first time, etc.). DEPLOYMENT_RECOVERING indicates that the
|
|
2601
|
-
served entity was previously in a ready state but no longer is and is attempting to recover.
|
|
2602
|
-
DEPLOYMENT_READY indicates that the served entity is ready to receive traffic. DEPLOYMENT_FAILED
|
|
2603
|
-
indicates that there was an error trying to bring up the served entity (e.g container image
|
|
2604
|
-
build failed, the model server failed to start due to a model loading error, etc.)
|
|
2605
|
-
DEPLOYMENT_ABORTED indicates that the deployment was terminated likely due to a failure in
|
|
2606
|
-
bringing up another served entity under the same endpoint and config version."""
|
|
2607
2771
|
|
|
2608
2772
|
deployment_state_message: Optional[str] = None
|
|
2609
|
-
"""More information about the state of the served entity, if available."""
|
|
2610
2773
|
|
|
2611
2774
|
def as_dict(self) -> dict:
|
|
2612
2775
|
"""Serializes the ServedModelState into a dictionary suitable for use as a JSON request body."""
|
|
@@ -2632,15 +2795,6 @@ class ServedModelState:
|
|
|
2632
2795
|
|
|
2633
2796
|
|
|
2634
2797
|
class ServedModelStateDeployment(Enum):
|
|
2635
|
-
"""The state of the served entity deployment. DEPLOYMENT_CREATING indicates that the served entity
|
|
2636
|
-
is not ready yet because the deployment is still being created (i.e container image is building,
|
|
2637
|
-
model server is deploying for the first time, etc.). DEPLOYMENT_RECOVERING indicates that the
|
|
2638
|
-
served entity was previously in a ready state but no longer is and is attempting to recover.
|
|
2639
|
-
DEPLOYMENT_READY indicates that the served entity is ready to receive traffic. DEPLOYMENT_FAILED
|
|
2640
|
-
indicates that there was an error trying to bring up the served entity (e.g container image
|
|
2641
|
-
build failed, the model server failed to start due to a model loading error, etc.)
|
|
2642
|
-
DEPLOYMENT_ABORTED indicates that the deployment was terminated likely due to a failure in
|
|
2643
|
-
bringing up another served entity under the same endpoint and config version."""
|
|
2644
2798
|
|
|
2645
2799
|
ABORTED = 'DEPLOYMENT_ABORTED'
|
|
2646
2800
|
CREATING = 'DEPLOYMENT_CREATING'
|
|
@@ -2675,8 +2829,8 @@ class ServerLogsResponse:
|
|
|
2675
2829
|
@dataclass
|
|
2676
2830
|
class ServingEndpoint:
|
|
2677
2831
|
ai_gateway: Optional[AiGatewayConfig] = None
|
|
2678
|
-
"""The AI Gateway configuration for the serving endpoint. NOTE: Only external model
|
|
2679
|
-
currently supported."""
|
|
2832
|
+
"""The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned
|
|
2833
|
+
throughput endpoints are currently supported."""
|
|
2680
2834
|
|
|
2681
2835
|
config: Optional[EndpointCoreConfigSummary] = None
|
|
2682
2836
|
"""The config that is currently being served by the endpoint."""
|
|
@@ -2688,8 +2842,7 @@ class ServingEndpoint:
|
|
|
2688
2842
|
"""The email of the user who created the serving endpoint."""
|
|
2689
2843
|
|
|
2690
2844
|
id: Optional[str] = None
|
|
2691
|
-
"""System-generated ID of the endpoint
|
|
2692
|
-
API"""
|
|
2845
|
+
"""System-generated ID of the endpoint, included to be used by the Permissions API."""
|
|
2693
2846
|
|
|
2694
2847
|
last_updated_timestamp: Optional[int] = None
|
|
2695
2848
|
"""The timestamp when the endpoint was last updated by a user in Unix time."""
|
|
@@ -2848,8 +3001,8 @@ class ServingEndpointAccessControlResponse:
|
|
|
2848
3001
|
@dataclass
|
|
2849
3002
|
class ServingEndpointDetailed:
|
|
2850
3003
|
ai_gateway: Optional[AiGatewayConfig] = None
|
|
2851
|
-
"""The AI Gateway configuration for the serving endpoint. NOTE: Only external model
|
|
2852
|
-
currently supported."""
|
|
3004
|
+
"""The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned
|
|
3005
|
+
throughput endpoints are currently supported."""
|
|
2853
3006
|
|
|
2854
3007
|
config: Optional[EndpointCoreConfigOutput] = None
|
|
2855
3008
|
"""The config that is currently being served by the endpoint."""
|
|
@@ -2957,7 +3110,6 @@ class ServingEndpointDetailed:
|
|
|
2957
3110
|
|
|
2958
3111
|
|
|
2959
3112
|
class ServingEndpointDetailedPermissionLevel(Enum):
|
|
2960
|
-
"""The permission level of the principal making the request."""
|
|
2961
3113
|
|
|
2962
3114
|
CAN_MANAGE = 'CAN_MANAGE'
|
|
2963
3115
|
CAN_QUERY = 'CAN_QUERY'
|
|
@@ -3097,6 +3249,15 @@ class ServingEndpointPermissionsRequest:
|
|
|
3097
3249
|
serving_endpoint_id=d.get('serving_endpoint_id', None))
|
|
3098
3250
|
|
|
3099
3251
|
|
|
3252
|
+
class ServingModelWorkloadType(Enum):
|
|
3253
|
+
|
|
3254
|
+
CPU = 'CPU'
|
|
3255
|
+
GPU_LARGE = 'GPU_LARGE'
|
|
3256
|
+
GPU_MEDIUM = 'GPU_MEDIUM'
|
|
3257
|
+
GPU_SMALL = 'GPU_SMALL'
|
|
3258
|
+
MULTIGPU_MEDIUM = 'MULTIGPU_MEDIUM'
|
|
3259
|
+
|
|
3260
|
+
|
|
3100
3261
|
@dataclass
|
|
3101
3262
|
class TrafficConfig:
|
|
3102
3263
|
routes: Optional[List[Route]] = None
|
|
@@ -3236,9 +3397,9 @@ class ServingEndpointsAPI:
|
|
|
3236
3397
|
|
|
3237
3398
|
def create(self,
|
|
3238
3399
|
name: str,
|
|
3239
|
-
config: EndpointCoreConfigInput,
|
|
3240
3400
|
*,
|
|
3241
3401
|
ai_gateway: Optional[AiGatewayConfig] = None,
|
|
3402
|
+
config: Optional[EndpointCoreConfigInput] = None,
|
|
3242
3403
|
rate_limits: Optional[List[RateLimit]] = None,
|
|
3243
3404
|
route_optimized: Optional[bool] = None,
|
|
3244
3405
|
tags: Optional[List[EndpointTag]] = None) -> Wait[ServingEndpointDetailed]:
|
|
@@ -3247,11 +3408,11 @@ class ServingEndpointsAPI:
|
|
|
3247
3408
|
:param name: str
|
|
3248
3409
|
The name of the serving endpoint. This field is required and must be unique across a Databricks
|
|
3249
3410
|
workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores.
|
|
3250
|
-
:param config: :class:`EndpointCoreConfigInput`
|
|
3251
|
-
The core config of the serving endpoint.
|
|
3252
3411
|
:param ai_gateway: :class:`AiGatewayConfig` (optional)
|
|
3253
|
-
The AI Gateway configuration for the serving endpoint. NOTE:
|
|
3254
|
-
|
|
3412
|
+
The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned
|
|
3413
|
+
throughput endpoints are currently supported.
|
|
3414
|
+
:param config: :class:`EndpointCoreConfigInput` (optional)
|
|
3415
|
+
The core config of the serving endpoint.
|
|
3255
3416
|
:param rate_limits: List[:class:`RateLimit`] (optional)
|
|
3256
3417
|
Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI
|
|
3257
3418
|
Gateway to manage rate limits.
|
|
@@ -3281,9 +3442,9 @@ class ServingEndpointsAPI:
|
|
|
3281
3442
|
def create_and_wait(
|
|
3282
3443
|
self,
|
|
3283
3444
|
name: str,
|
|
3284
|
-
config: EndpointCoreConfigInput,
|
|
3285
3445
|
*,
|
|
3286
3446
|
ai_gateway: Optional[AiGatewayConfig] = None,
|
|
3447
|
+
config: Optional[EndpointCoreConfigInput] = None,
|
|
3287
3448
|
rate_limits: Optional[List[RateLimit]] = None,
|
|
3288
3449
|
route_optimized: Optional[bool] = None,
|
|
3289
3450
|
tags: Optional[List[EndpointTag]] = None,
|
|
@@ -3299,7 +3460,6 @@ class ServingEndpointsAPI:
|
|
|
3299
3460
|
"""Delete a serving endpoint.
|
|
3300
3461
|
|
|
3301
3462
|
:param name: str
|
|
3302
|
-
The name of the serving endpoint. This field is required.
|
|
3303
3463
|
|
|
3304
3464
|
|
|
3305
3465
|
"""
|
|
@@ -3341,7 +3501,7 @@ class ServingEndpointsAPI:
|
|
|
3341
3501
|
res = self._api.do('GET', f'/api/2.0/serving-endpoints/{name}', headers=headers)
|
|
3342
3502
|
return ServingEndpointDetailed.from_dict(res)
|
|
3343
3503
|
|
|
3344
|
-
def get_open_api(self, name: str):
|
|
3504
|
+
def get_open_api(self, name: str) -> GetOpenApiResponse:
|
|
3345
3505
|
"""Get the schema for a serving endpoint.
|
|
3346
3506
|
|
|
3347
3507
|
Get the query schema of the serving endpoint in OpenAPI format. The schema contains information for
|
|
@@ -3350,12 +3510,13 @@ class ServingEndpointsAPI:
|
|
|
3350
3510
|
:param name: str
|
|
3351
3511
|
The name of the serving endpoint that the served model belongs to. This field is required.
|
|
3352
3512
|
|
|
3353
|
-
|
|
3513
|
+
:returns: :class:`GetOpenApiResponse`
|
|
3354
3514
|
"""
|
|
3355
3515
|
|
|
3356
|
-
headers = {'Accept': '
|
|
3516
|
+
headers = {'Accept': 'text/plain', }
|
|
3357
3517
|
|
|
3358
|
-
self._api.do('GET', f'/api/2.0/serving-endpoints/{name}/openapi', headers=headers)
|
|
3518
|
+
res = self._api.do('GET', f'/api/2.0/serving-endpoints/{name}/openapi', headers=headers, raw=True)
|
|
3519
|
+
return GetOpenApiResponse.from_dict(res)
|
|
3359
3520
|
|
|
3360
3521
|
def get_permission_levels(self, serving_endpoint_id: str) -> GetServingEndpointPermissionLevelsResponse:
|
|
3361
3522
|
"""Get serving endpoint permission levels.
|
|
@@ -3394,6 +3555,44 @@ class ServingEndpointsAPI:
|
|
|
3394
3555
|
headers=headers)
|
|
3395
3556
|
return ServingEndpointPermissions.from_dict(res)
|
|
3396
3557
|
|
|
3558
|
+
def http_request(self,
|
|
3559
|
+
connection_name: str,
|
|
3560
|
+
method: ExternalFunctionRequestHttpMethod,
|
|
3561
|
+
path: str,
|
|
3562
|
+
*,
|
|
3563
|
+
headers: Optional[str] = None,
|
|
3564
|
+
json: Optional[str] = None,
|
|
3565
|
+
params: Optional[str] = None) -> HttpRequestResponse:
|
|
3566
|
+
"""Make external services call using the credentials stored in UC Connection.
|
|
3567
|
+
|
|
3568
|
+
:param connection_name: str
|
|
3569
|
+
The connection name to use. This is required to identify the external connection.
|
|
3570
|
+
:param method: :class:`ExternalFunctionRequestHttpMethod`
|
|
3571
|
+
The HTTP method to use (e.g., 'GET', 'POST').
|
|
3572
|
+
:param path: str
|
|
3573
|
+
The relative path for the API endpoint. This is required.
|
|
3574
|
+
:param headers: str (optional)
|
|
3575
|
+
Additional headers for the request. If not provided, only auth headers from connections would be
|
|
3576
|
+
passed.
|
|
3577
|
+
:param json: str (optional)
|
|
3578
|
+
The JSON payload to send in the request body.
|
|
3579
|
+
:param params: str (optional)
|
|
3580
|
+
Query parameters for the request.
|
|
3581
|
+
|
|
3582
|
+
:returns: :class:`HttpRequestResponse`
|
|
3583
|
+
"""
|
|
3584
|
+
body = {}
|
|
3585
|
+
if connection_name is not None: body['connection_name'] = connection_name
|
|
3586
|
+
if headers is not None: body['headers'] = headers
|
|
3587
|
+
if json is not None: body['json'] = json
|
|
3588
|
+
if method is not None: body['method'] = method.value
|
|
3589
|
+
if params is not None: body['params'] = params
|
|
3590
|
+
if path is not None: body['path'] = path
|
|
3591
|
+
headers = {'Accept': 'text/plain', 'Content-Type': 'application/json', }
|
|
3592
|
+
|
|
3593
|
+
res = self._api.do('POST', '/api/2.0/external-function', body=body, headers=headers, raw=True)
|
|
3594
|
+
return HttpRequestResponse.from_dict(res)
|
|
3595
|
+
|
|
3397
3596
|
def list(self) -> Iterator[ServingEndpoint]:
|
|
3398
3597
|
"""Get all serving endpoints.
|
|
3399
3598
|
|
|
@@ -3430,7 +3629,7 @@ class ServingEndpointsAPI:
|
|
|
3430
3629
|
name: str,
|
|
3431
3630
|
*,
|
|
3432
3631
|
add_tags: Optional[List[EndpointTag]] = None,
|
|
3433
|
-
delete_tags: Optional[List[str]] = None) ->
|
|
3632
|
+
delete_tags: Optional[List[str]] = None) -> EndpointTags:
|
|
3434
3633
|
"""Update tags of a serving endpoint.
|
|
3435
3634
|
|
|
3436
3635
|
Used to batch add and delete tags from a serving endpoint with a single API call.
|
|
@@ -3442,7 +3641,7 @@ class ServingEndpointsAPI:
|
|
|
3442
3641
|
:param delete_tags: List[str] (optional)
|
|
3443
3642
|
List of tag keys to delete
|
|
3444
3643
|
|
|
3445
|
-
:returns:
|
|
3644
|
+
:returns: :class:`EndpointTags`
|
|
3446
3645
|
"""
|
|
3447
3646
|
body = {}
|
|
3448
3647
|
if add_tags is not None: body['add_tags'] = [v.as_dict() for v in add_tags]
|
|
@@ -3450,7 +3649,7 @@ class ServingEndpointsAPI:
|
|
|
3450
3649
|
headers = {'Accept': 'application/json', 'Content-Type': 'application/json', }
|
|
3451
3650
|
|
|
3452
3651
|
res = self._api.do('PATCH', f'/api/2.0/serving-endpoints/{name}/tags', body=body, headers=headers)
|
|
3453
|
-
return
|
|
3652
|
+
return EndpointTags.from_dict(res)
|
|
3454
3653
|
|
|
3455
3654
|
def put(self, name: str, *, rate_limits: Optional[List[RateLimit]] = None) -> PutResponse:
|
|
3456
3655
|
"""Update rate limits of a serving endpoint.
|
|
@@ -3485,8 +3684,8 @@ class ServingEndpointsAPI:
|
|
|
3485
3684
|
usage_tracking_config: Optional[AiGatewayUsageTrackingConfig] = None) -> PutAiGatewayResponse:
|
|
3486
3685
|
"""Update AI Gateway of a serving endpoint.
|
|
3487
3686
|
|
|
3488
|
-
Used to update the AI Gateway of a serving endpoint. NOTE: Only external model
|
|
3489
|
-
supported.
|
|
3687
|
+
Used to update the AI Gateway of a serving endpoint. NOTE: Only external model and provisioned
|
|
3688
|
+
throughput endpoints are currently supported.
|
|
3490
3689
|
|
|
3491
3690
|
:param name: str
|
|
3492
3691
|
The name of the serving endpoint whose AI Gateway is being updated. This field is required.
|
|
@@ -3646,14 +3845,16 @@ class ServingEndpointsAPI:
|
|
|
3646
3845
|
The name of the serving endpoint to update. This field is required.
|
|
3647
3846
|
:param auto_capture_config: :class:`AutoCaptureConfigInput` (optional)
|
|
3648
3847
|
Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
|
|
3848
|
+
Note: this field is deprecated for creating new provisioned throughput endpoints, or updating
|
|
3849
|
+
existing provisioned throughput endpoints that never have inference table configured; in these cases
|
|
3850
|
+
please use AI Gateway to manage inference tables.
|
|
3649
3851
|
:param served_entities: List[:class:`ServedEntityInput`] (optional)
|
|
3650
|
-
|
|
3651
|
-
entities.
|
|
3852
|
+
The list of served entities under the serving endpoint config.
|
|
3652
3853
|
:param served_models: List[:class:`ServedModelInput`] (optional)
|
|
3653
|
-
(Deprecated, use served_entities instead)
|
|
3654
|
-
|
|
3854
|
+
(Deprecated, use served_entities instead) The list of served models under the serving endpoint
|
|
3855
|
+
config.
|
|
3655
3856
|
:param traffic_config: :class:`TrafficConfig` (optional)
|
|
3656
|
-
The traffic
|
|
3857
|
+
The traffic configuration associated with the serving endpoint config.
|
|
3657
3858
|
|
|
3658
3859
|
:returns:
|
|
3659
3860
|
Long-running operation waiter for :class:`ServingEndpointDetailed`.
|
|
@@ -3725,6 +3926,7 @@ class ServingEndpointsDataPlaneAPI:
|
|
|
3725
3926
|
def __init__(self, api_client, control_plane):
|
|
3726
3927
|
self._api = api_client
|
|
3727
3928
|
self._control_plane = control_plane
|
|
3929
|
+
from ..data_plane import DataPlaneService
|
|
3728
3930
|
self._data_plane_service = DataPlaneService()
|
|
3729
3931
|
|
|
3730
3932
|
def query(self,
|