databricks-sdk 0.40.0__py3-none-any.whl → 0.41.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of databricks-sdk might be problematic. Click here for more details.
- databricks/sdk/__init__.py +23 -3
- databricks/sdk/_base_client.py +16 -3
- databricks/sdk/config.py +5 -0
- databricks/sdk/credentials_provider.py +23 -14
- databricks/sdk/data_plane.py +1 -1
- databricks/sdk/mixins/files.py +184 -1
- databricks/sdk/mixins/open_ai_client.py +40 -1
- databricks/sdk/service/apps.py +12 -4
- databricks/sdk/service/catalog.py +1 -0
- databricks/sdk/service/cleanrooms.py +2 -1
- databricks/sdk/service/compute.py +4 -0
- databricks/sdk/service/dashboards.py +7 -4
- databricks/sdk/service/files.py +6 -3
- databricks/sdk/service/iam.py +158 -0
- databricks/sdk/service/jobs.py +168 -16
- databricks/sdk/service/oauth2.py +53 -45
- databricks/sdk/service/pipelines.py +89 -12
- databricks/sdk/service/serving.py +423 -215
- databricks/sdk/service/sharing.py +51 -54
- databricks/sdk/version.py +1 -1
- {databricks_sdk-0.40.0.dist-info → databricks_sdk-0.41.0.dist-info}/METADATA +26 -26
- {databricks_sdk-0.40.0.dist-info → databricks_sdk-0.41.0.dist-info}/RECORD +26 -26
- {databricks_sdk-0.40.0.dist-info → databricks_sdk-0.41.0.dist-info}/WHEEL +1 -1
- {databricks_sdk-0.40.0.dist-info → databricks_sdk-0.41.0.dist-info}/LICENSE +0 -0
- {databricks_sdk-0.40.0.dist-info → databricks_sdk-0.41.0.dist-info}/NOTICE +0 -0
- {databricks_sdk-0.40.0.dist-info → databricks_sdk-0.41.0.dist-info}/top_level.txt +0 -0
|
@@ -12,14 +12,11 @@ from typing import Any, BinaryIO, Callable, Dict, Iterator, List, Optional
|
|
|
12
12
|
|
|
13
13
|
import requests
|
|
14
14
|
|
|
15
|
-
from ..data_plane import DataPlaneService
|
|
16
15
|
from ..errors import OperationFailed
|
|
17
16
|
from ._internal import Wait, _enum, _from_dict, _repeated_dict
|
|
18
17
|
|
|
19
18
|
_LOG = logging.getLogger('databricks.sdk')
|
|
20
19
|
|
|
21
|
-
from databricks.sdk.service import oauth2
|
|
22
|
-
|
|
23
20
|
# all definitions in this file are in alphabetical order
|
|
24
21
|
|
|
25
22
|
|
|
@@ -148,11 +145,8 @@ class AiGatewayGuardrailParameters:
|
|
|
148
145
|
|
|
149
146
|
@dataclass
|
|
150
147
|
class AiGatewayGuardrailPiiBehavior:
|
|
151
|
-
behavior: AiGatewayGuardrailPiiBehaviorBehavior
|
|
152
|
-
"""
|
|
153
|
-
guardrail and the request contains PII, the request is not sent to the model server and 400
|
|
154
|
-
status code is returned; if 'BLOCK' is set for the output guardrail and the model response
|
|
155
|
-
contains PII, the PII info in the response is redacted and 400 status code is returned."""
|
|
148
|
+
behavior: Optional[AiGatewayGuardrailPiiBehaviorBehavior] = None
|
|
149
|
+
"""Configuration for input guardrail filters."""
|
|
156
150
|
|
|
157
151
|
def as_dict(self) -> dict:
|
|
158
152
|
"""Serializes the AiGatewayGuardrailPiiBehavior into a dictionary suitable for use as a JSON request body."""
|
|
@@ -173,10 +167,6 @@ class AiGatewayGuardrailPiiBehavior:
|
|
|
173
167
|
|
|
174
168
|
|
|
175
169
|
class AiGatewayGuardrailPiiBehaviorBehavior(Enum):
|
|
176
|
-
"""Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input
|
|
177
|
-
guardrail and the request contains PII, the request is not sent to the model server and 400
|
|
178
|
-
status code is returned; if 'BLOCK' is set for the output guardrail and the model response
|
|
179
|
-
contains PII, the PII info in the response is redacted and 400 status code is returned."""
|
|
180
170
|
|
|
181
171
|
BLOCK = 'BLOCK'
|
|
182
172
|
NONE = 'NONE'
|
|
@@ -292,15 +282,12 @@ class AiGatewayRateLimit:
|
|
|
292
282
|
|
|
293
283
|
|
|
294
284
|
class AiGatewayRateLimitKey(Enum):
|
|
295
|
-
"""Key field for a rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint'
|
|
296
|
-
being the default if not specified."""
|
|
297
285
|
|
|
298
286
|
ENDPOINT = 'endpoint'
|
|
299
287
|
USER = 'user'
|
|
300
288
|
|
|
301
289
|
|
|
302
290
|
class AiGatewayRateLimitRenewalPeriod(Enum):
|
|
303
|
-
"""Renewal period field for a rate limit. Currently, only 'minute' is supported."""
|
|
304
291
|
|
|
305
292
|
MINUTE = 'minute'
|
|
306
293
|
|
|
@@ -339,9 +326,9 @@ class AmazonBedrockConfig:
|
|
|
339
326
|
|
|
340
327
|
aws_access_key_id: Optional[str] = None
|
|
341
328
|
"""The Databricks secret key reference for an AWS access key ID with permissions to interact with
|
|
342
|
-
Bedrock services. If you prefer to paste your API key directly, see
|
|
343
|
-
must provide an API key using one of the following fields:
|
|
344
|
-
`aws_access_key_id_plaintext`."""
|
|
329
|
+
Bedrock services. If you prefer to paste your API key directly, see
|
|
330
|
+
`aws_access_key_id_plaintext`. You must provide an API key using one of the following fields:
|
|
331
|
+
`aws_access_key_id` or `aws_access_key_id_plaintext`."""
|
|
345
332
|
|
|
346
333
|
aws_access_key_id_plaintext: Optional[str] = None
|
|
347
334
|
"""An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext
|
|
@@ -399,8 +386,6 @@ class AmazonBedrockConfig:
|
|
|
399
386
|
|
|
400
387
|
|
|
401
388
|
class AmazonBedrockConfigBedrockProvider(Enum):
|
|
402
|
-
"""The underlying provider in Amazon Bedrock. Supported values (case insensitive) include:
|
|
403
|
-
Anthropic, Cohere, AI21Labs, Amazon."""
|
|
404
389
|
|
|
405
390
|
AI21LABS = 'ai21labs'
|
|
406
391
|
AMAZON = 'amazon'
|
|
@@ -490,18 +475,21 @@ class AutoCaptureConfigInput:
|
|
|
490
475
|
@dataclass
|
|
491
476
|
class AutoCaptureConfigOutput:
|
|
492
477
|
catalog_name: Optional[str] = None
|
|
493
|
-
"""The name of the catalog in Unity Catalog.
|
|
478
|
+
"""The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if
|
|
479
|
+
the inference table is already enabled."""
|
|
494
480
|
|
|
495
481
|
enabled: Optional[bool] = None
|
|
496
482
|
"""Indicates whether the inference table is enabled."""
|
|
497
483
|
|
|
498
484
|
schema_name: Optional[str] = None
|
|
499
|
-
"""The name of the schema in Unity Catalog.
|
|
485
|
+
"""The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if
|
|
486
|
+
the inference table is already enabled."""
|
|
500
487
|
|
|
501
488
|
state: Optional[AutoCaptureState] = None
|
|
502
489
|
|
|
503
490
|
table_name_prefix: Optional[str] = None
|
|
504
|
-
"""The prefix of the table in Unity Catalog.
|
|
491
|
+
"""The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if
|
|
492
|
+
the inference table is already enabled."""
|
|
505
493
|
|
|
506
494
|
def as_dict(self) -> dict:
|
|
507
495
|
"""Serializes the AutoCaptureConfigOutput into a dictionary suitable for use as a JSON request body."""
|
|
@@ -666,8 +654,8 @@ class CreateServingEndpoint:
|
|
|
666
654
|
"""The core config of the serving endpoint."""
|
|
667
655
|
|
|
668
656
|
ai_gateway: Optional[AiGatewayConfig] = None
|
|
669
|
-
"""The AI Gateway configuration for the serving endpoint. NOTE:
|
|
670
|
-
|
|
657
|
+
"""The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned
|
|
658
|
+
throughput endpoints are currently supported."""
|
|
671
659
|
|
|
672
660
|
rate_limits: Optional[List[RateLimit]] = None
|
|
673
661
|
"""Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI
|
|
@@ -712,6 +700,37 @@ class CreateServingEndpoint:
|
|
|
712
700
|
tags=_repeated_dict(d, 'tags', EndpointTag))
|
|
713
701
|
|
|
714
702
|
|
|
703
|
+
@dataclass
|
|
704
|
+
class DataPlaneInfo:
|
|
705
|
+
"""Details necessary to query this object's API through the DataPlane APIs."""
|
|
706
|
+
|
|
707
|
+
authorization_details: Optional[str] = None
|
|
708
|
+
"""Authorization details as a string."""
|
|
709
|
+
|
|
710
|
+
endpoint_url: Optional[str] = None
|
|
711
|
+
"""The URL of the endpoint for this operation in the dataplane."""
|
|
712
|
+
|
|
713
|
+
def as_dict(self) -> dict:
|
|
714
|
+
"""Serializes the DataPlaneInfo into a dictionary suitable for use as a JSON request body."""
|
|
715
|
+
body = {}
|
|
716
|
+
if self.authorization_details is not None: body['authorization_details'] = self.authorization_details
|
|
717
|
+
if self.endpoint_url is not None: body['endpoint_url'] = self.endpoint_url
|
|
718
|
+
return body
|
|
719
|
+
|
|
720
|
+
def as_shallow_dict(self) -> dict:
|
|
721
|
+
"""Serializes the DataPlaneInfo into a shallow dictionary of its immediate attributes."""
|
|
722
|
+
body = {}
|
|
723
|
+
if self.authorization_details is not None: body['authorization_details'] = self.authorization_details
|
|
724
|
+
if self.endpoint_url is not None: body['endpoint_url'] = self.endpoint_url
|
|
725
|
+
return body
|
|
726
|
+
|
|
727
|
+
@classmethod
|
|
728
|
+
def from_dict(cls, d: Dict[str, any]) -> DataPlaneInfo:
|
|
729
|
+
"""Deserializes the DataPlaneInfo from a dictionary."""
|
|
730
|
+
return cls(authorization_details=d.get('authorization_details', None),
|
|
731
|
+
endpoint_url=d.get('endpoint_url', None))
|
|
732
|
+
|
|
733
|
+
|
|
715
734
|
@dataclass
|
|
716
735
|
class DatabricksModelServingConfig:
|
|
717
736
|
databricks_workspace_url: str
|
|
@@ -853,21 +872,22 @@ class EmbeddingsV1ResponseEmbeddingElementObject(Enum):
|
|
|
853
872
|
class EndpointCoreConfigInput:
|
|
854
873
|
auto_capture_config: Optional[AutoCaptureConfigInput] = None
|
|
855
874
|
"""Configuration for Inference Tables which automatically logs requests and responses to Unity
|
|
856
|
-
Catalog.
|
|
875
|
+
Catalog. Note: this field is deprecated for creating new provisioned throughput endpoints, or
|
|
876
|
+
updating existing provisioned throughput endpoints that never have inference table configured;
|
|
877
|
+
in these cases please use AI Gateway to manage inference tables."""
|
|
857
878
|
|
|
858
879
|
name: Optional[str] = None
|
|
859
880
|
"""The name of the serving endpoint to update. This field is required."""
|
|
860
881
|
|
|
861
882
|
served_entities: Optional[List[ServedEntityInput]] = None
|
|
862
|
-
"""
|
|
863
|
-
entities."""
|
|
883
|
+
"""The list of served entities under the serving endpoint config."""
|
|
864
884
|
|
|
865
885
|
served_models: Optional[List[ServedModelInput]] = None
|
|
866
|
-
"""(Deprecated, use served_entities instead)
|
|
867
|
-
|
|
886
|
+
"""(Deprecated, use served_entities instead) The list of served models under the serving endpoint
|
|
887
|
+
config."""
|
|
868
888
|
|
|
869
889
|
traffic_config: Optional[TrafficConfig] = None
|
|
870
|
-
"""The traffic
|
|
890
|
+
"""The traffic configuration associated with the serving endpoint config."""
|
|
871
891
|
|
|
872
892
|
def as_dict(self) -> dict:
|
|
873
893
|
"""Serializes the EndpointCoreConfigInput into a dictionary suitable for use as a JSON request body."""
|
|
@@ -903,7 +923,9 @@ class EndpointCoreConfigInput:
|
|
|
903
923
|
class EndpointCoreConfigOutput:
|
|
904
924
|
auto_capture_config: Optional[AutoCaptureConfigOutput] = None
|
|
905
925
|
"""Configuration for Inference Tables which automatically logs requests and responses to Unity
|
|
906
|
-
Catalog.
|
|
926
|
+
Catalog. Note: this field is deprecated for creating new provisioned throughput endpoints, or
|
|
927
|
+
updating existing provisioned throughput endpoints that never have inference table configured;
|
|
928
|
+
in these cases please use AI Gateway to manage inference tables."""
|
|
907
929
|
|
|
908
930
|
config_version: Optional[int] = None
|
|
909
931
|
"""The config version that the serving endpoint is currently serving."""
|
|
@@ -982,7 +1004,9 @@ class EndpointCoreConfigSummary:
|
|
|
982
1004
|
class EndpointPendingConfig:
|
|
983
1005
|
auto_capture_config: Optional[AutoCaptureConfigOutput] = None
|
|
984
1006
|
"""Configuration for Inference Tables which automatically logs requests and responses to Unity
|
|
985
|
-
Catalog.
|
|
1007
|
+
Catalog. Note: this field is deprecated for creating new provisioned throughput endpoints, or
|
|
1008
|
+
updating existing provisioned throughput endpoints that never have inference table configured;
|
|
1009
|
+
in these cases please use AI Gateway to manage inference tables."""
|
|
986
1010
|
|
|
987
1011
|
config_version: Optional[int] = None
|
|
988
1012
|
"""The config version that the serving endpoint is currently serving."""
|
|
@@ -1068,10 +1092,6 @@ class EndpointState:
|
|
|
1068
1092
|
|
|
1069
1093
|
|
|
1070
1094
|
class EndpointStateConfigUpdate(Enum):
|
|
1071
|
-
"""The state of an endpoint's config update. This informs the user if the pending_config is in
|
|
1072
|
-
progress, if the update failed, or if there is no update in progress. Note that if the
|
|
1073
|
-
endpoint's config_update state value is IN_PROGRESS, another update can not be made until the
|
|
1074
|
-
update completes or fails."""
|
|
1075
1095
|
|
|
1076
1096
|
IN_PROGRESS = 'IN_PROGRESS'
|
|
1077
1097
|
NOT_UPDATING = 'NOT_UPDATING'
|
|
@@ -1080,9 +1100,6 @@ class EndpointStateConfigUpdate(Enum):
|
|
|
1080
1100
|
|
|
1081
1101
|
|
|
1082
1102
|
class EndpointStateReady(Enum):
|
|
1083
|
-
"""The state of an endpoint, indicating whether or not the endpoint is queryable. An endpoint is
|
|
1084
|
-
READY if all of the served entities in its active configuration are ready. If any of the
|
|
1085
|
-
actively served entities are in a non-ready state, the endpoint state will be NOT_READY."""
|
|
1086
1103
|
|
|
1087
1104
|
NOT_READY = 'NOT_READY'
|
|
1088
1105
|
READY = 'READY'
|
|
@@ -1116,6 +1133,28 @@ class EndpointTag:
|
|
|
1116
1133
|
return cls(key=d.get('key', None), value=d.get('value', None))
|
|
1117
1134
|
|
|
1118
1135
|
|
|
1136
|
+
@dataclass
|
|
1137
|
+
class EndpointTags:
|
|
1138
|
+
tags: Optional[List[EndpointTag]] = None
|
|
1139
|
+
|
|
1140
|
+
def as_dict(self) -> dict:
|
|
1141
|
+
"""Serializes the EndpointTags into a dictionary suitable for use as a JSON request body."""
|
|
1142
|
+
body = {}
|
|
1143
|
+
if self.tags: body['tags'] = [v.as_dict() for v in self.tags]
|
|
1144
|
+
return body
|
|
1145
|
+
|
|
1146
|
+
def as_shallow_dict(self) -> dict:
|
|
1147
|
+
"""Serializes the EndpointTags into a shallow dictionary of its immediate attributes."""
|
|
1148
|
+
body = {}
|
|
1149
|
+
if self.tags: body['tags'] = self.tags
|
|
1150
|
+
return body
|
|
1151
|
+
|
|
1152
|
+
@classmethod
|
|
1153
|
+
def from_dict(cls, d: Dict[str, any]) -> EndpointTags:
|
|
1154
|
+
"""Deserializes the EndpointTags from a dictionary."""
|
|
1155
|
+
return cls(tags=_repeated_dict(d, 'tags', EndpointTag))
|
|
1156
|
+
|
|
1157
|
+
|
|
1119
1158
|
@dataclass
|
|
1120
1159
|
class ExportMetricsResponse:
|
|
1121
1160
|
contents: Optional[BinaryIO] = None
|
|
@@ -1138,12 +1177,105 @@ class ExportMetricsResponse:
|
|
|
1138
1177
|
return cls(contents=d.get('contents', None))
|
|
1139
1178
|
|
|
1140
1179
|
|
|
1180
|
+
@dataclass
|
|
1181
|
+
class ExternalFunctionRequest:
|
|
1182
|
+
"""Simple Proto message for testing"""
|
|
1183
|
+
|
|
1184
|
+
connection_name: str
|
|
1185
|
+
"""The connection name to use. This is required to identify the external connection."""
|
|
1186
|
+
|
|
1187
|
+
method: ExternalFunctionRequestHttpMethod
|
|
1188
|
+
"""The HTTP method to use (e.g., 'GET', 'POST')."""
|
|
1189
|
+
|
|
1190
|
+
path: str
|
|
1191
|
+
"""The relative path for the API endpoint. This is required."""
|
|
1192
|
+
|
|
1193
|
+
headers: Optional[str] = None
|
|
1194
|
+
"""Additional headers for the request. If not provided, only auth headers from connections would be
|
|
1195
|
+
passed."""
|
|
1196
|
+
|
|
1197
|
+
json: Optional[str] = None
|
|
1198
|
+
"""The JSON payload to send in the request body."""
|
|
1199
|
+
|
|
1200
|
+
params: Optional[str] = None
|
|
1201
|
+
"""Query parameters for the request."""
|
|
1202
|
+
|
|
1203
|
+
def as_dict(self) -> dict:
|
|
1204
|
+
"""Serializes the ExternalFunctionRequest into a dictionary suitable for use as a JSON request body."""
|
|
1205
|
+
body = {}
|
|
1206
|
+
if self.connection_name is not None: body['connection_name'] = self.connection_name
|
|
1207
|
+
if self.headers is not None: body['headers'] = self.headers
|
|
1208
|
+
if self.json is not None: body['json'] = self.json
|
|
1209
|
+
if self.method is not None: body['method'] = self.method.value
|
|
1210
|
+
if self.params is not None: body['params'] = self.params
|
|
1211
|
+
if self.path is not None: body['path'] = self.path
|
|
1212
|
+
return body
|
|
1213
|
+
|
|
1214
|
+
def as_shallow_dict(self) -> dict:
|
|
1215
|
+
"""Serializes the ExternalFunctionRequest into a shallow dictionary of its immediate attributes."""
|
|
1216
|
+
body = {}
|
|
1217
|
+
if self.connection_name is not None: body['connection_name'] = self.connection_name
|
|
1218
|
+
if self.headers is not None: body['headers'] = self.headers
|
|
1219
|
+
if self.json is not None: body['json'] = self.json
|
|
1220
|
+
if self.method is not None: body['method'] = self.method
|
|
1221
|
+
if self.params is not None: body['params'] = self.params
|
|
1222
|
+
if self.path is not None: body['path'] = self.path
|
|
1223
|
+
return body
|
|
1224
|
+
|
|
1225
|
+
@classmethod
|
|
1226
|
+
def from_dict(cls, d: Dict[str, any]) -> ExternalFunctionRequest:
|
|
1227
|
+
"""Deserializes the ExternalFunctionRequest from a dictionary."""
|
|
1228
|
+
return cls(connection_name=d.get('connection_name', None),
|
|
1229
|
+
headers=d.get('headers', None),
|
|
1230
|
+
json=d.get('json', None),
|
|
1231
|
+
method=_enum(d, 'method', ExternalFunctionRequestHttpMethod),
|
|
1232
|
+
params=d.get('params', None),
|
|
1233
|
+
path=d.get('path', None))
|
|
1234
|
+
|
|
1235
|
+
|
|
1236
|
+
class ExternalFunctionRequestHttpMethod(Enum):
|
|
1237
|
+
|
|
1238
|
+
DELETE = 'DELETE'
|
|
1239
|
+
GET = 'GET'
|
|
1240
|
+
PATCH = 'PATCH'
|
|
1241
|
+
POST = 'POST'
|
|
1242
|
+
PUT = 'PUT'
|
|
1243
|
+
|
|
1244
|
+
|
|
1245
|
+
@dataclass
|
|
1246
|
+
class ExternalFunctionResponse:
|
|
1247
|
+
status_code: Optional[int] = None
|
|
1248
|
+
"""The HTTP status code of the response"""
|
|
1249
|
+
|
|
1250
|
+
text: Optional[str] = None
|
|
1251
|
+
"""The content of the response"""
|
|
1252
|
+
|
|
1253
|
+
def as_dict(self) -> dict:
|
|
1254
|
+
"""Serializes the ExternalFunctionResponse into a dictionary suitable for use as a JSON request body."""
|
|
1255
|
+
body = {}
|
|
1256
|
+
if self.status_code is not None: body['status_code'] = self.status_code
|
|
1257
|
+
if self.text is not None: body['text'] = self.text
|
|
1258
|
+
return body
|
|
1259
|
+
|
|
1260
|
+
def as_shallow_dict(self) -> dict:
|
|
1261
|
+
"""Serializes the ExternalFunctionResponse into a shallow dictionary of its immediate attributes."""
|
|
1262
|
+
body = {}
|
|
1263
|
+
if self.status_code is not None: body['status_code'] = self.status_code
|
|
1264
|
+
if self.text is not None: body['text'] = self.text
|
|
1265
|
+
return body
|
|
1266
|
+
|
|
1267
|
+
@classmethod
|
|
1268
|
+
def from_dict(cls, d: Dict[str, any]) -> ExternalFunctionResponse:
|
|
1269
|
+
"""Deserializes the ExternalFunctionResponse from a dictionary."""
|
|
1270
|
+
return cls(status_code=d.get('status_code', None), text=d.get('text', None))
|
|
1271
|
+
|
|
1272
|
+
|
|
1141
1273
|
@dataclass
|
|
1142
1274
|
class ExternalModel:
|
|
1143
1275
|
provider: ExternalModelProvider
|
|
1144
1276
|
"""The name of the provider for the external model. Currently, the supported providers are
|
|
1145
1277
|
'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving',
|
|
1146
|
-
'google-cloud-vertex-ai', 'openai', and 'palm'."
|
|
1278
|
+
'google-cloud-vertex-ai', 'openai', and 'palm'."""
|
|
1147
1279
|
|
|
1148
1280
|
name: str
|
|
1149
1281
|
"""The name of the external model."""
|
|
@@ -1230,9 +1362,6 @@ class ExternalModel:
|
|
|
1230
1362
|
|
|
1231
1363
|
|
|
1232
1364
|
class ExternalModelProvider(Enum):
|
|
1233
|
-
"""The name of the provider for the external model. Currently, the supported providers are
|
|
1234
|
-
'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving',
|
|
1235
|
-
'google-cloud-vertex-ai', 'openai', and 'palm'.","""
|
|
1236
1365
|
|
|
1237
1366
|
AI21LABS = 'ai21labs'
|
|
1238
1367
|
AMAZON_BEDROCK = 'amazon-bedrock'
|
|
@@ -1281,17 +1410,16 @@ class ExternalModelUsageElement:
|
|
|
1281
1410
|
|
|
1282
1411
|
@dataclass
|
|
1283
1412
|
class FoundationModel:
|
|
1413
|
+
"""All fields are not sensitive as they are hard-coded in the system and made available to
|
|
1414
|
+
customers."""
|
|
1415
|
+
|
|
1284
1416
|
description: Optional[str] = None
|
|
1285
|
-
"""The description of the foundation model."""
|
|
1286
1417
|
|
|
1287
1418
|
display_name: Optional[str] = None
|
|
1288
|
-
"""The display name of the foundation model."""
|
|
1289
1419
|
|
|
1290
1420
|
docs: Optional[str] = None
|
|
1291
|
-
"""The URL to the documentation of the foundation model."""
|
|
1292
1421
|
|
|
1293
1422
|
name: Optional[str] = None
|
|
1294
|
-
"""The name of the foundation model."""
|
|
1295
1423
|
|
|
1296
1424
|
def as_dict(self) -> dict:
|
|
1297
1425
|
"""Serializes the FoundationModel into a dictionary suitable for use as a JSON request body."""
|
|
@@ -1322,23 +1450,24 @@ class FoundationModel:
|
|
|
1322
1450
|
|
|
1323
1451
|
@dataclass
|
|
1324
1452
|
class GetOpenApiResponse:
|
|
1325
|
-
|
|
1326
|
-
info, servers and paths, etc."""
|
|
1453
|
+
contents: Optional[BinaryIO] = None
|
|
1327
1454
|
|
|
1328
1455
|
def as_dict(self) -> dict:
|
|
1329
1456
|
"""Serializes the GetOpenApiResponse into a dictionary suitable for use as a JSON request body."""
|
|
1330
1457
|
body = {}
|
|
1458
|
+
if self.contents: body['contents'] = self.contents
|
|
1331
1459
|
return body
|
|
1332
1460
|
|
|
1333
1461
|
def as_shallow_dict(self) -> dict:
|
|
1334
1462
|
"""Serializes the GetOpenApiResponse into a shallow dictionary of its immediate attributes."""
|
|
1335
1463
|
body = {}
|
|
1464
|
+
if self.contents: body['contents'] = self.contents
|
|
1336
1465
|
return body
|
|
1337
1466
|
|
|
1338
1467
|
@classmethod
|
|
1339
1468
|
def from_dict(cls, d: Dict[str, any]) -> GetOpenApiResponse:
|
|
1340
1469
|
"""Deserializes the GetOpenApiResponse from a dictionary."""
|
|
1341
|
-
return cls()
|
|
1470
|
+
return cls(contents=d.get('contents', None))
|
|
1342
1471
|
|
|
1343
1472
|
|
|
1344
1473
|
@dataclass
|
|
@@ -1367,13 +1496,23 @@ class GetServingEndpointPermissionLevelsResponse:
|
|
|
1367
1496
|
|
|
1368
1497
|
@dataclass
|
|
1369
1498
|
class GoogleCloudVertexAiConfig:
|
|
1499
|
+
project_id: str
|
|
1500
|
+
"""This is the Google Cloud project id that the service account is associated with."""
|
|
1501
|
+
|
|
1502
|
+
region: str
|
|
1503
|
+
"""This is the region for the Google Cloud Vertex AI Service. See [supported regions] for more
|
|
1504
|
+
details. Some models are only available in specific regions.
|
|
1505
|
+
|
|
1506
|
+
[supported regions]: https://cloud.google.com/vertex-ai/docs/general/locations"""
|
|
1507
|
+
|
|
1370
1508
|
private_key: Optional[str] = None
|
|
1371
1509
|
"""The Databricks secret key reference for a private key for the service account which has access
|
|
1372
1510
|
to the Google Cloud Vertex AI Service. See [Best practices for managing service account keys].
|
|
1373
1511
|
If you prefer to paste your API key directly, see `private_key_plaintext`. You must provide an
|
|
1374
1512
|
API key using one of the following fields: `private_key` or `private_key_plaintext`
|
|
1375
1513
|
|
|
1376
|
-
[Best practices for managing service account keys]:
|
|
1514
|
+
[Best practices for managing service account keys]:
|
|
1515
|
+
https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys"""
|
|
1377
1516
|
|
|
1378
1517
|
private_key_plaintext: Optional[str] = None
|
|
1379
1518
|
"""The private key for the service account which has access to the Google Cloud Vertex AI Service
|
|
@@ -1381,16 +1520,8 @@ class GoogleCloudVertexAiConfig:
|
|
|
1381
1520
|
prefer to reference your key using Databricks Secrets, see `private_key`. You must provide an
|
|
1382
1521
|
API key using one of the following fields: `private_key` or `private_key_plaintext`.
|
|
1383
1522
|
|
|
1384
|
-
[Best practices for managing service account keys]:
|
|
1385
|
-
|
|
1386
|
-
project_id: Optional[str] = None
|
|
1387
|
-
"""This is the Google Cloud project id that the service account is associated with."""
|
|
1388
|
-
|
|
1389
|
-
region: Optional[str] = None
|
|
1390
|
-
"""This is the region for the Google Cloud Vertex AI Service. See [supported regions] for more
|
|
1391
|
-
details. Some models are only available in specific regions.
|
|
1392
|
-
|
|
1393
|
-
[supported regions]: https://cloud.google.com/vertex-ai/docs/general/locations"""
|
|
1523
|
+
[Best practices for managing service account keys]:
|
|
1524
|
+
https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys"""
|
|
1394
1525
|
|
|
1395
1526
|
def as_dict(self) -> dict:
|
|
1396
1527
|
"""Serializes the GoogleCloudVertexAiConfig into a dictionary suitable for use as a JSON request body."""
|
|
@@ -1444,7 +1575,10 @@ class ListEndpointsResponse:
|
|
|
1444
1575
|
|
|
1445
1576
|
@dataclass
|
|
1446
1577
|
class ModelDataPlaneInfo:
|
|
1447
|
-
|
|
1578
|
+
"""A representation of all DataPlaneInfo for operations that can be done on a model through Data
|
|
1579
|
+
Plane APIs."""
|
|
1580
|
+
|
|
1581
|
+
query_info: Optional[DataPlaneInfo] = None
|
|
1448
1582
|
"""Information required to query DataPlane API 'query' endpoint."""
|
|
1449
1583
|
|
|
1450
1584
|
def as_dict(self) -> dict:
|
|
@@ -1462,11 +1596,13 @@ class ModelDataPlaneInfo:
|
|
|
1462
1596
|
@classmethod
|
|
1463
1597
|
def from_dict(cls, d: Dict[str, any]) -> ModelDataPlaneInfo:
|
|
1464
1598
|
"""Deserializes the ModelDataPlaneInfo from a dictionary."""
|
|
1465
|
-
return cls(query_info=_from_dict(d, 'query_info',
|
|
1599
|
+
return cls(query_info=_from_dict(d, 'query_info', DataPlaneInfo))
|
|
1466
1600
|
|
|
1467
1601
|
|
|
1468
1602
|
@dataclass
|
|
1469
1603
|
class OpenAiConfig:
|
|
1604
|
+
"""Configs needed to create an OpenAI model route."""
|
|
1605
|
+
|
|
1470
1606
|
microsoft_entra_client_id: Optional[str] = None
|
|
1471
1607
|
"""This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID."""
|
|
1472
1608
|
|
|
@@ -1652,13 +1788,10 @@ class PatchServingEndpointTags:
|
|
|
1652
1788
|
@dataclass
|
|
1653
1789
|
class PayloadTable:
|
|
1654
1790
|
name: Optional[str] = None
|
|
1655
|
-
"""The name of the payload table."""
|
|
1656
1791
|
|
|
1657
1792
|
status: Optional[str] = None
|
|
1658
|
-
"""The status of the payload table."""
|
|
1659
1793
|
|
|
1660
1794
|
status_message: Optional[str] = None
|
|
1661
|
-
"""The status message of the payload table."""
|
|
1662
1795
|
|
|
1663
1796
|
def as_dict(self) -> dict:
|
|
1664
1797
|
"""Serializes the PayloadTable into a dictionary suitable for use as a JSON request body."""
|
|
@@ -1684,6 +1817,57 @@ class PayloadTable:
|
|
|
1684
1817
|
status_message=d.get('status_message', None))
|
|
1685
1818
|
|
|
1686
1819
|
|
|
1820
|
+
@dataclass
|
|
1821
|
+
class PutAiGatewayRequest:
|
|
1822
|
+
guardrails: Optional[AiGatewayGuardrails] = None
|
|
1823
|
+
"""Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and
|
|
1824
|
+
responses."""
|
|
1825
|
+
|
|
1826
|
+
inference_table_config: Optional[AiGatewayInferenceTableConfig] = None
|
|
1827
|
+
"""Configuration for payload logging using inference tables. Use these tables to monitor and audit
|
|
1828
|
+
data being sent to and received from model APIs and to improve model quality."""
|
|
1829
|
+
|
|
1830
|
+
name: Optional[str] = None
|
|
1831
|
+
"""The name of the serving endpoint whose AI Gateway is being updated. This field is required."""
|
|
1832
|
+
|
|
1833
|
+
rate_limits: Optional[List[AiGatewayRateLimit]] = None
|
|
1834
|
+
"""Configuration for rate limits which can be set to limit endpoint traffic."""
|
|
1835
|
+
|
|
1836
|
+
usage_tracking_config: Optional[AiGatewayUsageTrackingConfig] = None
|
|
1837
|
+
"""Configuration to enable usage tracking using system tables. These tables allow you to monitor
|
|
1838
|
+
operational usage on endpoints and their associated costs."""
|
|
1839
|
+
|
|
1840
|
+
def as_dict(self) -> dict:
|
|
1841
|
+
"""Serializes the PutAiGatewayRequest into a dictionary suitable for use as a JSON request body."""
|
|
1842
|
+
body = {}
|
|
1843
|
+
if self.guardrails: body['guardrails'] = self.guardrails.as_dict()
|
|
1844
|
+
if self.inference_table_config: body['inference_table_config'] = self.inference_table_config.as_dict()
|
|
1845
|
+
if self.name is not None: body['name'] = self.name
|
|
1846
|
+
if self.rate_limits: body['rate_limits'] = [v.as_dict() for v in self.rate_limits]
|
|
1847
|
+
if self.usage_tracking_config: body['usage_tracking_config'] = self.usage_tracking_config.as_dict()
|
|
1848
|
+
return body
|
|
1849
|
+
|
|
1850
|
+
def as_shallow_dict(self) -> dict:
|
|
1851
|
+
"""Serializes the PutAiGatewayRequest into a shallow dictionary of its immediate attributes."""
|
|
1852
|
+
body = {}
|
|
1853
|
+
if self.guardrails: body['guardrails'] = self.guardrails
|
|
1854
|
+
if self.inference_table_config: body['inference_table_config'] = self.inference_table_config
|
|
1855
|
+
if self.name is not None: body['name'] = self.name
|
|
1856
|
+
if self.rate_limits: body['rate_limits'] = self.rate_limits
|
|
1857
|
+
if self.usage_tracking_config: body['usage_tracking_config'] = self.usage_tracking_config
|
|
1858
|
+
return body
|
|
1859
|
+
|
|
1860
|
+
@classmethod
|
|
1861
|
+
def from_dict(cls, d: Dict[str, any]) -> PutAiGatewayRequest:
|
|
1862
|
+
"""Deserializes the PutAiGatewayRequest from a dictionary."""
|
|
1863
|
+
return cls(guardrails=_from_dict(d, 'guardrails', AiGatewayGuardrails),
|
|
1864
|
+
inference_table_config=_from_dict(d, 'inference_table_config',
|
|
1865
|
+
AiGatewayInferenceTableConfig),
|
|
1866
|
+
name=d.get('name', None),
|
|
1867
|
+
rate_limits=_repeated_dict(d, 'rate_limits', AiGatewayRateLimit),
|
|
1868
|
+
usage_tracking_config=_from_dict(d, 'usage_tracking_config', AiGatewayUsageTrackingConfig))
|
|
1869
|
+
|
|
1870
|
+
|
|
1687
1871
|
@dataclass
|
|
1688
1872
|
class PutAiGatewayResponse:
|
|
1689
1873
|
guardrails: Optional[AiGatewayGuardrails] = None
|
|
@@ -1692,7 +1876,7 @@ class PutAiGatewayResponse:
|
|
|
1692
1876
|
|
|
1693
1877
|
inference_table_config: Optional[AiGatewayInferenceTableConfig] = None
|
|
1694
1878
|
"""Configuration for payload logging using inference tables. Use these tables to monitor and audit
|
|
1695
|
-
data being sent to and received from model APIs and to improve model quality
|
|
1879
|
+
data being sent to and received from model APIs and to improve model quality."""
|
|
1696
1880
|
|
|
1697
1881
|
rate_limits: Optional[List[AiGatewayRateLimit]] = None
|
|
1698
1882
|
"""Configuration for rate limits which can be set to limit endpoint traffic."""
|
|
@@ -1729,6 +1913,34 @@ class PutAiGatewayResponse:
|
|
|
1729
1913
|
usage_tracking_config=_from_dict(d, 'usage_tracking_config', AiGatewayUsageTrackingConfig))
|
|
1730
1914
|
|
|
1731
1915
|
|
|
1916
|
+
@dataclass
|
|
1917
|
+
class PutRequest:
|
|
1918
|
+
name: Optional[str] = None
|
|
1919
|
+
"""The name of the serving endpoint whose rate limits are being updated. This field is required."""
|
|
1920
|
+
|
|
1921
|
+
rate_limits: Optional[List[RateLimit]] = None
|
|
1922
|
+
"""The list of endpoint rate limits."""
|
|
1923
|
+
|
|
1924
|
+
def as_dict(self) -> dict:
|
|
1925
|
+
"""Serializes the PutRequest into a dictionary suitable for use as a JSON request body."""
|
|
1926
|
+
body = {}
|
|
1927
|
+
if self.name is not None: body['name'] = self.name
|
|
1928
|
+
if self.rate_limits: body['rate_limits'] = [v.as_dict() for v in self.rate_limits]
|
|
1929
|
+
return body
|
|
1930
|
+
|
|
1931
|
+
def as_shallow_dict(self) -> dict:
|
|
1932
|
+
"""Serializes the PutRequest into a shallow dictionary of its immediate attributes."""
|
|
1933
|
+
body = {}
|
|
1934
|
+
if self.name is not None: body['name'] = self.name
|
|
1935
|
+
if self.rate_limits: body['rate_limits'] = self.rate_limits
|
|
1936
|
+
return body
|
|
1937
|
+
|
|
1938
|
+
@classmethod
|
|
1939
|
+
def from_dict(cls, d: Dict[str, any]) -> PutRequest:
|
|
1940
|
+
"""Deserializes the PutRequest from a dictionary."""
|
|
1941
|
+
return cls(name=d.get('name', None), rate_limits=_repeated_dict(d, 'rate_limits', RateLimit))
|
|
1942
|
+
|
|
1943
|
+
|
|
1732
1944
|
@dataclass
|
|
1733
1945
|
class PutResponse:
|
|
1734
1946
|
rate_limits: Optional[List[RateLimit]] = None
|
|
@@ -1994,15 +2206,12 @@ class RateLimit:
|
|
|
1994
2206
|
|
|
1995
2207
|
|
|
1996
2208
|
class RateLimitKey(Enum):
|
|
1997
|
-
"""Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are
|
|
1998
|
-
supported, with 'endpoint' being the default if not specified."""
|
|
1999
2209
|
|
|
2000
2210
|
ENDPOINT = 'endpoint'
|
|
2001
2211
|
USER = 'user'
|
|
2002
2212
|
|
|
2003
2213
|
|
|
2004
2214
|
class RateLimitRenewalPeriod(Enum):
|
|
2005
|
-
"""Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported."""
|
|
2006
2215
|
|
|
2007
2216
|
MINUTE = 'minute'
|
|
2008
2217
|
|
|
@@ -2043,11 +2252,9 @@ class ServedEntityInput:
|
|
|
2043
2252
|
"""The name of the entity to be served. The entity may be a model in the Databricks Model Registry,
|
|
2044
2253
|
a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC
|
|
2045
2254
|
object, the full name of the object should be given in the form of
|
|
2046
|
-
|
|
2255
|
+
**catalog_name.schema_name.model_name**."""
|
|
2047
2256
|
|
|
2048
2257
|
entity_version: Optional[str] = None
|
|
2049
|
-
"""The version of the model in Databricks Model Registry to be served or empty if the entity is a
|
|
2050
|
-
FEATURE_SPEC."""
|
|
2051
2258
|
|
|
2052
2259
|
environment_vars: Optional[Dict[str, str]] = None
|
|
2053
2260
|
"""An object containing a set of optional, user-specified environment variable key-value pairs used
|
|
@@ -2076,7 +2283,7 @@ class ServedEntityInput:
|
|
|
2076
2283
|
"""The name of a served entity. It must be unique across an endpoint. A served entity name can
|
|
2077
2284
|
consist of alphanumeric characters, dashes, and underscores. If not specified for an external
|
|
2078
2285
|
model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if
|
|
2079
|
-
not specified for other entities, it defaults to
|
|
2286
|
+
not specified for other entities, it defaults to entity_name-entity_version."""
|
|
2080
2287
|
|
|
2081
2288
|
scale_to_zero_enabled: Optional[bool] = None
|
|
2082
2289
|
"""Whether the compute resources for the served entity should scale down to zero."""
|
|
@@ -2089,13 +2296,13 @@ class ServedEntityInput:
|
|
|
2089
2296
|
scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
|
|
2090
2297
|
is 0."""
|
|
2091
2298
|
|
|
2092
|
-
workload_type: Optional[
|
|
2299
|
+
workload_type: Optional[ServingModelWorkloadType] = None
|
|
2093
2300
|
"""The workload type of the served entity. The workload type selects which type of compute to use
|
|
2094
2301
|
in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
|
|
2095
2302
|
acceleration is available by selecting workload types like GPU_SMALL and others. See the
|
|
2096
2303
|
available [GPU types].
|
|
2097
2304
|
|
|
2098
|
-
[GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
|
|
2305
|
+
[GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
|
|
2099
2306
|
|
|
2100
2307
|
def as_dict(self) -> dict:
|
|
2101
2308
|
"""Serializes the ServedEntityInput into a dictionary suitable for use as a JSON request body."""
|
|
@@ -2112,7 +2319,7 @@ class ServedEntityInput:
|
|
|
2112
2319
|
if self.name is not None: body['name'] = self.name
|
|
2113
2320
|
if self.scale_to_zero_enabled is not None: body['scale_to_zero_enabled'] = self.scale_to_zero_enabled
|
|
2114
2321
|
if self.workload_size is not None: body['workload_size'] = self.workload_size
|
|
2115
|
-
if self.workload_type is not None: body['workload_type'] = self.workload_type
|
|
2322
|
+
if self.workload_type is not None: body['workload_type'] = self.workload_type.value
|
|
2116
2323
|
return body
|
|
2117
2324
|
|
|
2118
2325
|
def as_shallow_dict(self) -> dict:
|
|
@@ -2146,26 +2353,22 @@ class ServedEntityInput:
|
|
|
2146
2353
|
name=d.get('name', None),
|
|
2147
2354
|
scale_to_zero_enabled=d.get('scale_to_zero_enabled', None),
|
|
2148
2355
|
workload_size=d.get('workload_size', None),
|
|
2149
|
-
workload_type=d
|
|
2356
|
+
workload_type=_enum(d, 'workload_type', ServingModelWorkloadType))
|
|
2150
2357
|
|
|
2151
2358
|
|
|
2152
2359
|
@dataclass
|
|
2153
2360
|
class ServedEntityOutput:
|
|
2154
2361
|
creation_timestamp: Optional[int] = None
|
|
2155
|
-
"""The creation timestamp of the served entity in Unix time."""
|
|
2156
2362
|
|
|
2157
2363
|
creator: Optional[str] = None
|
|
2158
|
-
"""The email of the user who created the served entity."""
|
|
2159
2364
|
|
|
2160
2365
|
entity_name: Optional[str] = None
|
|
2161
|
-
"""The name of the entity served. The entity may be a model in the Databricks Model Registry,
|
|
2162
|
-
model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC
|
|
2163
|
-
object, the full name of the object
|
|
2164
|
-
|
|
2366
|
+
"""The name of the entity to be served. The entity may be a model in the Databricks Model Registry,
|
|
2367
|
+
a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC
|
|
2368
|
+
object, the full name of the object should be given in the form of
|
|
2369
|
+
**catalog_name.schema_name.model_name**."""
|
|
2165
2370
|
|
|
2166
2371
|
entity_version: Optional[str] = None
|
|
2167
|
-
"""The version of the served entity in Databricks Model Registry or empty if the entity is a
|
|
2168
|
-
FEATURE_SPEC."""
|
|
2169
2372
|
|
|
2170
2373
|
environment_vars: Optional[Dict[str, str]] = None
|
|
2171
2374
|
"""An object containing a set of optional, user-specified environment variable key-value pairs used
|
|
@@ -2174,14 +2377,16 @@ class ServedEntityOutput:
|
|
|
2174
2377
|
"{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}`"""
|
|
2175
2378
|
|
|
2176
2379
|
external_model: Optional[ExternalModel] = None
|
|
2177
|
-
"""The external model
|
|
2178
|
-
|
|
2179
|
-
|
|
2380
|
+
"""The external model to be served. NOTE: Only one of external_model and (entity_name,
|
|
2381
|
+
entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with
|
|
2382
|
+
the latter set being used for custom model serving for a Databricks registered model. For an
|
|
2383
|
+
existing endpoint with external_model, it cannot be updated to an endpoint without
|
|
2384
|
+
external_model. If the endpoint is created without external_model, users cannot update it to add
|
|
2385
|
+
external_model later. The task type of all external models within an endpoint must be the same."""
|
|
2180
2386
|
|
|
2181
2387
|
foundation_model: Optional[FoundationModel] = None
|
|
2182
|
-
"""
|
|
2183
|
-
|
|
2184
|
-
returned based on the endpoint type."""
|
|
2388
|
+
"""All fields are not sensitive as they are hard-coded in the system and made available to
|
|
2389
|
+
customers."""
|
|
2185
2390
|
|
|
2186
2391
|
instance_profile_arn: Optional[str] = None
|
|
2187
2392
|
"""ARN of the instance profile that the served entity uses to access AWS resources."""
|
|
@@ -2193,13 +2398,15 @@ class ServedEntityOutput:
|
|
|
2193
2398
|
"""The minimum tokens per second that the endpoint can scale down to."""
|
|
2194
2399
|
|
|
2195
2400
|
name: Optional[str] = None
|
|
2196
|
-
"""The name of
|
|
2401
|
+
"""The name of a served entity. It must be unique across an endpoint. A served entity name can
|
|
2402
|
+
consist of alphanumeric characters, dashes, and underscores. If not specified for an external
|
|
2403
|
+
model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if
|
|
2404
|
+
not specified for other entities, it defaults to entity_name-entity_version."""
|
|
2197
2405
|
|
|
2198
2406
|
scale_to_zero_enabled: Optional[bool] = None
|
|
2199
2407
|
"""Whether the compute resources for the served entity should scale down to zero."""
|
|
2200
2408
|
|
|
2201
2409
|
state: Optional[ServedModelState] = None
|
|
2202
|
-
"""Information corresponding to the state of the served entity."""
|
|
2203
2410
|
|
|
2204
2411
|
workload_size: Optional[str] = None
|
|
2205
2412
|
"""The workload size of the served entity. The workload size corresponds to a range of provisioned
|
|
@@ -2207,15 +2414,15 @@ class ServedEntityOutput:
|
|
|
2207
2414
|
process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency),
|
|
2208
2415
|
"Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If
|
|
2209
2416
|
scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
|
|
2210
|
-
|
|
2417
|
+
is 0."""
|
|
2211
2418
|
|
|
2212
|
-
workload_type: Optional[
|
|
2419
|
+
workload_type: Optional[ServingModelWorkloadType] = None
|
|
2213
2420
|
"""The workload type of the served entity. The workload type selects which type of compute to use
|
|
2214
2421
|
in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
|
|
2215
2422
|
acceleration is available by selecting workload types like GPU_SMALL and others. See the
|
|
2216
2423
|
available [GPU types].
|
|
2217
2424
|
|
|
2218
|
-
[GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
|
|
2425
|
+
[GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
|
|
2219
2426
|
|
|
2220
2427
|
def as_dict(self) -> dict:
|
|
2221
2428
|
"""Serializes the ServedEntityOutput into a dictionary suitable for use as a JSON request body."""
|
|
@@ -2236,7 +2443,7 @@ class ServedEntityOutput:
|
|
|
2236
2443
|
if self.scale_to_zero_enabled is not None: body['scale_to_zero_enabled'] = self.scale_to_zero_enabled
|
|
2237
2444
|
if self.state: body['state'] = self.state.as_dict()
|
|
2238
2445
|
if self.workload_size is not None: body['workload_size'] = self.workload_size
|
|
2239
|
-
if self.workload_type is not None: body['workload_type'] = self.workload_type
|
|
2446
|
+
if self.workload_type is not None: body['workload_type'] = self.workload_type.value
|
|
2240
2447
|
return body
|
|
2241
2448
|
|
|
2242
2449
|
def as_shallow_dict(self) -> dict:
|
|
@@ -2278,31 +2485,22 @@ class ServedEntityOutput:
|
|
|
2278
2485
|
scale_to_zero_enabled=d.get('scale_to_zero_enabled', None),
|
|
2279
2486
|
state=_from_dict(d, 'state', ServedModelState),
|
|
2280
2487
|
workload_size=d.get('workload_size', None),
|
|
2281
|
-
workload_type=d
|
|
2488
|
+
workload_type=_enum(d, 'workload_type', ServingModelWorkloadType))
|
|
2282
2489
|
|
|
2283
2490
|
|
|
2284
2491
|
@dataclass
|
|
2285
2492
|
class ServedEntitySpec:
|
|
2286
2493
|
entity_name: Optional[str] = None
|
|
2287
|
-
"""The name of the entity served. The entity may be a model in the Databricks Model Registry, a
|
|
2288
|
-
model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC
|
|
2289
|
-
object, the full name of the object is given in the form of
|
|
2290
|
-
__catalog_name__.__schema_name__.__model_name__."""
|
|
2291
2494
|
|
|
2292
2495
|
entity_version: Optional[str] = None
|
|
2293
|
-
"""The version of the served entity in Databricks Model Registry or empty if the entity is a
|
|
2294
|
-
FEATURE_SPEC."""
|
|
2295
2496
|
|
|
2296
2497
|
external_model: Optional[ExternalModel] = None
|
|
2297
|
-
"""The external model that is served. NOTE: Only one of external_model, foundation_model, and
|
|
2298
|
-
(entity_name, entity_version) is returned based on the endpoint type."""
|
|
2299
2498
|
|
|
2300
2499
|
foundation_model: Optional[FoundationModel] = None
|
|
2301
|
-
"""
|
|
2302
|
-
|
|
2500
|
+
"""All fields are not sensitive as they are hard-coded in the system and made available to
|
|
2501
|
+
customers."""
|
|
2303
2502
|
|
|
2304
2503
|
name: Optional[str] = None
|
|
2305
|
-
"""The name of the served entity."""
|
|
2306
2504
|
|
|
2307
2505
|
def as_dict(self) -> dict:
|
|
2308
2506
|
"""Serializes the ServedEntitySpec into a dictionary suitable for use as a JSON request body."""
|
|
@@ -2336,24 +2534,21 @@ class ServedEntitySpec:
|
|
|
2336
2534
|
|
|
2337
2535
|
@dataclass
|
|
2338
2536
|
class ServedModelInput:
|
|
2537
|
+
scale_to_zero_enabled: bool
|
|
2538
|
+
"""Whether the compute resources for the served entity should scale down to zero."""
|
|
2539
|
+
|
|
2339
2540
|
model_name: str
|
|
2340
|
-
"""The name of the model in Databricks Model Registry to be served or if the model resides in Unity
|
|
2341
|
-
Catalog, the full name of model, in the form of __catalog_name__.__schema_name__.__model_name__."""
|
|
2342
2541
|
|
|
2343
2542
|
model_version: str
|
|
2344
|
-
"""The version of the model in Databricks Model Registry or Unity Catalog to be served."""
|
|
2345
|
-
|
|
2346
|
-
scale_to_zero_enabled: bool
|
|
2347
|
-
"""Whether the compute resources for the served model should scale down to zero."""
|
|
2348
2543
|
|
|
2349
2544
|
environment_vars: Optional[Dict[str, str]] = None
|
|
2350
2545
|
"""An object containing a set of optional, user-specified environment variable key-value pairs used
|
|
2351
|
-
for serving this
|
|
2352
|
-
|
|
2546
|
+
for serving this entity. Note: this is an experimental feature and subject to change. Example
|
|
2547
|
+
entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY":
|
|
2353
2548
|
"{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}`"""
|
|
2354
2549
|
|
|
2355
2550
|
instance_profile_arn: Optional[str] = None
|
|
2356
|
-
"""ARN of the instance profile that the served
|
|
2551
|
+
"""ARN of the instance profile that the served entity uses to access AWS resources."""
|
|
2357
2552
|
|
|
2358
2553
|
max_provisioned_throughput: Optional[int] = None
|
|
2359
2554
|
"""The maximum tokens per second that the endpoint can scale up to."""
|
|
@@ -2362,25 +2557,26 @@ class ServedModelInput:
|
|
|
2362
2557
|
"""The minimum tokens per second that the endpoint can scale down to."""
|
|
2363
2558
|
|
|
2364
2559
|
name: Optional[str] = None
|
|
2365
|
-
"""The name of a served
|
|
2366
|
-
|
|
2367
|
-
|
|
2560
|
+
"""The name of a served entity. It must be unique across an endpoint. A served entity name can
|
|
2561
|
+
consist of alphanumeric characters, dashes, and underscores. If not specified for an external
|
|
2562
|
+
model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if
|
|
2563
|
+
not specified for other entities, it defaults to entity_name-entity_version."""
|
|
2368
2564
|
|
|
2369
2565
|
workload_size: Optional[ServedModelInputWorkloadSize] = None
|
|
2370
|
-
"""The workload size of the served
|
|
2371
|
-
concurrency that the compute
|
|
2372
|
-
|
|
2373
|
-
|
|
2374
|
-
|
|
2375
|
-
|
|
2566
|
+
"""The workload size of the served entity. The workload size corresponds to a range of provisioned
|
|
2567
|
+
concurrency that the compute autoscales between. A single unit of provisioned concurrency can
|
|
2568
|
+
process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency),
|
|
2569
|
+
"Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If
|
|
2570
|
+
scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
|
|
2571
|
+
is 0."""
|
|
2376
2572
|
|
|
2377
2573
|
workload_type: Optional[ServedModelInputWorkloadType] = None
|
|
2378
|
-
"""The workload type of the served
|
|
2379
|
-
the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
|
|
2574
|
+
"""The workload type of the served entity. The workload type selects which type of compute to use
|
|
2575
|
+
in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
|
|
2380
2576
|
acceleration is available by selecting workload types like GPU_SMALL and others. See the
|
|
2381
2577
|
available [GPU types].
|
|
2382
2578
|
|
|
2383
|
-
[GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
|
|
2579
|
+
[GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
|
|
2384
2580
|
|
|
2385
2581
|
def as_dict(self) -> dict:
|
|
2386
2582
|
"""Serializes the ServedModelInput into a dictionary suitable for use as a JSON request body."""
|
|
@@ -2432,12 +2628,6 @@ class ServedModelInput:
|
|
|
2432
2628
|
|
|
2433
2629
|
|
|
2434
2630
|
class ServedModelInputWorkloadSize(Enum):
|
|
2435
|
-
"""The workload size of the served model. The workload size corresponds to a range of provisioned
|
|
2436
|
-
concurrency that the compute will autoscale between. A single unit of provisioned concurrency
|
|
2437
|
-
can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned
|
|
2438
|
-
concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned
|
|
2439
|
-
concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for
|
|
2440
|
-
each workload size will be 0."""
|
|
2441
2631
|
|
|
2442
2632
|
LARGE = 'Large'
|
|
2443
2633
|
MEDIUM = 'Medium'
|
|
@@ -2445,12 +2635,6 @@ class ServedModelInputWorkloadSize(Enum):
|
|
|
2445
2635
|
|
|
2446
2636
|
|
|
2447
2637
|
class ServedModelInputWorkloadType(Enum):
|
|
2448
|
-
"""The workload type of the served model. The workload type selects which type of compute to use in
|
|
2449
|
-
the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
|
|
2450
|
-
acceleration is available by selecting workload types like GPU_SMALL and others. See the
|
|
2451
|
-
available [GPU types].
|
|
2452
|
-
|
|
2453
|
-
[GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
|
|
2454
2638
|
|
|
2455
2639
|
CPU = 'CPU'
|
|
2456
2640
|
GPU_LARGE = 'GPU_LARGE'
|
|
@@ -2462,51 +2646,48 @@ class ServedModelInputWorkloadType(Enum):
|
|
|
2462
2646
|
@dataclass
|
|
2463
2647
|
class ServedModelOutput:
|
|
2464
2648
|
creation_timestamp: Optional[int] = None
|
|
2465
|
-
"""The creation timestamp of the served model in Unix time."""
|
|
2466
2649
|
|
|
2467
2650
|
creator: Optional[str] = None
|
|
2468
|
-
"""The email of the user who created the served model."""
|
|
2469
2651
|
|
|
2470
2652
|
environment_vars: Optional[Dict[str, str]] = None
|
|
2471
2653
|
"""An object containing a set of optional, user-specified environment variable key-value pairs used
|
|
2472
|
-
for serving this
|
|
2473
|
-
|
|
2654
|
+
for serving this entity. Note: this is an experimental feature and subject to change. Example
|
|
2655
|
+
entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY":
|
|
2474
2656
|
"{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}`"""
|
|
2475
2657
|
|
|
2476
2658
|
instance_profile_arn: Optional[str] = None
|
|
2477
|
-
"""ARN of the instance profile that the served
|
|
2659
|
+
"""ARN of the instance profile that the served entity uses to access AWS resources."""
|
|
2478
2660
|
|
|
2479
2661
|
model_name: Optional[str] = None
|
|
2480
|
-
"""The name of the model in Databricks Model Registry or the full name of the model in Unity
|
|
2481
|
-
Catalog."""
|
|
2482
2662
|
|
|
2483
2663
|
model_version: Optional[str] = None
|
|
2484
|
-
"""The version of the model in Databricks Model Registry or Unity Catalog to be served."""
|
|
2485
2664
|
|
|
2486
2665
|
name: Optional[str] = None
|
|
2487
|
-
"""The name of
|
|
2666
|
+
"""The name of a served entity. It must be unique across an endpoint. A served entity name can
|
|
2667
|
+
consist of alphanumeric characters, dashes, and underscores. If not specified for an external
|
|
2668
|
+
model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if
|
|
2669
|
+
not specified for other entities, it defaults to entity_name-entity_version."""
|
|
2488
2670
|
|
|
2489
2671
|
scale_to_zero_enabled: Optional[bool] = None
|
|
2490
|
-
"""Whether the compute resources for the
|
|
2672
|
+
"""Whether the compute resources for the served entity should scale down to zero."""
|
|
2491
2673
|
|
|
2492
2674
|
state: Optional[ServedModelState] = None
|
|
2493
|
-
"""Information corresponding to the state of the Served Model."""
|
|
2494
2675
|
|
|
2495
2676
|
workload_size: Optional[str] = None
|
|
2496
|
-
"""The workload size of the served
|
|
2497
|
-
concurrency that the compute
|
|
2498
|
-
|
|
2499
|
-
|
|
2500
|
-
|
|
2501
|
-
|
|
2502
|
-
|
|
2503
|
-
workload_type: Optional[
|
|
2504
|
-
"""The workload type of the served
|
|
2505
|
-
the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
|
|
2677
|
+
"""The workload size of the served entity. The workload size corresponds to a range of provisioned
|
|
2678
|
+
concurrency that the compute autoscales between. A single unit of provisioned concurrency can
|
|
2679
|
+
process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency),
|
|
2680
|
+
"Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If
|
|
2681
|
+
scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
|
|
2682
|
+
is 0."""
|
|
2683
|
+
|
|
2684
|
+
workload_type: Optional[ServingModelWorkloadType] = None
|
|
2685
|
+
"""The workload type of the served entity. The workload type selects which type of compute to use
|
|
2686
|
+
in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
|
|
2506
2687
|
acceleration is available by selecting workload types like GPU_SMALL and others. See the
|
|
2507
2688
|
available [GPU types].
|
|
2508
2689
|
|
|
2509
|
-
[GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
|
|
2690
|
+
[GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
|
|
2510
2691
|
|
|
2511
2692
|
def as_dict(self) -> dict:
|
|
2512
2693
|
"""Serializes the ServedModelOutput into a dictionary suitable for use as a JSON request body."""
|
|
@@ -2521,7 +2702,7 @@ class ServedModelOutput:
|
|
|
2521
2702
|
if self.scale_to_zero_enabled is not None: body['scale_to_zero_enabled'] = self.scale_to_zero_enabled
|
|
2522
2703
|
if self.state: body['state'] = self.state.as_dict()
|
|
2523
2704
|
if self.workload_size is not None: body['workload_size'] = self.workload_size
|
|
2524
|
-
if self.workload_type is not None: body['workload_type'] = self.workload_type
|
|
2705
|
+
if self.workload_type is not None: body['workload_type'] = self.workload_type.value
|
|
2525
2706
|
return body
|
|
2526
2707
|
|
|
2527
2708
|
def as_shallow_dict(self) -> dict:
|
|
@@ -2553,20 +2734,18 @@ class ServedModelOutput:
|
|
|
2553
2734
|
scale_to_zero_enabled=d.get('scale_to_zero_enabled', None),
|
|
2554
2735
|
state=_from_dict(d, 'state', ServedModelState),
|
|
2555
2736
|
workload_size=d.get('workload_size', None),
|
|
2556
|
-
workload_type=d
|
|
2737
|
+
workload_type=_enum(d, 'workload_type', ServingModelWorkloadType))
|
|
2557
2738
|
|
|
2558
2739
|
|
|
2559
2740
|
@dataclass
|
|
2560
2741
|
class ServedModelSpec:
|
|
2561
2742
|
model_name: Optional[str] = None
|
|
2562
|
-
"""
|
|
2563
|
-
Catalog."""
|
|
2743
|
+
"""Only one of model_name and entity_name should be populated"""
|
|
2564
2744
|
|
|
2565
2745
|
model_version: Optional[str] = None
|
|
2566
|
-
"""
|
|
2746
|
+
"""Only one of model_version and entity_version should be populated"""
|
|
2567
2747
|
|
|
2568
2748
|
name: Optional[str] = None
|
|
2569
|
-
"""The name of the served model."""
|
|
2570
2749
|
|
|
2571
2750
|
def as_dict(self) -> dict:
|
|
2572
2751
|
"""Serializes the ServedModelSpec into a dictionary suitable for use as a JSON request body."""
|
|
@@ -2595,18 +2774,8 @@ class ServedModelSpec:
|
|
|
2595
2774
|
@dataclass
|
|
2596
2775
|
class ServedModelState:
|
|
2597
2776
|
deployment: Optional[ServedModelStateDeployment] = None
|
|
2598
|
-
"""The state of the served entity deployment. DEPLOYMENT_CREATING indicates that the served entity
|
|
2599
|
-
is not ready yet because the deployment is still being created (i.e container image is building,
|
|
2600
|
-
model server is deploying for the first time, etc.). DEPLOYMENT_RECOVERING indicates that the
|
|
2601
|
-
served entity was previously in a ready state but no longer is and is attempting to recover.
|
|
2602
|
-
DEPLOYMENT_READY indicates that the served entity is ready to receive traffic. DEPLOYMENT_FAILED
|
|
2603
|
-
indicates that there was an error trying to bring up the served entity (e.g container image
|
|
2604
|
-
build failed, the model server failed to start due to a model loading error, etc.)
|
|
2605
|
-
DEPLOYMENT_ABORTED indicates that the deployment was terminated likely due to a failure in
|
|
2606
|
-
bringing up another served entity under the same endpoint and config version."""
|
|
2607
2777
|
|
|
2608
2778
|
deployment_state_message: Optional[str] = None
|
|
2609
|
-
"""More information about the state of the served entity, if available."""
|
|
2610
2779
|
|
|
2611
2780
|
def as_dict(self) -> dict:
|
|
2612
2781
|
"""Serializes the ServedModelState into a dictionary suitable for use as a JSON request body."""
|
|
@@ -2632,15 +2801,6 @@ class ServedModelState:
|
|
|
2632
2801
|
|
|
2633
2802
|
|
|
2634
2803
|
class ServedModelStateDeployment(Enum):
|
|
2635
|
-
"""The state of the served entity deployment. DEPLOYMENT_CREATING indicates that the served entity
|
|
2636
|
-
is not ready yet because the deployment is still being created (i.e container image is building,
|
|
2637
|
-
model server is deploying for the first time, etc.). DEPLOYMENT_RECOVERING indicates that the
|
|
2638
|
-
served entity was previously in a ready state but no longer is and is attempting to recover.
|
|
2639
|
-
DEPLOYMENT_READY indicates that the served entity is ready to receive traffic. DEPLOYMENT_FAILED
|
|
2640
|
-
indicates that there was an error trying to bring up the served entity (e.g container image
|
|
2641
|
-
build failed, the model server failed to start due to a model loading error, etc.)
|
|
2642
|
-
DEPLOYMENT_ABORTED indicates that the deployment was terminated likely due to a failure in
|
|
2643
|
-
bringing up another served entity under the same endpoint and config version."""
|
|
2644
2804
|
|
|
2645
2805
|
ABORTED = 'DEPLOYMENT_ABORTED'
|
|
2646
2806
|
CREATING = 'DEPLOYMENT_CREATING'
|
|
@@ -2675,8 +2835,8 @@ class ServerLogsResponse:
|
|
|
2675
2835
|
@dataclass
|
|
2676
2836
|
class ServingEndpoint:
|
|
2677
2837
|
ai_gateway: Optional[AiGatewayConfig] = None
|
|
2678
|
-
"""The AI Gateway configuration for the serving endpoint. NOTE: Only external model
|
|
2679
|
-
currently supported."""
|
|
2838
|
+
"""The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned
|
|
2839
|
+
throughput endpoints are currently supported."""
|
|
2680
2840
|
|
|
2681
2841
|
config: Optional[EndpointCoreConfigSummary] = None
|
|
2682
2842
|
"""The config that is currently being served by the endpoint."""
|
|
@@ -2688,8 +2848,7 @@ class ServingEndpoint:
|
|
|
2688
2848
|
"""The email of the user who created the serving endpoint."""
|
|
2689
2849
|
|
|
2690
2850
|
id: Optional[str] = None
|
|
2691
|
-
"""System-generated ID of the endpoint
|
|
2692
|
-
API"""
|
|
2851
|
+
"""System-generated ID of the endpoint, included to be used by the Permissions API."""
|
|
2693
2852
|
|
|
2694
2853
|
last_updated_timestamp: Optional[int] = None
|
|
2695
2854
|
"""The timestamp when the endpoint was last updated by a user in Unix time."""
|
|
@@ -2848,8 +3007,8 @@ class ServingEndpointAccessControlResponse:
|
|
|
2848
3007
|
@dataclass
|
|
2849
3008
|
class ServingEndpointDetailed:
|
|
2850
3009
|
ai_gateway: Optional[AiGatewayConfig] = None
|
|
2851
|
-
"""The AI Gateway configuration for the serving endpoint. NOTE: Only external model
|
|
2852
|
-
currently supported."""
|
|
3010
|
+
"""The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned
|
|
3011
|
+
throughput endpoints are currently supported."""
|
|
2853
3012
|
|
|
2854
3013
|
config: Optional[EndpointCoreConfigOutput] = None
|
|
2855
3014
|
"""The config that is currently being served by the endpoint."""
|
|
@@ -2957,7 +3116,6 @@ class ServingEndpointDetailed:
|
|
|
2957
3116
|
|
|
2958
3117
|
|
|
2959
3118
|
class ServingEndpointDetailedPermissionLevel(Enum):
|
|
2960
|
-
"""The permission level of the principal making the request."""
|
|
2961
3119
|
|
|
2962
3120
|
CAN_MANAGE = 'CAN_MANAGE'
|
|
2963
3121
|
CAN_QUERY = 'CAN_QUERY'
|
|
@@ -3097,6 +3255,15 @@ class ServingEndpointPermissionsRequest:
|
|
|
3097
3255
|
serving_endpoint_id=d.get('serving_endpoint_id', None))
|
|
3098
3256
|
|
|
3099
3257
|
|
|
3258
|
+
class ServingModelWorkloadType(Enum):
|
|
3259
|
+
|
|
3260
|
+
CPU = 'CPU'
|
|
3261
|
+
GPU_LARGE = 'GPU_LARGE'
|
|
3262
|
+
GPU_MEDIUM = 'GPU_MEDIUM'
|
|
3263
|
+
GPU_SMALL = 'GPU_SMALL'
|
|
3264
|
+
MULTIGPU_MEDIUM = 'MULTIGPU_MEDIUM'
|
|
3265
|
+
|
|
3266
|
+
|
|
3100
3267
|
@dataclass
|
|
3101
3268
|
class TrafficConfig:
|
|
3102
3269
|
routes: Optional[List[Route]] = None
|
|
@@ -3250,8 +3417,8 @@ class ServingEndpointsAPI:
|
|
|
3250
3417
|
:param config: :class:`EndpointCoreConfigInput`
|
|
3251
3418
|
The core config of the serving endpoint.
|
|
3252
3419
|
:param ai_gateway: :class:`AiGatewayConfig` (optional)
|
|
3253
|
-
The AI Gateway configuration for the serving endpoint. NOTE:
|
|
3254
|
-
|
|
3420
|
+
The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned
|
|
3421
|
+
throughput endpoints are currently supported.
|
|
3255
3422
|
:param rate_limits: List[:class:`RateLimit`] (optional)
|
|
3256
3423
|
Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI
|
|
3257
3424
|
Gateway to manage rate limits.
|
|
@@ -3299,7 +3466,6 @@ class ServingEndpointsAPI:
|
|
|
3299
3466
|
"""Delete a serving endpoint.
|
|
3300
3467
|
|
|
3301
3468
|
:param name: str
|
|
3302
|
-
The name of the serving endpoint. This field is required.
|
|
3303
3469
|
|
|
3304
3470
|
|
|
3305
3471
|
"""
|
|
@@ -3341,7 +3507,7 @@ class ServingEndpointsAPI:
|
|
|
3341
3507
|
res = self._api.do('GET', f'/api/2.0/serving-endpoints/{name}', headers=headers)
|
|
3342
3508
|
return ServingEndpointDetailed.from_dict(res)
|
|
3343
3509
|
|
|
3344
|
-
def get_open_api(self, name: str):
|
|
3510
|
+
def get_open_api(self, name: str) -> GetOpenApiResponse:
|
|
3345
3511
|
"""Get the schema for a serving endpoint.
|
|
3346
3512
|
|
|
3347
3513
|
Get the query schema of the serving endpoint in OpenAPI format. The schema contains information for
|
|
@@ -3350,12 +3516,13 @@ class ServingEndpointsAPI:
|
|
|
3350
3516
|
:param name: str
|
|
3351
3517
|
The name of the serving endpoint that the served model belongs to. This field is required.
|
|
3352
3518
|
|
|
3353
|
-
|
|
3519
|
+
:returns: :class:`GetOpenApiResponse`
|
|
3354
3520
|
"""
|
|
3355
3521
|
|
|
3356
|
-
headers = {'Accept': '
|
|
3522
|
+
headers = {'Accept': 'text/plain', }
|
|
3357
3523
|
|
|
3358
|
-
self._api.do('GET', f'/api/2.0/serving-endpoints/{name}/openapi', headers=headers)
|
|
3524
|
+
res = self._api.do('GET', f'/api/2.0/serving-endpoints/{name}/openapi', headers=headers, raw=True)
|
|
3525
|
+
return GetOpenApiResponse.from_dict(res)
|
|
3359
3526
|
|
|
3360
3527
|
def get_permission_levels(self, serving_endpoint_id: str) -> GetServingEndpointPermissionLevelsResponse:
|
|
3361
3528
|
"""Get serving endpoint permission levels.
|
|
@@ -3394,6 +3561,44 @@ class ServingEndpointsAPI:
|
|
|
3394
3561
|
headers=headers)
|
|
3395
3562
|
return ServingEndpointPermissions.from_dict(res)
|
|
3396
3563
|
|
|
3564
|
+
def http_request(self,
|
|
3565
|
+
connection_name: str,
|
|
3566
|
+
method: ExternalFunctionRequestHttpMethod,
|
|
3567
|
+
path: str,
|
|
3568
|
+
*,
|
|
3569
|
+
headers: Optional[str] = None,
|
|
3570
|
+
json: Optional[str] = None,
|
|
3571
|
+
params: Optional[str] = None) -> ExternalFunctionResponse:
|
|
3572
|
+
"""Make external services call using the credentials stored in UC Connection.
|
|
3573
|
+
|
|
3574
|
+
:param connection_name: str
|
|
3575
|
+
The connection name to use. This is required to identify the external connection.
|
|
3576
|
+
:param method: :class:`ExternalFunctionRequestHttpMethod`
|
|
3577
|
+
The HTTP method to use (e.g., 'GET', 'POST').
|
|
3578
|
+
:param path: str
|
|
3579
|
+
The relative path for the API endpoint. This is required.
|
|
3580
|
+
:param headers: str (optional)
|
|
3581
|
+
Additional headers for the request. If not provided, only auth headers from connections would be
|
|
3582
|
+
passed.
|
|
3583
|
+
:param json: str (optional)
|
|
3584
|
+
The JSON payload to send in the request body.
|
|
3585
|
+
:param params: str (optional)
|
|
3586
|
+
Query parameters for the request.
|
|
3587
|
+
|
|
3588
|
+
:returns: :class:`ExternalFunctionResponse`
|
|
3589
|
+
"""
|
|
3590
|
+
body = {}
|
|
3591
|
+
if connection_name is not None: body['connection_name'] = connection_name
|
|
3592
|
+
if headers is not None: body['headers'] = headers
|
|
3593
|
+
if json is not None: body['json'] = json
|
|
3594
|
+
if method is not None: body['method'] = method.value
|
|
3595
|
+
if params is not None: body['params'] = params
|
|
3596
|
+
if path is not None: body['path'] = path
|
|
3597
|
+
headers = {'Accept': 'application/json', 'Content-Type': 'application/json', }
|
|
3598
|
+
|
|
3599
|
+
res = self._api.do('POST', '/api/2.0/external-function', body=body, headers=headers)
|
|
3600
|
+
return ExternalFunctionResponse.from_dict(res)
|
|
3601
|
+
|
|
3397
3602
|
def list(self) -> Iterator[ServingEndpoint]:
|
|
3398
3603
|
"""Get all serving endpoints.
|
|
3399
3604
|
|
|
@@ -3430,7 +3635,7 @@ class ServingEndpointsAPI:
|
|
|
3430
3635
|
name: str,
|
|
3431
3636
|
*,
|
|
3432
3637
|
add_tags: Optional[List[EndpointTag]] = None,
|
|
3433
|
-
delete_tags: Optional[List[str]] = None) ->
|
|
3638
|
+
delete_tags: Optional[List[str]] = None) -> EndpointTags:
|
|
3434
3639
|
"""Update tags of a serving endpoint.
|
|
3435
3640
|
|
|
3436
3641
|
Used to batch add and delete tags from a serving endpoint with a single API call.
|
|
@@ -3442,7 +3647,7 @@ class ServingEndpointsAPI:
|
|
|
3442
3647
|
:param delete_tags: List[str] (optional)
|
|
3443
3648
|
List of tag keys to delete
|
|
3444
3649
|
|
|
3445
|
-
:returns:
|
|
3650
|
+
:returns: :class:`EndpointTags`
|
|
3446
3651
|
"""
|
|
3447
3652
|
body = {}
|
|
3448
3653
|
if add_tags is not None: body['add_tags'] = [v.as_dict() for v in add_tags]
|
|
@@ -3450,7 +3655,7 @@ class ServingEndpointsAPI:
|
|
|
3450
3655
|
headers = {'Accept': 'application/json', 'Content-Type': 'application/json', }
|
|
3451
3656
|
|
|
3452
3657
|
res = self._api.do('PATCH', f'/api/2.0/serving-endpoints/{name}/tags', body=body, headers=headers)
|
|
3453
|
-
return
|
|
3658
|
+
return EndpointTags.from_dict(res)
|
|
3454
3659
|
|
|
3455
3660
|
def put(self, name: str, *, rate_limits: Optional[List[RateLimit]] = None) -> PutResponse:
|
|
3456
3661
|
"""Update rate limits of a serving endpoint.
|
|
@@ -3485,8 +3690,8 @@ class ServingEndpointsAPI:
|
|
|
3485
3690
|
usage_tracking_config: Optional[AiGatewayUsageTrackingConfig] = None) -> PutAiGatewayResponse:
|
|
3486
3691
|
"""Update AI Gateway of a serving endpoint.
|
|
3487
3692
|
|
|
3488
|
-
Used to update the AI Gateway of a serving endpoint. NOTE: Only external model
|
|
3489
|
-
supported.
|
|
3693
|
+
Used to update the AI Gateway of a serving endpoint. NOTE: Only external model and provisioned
|
|
3694
|
+
throughput endpoints are currently supported.
|
|
3490
3695
|
|
|
3491
3696
|
:param name: str
|
|
3492
3697
|
The name of the serving endpoint whose AI Gateway is being updated. This field is required.
|
|
@@ -3646,14 +3851,16 @@ class ServingEndpointsAPI:
|
|
|
3646
3851
|
The name of the serving endpoint to update. This field is required.
|
|
3647
3852
|
:param auto_capture_config: :class:`AutoCaptureConfigInput` (optional)
|
|
3648
3853
|
Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
|
|
3854
|
+
Note: this field is deprecated for creating new provisioned throughput endpoints, or updating
|
|
3855
|
+
existing provisioned throughput endpoints that never have inference table configured; in these cases
|
|
3856
|
+
please use AI Gateway to manage inference tables.
|
|
3649
3857
|
:param served_entities: List[:class:`ServedEntityInput`] (optional)
|
|
3650
|
-
|
|
3651
|
-
entities.
|
|
3858
|
+
The list of served entities under the serving endpoint config.
|
|
3652
3859
|
:param served_models: List[:class:`ServedModelInput`] (optional)
|
|
3653
|
-
(Deprecated, use served_entities instead)
|
|
3654
|
-
|
|
3860
|
+
(Deprecated, use served_entities instead) The list of served models under the serving endpoint
|
|
3861
|
+
config.
|
|
3655
3862
|
:param traffic_config: :class:`TrafficConfig` (optional)
|
|
3656
|
-
The traffic
|
|
3863
|
+
The traffic configuration associated with the serving endpoint config.
|
|
3657
3864
|
|
|
3658
3865
|
:returns:
|
|
3659
3866
|
Long-running operation waiter for :class:`ServingEndpointDetailed`.
|
|
@@ -3725,6 +3932,7 @@ class ServingEndpointsDataPlaneAPI:
|
|
|
3725
3932
|
def __init__(self, api_client, control_plane):
|
|
3726
3933
|
self._api = api_client
|
|
3727
3934
|
self._control_plane = control_plane
|
|
3935
|
+
from ..data_plane import DataPlaneService
|
|
3728
3936
|
self._data_plane_service = DataPlaneService()
|
|
3729
3937
|
|
|
3730
3938
|
def query(self,
|