databricks-sdk 0.40.0__py3-none-any.whl → 0.42.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of databricks-sdk might be problematic. Click here for more details.

@@ -12,14 +12,11 @@ from typing import Any, BinaryIO, Callable, Dict, Iterator, List, Optional
12
12
 
13
13
  import requests
14
14
 
15
- from ..data_plane import DataPlaneService
16
15
  from ..errors import OperationFailed
17
16
  from ._internal import Wait, _enum, _from_dict, _repeated_dict
18
17
 
19
18
  _LOG = logging.getLogger('databricks.sdk')
20
19
 
21
- from databricks.sdk.service import oauth2
22
-
23
20
  # all definitions in this file are in alphabetical order
24
21
 
25
22
 
@@ -148,11 +145,8 @@ class AiGatewayGuardrailParameters:
148
145
 
149
146
  @dataclass
150
147
  class AiGatewayGuardrailPiiBehavior:
151
- behavior: AiGatewayGuardrailPiiBehaviorBehavior
152
- """Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input
153
- guardrail and the request contains PII, the request is not sent to the model server and 400
154
- status code is returned; if 'BLOCK' is set for the output guardrail and the model response
155
- contains PII, the PII info in the response is redacted and 400 status code is returned."""
148
+ behavior: Optional[AiGatewayGuardrailPiiBehaviorBehavior] = None
149
+ """Configuration for input guardrail filters."""
156
150
 
157
151
  def as_dict(self) -> dict:
158
152
  """Serializes the AiGatewayGuardrailPiiBehavior into a dictionary suitable for use as a JSON request body."""
@@ -173,10 +167,6 @@ class AiGatewayGuardrailPiiBehavior:
173
167
 
174
168
 
175
169
  class AiGatewayGuardrailPiiBehaviorBehavior(Enum):
176
- """Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input
177
- guardrail and the request contains PII, the request is not sent to the model server and 400
178
- status code is returned; if 'BLOCK' is set for the output guardrail and the model response
179
- contains PII, the PII info in the response is redacted and 400 status code is returned."""
180
170
 
181
171
  BLOCK = 'BLOCK'
182
172
  NONE = 'NONE'
@@ -292,15 +282,12 @@ class AiGatewayRateLimit:
292
282
 
293
283
 
294
284
  class AiGatewayRateLimitKey(Enum):
295
- """Key field for a rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint'
296
- being the default if not specified."""
297
285
 
298
286
  ENDPOINT = 'endpoint'
299
287
  USER = 'user'
300
288
 
301
289
 
302
290
  class AiGatewayRateLimitRenewalPeriod(Enum):
303
- """Renewal period field for a rate limit. Currently, only 'minute' is supported."""
304
291
 
305
292
  MINUTE = 'minute'
306
293
 
@@ -339,9 +326,9 @@ class AmazonBedrockConfig:
339
326
 
340
327
  aws_access_key_id: Optional[str] = None
341
328
  """The Databricks secret key reference for an AWS access key ID with permissions to interact with
342
- Bedrock services. If you prefer to paste your API key directly, see `aws_access_key_id`. You
343
- must provide an API key using one of the following fields: `aws_access_key_id` or
344
- `aws_access_key_id_plaintext`."""
329
+ Bedrock services. If you prefer to paste your API key directly, see
330
+ `aws_access_key_id_plaintext`. You must provide an API key using one of the following fields:
331
+ `aws_access_key_id` or `aws_access_key_id_plaintext`."""
345
332
 
346
333
  aws_access_key_id_plaintext: Optional[str] = None
347
334
  """An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext
@@ -399,8 +386,6 @@ class AmazonBedrockConfig:
399
386
 
400
387
 
401
388
  class AmazonBedrockConfigBedrockProvider(Enum):
402
- """The underlying provider in Amazon Bedrock. Supported values (case insensitive) include:
403
- Anthropic, Cohere, AI21Labs, Amazon."""
404
389
 
405
390
  AI21LABS = 'ai21labs'
406
391
  AMAZON = 'amazon'
@@ -490,18 +475,21 @@ class AutoCaptureConfigInput:
490
475
  @dataclass
491
476
  class AutoCaptureConfigOutput:
492
477
  catalog_name: Optional[str] = None
493
- """The name of the catalog in Unity Catalog."""
478
+ """The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if
479
+ the inference table is already enabled."""
494
480
 
495
481
  enabled: Optional[bool] = None
496
482
  """Indicates whether the inference table is enabled."""
497
483
 
498
484
  schema_name: Optional[str] = None
499
- """The name of the schema in Unity Catalog."""
485
+ """The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if
486
+ the inference table is already enabled."""
500
487
 
501
488
  state: Optional[AutoCaptureState] = None
502
489
 
503
490
  table_name_prefix: Optional[str] = None
504
- """The prefix of the table in Unity Catalog."""
491
+ """The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if
492
+ the inference table is already enabled."""
505
493
 
506
494
  def as_dict(self) -> dict:
507
495
  """Serializes the AutoCaptureConfigOutput into a dictionary suitable for use as a JSON request body."""
@@ -662,12 +650,12 @@ class CreateServingEndpoint:
662
650
  """The name of the serving endpoint. This field is required and must be unique across a Databricks
663
651
  workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores."""
664
652
 
665
- config: EndpointCoreConfigInput
666
- """The core config of the serving endpoint."""
667
-
668
653
  ai_gateway: Optional[AiGatewayConfig] = None
669
- """The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are
670
- supported as of now."""
654
+ """The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned
655
+ throughput endpoints are currently supported."""
656
+
657
+ config: Optional[EndpointCoreConfigInput] = None
658
+ """The core config of the serving endpoint."""
671
659
 
672
660
  rate_limits: Optional[List[RateLimit]] = None
673
661
  """Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI
@@ -712,6 +700,37 @@ class CreateServingEndpoint:
712
700
  tags=_repeated_dict(d, 'tags', EndpointTag))
713
701
 
714
702
 
703
+ @dataclass
704
+ class DataPlaneInfo:
705
+ """Details necessary to query this object's API through the DataPlane APIs."""
706
+
707
+ authorization_details: Optional[str] = None
708
+ """Authorization details as a string."""
709
+
710
+ endpoint_url: Optional[str] = None
711
+ """The URL of the endpoint for this operation in the dataplane."""
712
+
713
+ def as_dict(self) -> dict:
714
+ """Serializes the DataPlaneInfo into a dictionary suitable for use as a JSON request body."""
715
+ body = {}
716
+ if self.authorization_details is not None: body['authorization_details'] = self.authorization_details
717
+ if self.endpoint_url is not None: body['endpoint_url'] = self.endpoint_url
718
+ return body
719
+
720
+ def as_shallow_dict(self) -> dict:
721
+ """Serializes the DataPlaneInfo into a shallow dictionary of its immediate attributes."""
722
+ body = {}
723
+ if self.authorization_details is not None: body['authorization_details'] = self.authorization_details
724
+ if self.endpoint_url is not None: body['endpoint_url'] = self.endpoint_url
725
+ return body
726
+
727
+ @classmethod
728
+ def from_dict(cls, d: Dict[str, any]) -> DataPlaneInfo:
729
+ """Deserializes the DataPlaneInfo from a dictionary."""
730
+ return cls(authorization_details=d.get('authorization_details', None),
731
+ endpoint_url=d.get('endpoint_url', None))
732
+
733
+
715
734
  @dataclass
716
735
  class DatabricksModelServingConfig:
717
736
  databricks_workspace_url: str
@@ -853,21 +872,22 @@ class EmbeddingsV1ResponseEmbeddingElementObject(Enum):
853
872
  class EndpointCoreConfigInput:
854
873
  auto_capture_config: Optional[AutoCaptureConfigInput] = None
855
874
  """Configuration for Inference Tables which automatically logs requests and responses to Unity
856
- Catalog."""
875
+ Catalog. Note: this field is deprecated for creating new provisioned throughput endpoints, or
876
+ updating existing provisioned throughput endpoints that never have inference table configured;
877
+ in these cases please use AI Gateway to manage inference tables."""
857
878
 
858
879
  name: Optional[str] = None
859
880
  """The name of the serving endpoint to update. This field is required."""
860
881
 
861
882
  served_entities: Optional[List[ServedEntityInput]] = None
862
- """A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served
863
- entities."""
883
+ """The list of served entities under the serving endpoint config."""
864
884
 
865
885
  served_models: Optional[List[ServedModelInput]] = None
866
- """(Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A
867
- serving endpoint can have up to 15 served models."""
886
+ """(Deprecated, use served_entities instead) The list of served models under the serving endpoint
887
+ config."""
868
888
 
869
889
  traffic_config: Optional[TrafficConfig] = None
870
- """The traffic config defining how invocations to the serving endpoint should be routed."""
890
+ """The traffic configuration associated with the serving endpoint config."""
871
891
 
872
892
  def as_dict(self) -> dict:
873
893
  """Serializes the EndpointCoreConfigInput into a dictionary suitable for use as a JSON request body."""
@@ -903,7 +923,9 @@ class EndpointCoreConfigInput:
903
923
  class EndpointCoreConfigOutput:
904
924
  auto_capture_config: Optional[AutoCaptureConfigOutput] = None
905
925
  """Configuration for Inference Tables which automatically logs requests and responses to Unity
906
- Catalog."""
926
+ Catalog. Note: this field is deprecated for creating new provisioned throughput endpoints, or
927
+ updating existing provisioned throughput endpoints that never have inference table configured;
928
+ in these cases please use AI Gateway to manage inference tables."""
907
929
 
908
930
  config_version: Optional[int] = None
909
931
  """The config version that the serving endpoint is currently serving."""
@@ -982,7 +1004,9 @@ class EndpointCoreConfigSummary:
982
1004
  class EndpointPendingConfig:
983
1005
  auto_capture_config: Optional[AutoCaptureConfigOutput] = None
984
1006
  """Configuration for Inference Tables which automatically logs requests and responses to Unity
985
- Catalog."""
1007
+ Catalog. Note: this field is deprecated for creating new provisioned throughput endpoints, or
1008
+ updating existing provisioned throughput endpoints that never have inference table configured;
1009
+ in these cases please use AI Gateway to manage inference tables."""
986
1010
 
987
1011
  config_version: Optional[int] = None
988
1012
  """The config version that the serving endpoint is currently serving."""
@@ -1068,10 +1092,6 @@ class EndpointState:
1068
1092
 
1069
1093
 
1070
1094
  class EndpointStateConfigUpdate(Enum):
1071
- """The state of an endpoint's config update. This informs the user if the pending_config is in
1072
- progress, if the update failed, or if there is no update in progress. Note that if the
1073
- endpoint's config_update state value is IN_PROGRESS, another update can not be made until the
1074
- update completes or fails."""
1075
1095
 
1076
1096
  IN_PROGRESS = 'IN_PROGRESS'
1077
1097
  NOT_UPDATING = 'NOT_UPDATING'
@@ -1080,9 +1100,6 @@ class EndpointStateConfigUpdate(Enum):
1080
1100
 
1081
1101
 
1082
1102
  class EndpointStateReady(Enum):
1083
- """The state of an endpoint, indicating whether or not the endpoint is queryable. An endpoint is
1084
- READY if all of the served entities in its active configuration are ready. If any of the
1085
- actively served entities are in a non-ready state, the endpoint state will be NOT_READY."""
1086
1103
 
1087
1104
  NOT_READY = 'NOT_READY'
1088
1105
  READY = 'READY'
@@ -1116,6 +1133,28 @@ class EndpointTag:
1116
1133
  return cls(key=d.get('key', None), value=d.get('value', None))
1117
1134
 
1118
1135
 
1136
+ @dataclass
1137
+ class EndpointTags:
1138
+ tags: Optional[List[EndpointTag]] = None
1139
+
1140
+ def as_dict(self) -> dict:
1141
+ """Serializes the EndpointTags into a dictionary suitable for use as a JSON request body."""
1142
+ body = {}
1143
+ if self.tags: body['tags'] = [v.as_dict() for v in self.tags]
1144
+ return body
1145
+
1146
+ def as_shallow_dict(self) -> dict:
1147
+ """Serializes the EndpointTags into a shallow dictionary of its immediate attributes."""
1148
+ body = {}
1149
+ if self.tags: body['tags'] = self.tags
1150
+ return body
1151
+
1152
+ @classmethod
1153
+ def from_dict(cls, d: Dict[str, any]) -> EndpointTags:
1154
+ """Deserializes the EndpointTags from a dictionary."""
1155
+ return cls(tags=_repeated_dict(d, 'tags', EndpointTag))
1156
+
1157
+
1119
1158
  @dataclass
1120
1159
  class ExportMetricsResponse:
1121
1160
  contents: Optional[BinaryIO] = None
@@ -1138,12 +1177,77 @@ class ExportMetricsResponse:
1138
1177
  return cls(contents=d.get('contents', None))
1139
1178
 
1140
1179
 
1180
+ @dataclass
1181
+ class ExternalFunctionRequest:
1182
+ """Simple Proto message for testing"""
1183
+
1184
+ connection_name: str
1185
+ """The connection name to use. This is required to identify the external connection."""
1186
+
1187
+ method: ExternalFunctionRequestHttpMethod
1188
+ """The HTTP method to use (e.g., 'GET', 'POST')."""
1189
+
1190
+ path: str
1191
+ """The relative path for the API endpoint. This is required."""
1192
+
1193
+ headers: Optional[str] = None
1194
+ """Additional headers for the request. If not provided, only auth headers from connections would be
1195
+ passed."""
1196
+
1197
+ json: Optional[str] = None
1198
+ """The JSON payload to send in the request body."""
1199
+
1200
+ params: Optional[str] = None
1201
+ """Query parameters for the request."""
1202
+
1203
+ def as_dict(self) -> dict:
1204
+ """Serializes the ExternalFunctionRequest into a dictionary suitable for use as a JSON request body."""
1205
+ body = {}
1206
+ if self.connection_name is not None: body['connection_name'] = self.connection_name
1207
+ if self.headers is not None: body['headers'] = self.headers
1208
+ if self.json is not None: body['json'] = self.json
1209
+ if self.method is not None: body['method'] = self.method.value
1210
+ if self.params is not None: body['params'] = self.params
1211
+ if self.path is not None: body['path'] = self.path
1212
+ return body
1213
+
1214
+ def as_shallow_dict(self) -> dict:
1215
+ """Serializes the ExternalFunctionRequest into a shallow dictionary of its immediate attributes."""
1216
+ body = {}
1217
+ if self.connection_name is not None: body['connection_name'] = self.connection_name
1218
+ if self.headers is not None: body['headers'] = self.headers
1219
+ if self.json is not None: body['json'] = self.json
1220
+ if self.method is not None: body['method'] = self.method
1221
+ if self.params is not None: body['params'] = self.params
1222
+ if self.path is not None: body['path'] = self.path
1223
+ return body
1224
+
1225
+ @classmethod
1226
+ def from_dict(cls, d: Dict[str, any]) -> ExternalFunctionRequest:
1227
+ """Deserializes the ExternalFunctionRequest from a dictionary."""
1228
+ return cls(connection_name=d.get('connection_name', None),
1229
+ headers=d.get('headers', None),
1230
+ json=d.get('json', None),
1231
+ method=_enum(d, 'method', ExternalFunctionRequestHttpMethod),
1232
+ params=d.get('params', None),
1233
+ path=d.get('path', None))
1234
+
1235
+
1236
+ class ExternalFunctionRequestHttpMethod(Enum):
1237
+
1238
+ DELETE = 'DELETE'
1239
+ GET = 'GET'
1240
+ PATCH = 'PATCH'
1241
+ POST = 'POST'
1242
+ PUT = 'PUT'
1243
+
1244
+
1141
1245
  @dataclass
1142
1246
  class ExternalModel:
1143
1247
  provider: ExternalModelProvider
1144
1248
  """The name of the provider for the external model. Currently, the supported providers are
1145
1249
  'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving',
1146
- 'google-cloud-vertex-ai', 'openai', and 'palm'.","""
1250
+ 'google-cloud-vertex-ai', 'openai', and 'palm'."""
1147
1251
 
1148
1252
  name: str
1149
1253
  """The name of the external model."""
@@ -1230,9 +1334,6 @@ class ExternalModel:
1230
1334
 
1231
1335
 
1232
1336
  class ExternalModelProvider(Enum):
1233
- """The name of the provider for the external model. Currently, the supported providers are
1234
- 'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving',
1235
- 'google-cloud-vertex-ai', 'openai', and 'palm'.","""
1236
1337
 
1237
1338
  AI21LABS = 'ai21labs'
1238
1339
  AMAZON_BEDROCK = 'amazon-bedrock'
@@ -1281,17 +1382,16 @@ class ExternalModelUsageElement:
1281
1382
 
1282
1383
  @dataclass
1283
1384
  class FoundationModel:
1385
+ """All fields are not sensitive as they are hard-coded in the system and made available to
1386
+ customers."""
1387
+
1284
1388
  description: Optional[str] = None
1285
- """The description of the foundation model."""
1286
1389
 
1287
1390
  display_name: Optional[str] = None
1288
- """The display name of the foundation model."""
1289
1391
 
1290
1392
  docs: Optional[str] = None
1291
- """The URL to the documentation of the foundation model."""
1292
1393
 
1293
1394
  name: Optional[str] = None
1294
- """The name of the foundation model."""
1295
1395
 
1296
1396
  def as_dict(self) -> dict:
1297
1397
  """Serializes the FoundationModel into a dictionary suitable for use as a JSON request body."""
@@ -1322,23 +1422,24 @@ class FoundationModel:
1322
1422
 
1323
1423
  @dataclass
1324
1424
  class GetOpenApiResponse:
1325
- """The response is an OpenAPI spec in JSON format that typically includes fields like openapi,
1326
- info, servers and paths, etc."""
1425
+ contents: Optional[BinaryIO] = None
1327
1426
 
1328
1427
  def as_dict(self) -> dict:
1329
1428
  """Serializes the GetOpenApiResponse into a dictionary suitable for use as a JSON request body."""
1330
1429
  body = {}
1430
+ if self.contents: body['contents'] = self.contents
1331
1431
  return body
1332
1432
 
1333
1433
  def as_shallow_dict(self) -> dict:
1334
1434
  """Serializes the GetOpenApiResponse into a shallow dictionary of its immediate attributes."""
1335
1435
  body = {}
1436
+ if self.contents: body['contents'] = self.contents
1336
1437
  return body
1337
1438
 
1338
1439
  @classmethod
1339
1440
  def from_dict(cls, d: Dict[str, any]) -> GetOpenApiResponse:
1340
1441
  """Deserializes the GetOpenApiResponse from a dictionary."""
1341
- return cls()
1442
+ return cls(contents=d.get('contents', None))
1342
1443
 
1343
1444
 
1344
1445
  @dataclass
@@ -1367,13 +1468,23 @@ class GetServingEndpointPermissionLevelsResponse:
1367
1468
 
1368
1469
  @dataclass
1369
1470
  class GoogleCloudVertexAiConfig:
1471
+ project_id: str
1472
+ """This is the Google Cloud project id that the service account is associated with."""
1473
+
1474
+ region: str
1475
+ """This is the region for the Google Cloud Vertex AI Service. See [supported regions] for more
1476
+ details. Some models are only available in specific regions.
1477
+
1478
+ [supported regions]: https://cloud.google.com/vertex-ai/docs/general/locations"""
1479
+
1370
1480
  private_key: Optional[str] = None
1371
1481
  """The Databricks secret key reference for a private key for the service account which has access
1372
1482
  to the Google Cloud Vertex AI Service. See [Best practices for managing service account keys].
1373
1483
  If you prefer to paste your API key directly, see `private_key_plaintext`. You must provide an
1374
1484
  API key using one of the following fields: `private_key` or `private_key_plaintext`
1375
1485
 
1376
- [Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys"""
1486
+ [Best practices for managing service account keys]:
1487
+ https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys"""
1377
1488
 
1378
1489
  private_key_plaintext: Optional[str] = None
1379
1490
  """The private key for the service account which has access to the Google Cloud Vertex AI Service
@@ -1381,16 +1492,8 @@ class GoogleCloudVertexAiConfig:
1381
1492
  prefer to reference your key using Databricks Secrets, see `private_key`. You must provide an
1382
1493
  API key using one of the following fields: `private_key` or `private_key_plaintext`.
1383
1494
 
1384
- [Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys"""
1385
-
1386
- project_id: Optional[str] = None
1387
- """This is the Google Cloud project id that the service account is associated with."""
1388
-
1389
- region: Optional[str] = None
1390
- """This is the region for the Google Cloud Vertex AI Service. See [supported regions] for more
1391
- details. Some models are only available in specific regions.
1392
-
1393
- [supported regions]: https://cloud.google.com/vertex-ai/docs/general/locations"""
1495
+ [Best practices for managing service account keys]:
1496
+ https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys"""
1394
1497
 
1395
1498
  def as_dict(self) -> dict:
1396
1499
  """Serializes the GoogleCloudVertexAiConfig into a dictionary suitable for use as a JSON request body."""
@@ -1419,6 +1522,28 @@ class GoogleCloudVertexAiConfig:
1419
1522
  region=d.get('region', None))
1420
1523
 
1421
1524
 
1525
+ @dataclass
1526
+ class HttpRequestResponse:
1527
+ contents: Optional[BinaryIO] = None
1528
+
1529
+ def as_dict(self) -> dict:
1530
+ """Serializes the HttpRequestResponse into a dictionary suitable for use as a JSON request body."""
1531
+ body = {}
1532
+ if self.contents: body['contents'] = self.contents
1533
+ return body
1534
+
1535
+ def as_shallow_dict(self) -> dict:
1536
+ """Serializes the HttpRequestResponse into a shallow dictionary of its immediate attributes."""
1537
+ body = {}
1538
+ if self.contents: body['contents'] = self.contents
1539
+ return body
1540
+
1541
+ @classmethod
1542
+ def from_dict(cls, d: Dict[str, any]) -> HttpRequestResponse:
1543
+ """Deserializes the HttpRequestResponse from a dictionary."""
1544
+ return cls(contents=d.get('contents', None))
1545
+
1546
+
1422
1547
  @dataclass
1423
1548
  class ListEndpointsResponse:
1424
1549
  endpoints: Optional[List[ServingEndpoint]] = None
@@ -1444,7 +1569,10 @@ class ListEndpointsResponse:
1444
1569
 
1445
1570
  @dataclass
1446
1571
  class ModelDataPlaneInfo:
1447
- query_info: Optional[oauth2.DataPlaneInfo] = None
1572
+ """A representation of all DataPlaneInfo for operations that can be done on a model through Data
1573
+ Plane APIs."""
1574
+
1575
+ query_info: Optional[DataPlaneInfo] = None
1448
1576
  """Information required to query DataPlane API 'query' endpoint."""
1449
1577
 
1450
1578
  def as_dict(self) -> dict:
@@ -1462,11 +1590,13 @@ class ModelDataPlaneInfo:
1462
1590
  @classmethod
1463
1591
  def from_dict(cls, d: Dict[str, any]) -> ModelDataPlaneInfo:
1464
1592
  """Deserializes the ModelDataPlaneInfo from a dictionary."""
1465
- return cls(query_info=_from_dict(d, 'query_info', oauth2.DataPlaneInfo))
1593
+ return cls(query_info=_from_dict(d, 'query_info', DataPlaneInfo))
1466
1594
 
1467
1595
 
1468
1596
  @dataclass
1469
1597
  class OpenAiConfig:
1598
+ """Configs needed to create an OpenAI model route."""
1599
+
1470
1600
  microsoft_entra_client_id: Optional[str] = None
1471
1601
  """This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID."""
1472
1602
 
@@ -1652,13 +1782,10 @@ class PatchServingEndpointTags:
1652
1782
  @dataclass
1653
1783
  class PayloadTable:
1654
1784
  name: Optional[str] = None
1655
- """The name of the payload table."""
1656
1785
 
1657
1786
  status: Optional[str] = None
1658
- """The status of the payload table."""
1659
1787
 
1660
1788
  status_message: Optional[str] = None
1661
- """The status message of the payload table."""
1662
1789
 
1663
1790
  def as_dict(self) -> dict:
1664
1791
  """Serializes the PayloadTable into a dictionary suitable for use as a JSON request body."""
@@ -1684,6 +1811,57 @@ class PayloadTable:
1684
1811
  status_message=d.get('status_message', None))
1685
1812
 
1686
1813
 
1814
+ @dataclass
1815
+ class PutAiGatewayRequest:
1816
+ guardrails: Optional[AiGatewayGuardrails] = None
1817
+ """Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and
1818
+ responses."""
1819
+
1820
+ inference_table_config: Optional[AiGatewayInferenceTableConfig] = None
1821
+ """Configuration for payload logging using inference tables. Use these tables to monitor and audit
1822
+ data being sent to and received from model APIs and to improve model quality."""
1823
+
1824
+ name: Optional[str] = None
1825
+ """The name of the serving endpoint whose AI Gateway is being updated. This field is required."""
1826
+
1827
+ rate_limits: Optional[List[AiGatewayRateLimit]] = None
1828
+ """Configuration for rate limits which can be set to limit endpoint traffic."""
1829
+
1830
+ usage_tracking_config: Optional[AiGatewayUsageTrackingConfig] = None
1831
+ """Configuration to enable usage tracking using system tables. These tables allow you to monitor
1832
+ operational usage on endpoints and their associated costs."""
1833
+
1834
+ def as_dict(self) -> dict:
1835
+ """Serializes the PutAiGatewayRequest into a dictionary suitable for use as a JSON request body."""
1836
+ body = {}
1837
+ if self.guardrails: body['guardrails'] = self.guardrails.as_dict()
1838
+ if self.inference_table_config: body['inference_table_config'] = self.inference_table_config.as_dict()
1839
+ if self.name is not None: body['name'] = self.name
1840
+ if self.rate_limits: body['rate_limits'] = [v.as_dict() for v in self.rate_limits]
1841
+ if self.usage_tracking_config: body['usage_tracking_config'] = self.usage_tracking_config.as_dict()
1842
+ return body
1843
+
1844
+ def as_shallow_dict(self) -> dict:
1845
+ """Serializes the PutAiGatewayRequest into a shallow dictionary of its immediate attributes."""
1846
+ body = {}
1847
+ if self.guardrails: body['guardrails'] = self.guardrails
1848
+ if self.inference_table_config: body['inference_table_config'] = self.inference_table_config
1849
+ if self.name is not None: body['name'] = self.name
1850
+ if self.rate_limits: body['rate_limits'] = self.rate_limits
1851
+ if self.usage_tracking_config: body['usage_tracking_config'] = self.usage_tracking_config
1852
+ return body
1853
+
1854
+ @classmethod
1855
+ def from_dict(cls, d: Dict[str, any]) -> PutAiGatewayRequest:
1856
+ """Deserializes the PutAiGatewayRequest from a dictionary."""
1857
+ return cls(guardrails=_from_dict(d, 'guardrails', AiGatewayGuardrails),
1858
+ inference_table_config=_from_dict(d, 'inference_table_config',
1859
+ AiGatewayInferenceTableConfig),
1860
+ name=d.get('name', None),
1861
+ rate_limits=_repeated_dict(d, 'rate_limits', AiGatewayRateLimit),
1862
+ usage_tracking_config=_from_dict(d, 'usage_tracking_config', AiGatewayUsageTrackingConfig))
1863
+
1864
+
1687
1865
  @dataclass
1688
1866
  class PutAiGatewayResponse:
1689
1867
  guardrails: Optional[AiGatewayGuardrails] = None
@@ -1692,7 +1870,7 @@ class PutAiGatewayResponse:
1692
1870
 
1693
1871
  inference_table_config: Optional[AiGatewayInferenceTableConfig] = None
1694
1872
  """Configuration for payload logging using inference tables. Use these tables to monitor and audit
1695
- data being sent to and received from model APIs and to improve model quality ."""
1873
+ data being sent to and received from model APIs and to improve model quality."""
1696
1874
 
1697
1875
  rate_limits: Optional[List[AiGatewayRateLimit]] = None
1698
1876
  """Configuration for rate limits which can be set to limit endpoint traffic."""
@@ -1729,6 +1907,34 @@ class PutAiGatewayResponse:
1729
1907
  usage_tracking_config=_from_dict(d, 'usage_tracking_config', AiGatewayUsageTrackingConfig))
1730
1908
 
1731
1909
 
1910
+ @dataclass
1911
+ class PutRequest:
1912
+ name: Optional[str] = None
1913
+ """The name of the serving endpoint whose rate limits are being updated. This field is required."""
1914
+
1915
+ rate_limits: Optional[List[RateLimit]] = None
1916
+ """The list of endpoint rate limits."""
1917
+
1918
+ def as_dict(self) -> dict:
1919
+ """Serializes the PutRequest into a dictionary suitable for use as a JSON request body."""
1920
+ body = {}
1921
+ if self.name is not None: body['name'] = self.name
1922
+ if self.rate_limits: body['rate_limits'] = [v.as_dict() for v in self.rate_limits]
1923
+ return body
1924
+
1925
+ def as_shallow_dict(self) -> dict:
1926
+ """Serializes the PutRequest into a shallow dictionary of its immediate attributes."""
1927
+ body = {}
1928
+ if self.name is not None: body['name'] = self.name
1929
+ if self.rate_limits: body['rate_limits'] = self.rate_limits
1930
+ return body
1931
+
1932
+ @classmethod
1933
+ def from_dict(cls, d: Dict[str, any]) -> PutRequest:
1934
+ """Deserializes the PutRequest from a dictionary."""
1935
+ return cls(name=d.get('name', None), rate_limits=_repeated_dict(d, 'rate_limits', RateLimit))
1936
+
1937
+
1732
1938
  @dataclass
1733
1939
  class PutResponse:
1734
1940
  rate_limits: Optional[List[RateLimit]] = None
@@ -1994,15 +2200,12 @@ class RateLimit:
1994
2200
 
1995
2201
 
1996
2202
  class RateLimitKey(Enum):
1997
- """Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are
1998
- supported, with 'endpoint' being the default if not specified."""
1999
2203
 
2000
2204
  ENDPOINT = 'endpoint'
2001
2205
  USER = 'user'
2002
2206
 
2003
2207
 
2004
2208
  class RateLimitRenewalPeriod(Enum):
2005
- """Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported."""
2006
2209
 
2007
2210
  MINUTE = 'minute'
2008
2211
 
@@ -2043,11 +2246,9 @@ class ServedEntityInput:
2043
2246
  """The name of the entity to be served. The entity may be a model in the Databricks Model Registry,
2044
2247
  a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC
2045
2248
  object, the full name of the object should be given in the form of
2046
- __catalog_name__.__schema_name__.__model_name__."""
2249
+ **catalog_name.schema_name.model_name**."""
2047
2250
 
2048
2251
  entity_version: Optional[str] = None
2049
- """The version of the model in Databricks Model Registry to be served or empty if the entity is a
2050
- FEATURE_SPEC."""
2051
2252
 
2052
2253
  environment_vars: Optional[Dict[str, str]] = None
2053
2254
  """An object containing a set of optional, user-specified environment variable key-value pairs used
@@ -2076,7 +2277,7 @@ class ServedEntityInput:
2076
2277
  """The name of a served entity. It must be unique across an endpoint. A served entity name can
2077
2278
  consist of alphanumeric characters, dashes, and underscores. If not specified for an external
2078
2279
  model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if
2079
- not specified for other entities, it defaults to <entity-name>-<entity-version>."""
2280
+ not specified for other entities, it defaults to entity_name-entity_version."""
2080
2281
 
2081
2282
  scale_to_zero_enabled: Optional[bool] = None
2082
2283
  """Whether the compute resources for the served entity should scale down to zero."""
@@ -2089,13 +2290,13 @@ class ServedEntityInput:
2089
2290
  scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
2090
2291
  is 0."""
2091
2292
 
2092
- workload_type: Optional[str] = None
2293
+ workload_type: Optional[ServingModelWorkloadType] = None
2093
2294
  """The workload type of the served entity. The workload type selects which type of compute to use
2094
2295
  in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
2095
2296
  acceleration is available by selecting workload types like GPU_SMALL and others. See the
2096
2297
  available [GPU types].
2097
2298
 
2098
- [GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
2299
+ [GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
2099
2300
 
2100
2301
  def as_dict(self) -> dict:
2101
2302
  """Serializes the ServedEntityInput into a dictionary suitable for use as a JSON request body."""
@@ -2112,7 +2313,7 @@ class ServedEntityInput:
2112
2313
  if self.name is not None: body['name'] = self.name
2113
2314
  if self.scale_to_zero_enabled is not None: body['scale_to_zero_enabled'] = self.scale_to_zero_enabled
2114
2315
  if self.workload_size is not None: body['workload_size'] = self.workload_size
2115
- if self.workload_type is not None: body['workload_type'] = self.workload_type
2316
+ if self.workload_type is not None: body['workload_type'] = self.workload_type.value
2116
2317
  return body
2117
2318
 
2118
2319
  def as_shallow_dict(self) -> dict:
@@ -2146,26 +2347,22 @@ class ServedEntityInput:
2146
2347
  name=d.get('name', None),
2147
2348
  scale_to_zero_enabled=d.get('scale_to_zero_enabled', None),
2148
2349
  workload_size=d.get('workload_size', None),
2149
- workload_type=d.get('workload_type', None))
2350
+ workload_type=_enum(d, 'workload_type', ServingModelWorkloadType))
2150
2351
 
2151
2352
 
2152
2353
  @dataclass
2153
2354
  class ServedEntityOutput:
2154
2355
  creation_timestamp: Optional[int] = None
2155
- """The creation timestamp of the served entity in Unix time."""
2156
2356
 
2157
2357
  creator: Optional[str] = None
2158
- """The email of the user who created the served entity."""
2159
2358
 
2160
2359
  entity_name: Optional[str] = None
2161
- """The name of the entity served. The entity may be a model in the Databricks Model Registry, a
2162
- model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC
2163
- object, the full name of the object is given in the form of
2164
- __catalog_name__.__schema_name__.__model_name__."""
2360
+ """The name of the entity to be served. The entity may be a model in the Databricks Model Registry,
2361
+ a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC
2362
+ object, the full name of the object should be given in the form of
2363
+ **catalog_name.schema_name.model_name**."""
2165
2364
 
2166
2365
  entity_version: Optional[str] = None
2167
- """The version of the served entity in Databricks Model Registry or empty if the entity is a
2168
- FEATURE_SPEC."""
2169
2366
 
2170
2367
  environment_vars: Optional[Dict[str, str]] = None
2171
2368
  """An object containing a set of optional, user-specified environment variable key-value pairs used
@@ -2174,14 +2371,16 @@ class ServedEntityOutput:
2174
2371
  "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}`"""
2175
2372
 
2176
2373
  external_model: Optional[ExternalModel] = None
2177
- """The external model that is served. NOTE: Only one of external_model, foundation_model, and
2178
- (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) is
2179
- returned based on the endpoint type."""
2374
+ """The external model to be served. NOTE: Only one of external_model and (entity_name,
2375
+ entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with
2376
+ the latter set being used for custom model serving for a Databricks registered model. For an
2377
+ existing endpoint with external_model, it cannot be updated to an endpoint without
2378
+ external_model. If the endpoint is created without external_model, users cannot update it to add
2379
+ external_model later. The task type of all external models within an endpoint must be the same."""
2180
2380
 
2181
2381
  foundation_model: Optional[FoundationModel] = None
2182
- """The foundation model that is served. NOTE: Only one of foundation_model, external_model, and
2183
- (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) is
2184
- returned based on the endpoint type."""
2382
+ """All fields are not sensitive as they are hard-coded in the system and made available to
2383
+ customers."""
2185
2384
 
2186
2385
  instance_profile_arn: Optional[str] = None
2187
2386
  """ARN of the instance profile that the served entity uses to access AWS resources."""
@@ -2193,13 +2392,15 @@ class ServedEntityOutput:
2193
2392
  """The minimum tokens per second that the endpoint can scale down to."""
2194
2393
 
2195
2394
  name: Optional[str] = None
2196
- """The name of the served entity."""
2395
+ """The name of a served entity. It must be unique across an endpoint. A served entity name can
2396
+ consist of alphanumeric characters, dashes, and underscores. If not specified for an external
2397
+ model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if
2398
+ not specified for other entities, it defaults to entity_name-entity_version."""
2197
2399
 
2198
2400
  scale_to_zero_enabled: Optional[bool] = None
2199
2401
  """Whether the compute resources for the served entity should scale down to zero."""
2200
2402
 
2201
2403
  state: Optional[ServedModelState] = None
2202
- """Information corresponding to the state of the served entity."""
2203
2404
 
2204
2405
  workload_size: Optional[str] = None
2205
2406
  """The workload size of the served entity. The workload size corresponds to a range of provisioned
@@ -2207,15 +2408,15 @@ class ServedEntityOutput:
2207
2408
  process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency),
2208
2409
  "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If
2209
2410
  scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
2210
- will be 0."""
2411
+ is 0."""
2211
2412
 
2212
- workload_type: Optional[str] = None
2413
+ workload_type: Optional[ServingModelWorkloadType] = None
2213
2414
  """The workload type of the served entity. The workload type selects which type of compute to use
2214
2415
  in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
2215
2416
  acceleration is available by selecting workload types like GPU_SMALL and others. See the
2216
2417
  available [GPU types].
2217
2418
 
2218
- [GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
2419
+ [GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
2219
2420
 
2220
2421
  def as_dict(self) -> dict:
2221
2422
  """Serializes the ServedEntityOutput into a dictionary suitable for use as a JSON request body."""
@@ -2236,7 +2437,7 @@ class ServedEntityOutput:
2236
2437
  if self.scale_to_zero_enabled is not None: body['scale_to_zero_enabled'] = self.scale_to_zero_enabled
2237
2438
  if self.state: body['state'] = self.state.as_dict()
2238
2439
  if self.workload_size is not None: body['workload_size'] = self.workload_size
2239
- if self.workload_type is not None: body['workload_type'] = self.workload_type
2440
+ if self.workload_type is not None: body['workload_type'] = self.workload_type.value
2240
2441
  return body
2241
2442
 
2242
2443
  def as_shallow_dict(self) -> dict:
@@ -2278,31 +2479,22 @@ class ServedEntityOutput:
2278
2479
  scale_to_zero_enabled=d.get('scale_to_zero_enabled', None),
2279
2480
  state=_from_dict(d, 'state', ServedModelState),
2280
2481
  workload_size=d.get('workload_size', None),
2281
- workload_type=d.get('workload_type', None))
2482
+ workload_type=_enum(d, 'workload_type', ServingModelWorkloadType))
2282
2483
 
2283
2484
 
2284
2485
  @dataclass
2285
2486
  class ServedEntitySpec:
2286
2487
  entity_name: Optional[str] = None
2287
- """The name of the entity served. The entity may be a model in the Databricks Model Registry, a
2288
- model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC
2289
- object, the full name of the object is given in the form of
2290
- __catalog_name__.__schema_name__.__model_name__."""
2291
2488
 
2292
2489
  entity_version: Optional[str] = None
2293
- """The version of the served entity in Databricks Model Registry or empty if the entity is a
2294
- FEATURE_SPEC."""
2295
2490
 
2296
2491
  external_model: Optional[ExternalModel] = None
2297
- """The external model that is served. NOTE: Only one of external_model, foundation_model, and
2298
- (entity_name, entity_version) is returned based on the endpoint type."""
2299
2492
 
2300
2493
  foundation_model: Optional[FoundationModel] = None
2301
- """The foundation model that is served. NOTE: Only one of foundation_model, external_model, and
2302
- (entity_name, entity_version) is returned based on the endpoint type."""
2494
+ """All fields are not sensitive as they are hard-coded in the system and made available to
2495
+ customers."""
2303
2496
 
2304
2497
  name: Optional[str] = None
2305
- """The name of the served entity."""
2306
2498
 
2307
2499
  def as_dict(self) -> dict:
2308
2500
  """Serializes the ServedEntitySpec into a dictionary suitable for use as a JSON request body."""
@@ -2336,24 +2528,21 @@ class ServedEntitySpec:
2336
2528
 
2337
2529
  @dataclass
2338
2530
  class ServedModelInput:
2531
+ scale_to_zero_enabled: bool
2532
+ """Whether the compute resources for the served entity should scale down to zero."""
2533
+
2339
2534
  model_name: str
2340
- """The name of the model in Databricks Model Registry to be served or if the model resides in Unity
2341
- Catalog, the full name of model, in the form of __catalog_name__.__schema_name__.__model_name__."""
2342
2535
 
2343
2536
  model_version: str
2344
- """The version of the model in Databricks Model Registry or Unity Catalog to be served."""
2345
-
2346
- scale_to_zero_enabled: bool
2347
- """Whether the compute resources for the served model should scale down to zero."""
2348
2537
 
2349
2538
  environment_vars: Optional[Dict[str, str]] = None
2350
2539
  """An object containing a set of optional, user-specified environment variable key-value pairs used
2351
- for serving this model. Note: this is an experimental feature and subject to change. Example
2352
- model environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY":
2540
+ for serving this entity. Note: this is an experimental feature and subject to change. Example
2541
+ entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY":
2353
2542
  "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}`"""
2354
2543
 
2355
2544
  instance_profile_arn: Optional[str] = None
2356
- """ARN of the instance profile that the served model will use to access AWS resources."""
2545
+ """ARN of the instance profile that the served entity uses to access AWS resources."""
2357
2546
 
2358
2547
  max_provisioned_throughput: Optional[int] = None
2359
2548
  """The maximum tokens per second that the endpoint can scale up to."""
@@ -2362,25 +2551,26 @@ class ServedModelInput:
2362
2551
  """The minimum tokens per second that the endpoint can scale down to."""
2363
2552
 
2364
2553
  name: Optional[str] = None
2365
- """The name of a served model. It must be unique across an endpoint. If not specified, this field
2366
- will default to <model-name>-<model-version>. A served model name can consist of alphanumeric
2367
- characters, dashes, and underscores."""
2554
+ """The name of a served entity. It must be unique across an endpoint. A served entity name can
2555
+ consist of alphanumeric characters, dashes, and underscores. If not specified for an external
2556
+ model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if
2557
+ not specified for other entities, it defaults to entity_name-entity_version."""
2368
2558
 
2369
2559
  workload_size: Optional[ServedModelInputWorkloadSize] = None
2370
- """The workload size of the served model. The workload size corresponds to a range of provisioned
2371
- concurrency that the compute will autoscale between. A single unit of provisioned concurrency
2372
- can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned
2373
- concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned
2374
- concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for
2375
- each workload size will be 0."""
2560
+ """The workload size of the served entity. The workload size corresponds to a range of provisioned
2561
+ concurrency that the compute autoscales between. A single unit of provisioned concurrency can
2562
+ process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency),
2563
+ "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If
2564
+ scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
2565
+ is 0."""
2376
2566
 
2377
2567
  workload_type: Optional[ServedModelInputWorkloadType] = None
2378
- """The workload type of the served model. The workload type selects which type of compute to use in
2379
- the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
2568
+ """The workload type of the served entity. The workload type selects which type of compute to use
2569
+ in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
2380
2570
  acceleration is available by selecting workload types like GPU_SMALL and others. See the
2381
2571
  available [GPU types].
2382
2572
 
2383
- [GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
2573
+ [GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
2384
2574
 
2385
2575
  def as_dict(self) -> dict:
2386
2576
  """Serializes the ServedModelInput into a dictionary suitable for use as a JSON request body."""
@@ -2432,12 +2622,6 @@ class ServedModelInput:
2432
2622
 
2433
2623
 
2434
2624
  class ServedModelInputWorkloadSize(Enum):
2435
- """The workload size of the served model. The workload size corresponds to a range of provisioned
2436
- concurrency that the compute will autoscale between. A single unit of provisioned concurrency
2437
- can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned
2438
- concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned
2439
- concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for
2440
- each workload size will be 0."""
2441
2625
 
2442
2626
  LARGE = 'Large'
2443
2627
  MEDIUM = 'Medium'
@@ -2445,12 +2629,6 @@ class ServedModelInputWorkloadSize(Enum):
2445
2629
 
2446
2630
 
2447
2631
  class ServedModelInputWorkloadType(Enum):
2448
- """The workload type of the served model. The workload type selects which type of compute to use in
2449
- the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
2450
- acceleration is available by selecting workload types like GPU_SMALL and others. See the
2451
- available [GPU types].
2452
-
2453
- [GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
2454
2632
 
2455
2633
  CPU = 'CPU'
2456
2634
  GPU_LARGE = 'GPU_LARGE'
@@ -2462,51 +2640,48 @@ class ServedModelInputWorkloadType(Enum):
2462
2640
  @dataclass
2463
2641
  class ServedModelOutput:
2464
2642
  creation_timestamp: Optional[int] = None
2465
- """The creation timestamp of the served model in Unix time."""
2466
2643
 
2467
2644
  creator: Optional[str] = None
2468
- """The email of the user who created the served model."""
2469
2645
 
2470
2646
  environment_vars: Optional[Dict[str, str]] = None
2471
2647
  """An object containing a set of optional, user-specified environment variable key-value pairs used
2472
- for serving this model. Note: this is an experimental feature and subject to change. Example
2473
- model environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY":
2648
+ for serving this entity. Note: this is an experimental feature and subject to change. Example
2649
+ entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY":
2474
2650
  "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}`"""
2475
2651
 
2476
2652
  instance_profile_arn: Optional[str] = None
2477
- """ARN of the instance profile that the served model will use to access AWS resources."""
2653
+ """ARN of the instance profile that the served entity uses to access AWS resources."""
2478
2654
 
2479
2655
  model_name: Optional[str] = None
2480
- """The name of the model in Databricks Model Registry or the full name of the model in Unity
2481
- Catalog."""
2482
2656
 
2483
2657
  model_version: Optional[str] = None
2484
- """The version of the model in Databricks Model Registry or Unity Catalog to be served."""
2485
2658
 
2486
2659
  name: Optional[str] = None
2487
- """The name of the served model."""
2660
+ """The name of a served entity. It must be unique across an endpoint. A served entity name can
2661
+ consist of alphanumeric characters, dashes, and underscores. If not specified for an external
2662
+ model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if
2663
+ not specified for other entities, it defaults to entity_name-entity_version."""
2488
2664
 
2489
2665
  scale_to_zero_enabled: Optional[bool] = None
2490
- """Whether the compute resources for the Served Model should scale down to zero."""
2666
+ """Whether the compute resources for the served entity should scale down to zero."""
2491
2667
 
2492
2668
  state: Optional[ServedModelState] = None
2493
- """Information corresponding to the state of the Served Model."""
2494
2669
 
2495
2670
  workload_size: Optional[str] = None
2496
- """The workload size of the served model. The workload size corresponds to a range of provisioned
2497
- concurrency that the compute will autoscale between. A single unit of provisioned concurrency
2498
- can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned
2499
- concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned
2500
- concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for
2501
- each workload size will be 0."""
2502
-
2503
- workload_type: Optional[str] = None
2504
- """The workload type of the served model. The workload type selects which type of compute to use in
2505
- the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
2671
+ """The workload size of the served entity. The workload size corresponds to a range of provisioned
2672
+ concurrency that the compute autoscales between. A single unit of provisioned concurrency can
2673
+ process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency),
2674
+ "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If
2675
+ scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
2676
+ is 0."""
2677
+
2678
+ workload_type: Optional[ServingModelWorkloadType] = None
2679
+ """The workload type of the served entity. The workload type selects which type of compute to use
2680
+ in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
2506
2681
  acceleration is available by selecting workload types like GPU_SMALL and others. See the
2507
2682
  available [GPU types].
2508
2683
 
2509
- [GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
2684
+ [GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
2510
2685
 
2511
2686
  def as_dict(self) -> dict:
2512
2687
  """Serializes the ServedModelOutput into a dictionary suitable for use as a JSON request body."""
@@ -2521,7 +2696,7 @@ class ServedModelOutput:
2521
2696
  if self.scale_to_zero_enabled is not None: body['scale_to_zero_enabled'] = self.scale_to_zero_enabled
2522
2697
  if self.state: body['state'] = self.state.as_dict()
2523
2698
  if self.workload_size is not None: body['workload_size'] = self.workload_size
2524
- if self.workload_type is not None: body['workload_type'] = self.workload_type
2699
+ if self.workload_type is not None: body['workload_type'] = self.workload_type.value
2525
2700
  return body
2526
2701
 
2527
2702
  def as_shallow_dict(self) -> dict:
@@ -2553,20 +2728,18 @@ class ServedModelOutput:
2553
2728
  scale_to_zero_enabled=d.get('scale_to_zero_enabled', None),
2554
2729
  state=_from_dict(d, 'state', ServedModelState),
2555
2730
  workload_size=d.get('workload_size', None),
2556
- workload_type=d.get('workload_type', None))
2731
+ workload_type=_enum(d, 'workload_type', ServingModelWorkloadType))
2557
2732
 
2558
2733
 
2559
2734
  @dataclass
2560
2735
  class ServedModelSpec:
2561
2736
  model_name: Optional[str] = None
2562
- """The name of the model in Databricks Model Registry or the full name of the model in Unity
2563
- Catalog."""
2737
+ """Only one of model_name and entity_name should be populated"""
2564
2738
 
2565
2739
  model_version: Optional[str] = None
2566
- """The version of the model in Databricks Model Registry or Unity Catalog to be served."""
2740
+ """Only one of model_version and entity_version should be populated"""
2567
2741
 
2568
2742
  name: Optional[str] = None
2569
- """The name of the served model."""
2570
2743
 
2571
2744
  def as_dict(self) -> dict:
2572
2745
  """Serializes the ServedModelSpec into a dictionary suitable for use as a JSON request body."""
@@ -2595,18 +2768,8 @@ class ServedModelSpec:
2595
2768
  @dataclass
2596
2769
  class ServedModelState:
2597
2770
  deployment: Optional[ServedModelStateDeployment] = None
2598
- """The state of the served entity deployment. DEPLOYMENT_CREATING indicates that the served entity
2599
- is not ready yet because the deployment is still being created (i.e container image is building,
2600
- model server is deploying for the first time, etc.). DEPLOYMENT_RECOVERING indicates that the
2601
- served entity was previously in a ready state but no longer is and is attempting to recover.
2602
- DEPLOYMENT_READY indicates that the served entity is ready to receive traffic. DEPLOYMENT_FAILED
2603
- indicates that there was an error trying to bring up the served entity (e.g container image
2604
- build failed, the model server failed to start due to a model loading error, etc.)
2605
- DEPLOYMENT_ABORTED indicates that the deployment was terminated likely due to a failure in
2606
- bringing up another served entity under the same endpoint and config version."""
2607
2771
 
2608
2772
  deployment_state_message: Optional[str] = None
2609
- """More information about the state of the served entity, if available."""
2610
2773
 
2611
2774
  def as_dict(self) -> dict:
2612
2775
  """Serializes the ServedModelState into a dictionary suitable for use as a JSON request body."""
@@ -2632,15 +2795,6 @@ class ServedModelState:
2632
2795
 
2633
2796
 
2634
2797
  class ServedModelStateDeployment(Enum):
2635
- """The state of the served entity deployment. DEPLOYMENT_CREATING indicates that the served entity
2636
- is not ready yet because the deployment is still being created (i.e container image is building,
2637
- model server is deploying for the first time, etc.). DEPLOYMENT_RECOVERING indicates that the
2638
- served entity was previously in a ready state but no longer is and is attempting to recover.
2639
- DEPLOYMENT_READY indicates that the served entity is ready to receive traffic. DEPLOYMENT_FAILED
2640
- indicates that there was an error trying to bring up the served entity (e.g container image
2641
- build failed, the model server failed to start due to a model loading error, etc.)
2642
- DEPLOYMENT_ABORTED indicates that the deployment was terminated likely due to a failure in
2643
- bringing up another served entity under the same endpoint and config version."""
2644
2798
 
2645
2799
  ABORTED = 'DEPLOYMENT_ABORTED'
2646
2800
  CREATING = 'DEPLOYMENT_CREATING'
@@ -2675,8 +2829,8 @@ class ServerLogsResponse:
2675
2829
  @dataclass
2676
2830
  class ServingEndpoint:
2677
2831
  ai_gateway: Optional[AiGatewayConfig] = None
2678
- """The AI Gateway configuration for the serving endpoint. NOTE: Only external model endpoints are
2679
- currently supported."""
2832
+ """The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned
2833
+ throughput endpoints are currently supported."""
2680
2834
 
2681
2835
  config: Optional[EndpointCoreConfigSummary] = None
2682
2836
  """The config that is currently being served by the endpoint."""
@@ -2688,8 +2842,7 @@ class ServingEndpoint:
2688
2842
  """The email of the user who created the serving endpoint."""
2689
2843
 
2690
2844
  id: Optional[str] = None
2691
- """System-generated ID of the endpoint. This is used to refer to the endpoint in the Permissions
2692
- API"""
2845
+ """System-generated ID of the endpoint, included to be used by the Permissions API."""
2693
2846
 
2694
2847
  last_updated_timestamp: Optional[int] = None
2695
2848
  """The timestamp when the endpoint was last updated by a user in Unix time."""
@@ -2848,8 +3001,8 @@ class ServingEndpointAccessControlResponse:
2848
3001
  @dataclass
2849
3002
  class ServingEndpointDetailed:
2850
3003
  ai_gateway: Optional[AiGatewayConfig] = None
2851
- """The AI Gateway configuration for the serving endpoint. NOTE: Only external model endpoints are
2852
- currently supported."""
3004
+ """The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned
3005
+ throughput endpoints are currently supported."""
2853
3006
 
2854
3007
  config: Optional[EndpointCoreConfigOutput] = None
2855
3008
  """The config that is currently being served by the endpoint."""
@@ -2957,7 +3110,6 @@ class ServingEndpointDetailed:
2957
3110
 
2958
3111
 
2959
3112
  class ServingEndpointDetailedPermissionLevel(Enum):
2960
- """The permission level of the principal making the request."""
2961
3113
 
2962
3114
  CAN_MANAGE = 'CAN_MANAGE'
2963
3115
  CAN_QUERY = 'CAN_QUERY'
@@ -3097,6 +3249,15 @@ class ServingEndpointPermissionsRequest:
3097
3249
  serving_endpoint_id=d.get('serving_endpoint_id', None))
3098
3250
 
3099
3251
 
3252
+ class ServingModelWorkloadType(Enum):
3253
+
3254
+ CPU = 'CPU'
3255
+ GPU_LARGE = 'GPU_LARGE'
3256
+ GPU_MEDIUM = 'GPU_MEDIUM'
3257
+ GPU_SMALL = 'GPU_SMALL'
3258
+ MULTIGPU_MEDIUM = 'MULTIGPU_MEDIUM'
3259
+
3260
+
3100
3261
  @dataclass
3101
3262
  class TrafficConfig:
3102
3263
  routes: Optional[List[Route]] = None
@@ -3236,9 +3397,9 @@ class ServingEndpointsAPI:
3236
3397
 
3237
3398
  def create(self,
3238
3399
  name: str,
3239
- config: EndpointCoreConfigInput,
3240
3400
  *,
3241
3401
  ai_gateway: Optional[AiGatewayConfig] = None,
3402
+ config: Optional[EndpointCoreConfigInput] = None,
3242
3403
  rate_limits: Optional[List[RateLimit]] = None,
3243
3404
  route_optimized: Optional[bool] = None,
3244
3405
  tags: Optional[List[EndpointTag]] = None) -> Wait[ServingEndpointDetailed]:
@@ -3247,11 +3408,11 @@ class ServingEndpointsAPI:
3247
3408
  :param name: str
3248
3409
  The name of the serving endpoint. This field is required and must be unique across a Databricks
3249
3410
  workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores.
3250
- :param config: :class:`EndpointCoreConfigInput`
3251
- The core config of the serving endpoint.
3252
3411
  :param ai_gateway: :class:`AiGatewayConfig` (optional)
3253
- The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are
3254
- supported as of now.
3412
+ The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned
3413
+ throughput endpoints are currently supported.
3414
+ :param config: :class:`EndpointCoreConfigInput` (optional)
3415
+ The core config of the serving endpoint.
3255
3416
  :param rate_limits: List[:class:`RateLimit`] (optional)
3256
3417
  Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI
3257
3418
  Gateway to manage rate limits.
@@ -3281,9 +3442,9 @@ class ServingEndpointsAPI:
3281
3442
  def create_and_wait(
3282
3443
  self,
3283
3444
  name: str,
3284
- config: EndpointCoreConfigInput,
3285
3445
  *,
3286
3446
  ai_gateway: Optional[AiGatewayConfig] = None,
3447
+ config: Optional[EndpointCoreConfigInput] = None,
3287
3448
  rate_limits: Optional[List[RateLimit]] = None,
3288
3449
  route_optimized: Optional[bool] = None,
3289
3450
  tags: Optional[List[EndpointTag]] = None,
@@ -3299,7 +3460,6 @@ class ServingEndpointsAPI:
3299
3460
  """Delete a serving endpoint.
3300
3461
 
3301
3462
  :param name: str
3302
- The name of the serving endpoint. This field is required.
3303
3463
 
3304
3464
 
3305
3465
  """
@@ -3341,7 +3501,7 @@ class ServingEndpointsAPI:
3341
3501
  res = self._api.do('GET', f'/api/2.0/serving-endpoints/{name}', headers=headers)
3342
3502
  return ServingEndpointDetailed.from_dict(res)
3343
3503
 
3344
- def get_open_api(self, name: str):
3504
+ def get_open_api(self, name: str) -> GetOpenApiResponse:
3345
3505
  """Get the schema for a serving endpoint.
3346
3506
 
3347
3507
  Get the query schema of the serving endpoint in OpenAPI format. The schema contains information for
@@ -3350,12 +3510,13 @@ class ServingEndpointsAPI:
3350
3510
  :param name: str
3351
3511
  The name of the serving endpoint that the served model belongs to. This field is required.
3352
3512
 
3353
-
3513
+ :returns: :class:`GetOpenApiResponse`
3354
3514
  """
3355
3515
 
3356
- headers = {'Accept': 'application/json', }
3516
+ headers = {'Accept': 'text/plain', }
3357
3517
 
3358
- self._api.do('GET', f'/api/2.0/serving-endpoints/{name}/openapi', headers=headers)
3518
+ res = self._api.do('GET', f'/api/2.0/serving-endpoints/{name}/openapi', headers=headers, raw=True)
3519
+ return GetOpenApiResponse.from_dict(res)
3359
3520
 
3360
3521
  def get_permission_levels(self, serving_endpoint_id: str) -> GetServingEndpointPermissionLevelsResponse:
3361
3522
  """Get serving endpoint permission levels.
@@ -3394,6 +3555,44 @@ class ServingEndpointsAPI:
3394
3555
  headers=headers)
3395
3556
  return ServingEndpointPermissions.from_dict(res)
3396
3557
 
3558
+ def http_request(self,
3559
+ connection_name: str,
3560
+ method: ExternalFunctionRequestHttpMethod,
3561
+ path: str,
3562
+ *,
3563
+ headers: Optional[str] = None,
3564
+ json: Optional[str] = None,
3565
+ params: Optional[str] = None) -> HttpRequestResponse:
3566
+ """Make external services call using the credentials stored in UC Connection.
3567
+
3568
+ :param connection_name: str
3569
+ The connection name to use. This is required to identify the external connection.
3570
+ :param method: :class:`ExternalFunctionRequestHttpMethod`
3571
+ The HTTP method to use (e.g., 'GET', 'POST').
3572
+ :param path: str
3573
+ The relative path for the API endpoint. This is required.
3574
+ :param headers: str (optional)
3575
+ Additional headers for the request. If not provided, only auth headers from connections would be
3576
+ passed.
3577
+ :param json: str (optional)
3578
+ The JSON payload to send in the request body.
3579
+ :param params: str (optional)
3580
+ Query parameters for the request.
3581
+
3582
+ :returns: :class:`HttpRequestResponse`
3583
+ """
3584
+ body = {}
3585
+ if connection_name is not None: body['connection_name'] = connection_name
3586
+ if headers is not None: body['headers'] = headers
3587
+ if json is not None: body['json'] = json
3588
+ if method is not None: body['method'] = method.value
3589
+ if params is not None: body['params'] = params
3590
+ if path is not None: body['path'] = path
3591
+ headers = {'Accept': 'text/plain', 'Content-Type': 'application/json', }
3592
+
3593
+ res = self._api.do('POST', '/api/2.0/external-function', body=body, headers=headers, raw=True)
3594
+ return HttpRequestResponse.from_dict(res)
3595
+
3397
3596
  def list(self) -> Iterator[ServingEndpoint]:
3398
3597
  """Get all serving endpoints.
3399
3598
 
@@ -3430,7 +3629,7 @@ class ServingEndpointsAPI:
3430
3629
  name: str,
3431
3630
  *,
3432
3631
  add_tags: Optional[List[EndpointTag]] = None,
3433
- delete_tags: Optional[List[str]] = None) -> Iterator[EndpointTag]:
3632
+ delete_tags: Optional[List[str]] = None) -> EndpointTags:
3434
3633
  """Update tags of a serving endpoint.
3435
3634
 
3436
3635
  Used to batch add and delete tags from a serving endpoint with a single API call.
@@ -3442,7 +3641,7 @@ class ServingEndpointsAPI:
3442
3641
  :param delete_tags: List[str] (optional)
3443
3642
  List of tag keys to delete
3444
3643
 
3445
- :returns: Iterator over :class:`EndpointTag`
3644
+ :returns: :class:`EndpointTags`
3446
3645
  """
3447
3646
  body = {}
3448
3647
  if add_tags is not None: body['add_tags'] = [v.as_dict() for v in add_tags]
@@ -3450,7 +3649,7 @@ class ServingEndpointsAPI:
3450
3649
  headers = {'Accept': 'application/json', 'Content-Type': 'application/json', }
3451
3650
 
3452
3651
  res = self._api.do('PATCH', f'/api/2.0/serving-endpoints/{name}/tags', body=body, headers=headers)
3453
- return [EndpointTag.from_dict(v) for v in res]
3652
+ return EndpointTags.from_dict(res)
3454
3653
 
3455
3654
  def put(self, name: str, *, rate_limits: Optional[List[RateLimit]] = None) -> PutResponse:
3456
3655
  """Update rate limits of a serving endpoint.
@@ -3485,8 +3684,8 @@ class ServingEndpointsAPI:
3485
3684
  usage_tracking_config: Optional[AiGatewayUsageTrackingConfig] = None) -> PutAiGatewayResponse:
3486
3685
  """Update AI Gateway of a serving endpoint.
3487
3686
 
3488
- Used to update the AI Gateway of a serving endpoint. NOTE: Only external model endpoints are currently
3489
- supported.
3687
+ Used to update the AI Gateway of a serving endpoint. NOTE: Only external model and provisioned
3688
+ throughput endpoints are currently supported.
3490
3689
 
3491
3690
  :param name: str
3492
3691
  The name of the serving endpoint whose AI Gateway is being updated. This field is required.
@@ -3646,14 +3845,16 @@ class ServingEndpointsAPI:
3646
3845
  The name of the serving endpoint to update. This field is required.
3647
3846
  :param auto_capture_config: :class:`AutoCaptureConfigInput` (optional)
3648
3847
  Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
3848
+ Note: this field is deprecated for creating new provisioned throughput endpoints, or updating
3849
+ existing provisioned throughput endpoints that never have inference table configured; in these cases
3850
+ please use AI Gateway to manage inference tables.
3649
3851
  :param served_entities: List[:class:`ServedEntityInput`] (optional)
3650
- A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served
3651
- entities.
3852
+ The list of served entities under the serving endpoint config.
3652
3853
  :param served_models: List[:class:`ServedModelInput`] (optional)
3653
- (Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A
3654
- serving endpoint can have up to 15 served models.
3854
+ (Deprecated, use served_entities instead) The list of served models under the serving endpoint
3855
+ config.
3655
3856
  :param traffic_config: :class:`TrafficConfig` (optional)
3656
- The traffic config defining how invocations to the serving endpoint should be routed.
3857
+ The traffic configuration associated with the serving endpoint config.
3657
3858
 
3658
3859
  :returns:
3659
3860
  Long-running operation waiter for :class:`ServingEndpointDetailed`.
@@ -3725,6 +3926,7 @@ class ServingEndpointsDataPlaneAPI:
3725
3926
  def __init__(self, api_client, control_plane):
3726
3927
  self._api = api_client
3727
3928
  self._control_plane = control_plane
3929
+ from ..data_plane import DataPlaneService
3728
3930
  self._data_plane_service = DataPlaneService()
3729
3931
 
3730
3932
  def query(self,