databricks-sdk 0.39.0__py3-none-any.whl → 0.41.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of databricks-sdk might be problematic. Click here for more details.

@@ -12,14 +12,11 @@ from typing import Any, BinaryIO, Callable, Dict, Iterator, List, Optional
12
12
 
13
13
  import requests
14
14
 
15
- from ..data_plane import DataPlaneService
16
15
  from ..errors import OperationFailed
17
16
  from ._internal import Wait, _enum, _from_dict, _repeated_dict
18
17
 
19
18
  _LOG = logging.getLogger('databricks.sdk')
20
19
 
21
- from databricks.sdk.service import oauth2
22
-
23
20
  # all definitions in this file are in alphabetical order
24
21
 
25
22
 
@@ -148,11 +145,8 @@ class AiGatewayGuardrailParameters:
148
145
 
149
146
  @dataclass
150
147
  class AiGatewayGuardrailPiiBehavior:
151
- behavior: AiGatewayGuardrailPiiBehaviorBehavior
152
- """Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input
153
- guardrail and the request contains PII, the request is not sent to the model server and 400
154
- status code is returned; if 'BLOCK' is set for the output guardrail and the model response
155
- contains PII, the PII info in the response is redacted and 400 status code is returned."""
148
+ behavior: Optional[AiGatewayGuardrailPiiBehaviorBehavior] = None
149
+ """Configuration for input guardrail filters."""
156
150
 
157
151
  def as_dict(self) -> dict:
158
152
  """Serializes the AiGatewayGuardrailPiiBehavior into a dictionary suitable for use as a JSON request body."""
@@ -173,10 +167,6 @@ class AiGatewayGuardrailPiiBehavior:
173
167
 
174
168
 
175
169
  class AiGatewayGuardrailPiiBehaviorBehavior(Enum):
176
- """Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input
177
- guardrail and the request contains PII, the request is not sent to the model server and 400
178
- status code is returned; if 'BLOCK' is set for the output guardrail and the model response
179
- contains PII, the PII info in the response is redacted and 400 status code is returned."""
180
170
 
181
171
  BLOCK = 'BLOCK'
182
172
  NONE = 'NONE'
@@ -292,15 +282,12 @@ class AiGatewayRateLimit:
292
282
 
293
283
 
294
284
  class AiGatewayRateLimitKey(Enum):
295
- """Key field for a rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint'
296
- being the default if not specified."""
297
285
 
298
286
  ENDPOINT = 'endpoint'
299
287
  USER = 'user'
300
288
 
301
289
 
302
290
  class AiGatewayRateLimitRenewalPeriod(Enum):
303
- """Renewal period field for a rate limit. Currently, only 'minute' is supported."""
304
291
 
305
292
  MINUTE = 'minute'
306
293
 
@@ -339,9 +326,9 @@ class AmazonBedrockConfig:
339
326
 
340
327
  aws_access_key_id: Optional[str] = None
341
328
  """The Databricks secret key reference for an AWS access key ID with permissions to interact with
342
- Bedrock services. If you prefer to paste your API key directly, see `aws_access_key_id`. You
343
- must provide an API key using one of the following fields: `aws_access_key_id` or
344
- `aws_access_key_id_plaintext`."""
329
+ Bedrock services. If you prefer to paste your API key directly, see
330
+ `aws_access_key_id_plaintext`. You must provide an API key using one of the following fields:
331
+ `aws_access_key_id` or `aws_access_key_id_plaintext`."""
345
332
 
346
333
  aws_access_key_id_plaintext: Optional[str] = None
347
334
  """An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext
@@ -399,8 +386,6 @@ class AmazonBedrockConfig:
399
386
 
400
387
 
401
388
  class AmazonBedrockConfigBedrockProvider(Enum):
402
- """The underlying provider in Amazon Bedrock. Supported values (case insensitive) include:
403
- Anthropic, Cohere, AI21Labs, Amazon."""
404
389
 
405
390
  AI21LABS = 'ai21labs'
406
391
  AMAZON = 'amazon'
@@ -490,18 +475,21 @@ class AutoCaptureConfigInput:
490
475
  @dataclass
491
476
  class AutoCaptureConfigOutput:
492
477
  catalog_name: Optional[str] = None
493
- """The name of the catalog in Unity Catalog."""
478
+ """The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if
479
+ the inference table is already enabled."""
494
480
 
495
481
  enabled: Optional[bool] = None
496
482
  """Indicates whether the inference table is enabled."""
497
483
 
498
484
  schema_name: Optional[str] = None
499
- """The name of the schema in Unity Catalog."""
485
+ """The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if
486
+ the inference table is already enabled."""
500
487
 
501
488
  state: Optional[AutoCaptureState] = None
502
489
 
503
490
  table_name_prefix: Optional[str] = None
504
- """The prefix of the table in Unity Catalog."""
491
+ """The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if
492
+ the inference table is already enabled."""
505
493
 
506
494
  def as_dict(self) -> dict:
507
495
  """Serializes the AutoCaptureConfigOutput into a dictionary suitable for use as a JSON request body."""
@@ -666,8 +654,8 @@ class CreateServingEndpoint:
666
654
  """The core config of the serving endpoint."""
667
655
 
668
656
  ai_gateway: Optional[AiGatewayConfig] = None
669
- """The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are
670
- supported as of now."""
657
+ """The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned
658
+ throughput endpoints are currently supported."""
671
659
 
672
660
  rate_limits: Optional[List[RateLimit]] = None
673
661
  """Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI
@@ -712,6 +700,37 @@ class CreateServingEndpoint:
712
700
  tags=_repeated_dict(d, 'tags', EndpointTag))
713
701
 
714
702
 
703
+ @dataclass
704
+ class DataPlaneInfo:
705
+ """Details necessary to query this object's API through the DataPlane APIs."""
706
+
707
+ authorization_details: Optional[str] = None
708
+ """Authorization details as a string."""
709
+
710
+ endpoint_url: Optional[str] = None
711
+ """The URL of the endpoint for this operation in the dataplane."""
712
+
713
+ def as_dict(self) -> dict:
714
+ """Serializes the DataPlaneInfo into a dictionary suitable for use as a JSON request body."""
715
+ body = {}
716
+ if self.authorization_details is not None: body['authorization_details'] = self.authorization_details
717
+ if self.endpoint_url is not None: body['endpoint_url'] = self.endpoint_url
718
+ return body
719
+
720
+ def as_shallow_dict(self) -> dict:
721
+ """Serializes the DataPlaneInfo into a shallow dictionary of its immediate attributes."""
722
+ body = {}
723
+ if self.authorization_details is not None: body['authorization_details'] = self.authorization_details
724
+ if self.endpoint_url is not None: body['endpoint_url'] = self.endpoint_url
725
+ return body
726
+
727
+ @classmethod
728
+ def from_dict(cls, d: Dict[str, any]) -> DataPlaneInfo:
729
+ """Deserializes the DataPlaneInfo from a dictionary."""
730
+ return cls(authorization_details=d.get('authorization_details', None),
731
+ endpoint_url=d.get('endpoint_url', None))
732
+
733
+
715
734
  @dataclass
716
735
  class DatabricksModelServingConfig:
717
736
  databricks_workspace_url: str
@@ -853,21 +872,22 @@ class EmbeddingsV1ResponseEmbeddingElementObject(Enum):
853
872
  class EndpointCoreConfigInput:
854
873
  auto_capture_config: Optional[AutoCaptureConfigInput] = None
855
874
  """Configuration for Inference Tables which automatically logs requests and responses to Unity
856
- Catalog."""
875
+ Catalog. Note: this field is deprecated for creating new provisioned throughput endpoints, or
876
+ updating existing provisioned throughput endpoints that never have inference table configured;
877
+ in these cases please use AI Gateway to manage inference tables."""
857
878
 
858
879
  name: Optional[str] = None
859
880
  """The name of the serving endpoint to update. This field is required."""
860
881
 
861
882
  served_entities: Optional[List[ServedEntityInput]] = None
862
- """A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served
863
- entities."""
883
+ """The list of served entities under the serving endpoint config."""
864
884
 
865
885
  served_models: Optional[List[ServedModelInput]] = None
866
- """(Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A
867
- serving endpoint can have up to 15 served models."""
886
+ """(Deprecated, use served_entities instead) The list of served models under the serving endpoint
887
+ config."""
868
888
 
869
889
  traffic_config: Optional[TrafficConfig] = None
870
- """The traffic config defining how invocations to the serving endpoint should be routed."""
890
+ """The traffic configuration associated with the serving endpoint config."""
871
891
 
872
892
  def as_dict(self) -> dict:
873
893
  """Serializes the EndpointCoreConfigInput into a dictionary suitable for use as a JSON request body."""
@@ -903,7 +923,9 @@ class EndpointCoreConfigInput:
903
923
  class EndpointCoreConfigOutput:
904
924
  auto_capture_config: Optional[AutoCaptureConfigOutput] = None
905
925
  """Configuration for Inference Tables which automatically logs requests and responses to Unity
906
- Catalog."""
926
+ Catalog. Note: this field is deprecated for creating new provisioned throughput endpoints, or
927
+ updating existing provisioned throughput endpoints that never have inference table configured;
928
+ in these cases please use AI Gateway to manage inference tables."""
907
929
 
908
930
  config_version: Optional[int] = None
909
931
  """The config version that the serving endpoint is currently serving."""
@@ -982,7 +1004,9 @@ class EndpointCoreConfigSummary:
982
1004
  class EndpointPendingConfig:
983
1005
  auto_capture_config: Optional[AutoCaptureConfigOutput] = None
984
1006
  """Configuration for Inference Tables which automatically logs requests and responses to Unity
985
- Catalog."""
1007
+ Catalog. Note: this field is deprecated for creating new provisioned throughput endpoints, or
1008
+ updating existing provisioned throughput endpoints that never have inference table configured;
1009
+ in these cases please use AI Gateway to manage inference tables."""
986
1010
 
987
1011
  config_version: Optional[int] = None
988
1012
  """The config version that the serving endpoint is currently serving."""
@@ -1068,10 +1092,6 @@ class EndpointState:
1068
1092
 
1069
1093
 
1070
1094
  class EndpointStateConfigUpdate(Enum):
1071
- """The state of an endpoint's config update. This informs the user if the pending_config is in
1072
- progress, if the update failed, or if there is no update in progress. Note that if the
1073
- endpoint's config_update state value is IN_PROGRESS, another update can not be made until the
1074
- update completes or fails."""
1075
1095
 
1076
1096
  IN_PROGRESS = 'IN_PROGRESS'
1077
1097
  NOT_UPDATING = 'NOT_UPDATING'
@@ -1080,9 +1100,6 @@ class EndpointStateConfigUpdate(Enum):
1080
1100
 
1081
1101
 
1082
1102
  class EndpointStateReady(Enum):
1083
- """The state of an endpoint, indicating whether or not the endpoint is queryable. An endpoint is
1084
- READY if all of the served entities in its active configuration are ready. If any of the
1085
- actively served entities are in a non-ready state, the endpoint state will be NOT_READY."""
1086
1103
 
1087
1104
  NOT_READY = 'NOT_READY'
1088
1105
  READY = 'READY'
@@ -1116,6 +1133,28 @@ class EndpointTag:
1116
1133
  return cls(key=d.get('key', None), value=d.get('value', None))
1117
1134
 
1118
1135
 
1136
+ @dataclass
1137
+ class EndpointTags:
1138
+ tags: Optional[List[EndpointTag]] = None
1139
+
1140
+ def as_dict(self) -> dict:
1141
+ """Serializes the EndpointTags into a dictionary suitable for use as a JSON request body."""
1142
+ body = {}
1143
+ if self.tags: body['tags'] = [v.as_dict() for v in self.tags]
1144
+ return body
1145
+
1146
+ def as_shallow_dict(self) -> dict:
1147
+ """Serializes the EndpointTags into a shallow dictionary of its immediate attributes."""
1148
+ body = {}
1149
+ if self.tags: body['tags'] = self.tags
1150
+ return body
1151
+
1152
+ @classmethod
1153
+ def from_dict(cls, d: Dict[str, any]) -> EndpointTags:
1154
+ """Deserializes the EndpointTags from a dictionary."""
1155
+ return cls(tags=_repeated_dict(d, 'tags', EndpointTag))
1156
+
1157
+
1119
1158
  @dataclass
1120
1159
  class ExportMetricsResponse:
1121
1160
  contents: Optional[BinaryIO] = None
@@ -1138,12 +1177,105 @@ class ExportMetricsResponse:
1138
1177
  return cls(contents=d.get('contents', None))
1139
1178
 
1140
1179
 
1180
+ @dataclass
1181
+ class ExternalFunctionRequest:
1182
+ """Simple Proto message for testing"""
1183
+
1184
+ connection_name: str
1185
+ """The connection name to use. This is required to identify the external connection."""
1186
+
1187
+ method: ExternalFunctionRequestHttpMethod
1188
+ """The HTTP method to use (e.g., 'GET', 'POST')."""
1189
+
1190
+ path: str
1191
+ """The relative path for the API endpoint. This is required."""
1192
+
1193
+ headers: Optional[str] = None
1194
+ """Additional headers for the request. If not provided, only auth headers from connections would be
1195
+ passed."""
1196
+
1197
+ json: Optional[str] = None
1198
+ """The JSON payload to send in the request body."""
1199
+
1200
+ params: Optional[str] = None
1201
+ """Query parameters for the request."""
1202
+
1203
+ def as_dict(self) -> dict:
1204
+ """Serializes the ExternalFunctionRequest into a dictionary suitable for use as a JSON request body."""
1205
+ body = {}
1206
+ if self.connection_name is not None: body['connection_name'] = self.connection_name
1207
+ if self.headers is not None: body['headers'] = self.headers
1208
+ if self.json is not None: body['json'] = self.json
1209
+ if self.method is not None: body['method'] = self.method.value
1210
+ if self.params is not None: body['params'] = self.params
1211
+ if self.path is not None: body['path'] = self.path
1212
+ return body
1213
+
1214
+ def as_shallow_dict(self) -> dict:
1215
+ """Serializes the ExternalFunctionRequest into a shallow dictionary of its immediate attributes."""
1216
+ body = {}
1217
+ if self.connection_name is not None: body['connection_name'] = self.connection_name
1218
+ if self.headers is not None: body['headers'] = self.headers
1219
+ if self.json is not None: body['json'] = self.json
1220
+ if self.method is not None: body['method'] = self.method
1221
+ if self.params is not None: body['params'] = self.params
1222
+ if self.path is not None: body['path'] = self.path
1223
+ return body
1224
+
1225
+ @classmethod
1226
+ def from_dict(cls, d: Dict[str, any]) -> ExternalFunctionRequest:
1227
+ """Deserializes the ExternalFunctionRequest from a dictionary."""
1228
+ return cls(connection_name=d.get('connection_name', None),
1229
+ headers=d.get('headers', None),
1230
+ json=d.get('json', None),
1231
+ method=_enum(d, 'method', ExternalFunctionRequestHttpMethod),
1232
+ params=d.get('params', None),
1233
+ path=d.get('path', None))
1234
+
1235
+
1236
+ class ExternalFunctionRequestHttpMethod(Enum):
1237
+
1238
+ DELETE = 'DELETE'
1239
+ GET = 'GET'
1240
+ PATCH = 'PATCH'
1241
+ POST = 'POST'
1242
+ PUT = 'PUT'
1243
+
1244
+
1245
+ @dataclass
1246
+ class ExternalFunctionResponse:
1247
+ status_code: Optional[int] = None
1248
+ """The HTTP status code of the response"""
1249
+
1250
+ text: Optional[str] = None
1251
+ """The content of the response"""
1252
+
1253
+ def as_dict(self) -> dict:
1254
+ """Serializes the ExternalFunctionResponse into a dictionary suitable for use as a JSON request body."""
1255
+ body = {}
1256
+ if self.status_code is not None: body['status_code'] = self.status_code
1257
+ if self.text is not None: body['text'] = self.text
1258
+ return body
1259
+
1260
+ def as_shallow_dict(self) -> dict:
1261
+ """Serializes the ExternalFunctionResponse into a shallow dictionary of its immediate attributes."""
1262
+ body = {}
1263
+ if self.status_code is not None: body['status_code'] = self.status_code
1264
+ if self.text is not None: body['text'] = self.text
1265
+ return body
1266
+
1267
+ @classmethod
1268
+ def from_dict(cls, d: Dict[str, any]) -> ExternalFunctionResponse:
1269
+ """Deserializes the ExternalFunctionResponse from a dictionary."""
1270
+ return cls(status_code=d.get('status_code', None), text=d.get('text', None))
1271
+
1272
+
1141
1273
  @dataclass
1142
1274
  class ExternalModel:
1143
1275
  provider: ExternalModelProvider
1144
1276
  """The name of the provider for the external model. Currently, the supported providers are
1145
1277
  'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving',
1146
- 'google-cloud-vertex-ai', 'openai', and 'palm'.","""
1278
+ 'google-cloud-vertex-ai', 'openai', and 'palm'."""
1147
1279
 
1148
1280
  name: str
1149
1281
  """The name of the external model."""
@@ -1230,9 +1362,6 @@ class ExternalModel:
1230
1362
 
1231
1363
 
1232
1364
  class ExternalModelProvider(Enum):
1233
- """The name of the provider for the external model. Currently, the supported providers are
1234
- 'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving',
1235
- 'google-cloud-vertex-ai', 'openai', and 'palm'.","""
1236
1365
 
1237
1366
  AI21LABS = 'ai21labs'
1238
1367
  AMAZON_BEDROCK = 'amazon-bedrock'
@@ -1281,17 +1410,16 @@ class ExternalModelUsageElement:
1281
1410
 
1282
1411
  @dataclass
1283
1412
  class FoundationModel:
1413
+ """All fields are not sensitive as they are hard-coded in the system and made available to
1414
+ customers."""
1415
+
1284
1416
  description: Optional[str] = None
1285
- """The description of the foundation model."""
1286
1417
 
1287
1418
  display_name: Optional[str] = None
1288
- """The display name of the foundation model."""
1289
1419
 
1290
1420
  docs: Optional[str] = None
1291
- """The URL to the documentation of the foundation model."""
1292
1421
 
1293
1422
  name: Optional[str] = None
1294
- """The name of the foundation model."""
1295
1423
 
1296
1424
  def as_dict(self) -> dict:
1297
1425
  """Serializes the FoundationModel into a dictionary suitable for use as a JSON request body."""
@@ -1322,23 +1450,24 @@ class FoundationModel:
1322
1450
 
1323
1451
  @dataclass
1324
1452
  class GetOpenApiResponse:
1325
- """The response is an OpenAPI spec in JSON format that typically includes fields like openapi,
1326
- info, servers and paths, etc."""
1453
+ contents: Optional[BinaryIO] = None
1327
1454
 
1328
1455
  def as_dict(self) -> dict:
1329
1456
  """Serializes the GetOpenApiResponse into a dictionary suitable for use as a JSON request body."""
1330
1457
  body = {}
1458
+ if self.contents: body['contents'] = self.contents
1331
1459
  return body
1332
1460
 
1333
1461
  def as_shallow_dict(self) -> dict:
1334
1462
  """Serializes the GetOpenApiResponse into a shallow dictionary of its immediate attributes."""
1335
1463
  body = {}
1464
+ if self.contents: body['contents'] = self.contents
1336
1465
  return body
1337
1466
 
1338
1467
  @classmethod
1339
1468
  def from_dict(cls, d: Dict[str, any]) -> GetOpenApiResponse:
1340
1469
  """Deserializes the GetOpenApiResponse from a dictionary."""
1341
- return cls()
1470
+ return cls(contents=d.get('contents', None))
1342
1471
 
1343
1472
 
1344
1473
  @dataclass
@@ -1367,13 +1496,23 @@ class GetServingEndpointPermissionLevelsResponse:
1367
1496
 
1368
1497
  @dataclass
1369
1498
  class GoogleCloudVertexAiConfig:
1499
+ project_id: str
1500
+ """This is the Google Cloud project id that the service account is associated with."""
1501
+
1502
+ region: str
1503
+ """This is the region for the Google Cloud Vertex AI Service. See [supported regions] for more
1504
+ details. Some models are only available in specific regions.
1505
+
1506
+ [supported regions]: https://cloud.google.com/vertex-ai/docs/general/locations"""
1507
+
1370
1508
  private_key: Optional[str] = None
1371
1509
  """The Databricks secret key reference for a private key for the service account which has access
1372
1510
  to the Google Cloud Vertex AI Service. See [Best practices for managing service account keys].
1373
1511
  If you prefer to paste your API key directly, see `private_key_plaintext`. You must provide an
1374
1512
  API key using one of the following fields: `private_key` or `private_key_plaintext`
1375
1513
 
1376
- [Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys"""
1514
+ [Best practices for managing service account keys]:
1515
+ https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys"""
1377
1516
 
1378
1517
  private_key_plaintext: Optional[str] = None
1379
1518
  """The private key for the service account which has access to the Google Cloud Vertex AI Service
@@ -1381,16 +1520,8 @@ class GoogleCloudVertexAiConfig:
1381
1520
  prefer to reference your key using Databricks Secrets, see `private_key`. You must provide an
1382
1521
  API key using one of the following fields: `private_key` or `private_key_plaintext`.
1383
1522
 
1384
- [Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys"""
1385
-
1386
- project_id: Optional[str] = None
1387
- """This is the Google Cloud project id that the service account is associated with."""
1388
-
1389
- region: Optional[str] = None
1390
- """This is the region for the Google Cloud Vertex AI Service. See [supported regions] for more
1391
- details. Some models are only available in specific regions.
1392
-
1393
- [supported regions]: https://cloud.google.com/vertex-ai/docs/general/locations"""
1523
+ [Best practices for managing service account keys]:
1524
+ https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys"""
1394
1525
 
1395
1526
  def as_dict(self) -> dict:
1396
1527
  """Serializes the GoogleCloudVertexAiConfig into a dictionary suitable for use as a JSON request body."""
@@ -1444,7 +1575,10 @@ class ListEndpointsResponse:
1444
1575
 
1445
1576
  @dataclass
1446
1577
  class ModelDataPlaneInfo:
1447
- query_info: Optional[oauth2.DataPlaneInfo] = None
1578
+ """A representation of all DataPlaneInfo for operations that can be done on a model through Data
1579
+ Plane APIs."""
1580
+
1581
+ query_info: Optional[DataPlaneInfo] = None
1448
1582
  """Information required to query DataPlane API 'query' endpoint."""
1449
1583
 
1450
1584
  def as_dict(self) -> dict:
@@ -1462,11 +1596,13 @@ class ModelDataPlaneInfo:
1462
1596
  @classmethod
1463
1597
  def from_dict(cls, d: Dict[str, any]) -> ModelDataPlaneInfo:
1464
1598
  """Deserializes the ModelDataPlaneInfo from a dictionary."""
1465
- return cls(query_info=_from_dict(d, 'query_info', oauth2.DataPlaneInfo))
1599
+ return cls(query_info=_from_dict(d, 'query_info', DataPlaneInfo))
1466
1600
 
1467
1601
 
1468
1602
  @dataclass
1469
1603
  class OpenAiConfig:
1604
+ """Configs needed to create an OpenAI model route."""
1605
+
1470
1606
  microsoft_entra_client_id: Optional[str] = None
1471
1607
  """This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID."""
1472
1608
 
@@ -1652,13 +1788,10 @@ class PatchServingEndpointTags:
1652
1788
  @dataclass
1653
1789
  class PayloadTable:
1654
1790
  name: Optional[str] = None
1655
- """The name of the payload table."""
1656
1791
 
1657
1792
  status: Optional[str] = None
1658
- """The status of the payload table."""
1659
1793
 
1660
1794
  status_message: Optional[str] = None
1661
- """The status message of the payload table."""
1662
1795
 
1663
1796
  def as_dict(self) -> dict:
1664
1797
  """Serializes the PayloadTable into a dictionary suitable for use as a JSON request body."""
@@ -1684,6 +1817,57 @@ class PayloadTable:
1684
1817
  status_message=d.get('status_message', None))
1685
1818
 
1686
1819
 
1820
+ @dataclass
1821
+ class PutAiGatewayRequest:
1822
+ guardrails: Optional[AiGatewayGuardrails] = None
1823
+ """Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and
1824
+ responses."""
1825
+
1826
+ inference_table_config: Optional[AiGatewayInferenceTableConfig] = None
1827
+ """Configuration for payload logging using inference tables. Use these tables to monitor and audit
1828
+ data being sent to and received from model APIs and to improve model quality."""
1829
+
1830
+ name: Optional[str] = None
1831
+ """The name of the serving endpoint whose AI Gateway is being updated. This field is required."""
1832
+
1833
+ rate_limits: Optional[List[AiGatewayRateLimit]] = None
1834
+ """Configuration for rate limits which can be set to limit endpoint traffic."""
1835
+
1836
+ usage_tracking_config: Optional[AiGatewayUsageTrackingConfig] = None
1837
+ """Configuration to enable usage tracking using system tables. These tables allow you to monitor
1838
+ operational usage on endpoints and their associated costs."""
1839
+
1840
+ def as_dict(self) -> dict:
1841
+ """Serializes the PutAiGatewayRequest into a dictionary suitable for use as a JSON request body."""
1842
+ body = {}
1843
+ if self.guardrails: body['guardrails'] = self.guardrails.as_dict()
1844
+ if self.inference_table_config: body['inference_table_config'] = self.inference_table_config.as_dict()
1845
+ if self.name is not None: body['name'] = self.name
1846
+ if self.rate_limits: body['rate_limits'] = [v.as_dict() for v in self.rate_limits]
1847
+ if self.usage_tracking_config: body['usage_tracking_config'] = self.usage_tracking_config.as_dict()
1848
+ return body
1849
+
1850
+ def as_shallow_dict(self) -> dict:
1851
+ """Serializes the PutAiGatewayRequest into a shallow dictionary of its immediate attributes."""
1852
+ body = {}
1853
+ if self.guardrails: body['guardrails'] = self.guardrails
1854
+ if self.inference_table_config: body['inference_table_config'] = self.inference_table_config
1855
+ if self.name is not None: body['name'] = self.name
1856
+ if self.rate_limits: body['rate_limits'] = self.rate_limits
1857
+ if self.usage_tracking_config: body['usage_tracking_config'] = self.usage_tracking_config
1858
+ return body
1859
+
1860
+ @classmethod
1861
+ def from_dict(cls, d: Dict[str, any]) -> PutAiGatewayRequest:
1862
+ """Deserializes the PutAiGatewayRequest from a dictionary."""
1863
+ return cls(guardrails=_from_dict(d, 'guardrails', AiGatewayGuardrails),
1864
+ inference_table_config=_from_dict(d, 'inference_table_config',
1865
+ AiGatewayInferenceTableConfig),
1866
+ name=d.get('name', None),
1867
+ rate_limits=_repeated_dict(d, 'rate_limits', AiGatewayRateLimit),
1868
+ usage_tracking_config=_from_dict(d, 'usage_tracking_config', AiGatewayUsageTrackingConfig))
1869
+
1870
+
1687
1871
  @dataclass
1688
1872
  class PutAiGatewayResponse:
1689
1873
  guardrails: Optional[AiGatewayGuardrails] = None
@@ -1692,7 +1876,7 @@ class PutAiGatewayResponse:
1692
1876
 
1693
1877
  inference_table_config: Optional[AiGatewayInferenceTableConfig] = None
1694
1878
  """Configuration for payload logging using inference tables. Use these tables to monitor and audit
1695
- data being sent to and received from model APIs and to improve model quality ."""
1879
+ data being sent to and received from model APIs and to improve model quality."""
1696
1880
 
1697
1881
  rate_limits: Optional[List[AiGatewayRateLimit]] = None
1698
1882
  """Configuration for rate limits which can be set to limit endpoint traffic."""
@@ -1729,6 +1913,34 @@ class PutAiGatewayResponse:
1729
1913
  usage_tracking_config=_from_dict(d, 'usage_tracking_config', AiGatewayUsageTrackingConfig))
1730
1914
 
1731
1915
 
1916
+ @dataclass
1917
+ class PutRequest:
1918
+ name: Optional[str] = None
1919
+ """The name of the serving endpoint whose rate limits are being updated. This field is required."""
1920
+
1921
+ rate_limits: Optional[List[RateLimit]] = None
1922
+ """The list of endpoint rate limits."""
1923
+
1924
+ def as_dict(self) -> dict:
1925
+ """Serializes the PutRequest into a dictionary suitable for use as a JSON request body."""
1926
+ body = {}
1927
+ if self.name is not None: body['name'] = self.name
1928
+ if self.rate_limits: body['rate_limits'] = [v.as_dict() for v in self.rate_limits]
1929
+ return body
1930
+
1931
+ def as_shallow_dict(self) -> dict:
1932
+ """Serializes the PutRequest into a shallow dictionary of its immediate attributes."""
1933
+ body = {}
1934
+ if self.name is not None: body['name'] = self.name
1935
+ if self.rate_limits: body['rate_limits'] = self.rate_limits
1936
+ return body
1937
+
1938
+ @classmethod
1939
+ def from_dict(cls, d: Dict[str, any]) -> PutRequest:
1940
+ """Deserializes the PutRequest from a dictionary."""
1941
+ return cls(name=d.get('name', None), rate_limits=_repeated_dict(d, 'rate_limits', RateLimit))
1942
+
1943
+
1732
1944
  @dataclass
1733
1945
  class PutResponse:
1734
1946
  rate_limits: Optional[List[RateLimit]] = None
@@ -1994,15 +2206,12 @@ class RateLimit:
1994
2206
 
1995
2207
 
1996
2208
  class RateLimitKey(Enum):
1997
- """Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are
1998
- supported, with 'endpoint' being the default if not specified."""
1999
2209
 
2000
2210
  ENDPOINT = 'endpoint'
2001
2211
  USER = 'user'
2002
2212
 
2003
2213
 
2004
2214
  class RateLimitRenewalPeriod(Enum):
2005
- """Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported."""
2006
2215
 
2007
2216
  MINUTE = 'minute'
2008
2217
 
@@ -2043,11 +2252,9 @@ class ServedEntityInput:
2043
2252
  """The name of the entity to be served. The entity may be a model in the Databricks Model Registry,
2044
2253
  a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC
2045
2254
  object, the full name of the object should be given in the form of
2046
- __catalog_name__.__schema_name__.__model_name__."""
2255
+ **catalog_name.schema_name.model_name**."""
2047
2256
 
2048
2257
  entity_version: Optional[str] = None
2049
- """The version of the model in Databricks Model Registry to be served or empty if the entity is a
2050
- FEATURE_SPEC."""
2051
2258
 
2052
2259
  environment_vars: Optional[Dict[str, str]] = None
2053
2260
  """An object containing a set of optional, user-specified environment variable key-value pairs used
@@ -2076,7 +2283,7 @@ class ServedEntityInput:
2076
2283
  """The name of a served entity. It must be unique across an endpoint. A served entity name can
2077
2284
  consist of alphanumeric characters, dashes, and underscores. If not specified for an external
2078
2285
  model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if
2079
- not specified for other entities, it defaults to <entity-name>-<entity-version>."""
2286
+ not specified for other entities, it defaults to entity_name-entity_version."""
2080
2287
 
2081
2288
  scale_to_zero_enabled: Optional[bool] = None
2082
2289
  """Whether the compute resources for the served entity should scale down to zero."""
@@ -2089,13 +2296,13 @@ class ServedEntityInput:
2089
2296
  scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
2090
2297
  is 0."""
2091
2298
 
2092
- workload_type: Optional[str] = None
2299
+ workload_type: Optional[ServingModelWorkloadType] = None
2093
2300
  """The workload type of the served entity. The workload type selects which type of compute to use
2094
2301
  in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
2095
2302
  acceleration is available by selecting workload types like GPU_SMALL and others. See the
2096
2303
  available [GPU types].
2097
2304
 
2098
- [GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
2305
+ [GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
2099
2306
 
2100
2307
  def as_dict(self) -> dict:
2101
2308
  """Serializes the ServedEntityInput into a dictionary suitable for use as a JSON request body."""
@@ -2112,7 +2319,7 @@ class ServedEntityInput:
2112
2319
  if self.name is not None: body['name'] = self.name
2113
2320
  if self.scale_to_zero_enabled is not None: body['scale_to_zero_enabled'] = self.scale_to_zero_enabled
2114
2321
  if self.workload_size is not None: body['workload_size'] = self.workload_size
2115
- if self.workload_type is not None: body['workload_type'] = self.workload_type
2322
+ if self.workload_type is not None: body['workload_type'] = self.workload_type.value
2116
2323
  return body
2117
2324
 
2118
2325
  def as_shallow_dict(self) -> dict:
@@ -2146,26 +2353,22 @@ class ServedEntityInput:
2146
2353
  name=d.get('name', None),
2147
2354
  scale_to_zero_enabled=d.get('scale_to_zero_enabled', None),
2148
2355
  workload_size=d.get('workload_size', None),
2149
- workload_type=d.get('workload_type', None))
2356
+ workload_type=_enum(d, 'workload_type', ServingModelWorkloadType))
2150
2357
 
2151
2358
 
2152
2359
  @dataclass
2153
2360
  class ServedEntityOutput:
2154
2361
  creation_timestamp: Optional[int] = None
2155
- """The creation timestamp of the served entity in Unix time."""
2156
2362
 
2157
2363
  creator: Optional[str] = None
2158
- """The email of the user who created the served entity."""
2159
2364
 
2160
2365
  entity_name: Optional[str] = None
2161
- """The name of the entity served. The entity may be a model in the Databricks Model Registry, a
2162
- model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC
2163
- object, the full name of the object is given in the form of
2164
- __catalog_name__.__schema_name__.__model_name__."""
2366
+ """The name of the entity to be served. The entity may be a model in the Databricks Model Registry,
2367
+ a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC
2368
+ object, the full name of the object should be given in the form of
2369
+ **catalog_name.schema_name.model_name**."""
2165
2370
 
2166
2371
  entity_version: Optional[str] = None
2167
- """The version of the served entity in Databricks Model Registry or empty if the entity is a
2168
- FEATURE_SPEC."""
2169
2372
 
2170
2373
  environment_vars: Optional[Dict[str, str]] = None
2171
2374
  """An object containing a set of optional, user-specified environment variable key-value pairs used
@@ -2174,14 +2377,16 @@ class ServedEntityOutput:
2174
2377
  "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}`"""
2175
2378
 
2176
2379
  external_model: Optional[ExternalModel] = None
2177
- """The external model that is served. NOTE: Only one of external_model, foundation_model, and
2178
- (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) is
2179
- returned based on the endpoint type."""
2380
+ """The external model to be served. NOTE: Only one of external_model and (entity_name,
2381
+ entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with
2382
+ the latter set being used for custom model serving for a Databricks registered model. For an
2383
+ existing endpoint with external_model, it cannot be updated to an endpoint without
2384
+ external_model. If the endpoint is created without external_model, users cannot update it to add
2385
+ external_model later. The task type of all external models within an endpoint must be the same."""
2180
2386
 
2181
2387
  foundation_model: Optional[FoundationModel] = None
2182
- """The foundation model that is served. NOTE: Only one of foundation_model, external_model, and
2183
- (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) is
2184
- returned based on the endpoint type."""
2388
+ """All fields are not sensitive as they are hard-coded in the system and made available to
2389
+ customers."""
2185
2390
 
2186
2391
  instance_profile_arn: Optional[str] = None
2187
2392
  """ARN of the instance profile that the served entity uses to access AWS resources."""
@@ -2193,13 +2398,15 @@ class ServedEntityOutput:
2193
2398
  """The minimum tokens per second that the endpoint can scale down to."""
2194
2399
 
2195
2400
  name: Optional[str] = None
2196
- """The name of the served entity."""
2401
+ """The name of a served entity. It must be unique across an endpoint. A served entity name can
2402
+ consist of alphanumeric characters, dashes, and underscores. If not specified for an external
2403
+ model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if
2404
+ not specified for other entities, it defaults to entity_name-entity_version."""
2197
2405
 
2198
2406
  scale_to_zero_enabled: Optional[bool] = None
2199
2407
  """Whether the compute resources for the served entity should scale down to zero."""
2200
2408
 
2201
2409
  state: Optional[ServedModelState] = None
2202
- """Information corresponding to the state of the served entity."""
2203
2410
 
2204
2411
  workload_size: Optional[str] = None
2205
2412
  """The workload size of the served entity. The workload size corresponds to a range of provisioned
@@ -2207,15 +2414,15 @@ class ServedEntityOutput:
2207
2414
  process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency),
2208
2415
  "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If
2209
2416
  scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
2210
- will be 0."""
2417
+ is 0."""
2211
2418
 
2212
- workload_type: Optional[str] = None
2419
+ workload_type: Optional[ServingModelWorkloadType] = None
2213
2420
  """The workload type of the served entity. The workload type selects which type of compute to use
2214
2421
  in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
2215
2422
  acceleration is available by selecting workload types like GPU_SMALL and others. See the
2216
2423
  available [GPU types].
2217
2424
 
2218
- [GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
2425
+ [GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
2219
2426
 
2220
2427
  def as_dict(self) -> dict:
2221
2428
  """Serializes the ServedEntityOutput into a dictionary suitable for use as a JSON request body."""
@@ -2236,7 +2443,7 @@ class ServedEntityOutput:
2236
2443
  if self.scale_to_zero_enabled is not None: body['scale_to_zero_enabled'] = self.scale_to_zero_enabled
2237
2444
  if self.state: body['state'] = self.state.as_dict()
2238
2445
  if self.workload_size is not None: body['workload_size'] = self.workload_size
2239
- if self.workload_type is not None: body['workload_type'] = self.workload_type
2446
+ if self.workload_type is not None: body['workload_type'] = self.workload_type.value
2240
2447
  return body
2241
2448
 
2242
2449
  def as_shallow_dict(self) -> dict:
@@ -2278,31 +2485,22 @@ class ServedEntityOutput:
2278
2485
  scale_to_zero_enabled=d.get('scale_to_zero_enabled', None),
2279
2486
  state=_from_dict(d, 'state', ServedModelState),
2280
2487
  workload_size=d.get('workload_size', None),
2281
- workload_type=d.get('workload_type', None))
2488
+ workload_type=_enum(d, 'workload_type', ServingModelWorkloadType))
2282
2489
 
2283
2490
 
2284
2491
  @dataclass
2285
2492
  class ServedEntitySpec:
2286
2493
  entity_name: Optional[str] = None
2287
- """The name of the entity served. The entity may be a model in the Databricks Model Registry, a
2288
- model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC
2289
- object, the full name of the object is given in the form of
2290
- __catalog_name__.__schema_name__.__model_name__."""
2291
2494
 
2292
2495
  entity_version: Optional[str] = None
2293
- """The version of the served entity in Databricks Model Registry or empty if the entity is a
2294
- FEATURE_SPEC."""
2295
2496
 
2296
2497
  external_model: Optional[ExternalModel] = None
2297
- """The external model that is served. NOTE: Only one of external_model, foundation_model, and
2298
- (entity_name, entity_version) is returned based on the endpoint type."""
2299
2498
 
2300
2499
  foundation_model: Optional[FoundationModel] = None
2301
- """The foundation model that is served. NOTE: Only one of foundation_model, external_model, and
2302
- (entity_name, entity_version) is returned based on the endpoint type."""
2500
+ """All fields are not sensitive as they are hard-coded in the system and made available to
2501
+ customers."""
2303
2502
 
2304
2503
  name: Optional[str] = None
2305
- """The name of the served entity."""
2306
2504
 
2307
2505
  def as_dict(self) -> dict:
2308
2506
  """Serializes the ServedEntitySpec into a dictionary suitable for use as a JSON request body."""
@@ -2336,24 +2534,21 @@ class ServedEntitySpec:
2336
2534
 
2337
2535
  @dataclass
2338
2536
  class ServedModelInput:
2537
+ scale_to_zero_enabled: bool
2538
+ """Whether the compute resources for the served entity should scale down to zero."""
2539
+
2339
2540
  model_name: str
2340
- """The name of the model in Databricks Model Registry to be served or if the model resides in Unity
2341
- Catalog, the full name of model, in the form of __catalog_name__.__schema_name__.__model_name__."""
2342
2541
 
2343
2542
  model_version: str
2344
- """The version of the model in Databricks Model Registry or Unity Catalog to be served."""
2345
-
2346
- scale_to_zero_enabled: bool
2347
- """Whether the compute resources for the served model should scale down to zero."""
2348
2543
 
2349
2544
  environment_vars: Optional[Dict[str, str]] = None
2350
2545
  """An object containing a set of optional, user-specified environment variable key-value pairs used
2351
- for serving this model. Note: this is an experimental feature and subject to change. Example
2352
- model environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY":
2546
+ for serving this entity. Note: this is an experimental feature and subject to change. Example
2547
+ entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY":
2353
2548
  "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}`"""
2354
2549
 
2355
2550
  instance_profile_arn: Optional[str] = None
2356
- """ARN of the instance profile that the served model will use to access AWS resources."""
2551
+ """ARN of the instance profile that the served entity uses to access AWS resources."""
2357
2552
 
2358
2553
  max_provisioned_throughput: Optional[int] = None
2359
2554
  """The maximum tokens per second that the endpoint can scale up to."""
@@ -2362,25 +2557,26 @@ class ServedModelInput:
2362
2557
  """The minimum tokens per second that the endpoint can scale down to."""
2363
2558
 
2364
2559
  name: Optional[str] = None
2365
- """The name of a served model. It must be unique across an endpoint. If not specified, this field
2366
- will default to <model-name>-<model-version>. A served model name can consist of alphanumeric
2367
- characters, dashes, and underscores."""
2560
+ """The name of a served entity. It must be unique across an endpoint. A served entity name can
2561
+ consist of alphanumeric characters, dashes, and underscores. If not specified for an external
2562
+ model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if
2563
+ not specified for other entities, it defaults to entity_name-entity_version."""
2368
2564
 
2369
2565
  workload_size: Optional[ServedModelInputWorkloadSize] = None
2370
- """The workload size of the served model. The workload size corresponds to a range of provisioned
2371
- concurrency that the compute will autoscale between. A single unit of provisioned concurrency
2372
- can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned
2373
- concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned
2374
- concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for
2375
- each workload size will be 0."""
2566
+ """The workload size of the served entity. The workload size corresponds to a range of provisioned
2567
+ concurrency that the compute autoscales between. A single unit of provisioned concurrency can
2568
+ process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency),
2569
+ "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If
2570
+ scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
2571
+ is 0."""
2376
2572
 
2377
2573
  workload_type: Optional[ServedModelInputWorkloadType] = None
2378
- """The workload type of the served model. The workload type selects which type of compute to use in
2379
- the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
2574
+ """The workload type of the served entity. The workload type selects which type of compute to use
2575
+ in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
2380
2576
  acceleration is available by selecting workload types like GPU_SMALL and others. See the
2381
2577
  available [GPU types].
2382
2578
 
2383
- [GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
2579
+ [GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
2384
2580
 
2385
2581
  def as_dict(self) -> dict:
2386
2582
  """Serializes the ServedModelInput into a dictionary suitable for use as a JSON request body."""
@@ -2432,12 +2628,6 @@ class ServedModelInput:
2432
2628
 
2433
2629
 
2434
2630
  class ServedModelInputWorkloadSize(Enum):
2435
- """The workload size of the served model. The workload size corresponds to a range of provisioned
2436
- concurrency that the compute will autoscale between. A single unit of provisioned concurrency
2437
- can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned
2438
- concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned
2439
- concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for
2440
- each workload size will be 0."""
2441
2631
 
2442
2632
  LARGE = 'Large'
2443
2633
  MEDIUM = 'Medium'
@@ -2445,12 +2635,6 @@ class ServedModelInputWorkloadSize(Enum):
2445
2635
 
2446
2636
 
2447
2637
  class ServedModelInputWorkloadType(Enum):
2448
- """The workload type of the served model. The workload type selects which type of compute to use in
2449
- the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
2450
- acceleration is available by selecting workload types like GPU_SMALL and others. See the
2451
- available [GPU types].
2452
-
2453
- [GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
2454
2638
 
2455
2639
  CPU = 'CPU'
2456
2640
  GPU_LARGE = 'GPU_LARGE'
@@ -2462,51 +2646,48 @@ class ServedModelInputWorkloadType(Enum):
2462
2646
  @dataclass
2463
2647
  class ServedModelOutput:
2464
2648
  creation_timestamp: Optional[int] = None
2465
- """The creation timestamp of the served model in Unix time."""
2466
2649
 
2467
2650
  creator: Optional[str] = None
2468
- """The email of the user who created the served model."""
2469
2651
 
2470
2652
  environment_vars: Optional[Dict[str, str]] = None
2471
2653
  """An object containing a set of optional, user-specified environment variable key-value pairs used
2472
- for serving this model. Note: this is an experimental feature and subject to change. Example
2473
- model environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY":
2654
+ for serving this entity. Note: this is an experimental feature and subject to change. Example
2655
+ entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY":
2474
2656
  "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}`"""
2475
2657
 
2476
2658
  instance_profile_arn: Optional[str] = None
2477
- """ARN of the instance profile that the served model will use to access AWS resources."""
2659
+ """ARN of the instance profile that the served entity uses to access AWS resources."""
2478
2660
 
2479
2661
  model_name: Optional[str] = None
2480
- """The name of the model in Databricks Model Registry or the full name of the model in Unity
2481
- Catalog."""
2482
2662
 
2483
2663
  model_version: Optional[str] = None
2484
- """The version of the model in Databricks Model Registry or Unity Catalog to be served."""
2485
2664
 
2486
2665
  name: Optional[str] = None
2487
- """The name of the served model."""
2666
+ """The name of a served entity. It must be unique across an endpoint. A served entity name can
2667
+ consist of alphanumeric characters, dashes, and underscores. If not specified for an external
2668
+ model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if
2669
+ not specified for other entities, it defaults to entity_name-entity_version."""
2488
2670
 
2489
2671
  scale_to_zero_enabled: Optional[bool] = None
2490
- """Whether the compute resources for the Served Model should scale down to zero."""
2672
+ """Whether the compute resources for the served entity should scale down to zero."""
2491
2673
 
2492
2674
  state: Optional[ServedModelState] = None
2493
- """Information corresponding to the state of the Served Model."""
2494
2675
 
2495
2676
  workload_size: Optional[str] = None
2496
- """The workload size of the served model. The workload size corresponds to a range of provisioned
2497
- concurrency that the compute will autoscale between. A single unit of provisioned concurrency
2498
- can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned
2499
- concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned
2500
- concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for
2501
- each workload size will be 0."""
2502
-
2503
- workload_type: Optional[str] = None
2504
- """The workload type of the served model. The workload type selects which type of compute to use in
2505
- the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
2677
+ """The workload size of the served entity. The workload size corresponds to a range of provisioned
2678
+ concurrency that the compute autoscales between. A single unit of provisioned concurrency can
2679
+ process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency),
2680
+ "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If
2681
+ scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
2682
+ is 0."""
2683
+
2684
+ workload_type: Optional[ServingModelWorkloadType] = None
2685
+ """The workload type of the served entity. The workload type selects which type of compute to use
2686
+ in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU
2506
2687
  acceleration is available by selecting workload types like GPU_SMALL and others. See the
2507
2688
  available [GPU types].
2508
2689
 
2509
- [GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
2690
+ [GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types"""
2510
2691
 
2511
2692
  def as_dict(self) -> dict:
2512
2693
  """Serializes the ServedModelOutput into a dictionary suitable for use as a JSON request body."""
@@ -2521,7 +2702,7 @@ class ServedModelOutput:
2521
2702
  if self.scale_to_zero_enabled is not None: body['scale_to_zero_enabled'] = self.scale_to_zero_enabled
2522
2703
  if self.state: body['state'] = self.state.as_dict()
2523
2704
  if self.workload_size is not None: body['workload_size'] = self.workload_size
2524
- if self.workload_type is not None: body['workload_type'] = self.workload_type
2705
+ if self.workload_type is not None: body['workload_type'] = self.workload_type.value
2525
2706
  return body
2526
2707
 
2527
2708
  def as_shallow_dict(self) -> dict:
@@ -2553,20 +2734,18 @@ class ServedModelOutput:
2553
2734
  scale_to_zero_enabled=d.get('scale_to_zero_enabled', None),
2554
2735
  state=_from_dict(d, 'state', ServedModelState),
2555
2736
  workload_size=d.get('workload_size', None),
2556
- workload_type=d.get('workload_type', None))
2737
+ workload_type=_enum(d, 'workload_type', ServingModelWorkloadType))
2557
2738
 
2558
2739
 
2559
2740
  @dataclass
2560
2741
  class ServedModelSpec:
2561
2742
  model_name: Optional[str] = None
2562
- """The name of the model in Databricks Model Registry or the full name of the model in Unity
2563
- Catalog."""
2743
+ """Only one of model_name and entity_name should be populated"""
2564
2744
 
2565
2745
  model_version: Optional[str] = None
2566
- """The version of the model in Databricks Model Registry or Unity Catalog to be served."""
2746
+ """Only one of model_version and entity_version should be populated"""
2567
2747
 
2568
2748
  name: Optional[str] = None
2569
- """The name of the served model."""
2570
2749
 
2571
2750
  def as_dict(self) -> dict:
2572
2751
  """Serializes the ServedModelSpec into a dictionary suitable for use as a JSON request body."""
@@ -2595,18 +2774,8 @@ class ServedModelSpec:
2595
2774
  @dataclass
2596
2775
  class ServedModelState:
2597
2776
  deployment: Optional[ServedModelStateDeployment] = None
2598
- """The state of the served entity deployment. DEPLOYMENT_CREATING indicates that the served entity
2599
- is not ready yet because the deployment is still being created (i.e container image is building,
2600
- model server is deploying for the first time, etc.). DEPLOYMENT_RECOVERING indicates that the
2601
- served entity was previously in a ready state but no longer is and is attempting to recover.
2602
- DEPLOYMENT_READY indicates that the served entity is ready to receive traffic. DEPLOYMENT_FAILED
2603
- indicates that there was an error trying to bring up the served entity (e.g container image
2604
- build failed, the model server failed to start due to a model loading error, etc.)
2605
- DEPLOYMENT_ABORTED indicates that the deployment was terminated likely due to a failure in
2606
- bringing up another served entity under the same endpoint and config version."""
2607
2777
 
2608
2778
  deployment_state_message: Optional[str] = None
2609
- """More information about the state of the served entity, if available."""
2610
2779
 
2611
2780
  def as_dict(self) -> dict:
2612
2781
  """Serializes the ServedModelState into a dictionary suitable for use as a JSON request body."""
@@ -2632,15 +2801,6 @@ class ServedModelState:
2632
2801
 
2633
2802
 
2634
2803
  class ServedModelStateDeployment(Enum):
2635
- """The state of the served entity deployment. DEPLOYMENT_CREATING indicates that the served entity
2636
- is not ready yet because the deployment is still being created (i.e container image is building,
2637
- model server is deploying for the first time, etc.). DEPLOYMENT_RECOVERING indicates that the
2638
- served entity was previously in a ready state but no longer is and is attempting to recover.
2639
- DEPLOYMENT_READY indicates that the served entity is ready to receive traffic. DEPLOYMENT_FAILED
2640
- indicates that there was an error trying to bring up the served entity (e.g container image
2641
- build failed, the model server failed to start due to a model loading error, etc.)
2642
- DEPLOYMENT_ABORTED indicates that the deployment was terminated likely due to a failure in
2643
- bringing up another served entity under the same endpoint and config version."""
2644
2804
 
2645
2805
  ABORTED = 'DEPLOYMENT_ABORTED'
2646
2806
  CREATING = 'DEPLOYMENT_CREATING'
@@ -2675,8 +2835,8 @@ class ServerLogsResponse:
2675
2835
  @dataclass
2676
2836
  class ServingEndpoint:
2677
2837
  ai_gateway: Optional[AiGatewayConfig] = None
2678
- """The AI Gateway configuration for the serving endpoint. NOTE: Only external model endpoints are
2679
- currently supported."""
2838
+ """The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned
2839
+ throughput endpoints are currently supported."""
2680
2840
 
2681
2841
  config: Optional[EndpointCoreConfigSummary] = None
2682
2842
  """The config that is currently being served by the endpoint."""
@@ -2688,8 +2848,7 @@ class ServingEndpoint:
2688
2848
  """The email of the user who created the serving endpoint."""
2689
2849
 
2690
2850
  id: Optional[str] = None
2691
- """System-generated ID of the endpoint. This is used to refer to the endpoint in the Permissions
2692
- API"""
2851
+ """System-generated ID of the endpoint, included to be used by the Permissions API."""
2693
2852
 
2694
2853
  last_updated_timestamp: Optional[int] = None
2695
2854
  """The timestamp when the endpoint was last updated by a user in Unix time."""
@@ -2848,8 +3007,8 @@ class ServingEndpointAccessControlResponse:
2848
3007
  @dataclass
2849
3008
  class ServingEndpointDetailed:
2850
3009
  ai_gateway: Optional[AiGatewayConfig] = None
2851
- """The AI Gateway configuration for the serving endpoint. NOTE: Only external model endpoints are
2852
- currently supported."""
3010
+ """The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned
3011
+ throughput endpoints are currently supported."""
2853
3012
 
2854
3013
  config: Optional[EndpointCoreConfigOutput] = None
2855
3014
  """The config that is currently being served by the endpoint."""
@@ -2957,7 +3116,6 @@ class ServingEndpointDetailed:
2957
3116
 
2958
3117
 
2959
3118
  class ServingEndpointDetailedPermissionLevel(Enum):
2960
- """The permission level of the principal making the request."""
2961
3119
 
2962
3120
  CAN_MANAGE = 'CAN_MANAGE'
2963
3121
  CAN_QUERY = 'CAN_QUERY'
@@ -3097,6 +3255,15 @@ class ServingEndpointPermissionsRequest:
3097
3255
  serving_endpoint_id=d.get('serving_endpoint_id', None))
3098
3256
 
3099
3257
 
3258
+ class ServingModelWorkloadType(Enum):
3259
+
3260
+ CPU = 'CPU'
3261
+ GPU_LARGE = 'GPU_LARGE'
3262
+ GPU_MEDIUM = 'GPU_MEDIUM'
3263
+ GPU_SMALL = 'GPU_SMALL'
3264
+ MULTIGPU_MEDIUM = 'MULTIGPU_MEDIUM'
3265
+
3266
+
3100
3267
  @dataclass
3101
3268
  class TrafficConfig:
3102
3269
  routes: Optional[List[Route]] = None
@@ -3250,8 +3417,8 @@ class ServingEndpointsAPI:
3250
3417
  :param config: :class:`EndpointCoreConfigInput`
3251
3418
  The core config of the serving endpoint.
3252
3419
  :param ai_gateway: :class:`AiGatewayConfig` (optional)
3253
- The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are
3254
- supported as of now.
3420
+ The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned
3421
+ throughput endpoints are currently supported.
3255
3422
  :param rate_limits: List[:class:`RateLimit`] (optional)
3256
3423
  Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI
3257
3424
  Gateway to manage rate limits.
@@ -3299,7 +3466,6 @@ class ServingEndpointsAPI:
3299
3466
  """Delete a serving endpoint.
3300
3467
 
3301
3468
  :param name: str
3302
- The name of the serving endpoint. This field is required.
3303
3469
 
3304
3470
 
3305
3471
  """
@@ -3341,7 +3507,7 @@ class ServingEndpointsAPI:
3341
3507
  res = self._api.do('GET', f'/api/2.0/serving-endpoints/{name}', headers=headers)
3342
3508
  return ServingEndpointDetailed.from_dict(res)
3343
3509
 
3344
- def get_open_api(self, name: str):
3510
+ def get_open_api(self, name: str) -> GetOpenApiResponse:
3345
3511
  """Get the schema for a serving endpoint.
3346
3512
 
3347
3513
  Get the query schema of the serving endpoint in OpenAPI format. The schema contains information for
@@ -3350,12 +3516,13 @@ class ServingEndpointsAPI:
3350
3516
  :param name: str
3351
3517
  The name of the serving endpoint that the served model belongs to. This field is required.
3352
3518
 
3353
-
3519
+ :returns: :class:`GetOpenApiResponse`
3354
3520
  """
3355
3521
 
3356
- headers = {'Accept': 'application/json', }
3522
+ headers = {'Accept': 'text/plain', }
3357
3523
 
3358
- self._api.do('GET', f'/api/2.0/serving-endpoints/{name}/openapi', headers=headers)
3524
+ res = self._api.do('GET', f'/api/2.0/serving-endpoints/{name}/openapi', headers=headers, raw=True)
3525
+ return GetOpenApiResponse.from_dict(res)
3359
3526
 
3360
3527
  def get_permission_levels(self, serving_endpoint_id: str) -> GetServingEndpointPermissionLevelsResponse:
3361
3528
  """Get serving endpoint permission levels.
@@ -3394,6 +3561,44 @@ class ServingEndpointsAPI:
3394
3561
  headers=headers)
3395
3562
  return ServingEndpointPermissions.from_dict(res)
3396
3563
 
3564
+ def http_request(self,
3565
+ connection_name: str,
3566
+ method: ExternalFunctionRequestHttpMethod,
3567
+ path: str,
3568
+ *,
3569
+ headers: Optional[str] = None,
3570
+ json: Optional[str] = None,
3571
+ params: Optional[str] = None) -> ExternalFunctionResponse:
3572
+ """Make external services call using the credentials stored in UC Connection.
3573
+
3574
+ :param connection_name: str
3575
+ The connection name to use. This is required to identify the external connection.
3576
+ :param method: :class:`ExternalFunctionRequestHttpMethod`
3577
+ The HTTP method to use (e.g., 'GET', 'POST').
3578
+ :param path: str
3579
+ The relative path for the API endpoint. This is required.
3580
+ :param headers: str (optional)
3581
+ Additional headers for the request. If not provided, only auth headers from connections would be
3582
+ passed.
3583
+ :param json: str (optional)
3584
+ The JSON payload to send in the request body.
3585
+ :param params: str (optional)
3586
+ Query parameters for the request.
3587
+
3588
+ :returns: :class:`ExternalFunctionResponse`
3589
+ """
3590
+ body = {}
3591
+ if connection_name is not None: body['connection_name'] = connection_name
3592
+ if headers is not None: body['headers'] = headers
3593
+ if json is not None: body['json'] = json
3594
+ if method is not None: body['method'] = method.value
3595
+ if params is not None: body['params'] = params
3596
+ if path is not None: body['path'] = path
3597
+ headers = {'Accept': 'application/json', 'Content-Type': 'application/json', }
3598
+
3599
+ res = self._api.do('POST', '/api/2.0/external-function', body=body, headers=headers)
3600
+ return ExternalFunctionResponse.from_dict(res)
3601
+
3397
3602
  def list(self) -> Iterator[ServingEndpoint]:
3398
3603
  """Get all serving endpoints.
3399
3604
 
@@ -3430,7 +3635,7 @@ class ServingEndpointsAPI:
3430
3635
  name: str,
3431
3636
  *,
3432
3637
  add_tags: Optional[List[EndpointTag]] = None,
3433
- delete_tags: Optional[List[str]] = None) -> Iterator[EndpointTag]:
3638
+ delete_tags: Optional[List[str]] = None) -> EndpointTags:
3434
3639
  """Update tags of a serving endpoint.
3435
3640
 
3436
3641
  Used to batch add and delete tags from a serving endpoint with a single API call.
@@ -3442,7 +3647,7 @@ class ServingEndpointsAPI:
3442
3647
  :param delete_tags: List[str] (optional)
3443
3648
  List of tag keys to delete
3444
3649
 
3445
- :returns: Iterator over :class:`EndpointTag`
3650
+ :returns: :class:`EndpointTags`
3446
3651
  """
3447
3652
  body = {}
3448
3653
  if add_tags is not None: body['add_tags'] = [v.as_dict() for v in add_tags]
@@ -3450,7 +3655,7 @@ class ServingEndpointsAPI:
3450
3655
  headers = {'Accept': 'application/json', 'Content-Type': 'application/json', }
3451
3656
 
3452
3657
  res = self._api.do('PATCH', f'/api/2.0/serving-endpoints/{name}/tags', body=body, headers=headers)
3453
- return [EndpointTag.from_dict(v) for v in res]
3658
+ return EndpointTags.from_dict(res)
3454
3659
 
3455
3660
  def put(self, name: str, *, rate_limits: Optional[List[RateLimit]] = None) -> PutResponse:
3456
3661
  """Update rate limits of a serving endpoint.
@@ -3485,8 +3690,8 @@ class ServingEndpointsAPI:
3485
3690
  usage_tracking_config: Optional[AiGatewayUsageTrackingConfig] = None) -> PutAiGatewayResponse:
3486
3691
  """Update AI Gateway of a serving endpoint.
3487
3692
 
3488
- Used to update the AI Gateway of a serving endpoint. NOTE: Only external model endpoints are currently
3489
- supported.
3693
+ Used to update the AI Gateway of a serving endpoint. NOTE: Only external model and provisioned
3694
+ throughput endpoints are currently supported.
3490
3695
 
3491
3696
  :param name: str
3492
3697
  The name of the serving endpoint whose AI Gateway is being updated. This field is required.
@@ -3646,14 +3851,16 @@ class ServingEndpointsAPI:
3646
3851
  The name of the serving endpoint to update. This field is required.
3647
3852
  :param auto_capture_config: :class:`AutoCaptureConfigInput` (optional)
3648
3853
  Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
3854
+ Note: this field is deprecated for creating new provisioned throughput endpoints, or updating
3855
+ existing provisioned throughput endpoints that never have inference table configured; in these cases
3856
+ please use AI Gateway to manage inference tables.
3649
3857
  :param served_entities: List[:class:`ServedEntityInput`] (optional)
3650
- A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served
3651
- entities.
3858
+ The list of served entities under the serving endpoint config.
3652
3859
  :param served_models: List[:class:`ServedModelInput`] (optional)
3653
- (Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A
3654
- serving endpoint can have up to 15 served models.
3860
+ (Deprecated, use served_entities instead) The list of served models under the serving endpoint
3861
+ config.
3655
3862
  :param traffic_config: :class:`TrafficConfig` (optional)
3656
- The traffic config defining how invocations to the serving endpoint should be routed.
3863
+ The traffic configuration associated with the serving endpoint config.
3657
3864
 
3658
3865
  :returns:
3659
3866
  Long-running operation waiter for :class:`ServingEndpointDetailed`.
@@ -3725,6 +3932,7 @@ class ServingEndpointsDataPlaneAPI:
3725
3932
  def __init__(self, api_client, control_plane):
3726
3933
  self._api = api_client
3727
3934
  self._control_plane = control_plane
3935
+ from ..data_plane import DataPlaneService
3728
3936
  self._data_plane_service = DataPlaneService()
3729
3937
 
3730
3938
  def query(self,