databricks-sdk 0.55.0__py3-none-any.whl → 0.56.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of databricks-sdk might be problematic. Click here for more details.
- databricks/sdk/__init__.py +33 -22
- databricks/sdk/service/aibuilder.py +364 -0
- databricks/sdk/service/billing.py +150 -169
- databricks/sdk/service/catalog.py +263 -835
- databricks/sdk/service/cleanrooms.py +15 -10
- databricks/sdk/service/compute.py +12 -22
- databricks/sdk/service/dashboards.py +59 -451
- databricks/sdk/service/database.py +1256 -0
- databricks/sdk/service/files.py +2 -0
- databricks/sdk/service/iam.py +6 -6
- databricks/sdk/service/jobs.py +238 -0
- databricks/sdk/service/ml.py +8 -271
- databricks/sdk/service/pipelines.py +45 -1
- databricks/sdk/service/provisioning.py +0 -3
- databricks/sdk/service/qualitymonitorv2.py +275 -0
- databricks/sdk/service/serving.py +76 -4
- databricks/sdk/service/settings.py +982 -99
- databricks/sdk/service/sharing.py +3 -2
- databricks/sdk/service/sql.py +218 -1
- databricks/sdk/version.py +1 -1
- {databricks_sdk-0.55.0.dist-info → databricks_sdk-0.56.0.dist-info}/METADATA +1 -1
- {databricks_sdk-0.55.0.dist-info → databricks_sdk-0.56.0.dist-info}/RECORD +26 -23
- {databricks_sdk-0.55.0.dist-info → databricks_sdk-0.56.0.dist-info}/WHEEL +0 -0
- {databricks_sdk-0.55.0.dist-info → databricks_sdk-0.56.0.dist-info}/licenses/LICENSE +0 -0
- {databricks_sdk-0.55.0.dist-info → databricks_sdk-0.56.0.dist-info}/licenses/NOTICE +0 -0
- {databricks_sdk-0.55.0.dist-info → databricks_sdk-0.56.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
# Code generated from OpenAPI specs by Databricks SDK Generator. DO NOT EDIT.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import Any, Dict, Iterator, List, Optional
|
|
9
|
+
|
|
10
|
+
from ._internal import _enum, _from_dict, _repeated_dict
|
|
11
|
+
|
|
12
|
+
_LOG = logging.getLogger("databricks.sdk")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# all definitions in this file are in alphabetical order
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class AnomalyDetectionConfig:
|
|
20
|
+
last_run_id: Optional[str] = None
|
|
21
|
+
"""Run id of the last run of the workflow"""
|
|
22
|
+
|
|
23
|
+
latest_run_status: Optional[AnomalyDetectionRunStatus] = None
|
|
24
|
+
"""The status of the last run of the workflow."""
|
|
25
|
+
|
|
26
|
+
def as_dict(self) -> dict:
|
|
27
|
+
"""Serializes the AnomalyDetectionConfig into a dictionary suitable for use as a JSON request body."""
|
|
28
|
+
body = {}
|
|
29
|
+
if self.last_run_id is not None:
|
|
30
|
+
body["last_run_id"] = self.last_run_id
|
|
31
|
+
if self.latest_run_status is not None:
|
|
32
|
+
body["latest_run_status"] = self.latest_run_status.value
|
|
33
|
+
return body
|
|
34
|
+
|
|
35
|
+
def as_shallow_dict(self) -> dict:
|
|
36
|
+
"""Serializes the AnomalyDetectionConfig into a shallow dictionary of its immediate attributes."""
|
|
37
|
+
body = {}
|
|
38
|
+
if self.last_run_id is not None:
|
|
39
|
+
body["last_run_id"] = self.last_run_id
|
|
40
|
+
if self.latest_run_status is not None:
|
|
41
|
+
body["latest_run_status"] = self.latest_run_status
|
|
42
|
+
return body
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def from_dict(cls, d: Dict[str, Any]) -> AnomalyDetectionConfig:
|
|
46
|
+
"""Deserializes the AnomalyDetectionConfig from a dictionary."""
|
|
47
|
+
return cls(
|
|
48
|
+
last_run_id=d.get("last_run_id", None),
|
|
49
|
+
latest_run_status=_enum(d, "latest_run_status", AnomalyDetectionRunStatus),
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class AnomalyDetectionRunStatus(Enum):
|
|
54
|
+
"""Status of Anomaly Detection Job Run"""
|
|
55
|
+
|
|
56
|
+
ANOMALY_DETECTION_RUN_STATUS_CANCELED = "ANOMALY_DETECTION_RUN_STATUS_CANCELED"
|
|
57
|
+
ANOMALY_DETECTION_RUN_STATUS_FAILED = "ANOMALY_DETECTION_RUN_STATUS_FAILED"
|
|
58
|
+
ANOMALY_DETECTION_RUN_STATUS_JOB_DELETED = "ANOMALY_DETECTION_RUN_STATUS_JOB_DELETED"
|
|
59
|
+
ANOMALY_DETECTION_RUN_STATUS_PENDING = "ANOMALY_DETECTION_RUN_STATUS_PENDING"
|
|
60
|
+
ANOMALY_DETECTION_RUN_STATUS_RUNNING = "ANOMALY_DETECTION_RUN_STATUS_RUNNING"
|
|
61
|
+
ANOMALY_DETECTION_RUN_STATUS_SUCCESS = "ANOMALY_DETECTION_RUN_STATUS_SUCCESS"
|
|
62
|
+
ANOMALY_DETECTION_RUN_STATUS_UNKNOWN = "ANOMALY_DETECTION_RUN_STATUS_UNKNOWN"
|
|
63
|
+
ANOMALY_DETECTION_RUN_STATUS_WORKSPACE_MISMATCH_ERROR = "ANOMALY_DETECTION_RUN_STATUS_WORKSPACE_MISMATCH_ERROR"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class DeleteQualityMonitorResponse:
|
|
68
|
+
def as_dict(self) -> dict:
|
|
69
|
+
"""Serializes the DeleteQualityMonitorResponse into a dictionary suitable for use as a JSON request body."""
|
|
70
|
+
body = {}
|
|
71
|
+
return body
|
|
72
|
+
|
|
73
|
+
def as_shallow_dict(self) -> dict:
|
|
74
|
+
"""Serializes the DeleteQualityMonitorResponse into a shallow dictionary of its immediate attributes."""
|
|
75
|
+
body = {}
|
|
76
|
+
return body
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def from_dict(cls, d: Dict[str, Any]) -> DeleteQualityMonitorResponse:
|
|
80
|
+
"""Deserializes the DeleteQualityMonitorResponse from a dictionary."""
|
|
81
|
+
return cls()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@dataclass
|
|
85
|
+
class ListQualityMonitorResponse:
|
|
86
|
+
next_page_token: Optional[str] = None
|
|
87
|
+
|
|
88
|
+
quality_monitors: Optional[List[QualityMonitor]] = None
|
|
89
|
+
|
|
90
|
+
def as_dict(self) -> dict:
|
|
91
|
+
"""Serializes the ListQualityMonitorResponse into a dictionary suitable for use as a JSON request body."""
|
|
92
|
+
body = {}
|
|
93
|
+
if self.next_page_token is not None:
|
|
94
|
+
body["next_page_token"] = self.next_page_token
|
|
95
|
+
if self.quality_monitors:
|
|
96
|
+
body["quality_monitors"] = [v.as_dict() for v in self.quality_monitors]
|
|
97
|
+
return body
|
|
98
|
+
|
|
99
|
+
def as_shallow_dict(self) -> dict:
|
|
100
|
+
"""Serializes the ListQualityMonitorResponse into a shallow dictionary of its immediate attributes."""
|
|
101
|
+
body = {}
|
|
102
|
+
if self.next_page_token is not None:
|
|
103
|
+
body["next_page_token"] = self.next_page_token
|
|
104
|
+
if self.quality_monitors:
|
|
105
|
+
body["quality_monitors"] = self.quality_monitors
|
|
106
|
+
return body
|
|
107
|
+
|
|
108
|
+
@classmethod
|
|
109
|
+
def from_dict(cls, d: Dict[str, Any]) -> ListQualityMonitorResponse:
|
|
110
|
+
"""Deserializes the ListQualityMonitorResponse from a dictionary."""
|
|
111
|
+
return cls(
|
|
112
|
+
next_page_token=d.get("next_page_token", None),
|
|
113
|
+
quality_monitors=_repeated_dict(d, "quality_monitors", QualityMonitor),
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@dataclass
|
|
118
|
+
class QualityMonitor:
|
|
119
|
+
object_type: str
|
|
120
|
+
"""The type of the monitored object. Can be one of the following: schema."""
|
|
121
|
+
|
|
122
|
+
object_id: str
|
|
123
|
+
"""The uuid of the request object. For example, schema id."""
|
|
124
|
+
|
|
125
|
+
anomaly_detection_config: Optional[AnomalyDetectionConfig] = None
|
|
126
|
+
|
|
127
|
+
def as_dict(self) -> dict:
|
|
128
|
+
"""Serializes the QualityMonitor into a dictionary suitable for use as a JSON request body."""
|
|
129
|
+
body = {}
|
|
130
|
+
if self.anomaly_detection_config:
|
|
131
|
+
body["anomaly_detection_config"] = self.anomaly_detection_config.as_dict()
|
|
132
|
+
if self.object_id is not None:
|
|
133
|
+
body["object_id"] = self.object_id
|
|
134
|
+
if self.object_type is not None:
|
|
135
|
+
body["object_type"] = self.object_type
|
|
136
|
+
return body
|
|
137
|
+
|
|
138
|
+
def as_shallow_dict(self) -> dict:
|
|
139
|
+
"""Serializes the QualityMonitor into a shallow dictionary of its immediate attributes."""
|
|
140
|
+
body = {}
|
|
141
|
+
if self.anomaly_detection_config:
|
|
142
|
+
body["anomaly_detection_config"] = self.anomaly_detection_config
|
|
143
|
+
if self.object_id is not None:
|
|
144
|
+
body["object_id"] = self.object_id
|
|
145
|
+
if self.object_type is not None:
|
|
146
|
+
body["object_type"] = self.object_type
|
|
147
|
+
return body
|
|
148
|
+
|
|
149
|
+
@classmethod
|
|
150
|
+
def from_dict(cls, d: Dict[str, Any]) -> QualityMonitor:
|
|
151
|
+
"""Deserializes the QualityMonitor from a dictionary."""
|
|
152
|
+
return cls(
|
|
153
|
+
anomaly_detection_config=_from_dict(d, "anomaly_detection_config", AnomalyDetectionConfig),
|
|
154
|
+
object_id=d.get("object_id", None),
|
|
155
|
+
object_type=d.get("object_type", None),
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class QualityMonitorV2API:
|
|
160
|
+
"""Manage data quality of UC objects (currently support `schema`)"""
|
|
161
|
+
|
|
162
|
+
def __init__(self, api_client):
|
|
163
|
+
self._api = api_client
|
|
164
|
+
|
|
165
|
+
def create_quality_monitor(self, quality_monitor: QualityMonitor) -> QualityMonitor:
|
|
166
|
+
"""Create a quality monitor.
|
|
167
|
+
|
|
168
|
+
Create a quality monitor on UC object
|
|
169
|
+
|
|
170
|
+
:param quality_monitor: :class:`QualityMonitor`
|
|
171
|
+
|
|
172
|
+
:returns: :class:`QualityMonitor`
|
|
173
|
+
"""
|
|
174
|
+
body = quality_monitor.as_dict()
|
|
175
|
+
headers = {
|
|
176
|
+
"Accept": "application/json",
|
|
177
|
+
"Content-Type": "application/json",
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
res = self._api.do("POST", "/api/2.0/quality-monitors", body=body, headers=headers)
|
|
181
|
+
return QualityMonitor.from_dict(res)
|
|
182
|
+
|
|
183
|
+
def delete_quality_monitor(self, object_type: str, object_id: str):
|
|
184
|
+
"""Delete a quality monitor.
|
|
185
|
+
|
|
186
|
+
Delete a quality monitor on UC object
|
|
187
|
+
|
|
188
|
+
:param object_type: str
|
|
189
|
+
The type of the monitored object. Can be one of the following: schema.
|
|
190
|
+
:param object_id: str
|
|
191
|
+
The uuid of the request object. For example, schema id.
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
"""
|
|
195
|
+
|
|
196
|
+
headers = {
|
|
197
|
+
"Accept": "application/json",
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
self._api.do("DELETE", f"/api/2.0/quality-monitors/{object_type}/{object_id}", headers=headers)
|
|
201
|
+
|
|
202
|
+
def get_quality_monitor(self, object_type: str, object_id: str) -> QualityMonitor:
|
|
203
|
+
"""Read a quality monitor.
|
|
204
|
+
|
|
205
|
+
Read a quality monitor on UC object
|
|
206
|
+
|
|
207
|
+
:param object_type: str
|
|
208
|
+
The type of the monitored object. Can be one of the following: schema.
|
|
209
|
+
:param object_id: str
|
|
210
|
+
The uuid of the request object. For example, schema id.
|
|
211
|
+
|
|
212
|
+
:returns: :class:`QualityMonitor`
|
|
213
|
+
"""
|
|
214
|
+
|
|
215
|
+
headers = {
|
|
216
|
+
"Accept": "application/json",
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
res = self._api.do("GET", f"/api/2.0/quality-monitors/{object_type}/{object_id}", headers=headers)
|
|
220
|
+
return QualityMonitor.from_dict(res)
|
|
221
|
+
|
|
222
|
+
def list_quality_monitor(
|
|
223
|
+
self, *, page_size: Optional[int] = None, page_token: Optional[str] = None
|
|
224
|
+
) -> Iterator[QualityMonitor]:
|
|
225
|
+
"""List quality monitors.
|
|
226
|
+
|
|
227
|
+
(Unimplemented) List quality monitors
|
|
228
|
+
|
|
229
|
+
:param page_size: int (optional)
|
|
230
|
+
:param page_token: str (optional)
|
|
231
|
+
|
|
232
|
+
:returns: Iterator over :class:`QualityMonitor`
|
|
233
|
+
"""
|
|
234
|
+
|
|
235
|
+
query = {}
|
|
236
|
+
if page_size is not None:
|
|
237
|
+
query["page_size"] = page_size
|
|
238
|
+
if page_token is not None:
|
|
239
|
+
query["page_token"] = page_token
|
|
240
|
+
headers = {
|
|
241
|
+
"Accept": "application/json",
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
while True:
|
|
245
|
+
json = self._api.do("GET", "/api/2.0/quality-monitors", query=query, headers=headers)
|
|
246
|
+
if "quality_monitors" in json:
|
|
247
|
+
for v in json["quality_monitors"]:
|
|
248
|
+
yield QualityMonitor.from_dict(v)
|
|
249
|
+
if "next_page_token" not in json or not json["next_page_token"]:
|
|
250
|
+
return
|
|
251
|
+
query["page_token"] = json["next_page_token"]
|
|
252
|
+
|
|
253
|
+
def update_quality_monitor(
|
|
254
|
+
self, object_type: str, object_id: str, quality_monitor: QualityMonitor
|
|
255
|
+
) -> QualityMonitor:
|
|
256
|
+
"""Update a quality monitor.
|
|
257
|
+
|
|
258
|
+
(Unimplemented) Update a quality monitor on UC object
|
|
259
|
+
|
|
260
|
+
:param object_type: str
|
|
261
|
+
The type of the monitored object. Can be one of the following: schema.
|
|
262
|
+
:param object_id: str
|
|
263
|
+
The uuid of the request object. For example, schema id.
|
|
264
|
+
:param quality_monitor: :class:`QualityMonitor`
|
|
265
|
+
|
|
266
|
+
:returns: :class:`QualityMonitor`
|
|
267
|
+
"""
|
|
268
|
+
body = quality_monitor.as_dict()
|
|
269
|
+
headers = {
|
|
270
|
+
"Accept": "application/json",
|
|
271
|
+
"Content-Type": "application/json",
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
res = self._api.do("PUT", f"/api/2.0/quality-monitors/{object_type}/{object_id}", body=body, headers=headers)
|
|
275
|
+
return QualityMonitor.from_dict(res)
|
|
@@ -3005,9 +3005,17 @@ class ServedEntityInput:
|
|
|
3005
3005
|
instance_profile_arn: Optional[str] = None
|
|
3006
3006
|
"""ARN of the instance profile that the served entity uses to access AWS resources."""
|
|
3007
3007
|
|
|
3008
|
+
max_provisioned_concurrency: Optional[int] = None
|
|
3009
|
+
"""The maximum provisioned concurrency that the endpoint can scale up to. Do not use if
|
|
3010
|
+
workload_size is specified."""
|
|
3011
|
+
|
|
3008
3012
|
max_provisioned_throughput: Optional[int] = None
|
|
3009
3013
|
"""The maximum tokens per second that the endpoint can scale up to."""
|
|
3010
3014
|
|
|
3015
|
+
min_provisioned_concurrency: Optional[int] = None
|
|
3016
|
+
"""The minimum provisioned concurrency that the endpoint can scale down to. Do not use if
|
|
3017
|
+
workload_size is specified."""
|
|
3018
|
+
|
|
3011
3019
|
min_provisioned_throughput: Optional[int] = None
|
|
3012
3020
|
"""The minimum tokens per second that the endpoint can scale down to."""
|
|
3013
3021
|
|
|
@@ -3030,7 +3038,7 @@ class ServedEntityInput:
|
|
|
3030
3038
|
"Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
|
|
3031
3039
|
Additional custom workload sizes can also be used when available in the workspace. If
|
|
3032
3040
|
scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
|
|
3033
|
-
is 0."""
|
|
3041
|
+
is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified."""
|
|
3034
3042
|
|
|
3035
3043
|
workload_type: Optional[ServingModelWorkloadType] = None
|
|
3036
3044
|
"""The workload type of the served entity. The workload type selects which type of compute to use
|
|
@@ -3053,8 +3061,12 @@ class ServedEntityInput:
|
|
|
3053
3061
|
body["external_model"] = self.external_model.as_dict()
|
|
3054
3062
|
if self.instance_profile_arn is not None:
|
|
3055
3063
|
body["instance_profile_arn"] = self.instance_profile_arn
|
|
3064
|
+
if self.max_provisioned_concurrency is not None:
|
|
3065
|
+
body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
|
|
3056
3066
|
if self.max_provisioned_throughput is not None:
|
|
3057
3067
|
body["max_provisioned_throughput"] = self.max_provisioned_throughput
|
|
3068
|
+
if self.min_provisioned_concurrency is not None:
|
|
3069
|
+
body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
|
|
3058
3070
|
if self.min_provisioned_throughput is not None:
|
|
3059
3071
|
body["min_provisioned_throughput"] = self.min_provisioned_throughput
|
|
3060
3072
|
if self.name is not None:
|
|
@@ -3082,8 +3094,12 @@ class ServedEntityInput:
|
|
|
3082
3094
|
body["external_model"] = self.external_model
|
|
3083
3095
|
if self.instance_profile_arn is not None:
|
|
3084
3096
|
body["instance_profile_arn"] = self.instance_profile_arn
|
|
3097
|
+
if self.max_provisioned_concurrency is not None:
|
|
3098
|
+
body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
|
|
3085
3099
|
if self.max_provisioned_throughput is not None:
|
|
3086
3100
|
body["max_provisioned_throughput"] = self.max_provisioned_throughput
|
|
3101
|
+
if self.min_provisioned_concurrency is not None:
|
|
3102
|
+
body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
|
|
3087
3103
|
if self.min_provisioned_throughput is not None:
|
|
3088
3104
|
body["min_provisioned_throughput"] = self.min_provisioned_throughput
|
|
3089
3105
|
if self.name is not None:
|
|
@@ -3107,7 +3123,9 @@ class ServedEntityInput:
|
|
|
3107
3123
|
environment_vars=d.get("environment_vars", None),
|
|
3108
3124
|
external_model=_from_dict(d, "external_model", ExternalModel),
|
|
3109
3125
|
instance_profile_arn=d.get("instance_profile_arn", None),
|
|
3126
|
+
max_provisioned_concurrency=d.get("max_provisioned_concurrency", None),
|
|
3110
3127
|
max_provisioned_throughput=d.get("max_provisioned_throughput", None),
|
|
3128
|
+
min_provisioned_concurrency=d.get("min_provisioned_concurrency", None),
|
|
3111
3129
|
min_provisioned_throughput=d.get("min_provisioned_throughput", None),
|
|
3112
3130
|
name=d.get("name", None),
|
|
3113
3131
|
provisioned_model_units=d.get("provisioned_model_units", None),
|
|
@@ -3152,9 +3170,17 @@ class ServedEntityOutput:
|
|
|
3152
3170
|
instance_profile_arn: Optional[str] = None
|
|
3153
3171
|
"""ARN of the instance profile that the served entity uses to access AWS resources."""
|
|
3154
3172
|
|
|
3173
|
+
max_provisioned_concurrency: Optional[int] = None
|
|
3174
|
+
"""The maximum provisioned concurrency that the endpoint can scale up to. Do not use if
|
|
3175
|
+
workload_size is specified."""
|
|
3176
|
+
|
|
3155
3177
|
max_provisioned_throughput: Optional[int] = None
|
|
3156
3178
|
"""The maximum tokens per second that the endpoint can scale up to."""
|
|
3157
3179
|
|
|
3180
|
+
min_provisioned_concurrency: Optional[int] = None
|
|
3181
|
+
"""The minimum provisioned concurrency that the endpoint can scale down to. Do not use if
|
|
3182
|
+
workload_size is specified."""
|
|
3183
|
+
|
|
3158
3184
|
min_provisioned_throughput: Optional[int] = None
|
|
3159
3185
|
"""The minimum tokens per second that the endpoint can scale down to."""
|
|
3160
3186
|
|
|
@@ -3179,7 +3205,7 @@ class ServedEntityOutput:
|
|
|
3179
3205
|
"Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
|
|
3180
3206
|
Additional custom workload sizes can also be used when available in the workspace. If
|
|
3181
3207
|
scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
|
|
3182
|
-
is 0."""
|
|
3208
|
+
is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified."""
|
|
3183
3209
|
|
|
3184
3210
|
workload_type: Optional[ServingModelWorkloadType] = None
|
|
3185
3211
|
"""The workload type of the served entity. The workload type selects which type of compute to use
|
|
@@ -3208,8 +3234,12 @@ class ServedEntityOutput:
|
|
|
3208
3234
|
body["foundation_model"] = self.foundation_model.as_dict()
|
|
3209
3235
|
if self.instance_profile_arn is not None:
|
|
3210
3236
|
body["instance_profile_arn"] = self.instance_profile_arn
|
|
3237
|
+
if self.max_provisioned_concurrency is not None:
|
|
3238
|
+
body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
|
|
3211
3239
|
if self.max_provisioned_throughput is not None:
|
|
3212
3240
|
body["max_provisioned_throughput"] = self.max_provisioned_throughput
|
|
3241
|
+
if self.min_provisioned_concurrency is not None:
|
|
3242
|
+
body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
|
|
3213
3243
|
if self.min_provisioned_throughput is not None:
|
|
3214
3244
|
body["min_provisioned_throughput"] = self.min_provisioned_throughput
|
|
3215
3245
|
if self.name is not None:
|
|
@@ -3245,8 +3275,12 @@ class ServedEntityOutput:
|
|
|
3245
3275
|
body["foundation_model"] = self.foundation_model
|
|
3246
3276
|
if self.instance_profile_arn is not None:
|
|
3247
3277
|
body["instance_profile_arn"] = self.instance_profile_arn
|
|
3278
|
+
if self.max_provisioned_concurrency is not None:
|
|
3279
|
+
body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
|
|
3248
3280
|
if self.max_provisioned_throughput is not None:
|
|
3249
3281
|
body["max_provisioned_throughput"] = self.max_provisioned_throughput
|
|
3282
|
+
if self.min_provisioned_concurrency is not None:
|
|
3283
|
+
body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
|
|
3250
3284
|
if self.min_provisioned_throughput is not None:
|
|
3251
3285
|
body["min_provisioned_throughput"] = self.min_provisioned_throughput
|
|
3252
3286
|
if self.name is not None:
|
|
@@ -3275,7 +3309,9 @@ class ServedEntityOutput:
|
|
|
3275
3309
|
external_model=_from_dict(d, "external_model", ExternalModel),
|
|
3276
3310
|
foundation_model=_from_dict(d, "foundation_model", FoundationModel),
|
|
3277
3311
|
instance_profile_arn=d.get("instance_profile_arn", None),
|
|
3312
|
+
max_provisioned_concurrency=d.get("max_provisioned_concurrency", None),
|
|
3278
3313
|
max_provisioned_throughput=d.get("max_provisioned_throughput", None),
|
|
3314
|
+
min_provisioned_concurrency=d.get("min_provisioned_concurrency", None),
|
|
3279
3315
|
min_provisioned_throughput=d.get("min_provisioned_throughput", None),
|
|
3280
3316
|
name=d.get("name", None),
|
|
3281
3317
|
provisioned_model_units=d.get("provisioned_model_units", None),
|
|
@@ -3360,9 +3396,17 @@ class ServedModelInput:
|
|
|
3360
3396
|
instance_profile_arn: Optional[str] = None
|
|
3361
3397
|
"""ARN of the instance profile that the served entity uses to access AWS resources."""
|
|
3362
3398
|
|
|
3399
|
+
max_provisioned_concurrency: Optional[int] = None
|
|
3400
|
+
"""The maximum provisioned concurrency that the endpoint can scale up to. Do not use if
|
|
3401
|
+
workload_size is specified."""
|
|
3402
|
+
|
|
3363
3403
|
max_provisioned_throughput: Optional[int] = None
|
|
3364
3404
|
"""The maximum tokens per second that the endpoint can scale up to."""
|
|
3365
3405
|
|
|
3406
|
+
min_provisioned_concurrency: Optional[int] = None
|
|
3407
|
+
"""The minimum provisioned concurrency that the endpoint can scale down to. Do not use if
|
|
3408
|
+
workload_size is specified."""
|
|
3409
|
+
|
|
3366
3410
|
min_provisioned_throughput: Optional[int] = None
|
|
3367
3411
|
"""The minimum tokens per second that the endpoint can scale down to."""
|
|
3368
3412
|
|
|
@@ -3382,7 +3426,7 @@ class ServedModelInput:
|
|
|
3382
3426
|
"Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
|
|
3383
3427
|
Additional custom workload sizes can also be used when available in the workspace. If
|
|
3384
3428
|
scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
|
|
3385
|
-
is 0."""
|
|
3429
|
+
is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified."""
|
|
3386
3430
|
|
|
3387
3431
|
workload_type: Optional[ServedModelInputWorkloadType] = None
|
|
3388
3432
|
"""The workload type of the served entity. The workload type selects which type of compute to use
|
|
@@ -3399,8 +3443,12 @@ class ServedModelInput:
|
|
|
3399
3443
|
body["environment_vars"] = self.environment_vars
|
|
3400
3444
|
if self.instance_profile_arn is not None:
|
|
3401
3445
|
body["instance_profile_arn"] = self.instance_profile_arn
|
|
3446
|
+
if self.max_provisioned_concurrency is not None:
|
|
3447
|
+
body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
|
|
3402
3448
|
if self.max_provisioned_throughput is not None:
|
|
3403
3449
|
body["max_provisioned_throughput"] = self.max_provisioned_throughput
|
|
3450
|
+
if self.min_provisioned_concurrency is not None:
|
|
3451
|
+
body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
|
|
3404
3452
|
if self.min_provisioned_throughput is not None:
|
|
3405
3453
|
body["min_provisioned_throughput"] = self.min_provisioned_throughput
|
|
3406
3454
|
if self.model_name is not None:
|
|
@@ -3426,8 +3474,12 @@ class ServedModelInput:
|
|
|
3426
3474
|
body["environment_vars"] = self.environment_vars
|
|
3427
3475
|
if self.instance_profile_arn is not None:
|
|
3428
3476
|
body["instance_profile_arn"] = self.instance_profile_arn
|
|
3477
|
+
if self.max_provisioned_concurrency is not None:
|
|
3478
|
+
body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
|
|
3429
3479
|
if self.max_provisioned_throughput is not None:
|
|
3430
3480
|
body["max_provisioned_throughput"] = self.max_provisioned_throughput
|
|
3481
|
+
if self.min_provisioned_concurrency is not None:
|
|
3482
|
+
body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
|
|
3431
3483
|
if self.min_provisioned_throughput is not None:
|
|
3432
3484
|
body["min_provisioned_throughput"] = self.min_provisioned_throughput
|
|
3433
3485
|
if self.model_name is not None:
|
|
@@ -3452,7 +3504,9 @@ class ServedModelInput:
|
|
|
3452
3504
|
return cls(
|
|
3453
3505
|
environment_vars=d.get("environment_vars", None),
|
|
3454
3506
|
instance_profile_arn=d.get("instance_profile_arn", None),
|
|
3507
|
+
max_provisioned_concurrency=d.get("max_provisioned_concurrency", None),
|
|
3455
3508
|
max_provisioned_throughput=d.get("max_provisioned_throughput", None),
|
|
3509
|
+
min_provisioned_concurrency=d.get("min_provisioned_concurrency", None),
|
|
3456
3510
|
min_provisioned_throughput=d.get("min_provisioned_throughput", None),
|
|
3457
3511
|
model_name=d.get("model_name", None),
|
|
3458
3512
|
model_version=d.get("model_version", None),
|
|
@@ -3489,6 +3543,14 @@ class ServedModelOutput:
|
|
|
3489
3543
|
instance_profile_arn: Optional[str] = None
|
|
3490
3544
|
"""ARN of the instance profile that the served entity uses to access AWS resources."""
|
|
3491
3545
|
|
|
3546
|
+
max_provisioned_concurrency: Optional[int] = None
|
|
3547
|
+
"""The maximum provisioned concurrency that the endpoint can scale up to. Do not use if
|
|
3548
|
+
workload_size is specified."""
|
|
3549
|
+
|
|
3550
|
+
min_provisioned_concurrency: Optional[int] = None
|
|
3551
|
+
"""The minimum provisioned concurrency that the endpoint can scale down to. Do not use if
|
|
3552
|
+
workload_size is specified."""
|
|
3553
|
+
|
|
3492
3554
|
model_name: Optional[str] = None
|
|
3493
3555
|
|
|
3494
3556
|
model_version: Optional[str] = None
|
|
@@ -3514,7 +3576,7 @@ class ServedModelOutput:
|
|
|
3514
3576
|
"Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
|
|
3515
3577
|
Additional custom workload sizes can also be used when available in the workspace. If
|
|
3516
3578
|
scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
|
|
3517
|
-
is 0."""
|
|
3579
|
+
is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified."""
|
|
3518
3580
|
|
|
3519
3581
|
workload_type: Optional[ServingModelWorkloadType] = None
|
|
3520
3582
|
"""The workload type of the served entity. The workload type selects which type of compute to use
|
|
@@ -3535,6 +3597,10 @@ class ServedModelOutput:
|
|
|
3535
3597
|
body["environment_vars"] = self.environment_vars
|
|
3536
3598
|
if self.instance_profile_arn is not None:
|
|
3537
3599
|
body["instance_profile_arn"] = self.instance_profile_arn
|
|
3600
|
+
if self.max_provisioned_concurrency is not None:
|
|
3601
|
+
body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
|
|
3602
|
+
if self.min_provisioned_concurrency is not None:
|
|
3603
|
+
body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
|
|
3538
3604
|
if self.model_name is not None:
|
|
3539
3605
|
body["model_name"] = self.model_name
|
|
3540
3606
|
if self.model_version is not None:
|
|
@@ -3564,6 +3630,10 @@ class ServedModelOutput:
|
|
|
3564
3630
|
body["environment_vars"] = self.environment_vars
|
|
3565
3631
|
if self.instance_profile_arn is not None:
|
|
3566
3632
|
body["instance_profile_arn"] = self.instance_profile_arn
|
|
3633
|
+
if self.max_provisioned_concurrency is not None:
|
|
3634
|
+
body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
|
|
3635
|
+
if self.min_provisioned_concurrency is not None:
|
|
3636
|
+
body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
|
|
3567
3637
|
if self.model_name is not None:
|
|
3568
3638
|
body["model_name"] = self.model_name
|
|
3569
3639
|
if self.model_version is not None:
|
|
@@ -3590,6 +3660,8 @@ class ServedModelOutput:
|
|
|
3590
3660
|
creator=d.get("creator", None),
|
|
3591
3661
|
environment_vars=d.get("environment_vars", None),
|
|
3592
3662
|
instance_profile_arn=d.get("instance_profile_arn", None),
|
|
3663
|
+
max_provisioned_concurrency=d.get("max_provisioned_concurrency", None),
|
|
3664
|
+
min_provisioned_concurrency=d.get("min_provisioned_concurrency", None),
|
|
3593
3665
|
model_name=d.get("model_name", None),
|
|
3594
3666
|
model_version=d.get("model_version", None),
|
|
3595
3667
|
name=d.get("name", None),
|