databricks-sdk 0.55.0__py3-none-any.whl → 0.56.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of databricks-sdk might be problematic. Click here for more details.

@@ -0,0 +1,275 @@
1
+ # Code generated from OpenAPI specs by Databricks SDK Generator. DO NOT EDIT.
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from dataclasses import dataclass
7
+ from enum import Enum
8
+ from typing import Any, Dict, Iterator, List, Optional
9
+
10
+ from ._internal import _enum, _from_dict, _repeated_dict
11
+
12
+ _LOG = logging.getLogger("databricks.sdk")
13
+
14
+
15
+ # all definitions in this file are in alphabetical order
16
+
17
+
18
+ @dataclass
19
+ class AnomalyDetectionConfig:
20
+ last_run_id: Optional[str] = None
21
+ """Run id of the last run of the workflow"""
22
+
23
+ latest_run_status: Optional[AnomalyDetectionRunStatus] = None
24
+ """The status of the last run of the workflow."""
25
+
26
+ def as_dict(self) -> dict:
27
+ """Serializes the AnomalyDetectionConfig into a dictionary suitable for use as a JSON request body."""
28
+ body = {}
29
+ if self.last_run_id is not None:
30
+ body["last_run_id"] = self.last_run_id
31
+ if self.latest_run_status is not None:
32
+ body["latest_run_status"] = self.latest_run_status.value
33
+ return body
34
+
35
+ def as_shallow_dict(self) -> dict:
36
+ """Serializes the AnomalyDetectionConfig into a shallow dictionary of its immediate attributes."""
37
+ body = {}
38
+ if self.last_run_id is not None:
39
+ body["last_run_id"] = self.last_run_id
40
+ if self.latest_run_status is not None:
41
+ body["latest_run_status"] = self.latest_run_status
42
+ return body
43
+
44
+ @classmethod
45
+ def from_dict(cls, d: Dict[str, Any]) -> AnomalyDetectionConfig:
46
+ """Deserializes the AnomalyDetectionConfig from a dictionary."""
47
+ return cls(
48
+ last_run_id=d.get("last_run_id", None),
49
+ latest_run_status=_enum(d, "latest_run_status", AnomalyDetectionRunStatus),
50
+ )
51
+
52
+
53
+ class AnomalyDetectionRunStatus(Enum):
54
+ """Status of Anomaly Detection Job Run"""
55
+
56
+ ANOMALY_DETECTION_RUN_STATUS_CANCELED = "ANOMALY_DETECTION_RUN_STATUS_CANCELED"
57
+ ANOMALY_DETECTION_RUN_STATUS_FAILED = "ANOMALY_DETECTION_RUN_STATUS_FAILED"
58
+ ANOMALY_DETECTION_RUN_STATUS_JOB_DELETED = "ANOMALY_DETECTION_RUN_STATUS_JOB_DELETED"
59
+ ANOMALY_DETECTION_RUN_STATUS_PENDING = "ANOMALY_DETECTION_RUN_STATUS_PENDING"
60
+ ANOMALY_DETECTION_RUN_STATUS_RUNNING = "ANOMALY_DETECTION_RUN_STATUS_RUNNING"
61
+ ANOMALY_DETECTION_RUN_STATUS_SUCCESS = "ANOMALY_DETECTION_RUN_STATUS_SUCCESS"
62
+ ANOMALY_DETECTION_RUN_STATUS_UNKNOWN = "ANOMALY_DETECTION_RUN_STATUS_UNKNOWN"
63
+ ANOMALY_DETECTION_RUN_STATUS_WORKSPACE_MISMATCH_ERROR = "ANOMALY_DETECTION_RUN_STATUS_WORKSPACE_MISMATCH_ERROR"
64
+
65
+
66
+ @dataclass
67
+ class DeleteQualityMonitorResponse:
68
+ def as_dict(self) -> dict:
69
+ """Serializes the DeleteQualityMonitorResponse into a dictionary suitable for use as a JSON request body."""
70
+ body = {}
71
+ return body
72
+
73
+ def as_shallow_dict(self) -> dict:
74
+ """Serializes the DeleteQualityMonitorResponse into a shallow dictionary of its immediate attributes."""
75
+ body = {}
76
+ return body
77
+
78
+ @classmethod
79
+ def from_dict(cls, d: Dict[str, Any]) -> DeleteQualityMonitorResponse:
80
+ """Deserializes the DeleteQualityMonitorResponse from a dictionary."""
81
+ return cls()
82
+
83
+
84
+ @dataclass
85
+ class ListQualityMonitorResponse:
86
+ next_page_token: Optional[str] = None
87
+
88
+ quality_monitors: Optional[List[QualityMonitor]] = None
89
+
90
+ def as_dict(self) -> dict:
91
+ """Serializes the ListQualityMonitorResponse into a dictionary suitable for use as a JSON request body."""
92
+ body = {}
93
+ if self.next_page_token is not None:
94
+ body["next_page_token"] = self.next_page_token
95
+ if self.quality_monitors:
96
+ body["quality_monitors"] = [v.as_dict() for v in self.quality_monitors]
97
+ return body
98
+
99
+ def as_shallow_dict(self) -> dict:
100
+ """Serializes the ListQualityMonitorResponse into a shallow dictionary of its immediate attributes."""
101
+ body = {}
102
+ if self.next_page_token is not None:
103
+ body["next_page_token"] = self.next_page_token
104
+ if self.quality_monitors:
105
+ body["quality_monitors"] = self.quality_monitors
106
+ return body
107
+
108
+ @classmethod
109
+ def from_dict(cls, d: Dict[str, Any]) -> ListQualityMonitorResponse:
110
+ """Deserializes the ListQualityMonitorResponse from a dictionary."""
111
+ return cls(
112
+ next_page_token=d.get("next_page_token", None),
113
+ quality_monitors=_repeated_dict(d, "quality_monitors", QualityMonitor),
114
+ )
115
+
116
+
117
+ @dataclass
118
+ class QualityMonitor:
119
+ object_type: str
120
+ """The type of the monitored object. Can be one of the following: schema."""
121
+
122
+ object_id: str
123
+ """The uuid of the request object. For example, schema id."""
124
+
125
+ anomaly_detection_config: Optional[AnomalyDetectionConfig] = None
126
+
127
+ def as_dict(self) -> dict:
128
+ """Serializes the QualityMonitor into a dictionary suitable for use as a JSON request body."""
129
+ body = {}
130
+ if self.anomaly_detection_config:
131
+ body["anomaly_detection_config"] = self.anomaly_detection_config.as_dict()
132
+ if self.object_id is not None:
133
+ body["object_id"] = self.object_id
134
+ if self.object_type is not None:
135
+ body["object_type"] = self.object_type
136
+ return body
137
+
138
+ def as_shallow_dict(self) -> dict:
139
+ """Serializes the QualityMonitor into a shallow dictionary of its immediate attributes."""
140
+ body = {}
141
+ if self.anomaly_detection_config:
142
+ body["anomaly_detection_config"] = self.anomaly_detection_config
143
+ if self.object_id is not None:
144
+ body["object_id"] = self.object_id
145
+ if self.object_type is not None:
146
+ body["object_type"] = self.object_type
147
+ return body
148
+
149
+ @classmethod
150
+ def from_dict(cls, d: Dict[str, Any]) -> QualityMonitor:
151
+ """Deserializes the QualityMonitor from a dictionary."""
152
+ return cls(
153
+ anomaly_detection_config=_from_dict(d, "anomaly_detection_config", AnomalyDetectionConfig),
154
+ object_id=d.get("object_id", None),
155
+ object_type=d.get("object_type", None),
156
+ )
157
+
158
+
159
+ class QualityMonitorV2API:
160
+ """Manage data quality of UC objects (currently support `schema`)"""
161
+
162
+ def __init__(self, api_client):
163
+ self._api = api_client
164
+
165
+ def create_quality_monitor(self, quality_monitor: QualityMonitor) -> QualityMonitor:
166
+ """Create a quality monitor.
167
+
168
+ Create a quality monitor on UC object
169
+
170
+ :param quality_monitor: :class:`QualityMonitor`
171
+
172
+ :returns: :class:`QualityMonitor`
173
+ """
174
+ body = quality_monitor.as_dict()
175
+ headers = {
176
+ "Accept": "application/json",
177
+ "Content-Type": "application/json",
178
+ }
179
+
180
+ res = self._api.do("POST", "/api/2.0/quality-monitors", body=body, headers=headers)
181
+ return QualityMonitor.from_dict(res)
182
+
183
+ def delete_quality_monitor(self, object_type: str, object_id: str):
184
+ """Delete a quality monitor.
185
+
186
+ Delete a quality monitor on UC object
187
+
188
+ :param object_type: str
189
+ The type of the monitored object. Can be one of the following: schema.
190
+ :param object_id: str
191
+ The uuid of the request object. For example, schema id.
192
+
193
+
194
+ """
195
+
196
+ headers = {
197
+ "Accept": "application/json",
198
+ }
199
+
200
+ self._api.do("DELETE", f"/api/2.0/quality-monitors/{object_type}/{object_id}", headers=headers)
201
+
202
+ def get_quality_monitor(self, object_type: str, object_id: str) -> QualityMonitor:
203
+ """Read a quality monitor.
204
+
205
+ Read a quality monitor on UC object
206
+
207
+ :param object_type: str
208
+ The type of the monitored object. Can be one of the following: schema.
209
+ :param object_id: str
210
+ The uuid of the request object. For example, schema id.
211
+
212
+ :returns: :class:`QualityMonitor`
213
+ """
214
+
215
+ headers = {
216
+ "Accept": "application/json",
217
+ }
218
+
219
+ res = self._api.do("GET", f"/api/2.0/quality-monitors/{object_type}/{object_id}", headers=headers)
220
+ return QualityMonitor.from_dict(res)
221
+
222
+ def list_quality_monitor(
223
+ self, *, page_size: Optional[int] = None, page_token: Optional[str] = None
224
+ ) -> Iterator[QualityMonitor]:
225
+ """List quality monitors.
226
+
227
+ (Unimplemented) List quality monitors
228
+
229
+ :param page_size: int (optional)
230
+ :param page_token: str (optional)
231
+
232
+ :returns: Iterator over :class:`QualityMonitor`
233
+ """
234
+
235
+ query = {}
236
+ if page_size is not None:
237
+ query["page_size"] = page_size
238
+ if page_token is not None:
239
+ query["page_token"] = page_token
240
+ headers = {
241
+ "Accept": "application/json",
242
+ }
243
+
244
+ while True:
245
+ json = self._api.do("GET", "/api/2.0/quality-monitors", query=query, headers=headers)
246
+ if "quality_monitors" in json:
247
+ for v in json["quality_monitors"]:
248
+ yield QualityMonitor.from_dict(v)
249
+ if "next_page_token" not in json or not json["next_page_token"]:
250
+ return
251
+ query["page_token"] = json["next_page_token"]
252
+
253
+ def update_quality_monitor(
254
+ self, object_type: str, object_id: str, quality_monitor: QualityMonitor
255
+ ) -> QualityMonitor:
256
+ """Update a quality monitor.
257
+
258
+ (Unimplemented) Update a quality monitor on UC object
259
+
260
+ :param object_type: str
261
+ The type of the monitored object. Can be one of the following: schema.
262
+ :param object_id: str
263
+ The uuid of the request object. For example, schema id.
264
+ :param quality_monitor: :class:`QualityMonitor`
265
+
266
+ :returns: :class:`QualityMonitor`
267
+ """
268
+ body = quality_monitor.as_dict()
269
+ headers = {
270
+ "Accept": "application/json",
271
+ "Content-Type": "application/json",
272
+ }
273
+
274
+ res = self._api.do("PUT", f"/api/2.0/quality-monitors/{object_type}/{object_id}", body=body, headers=headers)
275
+ return QualityMonitor.from_dict(res)
@@ -3005,9 +3005,17 @@ class ServedEntityInput:
3005
3005
  instance_profile_arn: Optional[str] = None
3006
3006
  """ARN of the instance profile that the served entity uses to access AWS resources."""
3007
3007
 
3008
+ max_provisioned_concurrency: Optional[int] = None
3009
+ """The maximum provisioned concurrency that the endpoint can scale up to. Do not use if
3010
+ workload_size is specified."""
3011
+
3008
3012
  max_provisioned_throughput: Optional[int] = None
3009
3013
  """The maximum tokens per second that the endpoint can scale up to."""
3010
3014
 
3015
+ min_provisioned_concurrency: Optional[int] = None
3016
+ """The minimum provisioned concurrency that the endpoint can scale down to. Do not use if
3017
+ workload_size is specified."""
3018
+
3011
3019
  min_provisioned_throughput: Optional[int] = None
3012
3020
  """The minimum tokens per second that the endpoint can scale down to."""
3013
3021
 
@@ -3030,7 +3038,7 @@ class ServedEntityInput:
3030
3038
  "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
3031
3039
  Additional custom workload sizes can also be used when available in the workspace. If
3032
3040
  scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
3033
- is 0."""
3041
+ is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified."""
3034
3042
 
3035
3043
  workload_type: Optional[ServingModelWorkloadType] = None
3036
3044
  """The workload type of the served entity. The workload type selects which type of compute to use
@@ -3053,8 +3061,12 @@ class ServedEntityInput:
3053
3061
  body["external_model"] = self.external_model.as_dict()
3054
3062
  if self.instance_profile_arn is not None:
3055
3063
  body["instance_profile_arn"] = self.instance_profile_arn
3064
+ if self.max_provisioned_concurrency is not None:
3065
+ body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
3056
3066
  if self.max_provisioned_throughput is not None:
3057
3067
  body["max_provisioned_throughput"] = self.max_provisioned_throughput
3068
+ if self.min_provisioned_concurrency is not None:
3069
+ body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
3058
3070
  if self.min_provisioned_throughput is not None:
3059
3071
  body["min_provisioned_throughput"] = self.min_provisioned_throughput
3060
3072
  if self.name is not None:
@@ -3082,8 +3094,12 @@ class ServedEntityInput:
3082
3094
  body["external_model"] = self.external_model
3083
3095
  if self.instance_profile_arn is not None:
3084
3096
  body["instance_profile_arn"] = self.instance_profile_arn
3097
+ if self.max_provisioned_concurrency is not None:
3098
+ body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
3085
3099
  if self.max_provisioned_throughput is not None:
3086
3100
  body["max_provisioned_throughput"] = self.max_provisioned_throughput
3101
+ if self.min_provisioned_concurrency is not None:
3102
+ body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
3087
3103
  if self.min_provisioned_throughput is not None:
3088
3104
  body["min_provisioned_throughput"] = self.min_provisioned_throughput
3089
3105
  if self.name is not None:
@@ -3107,7 +3123,9 @@ class ServedEntityInput:
3107
3123
  environment_vars=d.get("environment_vars", None),
3108
3124
  external_model=_from_dict(d, "external_model", ExternalModel),
3109
3125
  instance_profile_arn=d.get("instance_profile_arn", None),
3126
+ max_provisioned_concurrency=d.get("max_provisioned_concurrency", None),
3110
3127
  max_provisioned_throughput=d.get("max_provisioned_throughput", None),
3128
+ min_provisioned_concurrency=d.get("min_provisioned_concurrency", None),
3111
3129
  min_provisioned_throughput=d.get("min_provisioned_throughput", None),
3112
3130
  name=d.get("name", None),
3113
3131
  provisioned_model_units=d.get("provisioned_model_units", None),
@@ -3152,9 +3170,17 @@ class ServedEntityOutput:
3152
3170
  instance_profile_arn: Optional[str] = None
3153
3171
  """ARN of the instance profile that the served entity uses to access AWS resources."""
3154
3172
 
3173
+ max_provisioned_concurrency: Optional[int] = None
3174
+ """The maximum provisioned concurrency that the endpoint can scale up to. Do not use if
3175
+ workload_size is specified."""
3176
+
3155
3177
  max_provisioned_throughput: Optional[int] = None
3156
3178
  """The maximum tokens per second that the endpoint can scale up to."""
3157
3179
 
3180
+ min_provisioned_concurrency: Optional[int] = None
3181
+ """The minimum provisioned concurrency that the endpoint can scale down to. Do not use if
3182
+ workload_size is specified."""
3183
+
3158
3184
  min_provisioned_throughput: Optional[int] = None
3159
3185
  """The minimum tokens per second that the endpoint can scale down to."""
3160
3186
 
@@ -3179,7 +3205,7 @@ class ServedEntityOutput:
3179
3205
  "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
3180
3206
  Additional custom workload sizes can also be used when available in the workspace. If
3181
3207
  scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
3182
- is 0."""
3208
+ is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified."""
3183
3209
 
3184
3210
  workload_type: Optional[ServingModelWorkloadType] = None
3185
3211
  """The workload type of the served entity. The workload type selects which type of compute to use
@@ -3208,8 +3234,12 @@ class ServedEntityOutput:
3208
3234
  body["foundation_model"] = self.foundation_model.as_dict()
3209
3235
  if self.instance_profile_arn is not None:
3210
3236
  body["instance_profile_arn"] = self.instance_profile_arn
3237
+ if self.max_provisioned_concurrency is not None:
3238
+ body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
3211
3239
  if self.max_provisioned_throughput is not None:
3212
3240
  body["max_provisioned_throughput"] = self.max_provisioned_throughput
3241
+ if self.min_provisioned_concurrency is not None:
3242
+ body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
3213
3243
  if self.min_provisioned_throughput is not None:
3214
3244
  body["min_provisioned_throughput"] = self.min_provisioned_throughput
3215
3245
  if self.name is not None:
@@ -3245,8 +3275,12 @@ class ServedEntityOutput:
3245
3275
  body["foundation_model"] = self.foundation_model
3246
3276
  if self.instance_profile_arn is not None:
3247
3277
  body["instance_profile_arn"] = self.instance_profile_arn
3278
+ if self.max_provisioned_concurrency is not None:
3279
+ body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
3248
3280
  if self.max_provisioned_throughput is not None:
3249
3281
  body["max_provisioned_throughput"] = self.max_provisioned_throughput
3282
+ if self.min_provisioned_concurrency is not None:
3283
+ body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
3250
3284
  if self.min_provisioned_throughput is not None:
3251
3285
  body["min_provisioned_throughput"] = self.min_provisioned_throughput
3252
3286
  if self.name is not None:
@@ -3275,7 +3309,9 @@ class ServedEntityOutput:
3275
3309
  external_model=_from_dict(d, "external_model", ExternalModel),
3276
3310
  foundation_model=_from_dict(d, "foundation_model", FoundationModel),
3277
3311
  instance_profile_arn=d.get("instance_profile_arn", None),
3312
+ max_provisioned_concurrency=d.get("max_provisioned_concurrency", None),
3278
3313
  max_provisioned_throughput=d.get("max_provisioned_throughput", None),
3314
+ min_provisioned_concurrency=d.get("min_provisioned_concurrency", None),
3279
3315
  min_provisioned_throughput=d.get("min_provisioned_throughput", None),
3280
3316
  name=d.get("name", None),
3281
3317
  provisioned_model_units=d.get("provisioned_model_units", None),
@@ -3360,9 +3396,17 @@ class ServedModelInput:
3360
3396
  instance_profile_arn: Optional[str] = None
3361
3397
  """ARN of the instance profile that the served entity uses to access AWS resources."""
3362
3398
 
3399
+ max_provisioned_concurrency: Optional[int] = None
3400
+ """The maximum provisioned concurrency that the endpoint can scale up to. Do not use if
3401
+ workload_size is specified."""
3402
+
3363
3403
  max_provisioned_throughput: Optional[int] = None
3364
3404
  """The maximum tokens per second that the endpoint can scale up to."""
3365
3405
 
3406
+ min_provisioned_concurrency: Optional[int] = None
3407
+ """The minimum provisioned concurrency that the endpoint can scale down to. Do not use if
3408
+ workload_size is specified."""
3409
+
3366
3410
  min_provisioned_throughput: Optional[int] = None
3367
3411
  """The minimum tokens per second that the endpoint can scale down to."""
3368
3412
 
@@ -3382,7 +3426,7 @@ class ServedModelInput:
3382
3426
  "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
3383
3427
  Additional custom workload sizes can also be used when available in the workspace. If
3384
3428
  scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
3385
- is 0."""
3429
+ is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified."""
3386
3430
 
3387
3431
  workload_type: Optional[ServedModelInputWorkloadType] = None
3388
3432
  """The workload type of the served entity. The workload type selects which type of compute to use
@@ -3399,8 +3443,12 @@ class ServedModelInput:
3399
3443
  body["environment_vars"] = self.environment_vars
3400
3444
  if self.instance_profile_arn is not None:
3401
3445
  body["instance_profile_arn"] = self.instance_profile_arn
3446
+ if self.max_provisioned_concurrency is not None:
3447
+ body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
3402
3448
  if self.max_provisioned_throughput is not None:
3403
3449
  body["max_provisioned_throughput"] = self.max_provisioned_throughput
3450
+ if self.min_provisioned_concurrency is not None:
3451
+ body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
3404
3452
  if self.min_provisioned_throughput is not None:
3405
3453
  body["min_provisioned_throughput"] = self.min_provisioned_throughput
3406
3454
  if self.model_name is not None:
@@ -3426,8 +3474,12 @@ class ServedModelInput:
3426
3474
  body["environment_vars"] = self.environment_vars
3427
3475
  if self.instance_profile_arn is not None:
3428
3476
  body["instance_profile_arn"] = self.instance_profile_arn
3477
+ if self.max_provisioned_concurrency is not None:
3478
+ body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
3429
3479
  if self.max_provisioned_throughput is not None:
3430
3480
  body["max_provisioned_throughput"] = self.max_provisioned_throughput
3481
+ if self.min_provisioned_concurrency is not None:
3482
+ body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
3431
3483
  if self.min_provisioned_throughput is not None:
3432
3484
  body["min_provisioned_throughput"] = self.min_provisioned_throughput
3433
3485
  if self.model_name is not None:
@@ -3452,7 +3504,9 @@ class ServedModelInput:
3452
3504
  return cls(
3453
3505
  environment_vars=d.get("environment_vars", None),
3454
3506
  instance_profile_arn=d.get("instance_profile_arn", None),
3507
+ max_provisioned_concurrency=d.get("max_provisioned_concurrency", None),
3455
3508
  max_provisioned_throughput=d.get("max_provisioned_throughput", None),
3509
+ min_provisioned_concurrency=d.get("min_provisioned_concurrency", None),
3456
3510
  min_provisioned_throughput=d.get("min_provisioned_throughput", None),
3457
3511
  model_name=d.get("model_name", None),
3458
3512
  model_version=d.get("model_version", None),
@@ -3489,6 +3543,14 @@ class ServedModelOutput:
3489
3543
  instance_profile_arn: Optional[str] = None
3490
3544
  """ARN of the instance profile that the served entity uses to access AWS resources."""
3491
3545
 
3546
+ max_provisioned_concurrency: Optional[int] = None
3547
+ """The maximum provisioned concurrency that the endpoint can scale up to. Do not use if
3548
+ workload_size is specified."""
3549
+
3550
+ min_provisioned_concurrency: Optional[int] = None
3551
+ """The minimum provisioned concurrency that the endpoint can scale down to. Do not use if
3552
+ workload_size is specified."""
3553
+
3492
3554
  model_name: Optional[str] = None
3493
3555
 
3494
3556
  model_version: Optional[str] = None
@@ -3514,7 +3576,7 @@ class ServedModelOutput:
3514
3576
  "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
3515
3577
  Additional custom workload sizes can also be used when available in the workspace. If
3516
3578
  scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
3517
- is 0."""
3579
+ is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified."""
3518
3580
 
3519
3581
  workload_type: Optional[ServingModelWorkloadType] = None
3520
3582
  """The workload type of the served entity. The workload type selects which type of compute to use
@@ -3535,6 +3597,10 @@ class ServedModelOutput:
3535
3597
  body["environment_vars"] = self.environment_vars
3536
3598
  if self.instance_profile_arn is not None:
3537
3599
  body["instance_profile_arn"] = self.instance_profile_arn
3600
+ if self.max_provisioned_concurrency is not None:
3601
+ body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
3602
+ if self.min_provisioned_concurrency is not None:
3603
+ body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
3538
3604
  if self.model_name is not None:
3539
3605
  body["model_name"] = self.model_name
3540
3606
  if self.model_version is not None:
@@ -3564,6 +3630,10 @@ class ServedModelOutput:
3564
3630
  body["environment_vars"] = self.environment_vars
3565
3631
  if self.instance_profile_arn is not None:
3566
3632
  body["instance_profile_arn"] = self.instance_profile_arn
3633
+ if self.max_provisioned_concurrency is not None:
3634
+ body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
3635
+ if self.min_provisioned_concurrency is not None:
3636
+ body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
3567
3637
  if self.model_name is not None:
3568
3638
  body["model_name"] = self.model_name
3569
3639
  if self.model_version is not None:
@@ -3590,6 +3660,8 @@ class ServedModelOutput:
3590
3660
  creator=d.get("creator", None),
3591
3661
  environment_vars=d.get("environment_vars", None),
3592
3662
  instance_profile_arn=d.get("instance_profile_arn", None),
3663
+ max_provisioned_concurrency=d.get("max_provisioned_concurrency", None),
3664
+ min_provisioned_concurrency=d.get("min_provisioned_concurrency", None),
3593
3665
  model_name=d.get("model_name", None),
3594
3666
  model_version=d.get("model_version", None),
3595
3667
  name=d.get("name", None),