databricks-sdk 0.55.0__py3-none-any.whl → 0.57.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of databricks-sdk might be problematic. Click here for more details.
- databricks/sdk/__init__.py +41 -24
- databricks/sdk/service/aibuilder.py +505 -0
- databricks/sdk/service/apps.py +14 -42
- databricks/sdk/service/billing.py +167 -220
- databricks/sdk/service/catalog.py +462 -1235
- databricks/sdk/service/cleanrooms.py +26 -43
- databricks/sdk/service/compute.py +75 -211
- databricks/sdk/service/dashboards.py +77 -511
- databricks/sdk/service/database.py +1271 -0
- databricks/sdk/service/files.py +20 -54
- databricks/sdk/service/iam.py +61 -171
- databricks/sdk/service/jobs.py +453 -68
- databricks/sdk/service/marketplace.py +46 -146
- databricks/sdk/service/ml.py +453 -477
- databricks/sdk/service/oauth2.py +17 -45
- databricks/sdk/service/pipelines.py +125 -40
- databricks/sdk/service/provisioning.py +30 -93
- databricks/sdk/service/qualitymonitorv2.py +265 -0
- databricks/sdk/service/serving.py +106 -46
- databricks/sdk/service/settings.py +1062 -390
- databricks/sdk/service/sharing.py +33 -88
- databricks/sdk/service/sql.py +292 -185
- databricks/sdk/service/vectorsearch.py +13 -43
- databricks/sdk/service/workspace.py +35 -105
- databricks/sdk/version.py +1 -1
- {databricks_sdk-0.55.0.dist-info → databricks_sdk-0.57.0.dist-info}/METADATA +1 -1
- {databricks_sdk-0.55.0.dist-info → databricks_sdk-0.57.0.dist-info}/RECORD +31 -28
- {databricks_sdk-0.55.0.dist-info → databricks_sdk-0.57.0.dist-info}/WHEEL +0 -0
- {databricks_sdk-0.55.0.dist-info → databricks_sdk-0.57.0.dist-info}/licenses/LICENSE +0 -0
- {databricks_sdk-0.55.0.dist-info → databricks_sdk-0.57.0.dist-info}/licenses/NOTICE +0 -0
- {databricks_sdk-0.55.0.dist-info → databricks_sdk-0.57.0.dist-info}/top_level.txt +0 -0
|
@@ -3005,9 +3005,17 @@ class ServedEntityInput:
|
|
|
3005
3005
|
instance_profile_arn: Optional[str] = None
|
|
3006
3006
|
"""ARN of the instance profile that the served entity uses to access AWS resources."""
|
|
3007
3007
|
|
|
3008
|
+
max_provisioned_concurrency: Optional[int] = None
|
|
3009
|
+
"""The maximum provisioned concurrency that the endpoint can scale up to. Do not use if
|
|
3010
|
+
workload_size is specified."""
|
|
3011
|
+
|
|
3008
3012
|
max_provisioned_throughput: Optional[int] = None
|
|
3009
3013
|
"""The maximum tokens per second that the endpoint can scale up to."""
|
|
3010
3014
|
|
|
3015
|
+
min_provisioned_concurrency: Optional[int] = None
|
|
3016
|
+
"""The minimum provisioned concurrency that the endpoint can scale down to. Do not use if
|
|
3017
|
+
workload_size is specified."""
|
|
3018
|
+
|
|
3011
3019
|
min_provisioned_throughput: Optional[int] = None
|
|
3012
3020
|
"""The minimum tokens per second that the endpoint can scale down to."""
|
|
3013
3021
|
|
|
@@ -3030,7 +3038,7 @@ class ServedEntityInput:
|
|
|
3030
3038
|
"Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
|
|
3031
3039
|
Additional custom workload sizes can also be used when available in the workspace. If
|
|
3032
3040
|
scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
|
|
3033
|
-
is 0."""
|
|
3041
|
+
is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified."""
|
|
3034
3042
|
|
|
3035
3043
|
workload_type: Optional[ServingModelWorkloadType] = None
|
|
3036
3044
|
"""The workload type of the served entity. The workload type selects which type of compute to use
|
|
@@ -3053,8 +3061,12 @@ class ServedEntityInput:
|
|
|
3053
3061
|
body["external_model"] = self.external_model.as_dict()
|
|
3054
3062
|
if self.instance_profile_arn is not None:
|
|
3055
3063
|
body["instance_profile_arn"] = self.instance_profile_arn
|
|
3064
|
+
if self.max_provisioned_concurrency is not None:
|
|
3065
|
+
body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
|
|
3056
3066
|
if self.max_provisioned_throughput is not None:
|
|
3057
3067
|
body["max_provisioned_throughput"] = self.max_provisioned_throughput
|
|
3068
|
+
if self.min_provisioned_concurrency is not None:
|
|
3069
|
+
body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
|
|
3058
3070
|
if self.min_provisioned_throughput is not None:
|
|
3059
3071
|
body["min_provisioned_throughput"] = self.min_provisioned_throughput
|
|
3060
3072
|
if self.name is not None:
|
|
@@ -3082,8 +3094,12 @@ class ServedEntityInput:
|
|
|
3082
3094
|
body["external_model"] = self.external_model
|
|
3083
3095
|
if self.instance_profile_arn is not None:
|
|
3084
3096
|
body["instance_profile_arn"] = self.instance_profile_arn
|
|
3097
|
+
if self.max_provisioned_concurrency is not None:
|
|
3098
|
+
body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
|
|
3085
3099
|
if self.max_provisioned_throughput is not None:
|
|
3086
3100
|
body["max_provisioned_throughput"] = self.max_provisioned_throughput
|
|
3101
|
+
if self.min_provisioned_concurrency is not None:
|
|
3102
|
+
body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
|
|
3087
3103
|
if self.min_provisioned_throughput is not None:
|
|
3088
3104
|
body["min_provisioned_throughput"] = self.min_provisioned_throughput
|
|
3089
3105
|
if self.name is not None:
|
|
@@ -3107,7 +3123,9 @@ class ServedEntityInput:
|
|
|
3107
3123
|
environment_vars=d.get("environment_vars", None),
|
|
3108
3124
|
external_model=_from_dict(d, "external_model", ExternalModel),
|
|
3109
3125
|
instance_profile_arn=d.get("instance_profile_arn", None),
|
|
3126
|
+
max_provisioned_concurrency=d.get("max_provisioned_concurrency", None),
|
|
3110
3127
|
max_provisioned_throughput=d.get("max_provisioned_throughput", None),
|
|
3128
|
+
min_provisioned_concurrency=d.get("min_provisioned_concurrency", None),
|
|
3111
3129
|
min_provisioned_throughput=d.get("min_provisioned_throughput", None),
|
|
3112
3130
|
name=d.get("name", None),
|
|
3113
3131
|
provisioned_model_units=d.get("provisioned_model_units", None),
|
|
@@ -3152,9 +3170,17 @@ class ServedEntityOutput:
|
|
|
3152
3170
|
instance_profile_arn: Optional[str] = None
|
|
3153
3171
|
"""ARN of the instance profile that the served entity uses to access AWS resources."""
|
|
3154
3172
|
|
|
3173
|
+
max_provisioned_concurrency: Optional[int] = None
|
|
3174
|
+
"""The maximum provisioned concurrency that the endpoint can scale up to. Do not use if
|
|
3175
|
+
workload_size is specified."""
|
|
3176
|
+
|
|
3155
3177
|
max_provisioned_throughput: Optional[int] = None
|
|
3156
3178
|
"""The maximum tokens per second that the endpoint can scale up to."""
|
|
3157
3179
|
|
|
3180
|
+
min_provisioned_concurrency: Optional[int] = None
|
|
3181
|
+
"""The minimum provisioned concurrency that the endpoint can scale down to. Do not use if
|
|
3182
|
+
workload_size is specified."""
|
|
3183
|
+
|
|
3158
3184
|
min_provisioned_throughput: Optional[int] = None
|
|
3159
3185
|
"""The minimum tokens per second that the endpoint can scale down to."""
|
|
3160
3186
|
|
|
@@ -3179,7 +3205,7 @@ class ServedEntityOutput:
|
|
|
3179
3205
|
"Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
|
|
3180
3206
|
Additional custom workload sizes can also be used when available in the workspace. If
|
|
3181
3207
|
scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
|
|
3182
|
-
is 0."""
|
|
3208
|
+
is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified."""
|
|
3183
3209
|
|
|
3184
3210
|
workload_type: Optional[ServingModelWorkloadType] = None
|
|
3185
3211
|
"""The workload type of the served entity. The workload type selects which type of compute to use
|
|
@@ -3208,8 +3234,12 @@ class ServedEntityOutput:
|
|
|
3208
3234
|
body["foundation_model"] = self.foundation_model.as_dict()
|
|
3209
3235
|
if self.instance_profile_arn is not None:
|
|
3210
3236
|
body["instance_profile_arn"] = self.instance_profile_arn
|
|
3237
|
+
if self.max_provisioned_concurrency is not None:
|
|
3238
|
+
body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
|
|
3211
3239
|
if self.max_provisioned_throughput is not None:
|
|
3212
3240
|
body["max_provisioned_throughput"] = self.max_provisioned_throughput
|
|
3241
|
+
if self.min_provisioned_concurrency is not None:
|
|
3242
|
+
body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
|
|
3213
3243
|
if self.min_provisioned_throughput is not None:
|
|
3214
3244
|
body["min_provisioned_throughput"] = self.min_provisioned_throughput
|
|
3215
3245
|
if self.name is not None:
|
|
@@ -3245,8 +3275,12 @@ class ServedEntityOutput:
|
|
|
3245
3275
|
body["foundation_model"] = self.foundation_model
|
|
3246
3276
|
if self.instance_profile_arn is not None:
|
|
3247
3277
|
body["instance_profile_arn"] = self.instance_profile_arn
|
|
3278
|
+
if self.max_provisioned_concurrency is not None:
|
|
3279
|
+
body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
|
|
3248
3280
|
if self.max_provisioned_throughput is not None:
|
|
3249
3281
|
body["max_provisioned_throughput"] = self.max_provisioned_throughput
|
|
3282
|
+
if self.min_provisioned_concurrency is not None:
|
|
3283
|
+
body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
|
|
3250
3284
|
if self.min_provisioned_throughput is not None:
|
|
3251
3285
|
body["min_provisioned_throughput"] = self.min_provisioned_throughput
|
|
3252
3286
|
if self.name is not None:
|
|
@@ -3275,7 +3309,9 @@ class ServedEntityOutput:
|
|
|
3275
3309
|
external_model=_from_dict(d, "external_model", ExternalModel),
|
|
3276
3310
|
foundation_model=_from_dict(d, "foundation_model", FoundationModel),
|
|
3277
3311
|
instance_profile_arn=d.get("instance_profile_arn", None),
|
|
3312
|
+
max_provisioned_concurrency=d.get("max_provisioned_concurrency", None),
|
|
3278
3313
|
max_provisioned_throughput=d.get("max_provisioned_throughput", None),
|
|
3314
|
+
min_provisioned_concurrency=d.get("min_provisioned_concurrency", None),
|
|
3279
3315
|
min_provisioned_throughput=d.get("min_provisioned_throughput", None),
|
|
3280
3316
|
name=d.get("name", None),
|
|
3281
3317
|
provisioned_model_units=d.get("provisioned_model_units", None),
|
|
@@ -3360,9 +3396,17 @@ class ServedModelInput:
|
|
|
3360
3396
|
instance_profile_arn: Optional[str] = None
|
|
3361
3397
|
"""ARN of the instance profile that the served entity uses to access AWS resources."""
|
|
3362
3398
|
|
|
3399
|
+
max_provisioned_concurrency: Optional[int] = None
|
|
3400
|
+
"""The maximum provisioned concurrency that the endpoint can scale up to. Do not use if
|
|
3401
|
+
workload_size is specified."""
|
|
3402
|
+
|
|
3363
3403
|
max_provisioned_throughput: Optional[int] = None
|
|
3364
3404
|
"""The maximum tokens per second that the endpoint can scale up to."""
|
|
3365
3405
|
|
|
3406
|
+
min_provisioned_concurrency: Optional[int] = None
|
|
3407
|
+
"""The minimum provisioned concurrency that the endpoint can scale down to. Do not use if
|
|
3408
|
+
workload_size is specified."""
|
|
3409
|
+
|
|
3366
3410
|
min_provisioned_throughput: Optional[int] = None
|
|
3367
3411
|
"""The minimum tokens per second that the endpoint can scale down to."""
|
|
3368
3412
|
|
|
@@ -3382,7 +3426,7 @@ class ServedModelInput:
|
|
|
3382
3426
|
"Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
|
|
3383
3427
|
Additional custom workload sizes can also be used when available in the workspace. If
|
|
3384
3428
|
scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
|
|
3385
|
-
is 0."""
|
|
3429
|
+
is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified."""
|
|
3386
3430
|
|
|
3387
3431
|
workload_type: Optional[ServedModelInputWorkloadType] = None
|
|
3388
3432
|
"""The workload type of the served entity. The workload type selects which type of compute to use
|
|
@@ -3399,8 +3443,12 @@ class ServedModelInput:
|
|
|
3399
3443
|
body["environment_vars"] = self.environment_vars
|
|
3400
3444
|
if self.instance_profile_arn is not None:
|
|
3401
3445
|
body["instance_profile_arn"] = self.instance_profile_arn
|
|
3446
|
+
if self.max_provisioned_concurrency is not None:
|
|
3447
|
+
body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
|
|
3402
3448
|
if self.max_provisioned_throughput is not None:
|
|
3403
3449
|
body["max_provisioned_throughput"] = self.max_provisioned_throughput
|
|
3450
|
+
if self.min_provisioned_concurrency is not None:
|
|
3451
|
+
body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
|
|
3404
3452
|
if self.min_provisioned_throughput is not None:
|
|
3405
3453
|
body["min_provisioned_throughput"] = self.min_provisioned_throughput
|
|
3406
3454
|
if self.model_name is not None:
|
|
@@ -3426,8 +3474,12 @@ class ServedModelInput:
|
|
|
3426
3474
|
body["environment_vars"] = self.environment_vars
|
|
3427
3475
|
if self.instance_profile_arn is not None:
|
|
3428
3476
|
body["instance_profile_arn"] = self.instance_profile_arn
|
|
3477
|
+
if self.max_provisioned_concurrency is not None:
|
|
3478
|
+
body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
|
|
3429
3479
|
if self.max_provisioned_throughput is not None:
|
|
3430
3480
|
body["max_provisioned_throughput"] = self.max_provisioned_throughput
|
|
3481
|
+
if self.min_provisioned_concurrency is not None:
|
|
3482
|
+
body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
|
|
3431
3483
|
if self.min_provisioned_throughput is not None:
|
|
3432
3484
|
body["min_provisioned_throughput"] = self.min_provisioned_throughput
|
|
3433
3485
|
if self.model_name is not None:
|
|
@@ -3452,7 +3504,9 @@ class ServedModelInput:
|
|
|
3452
3504
|
return cls(
|
|
3453
3505
|
environment_vars=d.get("environment_vars", None),
|
|
3454
3506
|
instance_profile_arn=d.get("instance_profile_arn", None),
|
|
3507
|
+
max_provisioned_concurrency=d.get("max_provisioned_concurrency", None),
|
|
3455
3508
|
max_provisioned_throughput=d.get("max_provisioned_throughput", None),
|
|
3509
|
+
min_provisioned_concurrency=d.get("min_provisioned_concurrency", None),
|
|
3456
3510
|
min_provisioned_throughput=d.get("min_provisioned_throughput", None),
|
|
3457
3511
|
model_name=d.get("model_name", None),
|
|
3458
3512
|
model_version=d.get("model_version", None),
|
|
@@ -3489,6 +3543,14 @@ class ServedModelOutput:
|
|
|
3489
3543
|
instance_profile_arn: Optional[str] = None
|
|
3490
3544
|
"""ARN of the instance profile that the served entity uses to access AWS resources."""
|
|
3491
3545
|
|
|
3546
|
+
max_provisioned_concurrency: Optional[int] = None
|
|
3547
|
+
"""The maximum provisioned concurrency that the endpoint can scale up to. Do not use if
|
|
3548
|
+
workload_size is specified."""
|
|
3549
|
+
|
|
3550
|
+
min_provisioned_concurrency: Optional[int] = None
|
|
3551
|
+
"""The minimum provisioned concurrency that the endpoint can scale down to. Do not use if
|
|
3552
|
+
workload_size is specified."""
|
|
3553
|
+
|
|
3492
3554
|
model_name: Optional[str] = None
|
|
3493
3555
|
|
|
3494
3556
|
model_version: Optional[str] = None
|
|
@@ -3514,7 +3576,7 @@ class ServedModelOutput:
|
|
|
3514
3576
|
"Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
|
|
3515
3577
|
Additional custom workload sizes can also be used when available in the workspace. If
|
|
3516
3578
|
scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size
|
|
3517
|
-
is 0."""
|
|
3579
|
+
is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified."""
|
|
3518
3580
|
|
|
3519
3581
|
workload_type: Optional[ServingModelWorkloadType] = None
|
|
3520
3582
|
"""The workload type of the served entity. The workload type selects which type of compute to use
|
|
@@ -3535,6 +3597,10 @@ class ServedModelOutput:
|
|
|
3535
3597
|
body["environment_vars"] = self.environment_vars
|
|
3536
3598
|
if self.instance_profile_arn is not None:
|
|
3537
3599
|
body["instance_profile_arn"] = self.instance_profile_arn
|
|
3600
|
+
if self.max_provisioned_concurrency is not None:
|
|
3601
|
+
body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
|
|
3602
|
+
if self.min_provisioned_concurrency is not None:
|
|
3603
|
+
body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
|
|
3538
3604
|
if self.model_name is not None:
|
|
3539
3605
|
body["model_name"] = self.model_name
|
|
3540
3606
|
if self.model_version is not None:
|
|
@@ -3564,6 +3630,10 @@ class ServedModelOutput:
|
|
|
3564
3630
|
body["environment_vars"] = self.environment_vars
|
|
3565
3631
|
if self.instance_profile_arn is not None:
|
|
3566
3632
|
body["instance_profile_arn"] = self.instance_profile_arn
|
|
3633
|
+
if self.max_provisioned_concurrency is not None:
|
|
3634
|
+
body["max_provisioned_concurrency"] = self.max_provisioned_concurrency
|
|
3635
|
+
if self.min_provisioned_concurrency is not None:
|
|
3636
|
+
body["min_provisioned_concurrency"] = self.min_provisioned_concurrency
|
|
3567
3637
|
if self.model_name is not None:
|
|
3568
3638
|
body["model_name"] = self.model_name
|
|
3569
3639
|
if self.model_version is not None:
|
|
@@ -3590,6 +3660,8 @@ class ServedModelOutput:
|
|
|
3590
3660
|
creator=d.get("creator", None),
|
|
3591
3661
|
environment_vars=d.get("environment_vars", None),
|
|
3592
3662
|
instance_profile_arn=d.get("instance_profile_arn", None),
|
|
3663
|
+
max_provisioned_concurrency=d.get("max_provisioned_concurrency", None),
|
|
3664
|
+
min_provisioned_concurrency=d.get("min_provisioned_concurrency", None),
|
|
3593
3665
|
model_name=d.get("model_name", None),
|
|
3594
3666
|
model_version=d.get("model_version", None),
|
|
3595
3667
|
name=d.get("name", None),
|
|
@@ -3727,6 +3799,9 @@ class ServingEndpoint:
|
|
|
3727
3799
|
creator: Optional[str] = None
|
|
3728
3800
|
"""The email of the user who created the serving endpoint."""
|
|
3729
3801
|
|
|
3802
|
+
description: Optional[str] = None
|
|
3803
|
+
"""Description of the endpoint"""
|
|
3804
|
+
|
|
3730
3805
|
id: Optional[str] = None
|
|
3731
3806
|
"""System-generated ID of the endpoint, included to be used by the Permissions API."""
|
|
3732
3807
|
|
|
@@ -3758,6 +3833,8 @@ class ServingEndpoint:
|
|
|
3758
3833
|
body["creation_timestamp"] = self.creation_timestamp
|
|
3759
3834
|
if self.creator is not None:
|
|
3760
3835
|
body["creator"] = self.creator
|
|
3836
|
+
if self.description is not None:
|
|
3837
|
+
body["description"] = self.description
|
|
3761
3838
|
if self.id is not None:
|
|
3762
3839
|
body["id"] = self.id
|
|
3763
3840
|
if self.last_updated_timestamp is not None:
|
|
@@ -3785,6 +3862,8 @@ class ServingEndpoint:
|
|
|
3785
3862
|
body["creation_timestamp"] = self.creation_timestamp
|
|
3786
3863
|
if self.creator is not None:
|
|
3787
3864
|
body["creator"] = self.creator
|
|
3865
|
+
if self.description is not None:
|
|
3866
|
+
body["description"] = self.description
|
|
3788
3867
|
if self.id is not None:
|
|
3789
3868
|
body["id"] = self.id
|
|
3790
3869
|
if self.last_updated_timestamp is not None:
|
|
@@ -3808,6 +3887,7 @@ class ServingEndpoint:
|
|
|
3808
3887
|
config=_from_dict(d, "config", EndpointCoreConfigSummary),
|
|
3809
3888
|
creation_timestamp=d.get("creation_timestamp", None),
|
|
3810
3889
|
creator=d.get("creator", None),
|
|
3890
|
+
description=d.get("description", None),
|
|
3811
3891
|
id=d.get("id", None),
|
|
3812
3892
|
last_updated_timestamp=d.get("last_updated_timestamp", None),
|
|
3813
3893
|
name=d.get("name", None),
|
|
@@ -3949,6 +4029,9 @@ class ServingEndpointDetailed:
|
|
|
3949
4029
|
data_plane_info: Optional[ModelDataPlaneInfo] = None
|
|
3950
4030
|
"""Information required to query DataPlane APIs."""
|
|
3951
4031
|
|
|
4032
|
+
description: Optional[str] = None
|
|
4033
|
+
"""Description of the serving model"""
|
|
4034
|
+
|
|
3952
4035
|
endpoint_url: Optional[str] = None
|
|
3953
4036
|
"""Endpoint invocation url if route optimization is enabled for endpoint"""
|
|
3954
4037
|
|
|
@@ -3995,6 +4078,8 @@ class ServingEndpointDetailed:
|
|
|
3995
4078
|
body["creator"] = self.creator
|
|
3996
4079
|
if self.data_plane_info:
|
|
3997
4080
|
body["data_plane_info"] = self.data_plane_info.as_dict()
|
|
4081
|
+
if self.description is not None:
|
|
4082
|
+
body["description"] = self.description
|
|
3998
4083
|
if self.endpoint_url is not None:
|
|
3999
4084
|
body["endpoint_url"] = self.endpoint_url
|
|
4000
4085
|
if self.id is not None:
|
|
@@ -4032,6 +4117,8 @@ class ServingEndpointDetailed:
|
|
|
4032
4117
|
body["creator"] = self.creator
|
|
4033
4118
|
if self.data_plane_info:
|
|
4034
4119
|
body["data_plane_info"] = self.data_plane_info
|
|
4120
|
+
if self.description is not None:
|
|
4121
|
+
body["description"] = self.description
|
|
4035
4122
|
if self.endpoint_url is not None:
|
|
4036
4123
|
body["endpoint_url"] = self.endpoint_url
|
|
4037
4124
|
if self.id is not None:
|
|
@@ -4064,6 +4151,7 @@ class ServingEndpointDetailed:
|
|
|
4064
4151
|
creation_timestamp=d.get("creation_timestamp", None),
|
|
4065
4152
|
creator=d.get("creator", None),
|
|
4066
4153
|
data_plane_info=_from_dict(d, "data_plane_info", ModelDataPlaneInfo),
|
|
4154
|
+
description=d.get("description", None),
|
|
4067
4155
|
endpoint_url=d.get("endpoint_url", None),
|
|
4068
4156
|
id=d.get("id", None),
|
|
4069
4157
|
last_updated_timestamp=d.get("last_updated_timestamp", None),
|
|
@@ -4417,9 +4505,7 @@ class ServingEndpointsAPI:
|
|
|
4417
4505
|
raise TimeoutError(f"timed out after {timeout}: {status_message}")
|
|
4418
4506
|
|
|
4419
4507
|
def build_logs(self, name: str, served_model_name: str) -> BuildLogsResponse:
|
|
4420
|
-
"""
|
|
4421
|
-
|
|
4422
|
-
Retrieves the build logs associated with the provided served model.
|
|
4508
|
+
"""Retrieves the build logs associated with the provided served model.
|
|
4423
4509
|
|
|
4424
4510
|
:param name: str
|
|
4425
4511
|
The name of the serving endpoint that the served model belongs to. This field is required.
|
|
@@ -4600,9 +4686,7 @@ class ServingEndpointsAPI:
|
|
|
4600
4686
|
self._api.do("DELETE", f"/api/2.0/serving-endpoints/{name}", headers=headers)
|
|
4601
4687
|
|
|
4602
4688
|
def export_metrics(self, name: str) -> ExportMetricsResponse:
|
|
4603
|
-
"""
|
|
4604
|
-
|
|
4605
|
-
Retrieves the metrics associated with the provided serving endpoint in either Prometheus or
|
|
4689
|
+
"""Retrieves the metrics associated with the provided serving endpoint in either Prometheus or
|
|
4606
4690
|
OpenMetrics exposition format.
|
|
4607
4691
|
|
|
4608
4692
|
:param name: str
|
|
@@ -4619,9 +4703,7 @@ class ServingEndpointsAPI:
|
|
|
4619
4703
|
return ExportMetricsResponse.from_dict(res)
|
|
4620
4704
|
|
|
4621
4705
|
def get(self, name: str) -> ServingEndpointDetailed:
|
|
4622
|
-
"""
|
|
4623
|
-
|
|
4624
|
-
Retrieves the details for a single serving endpoint.
|
|
4706
|
+
"""Retrieves the details for a single serving endpoint.
|
|
4625
4707
|
|
|
4626
4708
|
:param name: str
|
|
4627
4709
|
The name of the serving endpoint. This field is required.
|
|
@@ -4637,9 +4719,7 @@ class ServingEndpointsAPI:
|
|
|
4637
4719
|
return ServingEndpointDetailed.from_dict(res)
|
|
4638
4720
|
|
|
4639
4721
|
def get_open_api(self, name: str) -> GetOpenApiResponse:
|
|
4640
|
-
"""Get the schema
|
|
4641
|
-
|
|
4642
|
-
Get the query schema of the serving endpoint in OpenAPI format. The schema contains information for
|
|
4722
|
+
"""Get the query schema of the serving endpoint in OpenAPI format. The schema contains information for
|
|
4643
4723
|
the supported paths, input and output format and datatypes.
|
|
4644
4724
|
|
|
4645
4725
|
:param name: str
|
|
@@ -4656,9 +4736,7 @@ class ServingEndpointsAPI:
|
|
|
4656
4736
|
return GetOpenApiResponse.from_dict(res)
|
|
4657
4737
|
|
|
4658
4738
|
def get_permission_levels(self, serving_endpoint_id: str) -> GetServingEndpointPermissionLevelsResponse:
|
|
4659
|
-
"""
|
|
4660
|
-
|
|
4661
|
-
Gets the permission levels that a user can have on an object.
|
|
4739
|
+
"""Gets the permission levels that a user can have on an object.
|
|
4662
4740
|
|
|
4663
4741
|
:param serving_endpoint_id: str
|
|
4664
4742
|
The serving endpoint for which to get or manage permissions.
|
|
@@ -4676,9 +4754,7 @@ class ServingEndpointsAPI:
|
|
|
4676
4754
|
return GetServingEndpointPermissionLevelsResponse.from_dict(res)
|
|
4677
4755
|
|
|
4678
4756
|
def get_permissions(self, serving_endpoint_id: str) -> ServingEndpointPermissions:
|
|
4679
|
-
"""
|
|
4680
|
-
|
|
4681
|
-
Gets the permissions of a serving endpoint. Serving endpoints can inherit permissions from their root
|
|
4757
|
+
"""Gets the permissions of a serving endpoint. Serving endpoints can inherit permissions from their root
|
|
4682
4758
|
object.
|
|
4683
4759
|
|
|
4684
4760
|
:param serving_endpoint_id: str
|
|
@@ -4758,9 +4834,7 @@ class ServingEndpointsAPI:
|
|
|
4758
4834
|
return parsed if parsed is not None else []
|
|
4759
4835
|
|
|
4760
4836
|
def logs(self, name: str, served_model_name: str) -> ServerLogsResponse:
|
|
4761
|
-
"""
|
|
4762
|
-
|
|
4763
|
-
Retrieves the service logs associated with the provided served model.
|
|
4837
|
+
"""Retrieves the service logs associated with the provided served model.
|
|
4764
4838
|
|
|
4765
4839
|
:param name: str
|
|
4766
4840
|
The name of the serving endpoint that the served model belongs to. This field is required.
|
|
@@ -4782,9 +4856,7 @@ class ServingEndpointsAPI:
|
|
|
4782
4856
|
def patch(
|
|
4783
4857
|
self, name: str, *, add_tags: Optional[List[EndpointTag]] = None, delete_tags: Optional[List[str]] = None
|
|
4784
4858
|
) -> EndpointTags:
|
|
4785
|
-
"""
|
|
4786
|
-
|
|
4787
|
-
Used to batch add and delete tags from a serving endpoint with a single API call.
|
|
4859
|
+
"""Used to batch add and delete tags from a serving endpoint with a single API call.
|
|
4788
4860
|
|
|
4789
4861
|
:param name: str
|
|
4790
4862
|
The name of the serving endpoint who's tags to patch. This field is required.
|
|
@@ -4809,9 +4881,7 @@ class ServingEndpointsAPI:
|
|
|
4809
4881
|
return EndpointTags.from_dict(res)
|
|
4810
4882
|
|
|
4811
4883
|
def put(self, name: str, *, rate_limits: Optional[List[RateLimit]] = None) -> PutResponse:
|
|
4812
|
-
"""
|
|
4813
|
-
|
|
4814
|
-
Deprecated: Please use AI Gateway to manage rate limits instead.
|
|
4884
|
+
"""Deprecated: Please use AI Gateway to manage rate limits instead.
|
|
4815
4885
|
|
|
4816
4886
|
:param name: str
|
|
4817
4887
|
The name of the serving endpoint whose rate limits are being updated. This field is required.
|
|
@@ -4841,9 +4911,7 @@ class ServingEndpointsAPI:
|
|
|
4841
4911
|
rate_limits: Optional[List[AiGatewayRateLimit]] = None,
|
|
4842
4912
|
usage_tracking_config: Optional[AiGatewayUsageTrackingConfig] = None,
|
|
4843
4913
|
) -> PutAiGatewayResponse:
|
|
4844
|
-
"""
|
|
4845
|
-
|
|
4846
|
-
Used to update the AI Gateway of a serving endpoint. NOTE: External model, provisioned throughput, and
|
|
4914
|
+
"""Used to update the AI Gateway of a serving endpoint. NOTE: External model, provisioned throughput, and
|
|
4847
4915
|
pay-per-token endpoints are fully supported; agent endpoints currently only support inference tables.
|
|
4848
4916
|
|
|
4849
4917
|
:param name: str
|
|
@@ -4998,9 +5066,7 @@ class ServingEndpointsAPI:
|
|
|
4998
5066
|
*,
|
|
4999
5067
|
access_control_list: Optional[List[ServingEndpointAccessControlRequest]] = None,
|
|
5000
5068
|
) -> ServingEndpointPermissions:
|
|
5001
|
-
"""
|
|
5002
|
-
|
|
5003
|
-
Sets permissions on an object, replacing existing permissions if they exist. Deletes all direct
|
|
5069
|
+
"""Sets permissions on an object, replacing existing permissions if they exist. Deletes all direct
|
|
5004
5070
|
permissions if none are specified. Objects can inherit permissions from their root object.
|
|
5005
5071
|
|
|
5006
5072
|
:param serving_endpoint_id: str
|
|
@@ -5031,9 +5097,7 @@ class ServingEndpointsAPI:
|
|
|
5031
5097
|
served_models: Optional[List[ServedModelInput]] = None,
|
|
5032
5098
|
traffic_config: Optional[TrafficConfig] = None,
|
|
5033
5099
|
) -> Wait[ServingEndpointDetailed]:
|
|
5034
|
-
"""
|
|
5035
|
-
|
|
5036
|
-
Updates any combination of the serving endpoint's served entities, the compute configuration of those
|
|
5100
|
+
"""Updates any combination of the serving endpoint's served entities, the compute configuration of those
|
|
5037
5101
|
served entities, and the endpoint's traffic config. An endpoint that already has an update in progress
|
|
5038
5102
|
can not be updated until the current update completes or fails.
|
|
5039
5103
|
|
|
@@ -5101,9 +5165,7 @@ class ServingEndpointsAPI:
|
|
|
5101
5165
|
*,
|
|
5102
5166
|
access_control_list: Optional[List[ServingEndpointAccessControlRequest]] = None,
|
|
5103
5167
|
) -> ServingEndpointPermissions:
|
|
5104
|
-
"""
|
|
5105
|
-
|
|
5106
|
-
Updates the permissions on a serving endpoint. Serving endpoints can inherit permissions from their
|
|
5168
|
+
"""Updates the permissions on a serving endpoint. Serving endpoints can inherit permissions from their
|
|
5107
5169
|
root object.
|
|
5108
5170
|
|
|
5109
5171
|
:param serving_endpoint_id: str
|
|
@@ -5128,9 +5190,7 @@ class ServingEndpointsAPI:
|
|
|
5128
5190
|
def update_provisioned_throughput_endpoint_config(
|
|
5129
5191
|
self, name: str, config: PtEndpointCoreConfig
|
|
5130
5192
|
) -> Wait[ServingEndpointDetailed]:
|
|
5131
|
-
"""
|
|
5132
|
-
|
|
5133
|
-
Updates any combination of the pt endpoint's served entities, the compute configuration of those
|
|
5193
|
+
"""Updates any combination of the pt endpoint's served entities, the compute configuration of those
|
|
5134
5194
|
served entities, and the endpoint's traffic config. Updates are instantaneous and endpoint should be
|
|
5135
5195
|
updated instantly
|
|
5136
5196
|
|