databricks-sdk 0.53.0__py3-none-any.whl → 0.55.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of databricks-sdk might be problematic. Click here for more details.

@@ -842,6 +842,66 @@ class CohereConfig:
842
842
  )
843
843
 
844
844
 
845
+ @dataclass
846
+ class CreatePtEndpointRequest:
847
+ name: str
848
+ """The name of the serving endpoint. This field is required and must be unique across a Databricks
849
+ workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores."""
850
+
851
+ config: PtEndpointCoreConfig
852
+ """The core config of the serving endpoint."""
853
+
854
+ ai_gateway: Optional[AiGatewayConfig] = None
855
+ """The AI Gateway configuration for the serving endpoint."""
856
+
857
+ budget_policy_id: Optional[str] = None
858
+ """The budget policy associated with the endpoint."""
859
+
860
+ tags: Optional[List[EndpointTag]] = None
861
+ """Tags to be attached to the serving endpoint and automatically propagated to billing logs."""
862
+
863
+ def as_dict(self) -> dict:
864
+ """Serializes the CreatePtEndpointRequest into a dictionary suitable for use as a JSON request body."""
865
+ body = {}
866
+ if self.ai_gateway:
867
+ body["ai_gateway"] = self.ai_gateway.as_dict()
868
+ if self.budget_policy_id is not None:
869
+ body["budget_policy_id"] = self.budget_policy_id
870
+ if self.config:
871
+ body["config"] = self.config.as_dict()
872
+ if self.name is not None:
873
+ body["name"] = self.name
874
+ if self.tags:
875
+ body["tags"] = [v.as_dict() for v in self.tags]
876
+ return body
877
+
878
+ def as_shallow_dict(self) -> dict:
879
+ """Serializes the CreatePtEndpointRequest into a shallow dictionary of its immediate attributes."""
880
+ body = {}
881
+ if self.ai_gateway:
882
+ body["ai_gateway"] = self.ai_gateway
883
+ if self.budget_policy_id is not None:
884
+ body["budget_policy_id"] = self.budget_policy_id
885
+ if self.config:
886
+ body["config"] = self.config
887
+ if self.name is not None:
888
+ body["name"] = self.name
889
+ if self.tags:
890
+ body["tags"] = self.tags
891
+ return body
892
+
893
+ @classmethod
894
+ def from_dict(cls, d: Dict[str, Any]) -> CreatePtEndpointRequest:
895
+ """Deserializes the CreatePtEndpointRequest from a dictionary."""
896
+ return cls(
897
+ ai_gateway=_from_dict(d, "ai_gateway", AiGatewayConfig),
898
+ budget_policy_id=d.get("budget_policy_id", None),
899
+ config=_from_dict(d, "config", PtEndpointCoreConfig),
900
+ name=d.get("name", None),
901
+ tags=_repeated_dict(d, "tags", EndpointTag),
902
+ )
903
+
904
+
845
905
  @dataclass
846
906
  class CreateServingEndpoint:
847
907
  name: str
@@ -2292,6 +2352,96 @@ class PayloadTable:
2292
2352
  return cls(name=d.get("name", None), status=d.get("status", None), status_message=d.get("status_message", None))
2293
2353
 
2294
2354
 
2355
+ @dataclass
2356
+ class PtEndpointCoreConfig:
2357
+ served_entities: Optional[List[PtServedModel]] = None
2358
+ """The list of served entities under the serving endpoint config."""
2359
+
2360
+ traffic_config: Optional[TrafficConfig] = None
2361
+
2362
+ def as_dict(self) -> dict:
2363
+ """Serializes the PtEndpointCoreConfig into a dictionary suitable for use as a JSON request body."""
2364
+ body = {}
2365
+ if self.served_entities:
2366
+ body["served_entities"] = [v.as_dict() for v in self.served_entities]
2367
+ if self.traffic_config:
2368
+ body["traffic_config"] = self.traffic_config.as_dict()
2369
+ return body
2370
+
2371
+ def as_shallow_dict(self) -> dict:
2372
+ """Serializes the PtEndpointCoreConfig into a shallow dictionary of its immediate attributes."""
2373
+ body = {}
2374
+ if self.served_entities:
2375
+ body["served_entities"] = self.served_entities
2376
+ if self.traffic_config:
2377
+ body["traffic_config"] = self.traffic_config
2378
+ return body
2379
+
2380
+ @classmethod
2381
+ def from_dict(cls, d: Dict[str, Any]) -> PtEndpointCoreConfig:
2382
+ """Deserializes the PtEndpointCoreConfig from a dictionary."""
2383
+ return cls(
2384
+ served_entities=_repeated_dict(d, "served_entities", PtServedModel),
2385
+ traffic_config=_from_dict(d, "traffic_config", TrafficConfig),
2386
+ )
2387
+
2388
+
2389
+ @dataclass
2390
+ class PtServedModel:
2391
+ entity_name: str
2392
+ """The name of the entity to be served. The entity may be a model in the Databricks Model Registry,
2393
+ a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC
2394
+ object, the full name of the object should be given in the form of
2395
+ **catalog_name.schema_name.model_name**."""
2396
+
2397
+ provisioned_model_units: int
2398
+ """The number of model units to be provisioned."""
2399
+
2400
+ entity_version: Optional[str] = None
2401
+
2402
+ name: Optional[str] = None
2403
+ """The name of a served entity. It must be unique across an endpoint. A served entity name can
2404
+ consist of alphanumeric characters, dashes, and underscores. If not specified for an external
2405
+ model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if
2406
+ not specified for other entities, it defaults to entity_name-entity_version."""
2407
+
2408
+ def as_dict(self) -> dict:
2409
+ """Serializes the PtServedModel into a dictionary suitable for use as a JSON request body."""
2410
+ body = {}
2411
+ if self.entity_name is not None:
2412
+ body["entity_name"] = self.entity_name
2413
+ if self.entity_version is not None:
2414
+ body["entity_version"] = self.entity_version
2415
+ if self.name is not None:
2416
+ body["name"] = self.name
2417
+ if self.provisioned_model_units is not None:
2418
+ body["provisioned_model_units"] = self.provisioned_model_units
2419
+ return body
2420
+
2421
+ def as_shallow_dict(self) -> dict:
2422
+ """Serializes the PtServedModel into a shallow dictionary of its immediate attributes."""
2423
+ body = {}
2424
+ if self.entity_name is not None:
2425
+ body["entity_name"] = self.entity_name
2426
+ if self.entity_version is not None:
2427
+ body["entity_version"] = self.entity_version
2428
+ if self.name is not None:
2429
+ body["name"] = self.name
2430
+ if self.provisioned_model_units is not None:
2431
+ body["provisioned_model_units"] = self.provisioned_model_units
2432
+ return body
2433
+
2434
+ @classmethod
2435
+ def from_dict(cls, d: Dict[str, Any]) -> PtServedModel:
2436
+ """Deserializes the PtServedModel from a dictionary."""
2437
+ return cls(
2438
+ entity_name=d.get("entity_name", None),
2439
+ entity_version=d.get("entity_version", None),
2440
+ name=d.get("name", None),
2441
+ provisioned_model_units=d.get("provisioned_model_units", None),
2442
+ )
2443
+
2444
+
2295
2445
  @dataclass
2296
2446
  class PutAiGatewayRequest:
2297
2447
  fallback_config: Optional[FallbackConfig] = None
@@ -2867,6 +3017,9 @@ class ServedEntityInput:
2867
3017
  model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if
2868
3018
  not specified for other entities, it defaults to entity_name-entity_version."""
2869
3019
 
3020
+ provisioned_model_units: Optional[int] = None
3021
+ """The number of model units provisioned."""
3022
+
2870
3023
  scale_to_zero_enabled: Optional[bool] = None
2871
3024
  """Whether the compute resources for the served entity should scale down to zero."""
2872
3025
 
@@ -2906,6 +3059,8 @@ class ServedEntityInput:
2906
3059
  body["min_provisioned_throughput"] = self.min_provisioned_throughput
2907
3060
  if self.name is not None:
2908
3061
  body["name"] = self.name
3062
+ if self.provisioned_model_units is not None:
3063
+ body["provisioned_model_units"] = self.provisioned_model_units
2909
3064
  if self.scale_to_zero_enabled is not None:
2910
3065
  body["scale_to_zero_enabled"] = self.scale_to_zero_enabled
2911
3066
  if self.workload_size is not None:
@@ -2933,6 +3088,8 @@ class ServedEntityInput:
2933
3088
  body["min_provisioned_throughput"] = self.min_provisioned_throughput
2934
3089
  if self.name is not None:
2935
3090
  body["name"] = self.name
3091
+ if self.provisioned_model_units is not None:
3092
+ body["provisioned_model_units"] = self.provisioned_model_units
2936
3093
  if self.scale_to_zero_enabled is not None:
2937
3094
  body["scale_to_zero_enabled"] = self.scale_to_zero_enabled
2938
3095
  if self.workload_size is not None:
@@ -2953,6 +3110,7 @@ class ServedEntityInput:
2953
3110
  max_provisioned_throughput=d.get("max_provisioned_throughput", None),
2954
3111
  min_provisioned_throughput=d.get("min_provisioned_throughput", None),
2955
3112
  name=d.get("name", None),
3113
+ provisioned_model_units=d.get("provisioned_model_units", None),
2956
3114
  scale_to_zero_enabled=d.get("scale_to_zero_enabled", None),
2957
3115
  workload_size=d.get("workload_size", None),
2958
3116
  workload_type=_enum(d, "workload_type", ServingModelWorkloadType),
@@ -3006,6 +3164,9 @@ class ServedEntityOutput:
3006
3164
  model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if
3007
3165
  not specified for other entities, it defaults to entity_name-entity_version."""
3008
3166
 
3167
+ provisioned_model_units: Optional[int] = None
3168
+ """The number of model units provisioned."""
3169
+
3009
3170
  scale_to_zero_enabled: Optional[bool] = None
3010
3171
  """Whether the compute resources for the served entity should scale down to zero."""
3011
3172
 
@@ -3053,6 +3214,8 @@ class ServedEntityOutput:
3053
3214
  body["min_provisioned_throughput"] = self.min_provisioned_throughput
3054
3215
  if self.name is not None:
3055
3216
  body["name"] = self.name
3217
+ if self.provisioned_model_units is not None:
3218
+ body["provisioned_model_units"] = self.provisioned_model_units
3056
3219
  if self.scale_to_zero_enabled is not None:
3057
3220
  body["scale_to_zero_enabled"] = self.scale_to_zero_enabled
3058
3221
  if self.state:
@@ -3088,6 +3251,8 @@ class ServedEntityOutput:
3088
3251
  body["min_provisioned_throughput"] = self.min_provisioned_throughput
3089
3252
  if self.name is not None:
3090
3253
  body["name"] = self.name
3254
+ if self.provisioned_model_units is not None:
3255
+ body["provisioned_model_units"] = self.provisioned_model_units
3091
3256
  if self.scale_to_zero_enabled is not None:
3092
3257
  body["scale_to_zero_enabled"] = self.scale_to_zero_enabled
3093
3258
  if self.state:
@@ -3113,6 +3278,7 @@ class ServedEntityOutput:
3113
3278
  max_provisioned_throughput=d.get("max_provisioned_throughput", None),
3114
3279
  min_provisioned_throughput=d.get("min_provisioned_throughput", None),
3115
3280
  name=d.get("name", None),
3281
+ provisioned_model_units=d.get("provisioned_model_units", None),
3116
3282
  scale_to_zero_enabled=d.get("scale_to_zero_enabled", None),
3117
3283
  state=_from_dict(d, "state", ServedModelState),
3118
3284
  workload_size=d.get("workload_size", None),
@@ -3206,6 +3372,9 @@ class ServedModelInput:
3206
3372
  model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if
3207
3373
  not specified for other entities, it defaults to entity_name-entity_version."""
3208
3374
 
3375
+ provisioned_model_units: Optional[int] = None
3376
+ """The number of model units provisioned."""
3377
+
3209
3378
  workload_size: Optional[str] = None
3210
3379
  """The workload size of the served entity. The workload size corresponds to a range of provisioned
3211
3380
  concurrency that the compute autoscales between. A single unit of provisioned concurrency can
@@ -3240,6 +3409,8 @@ class ServedModelInput:
3240
3409
  body["model_version"] = self.model_version
3241
3410
  if self.name is not None:
3242
3411
  body["name"] = self.name
3412
+ if self.provisioned_model_units is not None:
3413
+ body["provisioned_model_units"] = self.provisioned_model_units
3243
3414
  if self.scale_to_zero_enabled is not None:
3244
3415
  body["scale_to_zero_enabled"] = self.scale_to_zero_enabled
3245
3416
  if self.workload_size is not None:
@@ -3265,6 +3436,8 @@ class ServedModelInput:
3265
3436
  body["model_version"] = self.model_version
3266
3437
  if self.name is not None:
3267
3438
  body["name"] = self.name
3439
+ if self.provisioned_model_units is not None:
3440
+ body["provisioned_model_units"] = self.provisioned_model_units
3268
3441
  if self.scale_to_zero_enabled is not None:
3269
3442
  body["scale_to_zero_enabled"] = self.scale_to_zero_enabled
3270
3443
  if self.workload_size is not None:
@@ -3284,6 +3457,7 @@ class ServedModelInput:
3284
3457
  model_name=d.get("model_name", None),
3285
3458
  model_version=d.get("model_version", None),
3286
3459
  name=d.get("name", None),
3460
+ provisioned_model_units=d.get("provisioned_model_units", None),
3287
3461
  scale_to_zero_enabled=d.get("scale_to_zero_enabled", None),
3288
3462
  workload_size=d.get("workload_size", None),
3289
3463
  workload_type=_enum(d, "workload_type", ServedModelInputWorkloadType),
@@ -3325,6 +3499,9 @@ class ServedModelOutput:
3325
3499
  model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if
3326
3500
  not specified for other entities, it defaults to entity_name-entity_version."""
3327
3501
 
3502
+ provisioned_model_units: Optional[int] = None
3503
+ """The number of model units provisioned."""
3504
+
3328
3505
  scale_to_zero_enabled: Optional[bool] = None
3329
3506
  """Whether the compute resources for the served entity should scale down to zero."""
3330
3507
 
@@ -3364,6 +3541,8 @@ class ServedModelOutput:
3364
3541
  body["model_version"] = self.model_version
3365
3542
  if self.name is not None:
3366
3543
  body["name"] = self.name
3544
+ if self.provisioned_model_units is not None:
3545
+ body["provisioned_model_units"] = self.provisioned_model_units
3367
3546
  if self.scale_to_zero_enabled is not None:
3368
3547
  body["scale_to_zero_enabled"] = self.scale_to_zero_enabled
3369
3548
  if self.state:
@@ -3391,6 +3570,8 @@ class ServedModelOutput:
3391
3570
  body["model_version"] = self.model_version
3392
3571
  if self.name is not None:
3393
3572
  body["name"] = self.name
3573
+ if self.provisioned_model_units is not None:
3574
+ body["provisioned_model_units"] = self.provisioned_model_units
3394
3575
  if self.scale_to_zero_enabled is not None:
3395
3576
  body["scale_to_zero_enabled"] = self.scale_to_zero_enabled
3396
3577
  if self.state:
@@ -3412,6 +3593,7 @@ class ServedModelOutput:
3412
3593
  model_name=d.get("model_name", None),
3413
3594
  model_version=d.get("model_version", None),
3414
3595
  name=d.get("name", None),
3596
+ provisioned_model_units=d.get("provisioned_model_units", None),
3415
3597
  scale_to_zero_enabled=d.get("scale_to_zero_enabled", None),
3416
3598
  state=_from_dict(d, "state", ServedModelState),
3417
3599
  workload_size=d.get("workload_size", None),
@@ -4094,6 +4276,37 @@ class TrafficConfig:
4094
4276
  return cls(routes=_repeated_dict(d, "routes", Route))
4095
4277
 
4096
4278
 
4279
+ @dataclass
4280
+ class UpdateProvisionedThroughputEndpointConfigRequest:
4281
+ config: PtEndpointCoreConfig
4282
+
4283
+ name: Optional[str] = None
4284
+ """The name of the pt endpoint to update. This field is required."""
4285
+
4286
+ def as_dict(self) -> dict:
4287
+ """Serializes the UpdateProvisionedThroughputEndpointConfigRequest into a dictionary suitable for use as a JSON request body."""
4288
+ body = {}
4289
+ if self.config:
4290
+ body["config"] = self.config.as_dict()
4291
+ if self.name is not None:
4292
+ body["name"] = self.name
4293
+ return body
4294
+
4295
+ def as_shallow_dict(self) -> dict:
4296
+ """Serializes the UpdateProvisionedThroughputEndpointConfigRequest into a shallow dictionary of its immediate attributes."""
4297
+ body = {}
4298
+ if self.config:
4299
+ body["config"] = self.config
4300
+ if self.name is not None:
4301
+ body["name"] = self.name
4302
+ return body
4303
+
4304
+ @classmethod
4305
+ def from_dict(cls, d: Dict[str, Any]) -> UpdateProvisionedThroughputEndpointConfigRequest:
4306
+ """Deserializes the UpdateProvisionedThroughputEndpointConfigRequest from a dictionary."""
4307
+ return cls(config=_from_dict(d, "config", PtEndpointCoreConfig), name=d.get("name", None))
4308
+
4309
+
4097
4310
  @dataclass
4098
4311
  class V1ResponseChoiceElement:
4099
4312
  finish_reason: Optional[str] = None
@@ -4310,6 +4523,70 @@ class ServingEndpointsAPI:
4310
4523
  tags=tags,
4311
4524
  ).result(timeout=timeout)
4312
4525
 
4526
+ def create_provisioned_throughput_endpoint(
4527
+ self,
4528
+ name: str,
4529
+ config: PtEndpointCoreConfig,
4530
+ *,
4531
+ ai_gateway: Optional[AiGatewayConfig] = None,
4532
+ budget_policy_id: Optional[str] = None,
4533
+ tags: Optional[List[EndpointTag]] = None,
4534
+ ) -> Wait[ServingEndpointDetailed]:
4535
+ """Create a new PT serving endpoint.
4536
+
4537
+ :param name: str
4538
+ The name of the serving endpoint. This field is required and must be unique across a Databricks
4539
+ workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores.
4540
+ :param config: :class:`PtEndpointCoreConfig`
4541
+ The core config of the serving endpoint.
4542
+ :param ai_gateway: :class:`AiGatewayConfig` (optional)
4543
+ The AI Gateway configuration for the serving endpoint.
4544
+ :param budget_policy_id: str (optional)
4545
+ The budget policy associated with the endpoint.
4546
+ :param tags: List[:class:`EndpointTag`] (optional)
4547
+ Tags to be attached to the serving endpoint and automatically propagated to billing logs.
4548
+
4549
+ :returns:
4550
+ Long-running operation waiter for :class:`ServingEndpointDetailed`.
4551
+ See :method:wait_get_serving_endpoint_not_updating for more details.
4552
+ """
4553
+ body = {}
4554
+ if ai_gateway is not None:
4555
+ body["ai_gateway"] = ai_gateway.as_dict()
4556
+ if budget_policy_id is not None:
4557
+ body["budget_policy_id"] = budget_policy_id
4558
+ if config is not None:
4559
+ body["config"] = config.as_dict()
4560
+ if name is not None:
4561
+ body["name"] = name
4562
+ if tags is not None:
4563
+ body["tags"] = [v.as_dict() for v in tags]
4564
+ headers = {
4565
+ "Accept": "application/json",
4566
+ "Content-Type": "application/json",
4567
+ }
4568
+
4569
+ op_response = self._api.do("POST", "/api/2.0/serving-endpoints/pt", body=body, headers=headers)
4570
+ return Wait(
4571
+ self.wait_get_serving_endpoint_not_updating,
4572
+ response=ServingEndpointDetailed.from_dict(op_response),
4573
+ name=op_response["name"],
4574
+ )
4575
+
4576
+ def create_provisioned_throughput_endpoint_and_wait(
4577
+ self,
4578
+ name: str,
4579
+ config: PtEndpointCoreConfig,
4580
+ *,
4581
+ ai_gateway: Optional[AiGatewayConfig] = None,
4582
+ budget_policy_id: Optional[str] = None,
4583
+ tags: Optional[List[EndpointTag]] = None,
4584
+ timeout=timedelta(minutes=20),
4585
+ ) -> ServingEndpointDetailed:
4586
+ return self.create_provisioned_throughput_endpoint(
4587
+ ai_gateway=ai_gateway, budget_policy_id=budget_policy_id, config=config, name=name, tags=tags
4588
+ ).result(timeout=timeout)
4589
+
4313
4590
  def delete(self, name: str):
4314
4591
  """Delete a serving endpoint.
4315
4592
 
@@ -4848,6 +5125,43 @@ class ServingEndpointsAPI:
4848
5125
  )
4849
5126
  return ServingEndpointPermissions.from_dict(res)
4850
5127
 
5128
+ def update_provisioned_throughput_endpoint_config(
5129
+ self, name: str, config: PtEndpointCoreConfig
5130
+ ) -> Wait[ServingEndpointDetailed]:
5131
+ """Update config of a PT serving endpoint.
5132
+
5133
+ Updates any combination of the pt endpoint's served entities, the compute configuration of those
5134
+ served entities, and the endpoint's traffic config. Updates are instantaneous and endpoint should be
5135
+ updated instantly
5136
+
5137
+ :param name: str
5138
+ The name of the pt endpoint to update. This field is required.
5139
+ :param config: :class:`PtEndpointCoreConfig`
5140
+
5141
+ :returns:
5142
+ Long-running operation waiter for :class:`ServingEndpointDetailed`.
5143
+ See :method:wait_get_serving_endpoint_not_updating for more details.
5144
+ """
5145
+ body = {}
5146
+ if config is not None:
5147
+ body["config"] = config.as_dict()
5148
+ headers = {
5149
+ "Accept": "application/json",
5150
+ "Content-Type": "application/json",
5151
+ }
5152
+
5153
+ op_response = self._api.do("PUT", f"/api/2.0/serving-endpoints/pt/{name}/config", body=body, headers=headers)
5154
+ return Wait(
5155
+ self.wait_get_serving_endpoint_not_updating,
5156
+ response=ServingEndpointDetailed.from_dict(op_response),
5157
+ name=op_response["name"],
5158
+ )
5159
+
5160
+ def update_provisioned_throughput_endpoint_config_and_wait(
5161
+ self, name: str, config: PtEndpointCoreConfig, timeout=timedelta(minutes=20)
5162
+ ) -> ServingEndpointDetailed:
5163
+ return self.update_provisioned_throughput_endpoint_config(config=config, name=name).result(timeout=timeout)
5164
+
4851
5165
 
4852
5166
  class ServingEndpointsDataPlaneAPI:
4853
5167
  """Serving endpoints DataPlane provides a set of operations to interact with data plane endpoints for Serving