platform-api-python-client 4.8.4__py3-none-any.whl → 4.9.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- platform_api_python_client/__init__.py +5 -4
- platform_api_python_client/api/external_api.py +129 -420
- platform_api_python_client/api_client.py +1 -1
- platform_api_python_client/configuration.py +1 -1
- platform_api_python_client/models/__init__.py +4 -3
- platform_api_python_client/models/config_file_mount.py +99 -0
- platform_api_python_client/models/create_c_serve_v3_deployment_request.py +13 -2
- platform_api_python_client/models/create_inference_v3_deployment_request.py +29 -2
- platform_api_python_client/models/{create_rag_deployment_request.py → create_job_deployment_request.py} +32 -57
- platform_api_python_client/models/{create_rag_deployment_response.py → create_job_deployment_response.py} +4 -4
- platform_api_python_client/models/deployment_type.py +1 -0
- platform_api_python_client/models/get_c_serve_v3_deployment_response.py +9 -3
- platform_api_python_client/models/get_inference_v3_deployment_response.py +18 -3
- platform_api_python_client/models/{get_rag_deployment_response.py → get_job_deployment_response.py} +35 -48
- platform_api_python_client/models/pod_status.py +2 -0
- platform_api_python_client/models/service_status.py +4 -0
- {platform_api_python_client-4.8.4.dist-info → platform_api_python_client-4.9.7.dist-info}/METADATA +1 -1
- {platform_api_python_client-4.8.4.dist-info → platform_api_python_client-4.9.7.dist-info}/RECORD +20 -19
- {platform_api_python_client-4.8.4.dist-info → platform_api_python_client-4.9.7.dist-info}/WHEEL +0 -0
- {platform_api_python_client-4.8.4.dist-info → platform_api_python_client-4.9.7.dist-info}/top_level.txt +0 -0
|
@@ -90,7 +90,7 @@ class ApiClient:
|
|
|
90
90
|
self.default_headers[header_name] = header_value
|
|
91
91
|
self.cookie = cookie
|
|
92
92
|
# Set default User-Agent.
|
|
93
|
-
self.user_agent = 'OpenAPI-Generator/4.
|
|
93
|
+
self.user_agent = 'OpenAPI-Generator/4.9.7/python'
|
|
94
94
|
self.client_side_validation = configuration.client_side_validation
|
|
95
95
|
|
|
96
96
|
def __enter__(self):
|
|
@@ -392,7 +392,7 @@ class Configuration:
|
|
|
392
392
|
"OS: {env}\n"\
|
|
393
393
|
"Python Version: {pyversion}\n"\
|
|
394
394
|
"Version of the API: 0.1.0\n"\
|
|
395
|
-
"SDK Package Version: 4.
|
|
395
|
+
"SDK Package Version: 4.9.7".\
|
|
396
396
|
format(env=sys.platform, pyversion=sys.version)
|
|
397
397
|
|
|
398
398
|
def get_host_settings(self):
|
|
@@ -21,6 +21,7 @@ from platform_api_python_client.models.c_serve_recipe_perf import CServeRecipePe
|
|
|
21
21
|
from platform_api_python_client.models.c_serve_recipe_response import CServeRecipeResponse
|
|
22
22
|
from platform_api_python_client.models.c_serve_v2_recipe import CServeV2Recipe
|
|
23
23
|
from platform_api_python_client.models.cluster_capacity_response import ClusterCapacityResponse
|
|
24
|
+
from platform_api_python_client.models.config_file_mount import ConfigFileMount
|
|
24
25
|
from platform_api_python_client.models.create_c_serve_v2_deployment_request import CreateCServeV2DeploymentRequest
|
|
25
26
|
from platform_api_python_client.models.create_c_serve_v2_deployment_response import CreateCServeV2DeploymentResponse
|
|
26
27
|
from platform_api_python_client.models.create_c_serve_v3_deployment_request import CreateCServeV3DeploymentRequest
|
|
@@ -30,10 +31,10 @@ from platform_api_python_client.models.create_compute_deployment_response import
|
|
|
30
31
|
from platform_api_python_client.models.create_inference_deployment_request import CreateInferenceDeploymentRequest
|
|
31
32
|
from platform_api_python_client.models.create_inference_deployment_response import CreateInferenceDeploymentResponse
|
|
32
33
|
from platform_api_python_client.models.create_inference_v3_deployment_request import CreateInferenceV3DeploymentRequest
|
|
34
|
+
from platform_api_python_client.models.create_job_deployment_request import CreateJobDeploymentRequest
|
|
35
|
+
from platform_api_python_client.models.create_job_deployment_response import CreateJobDeploymentResponse
|
|
33
36
|
from platform_api_python_client.models.create_organization_request import CreateOrganizationRequest
|
|
34
37
|
from platform_api_python_client.models.create_organization_response import CreateOrganizationResponse
|
|
35
|
-
from platform_api_python_client.models.create_rag_deployment_request import CreateRagDeploymentRequest
|
|
36
|
-
from platform_api_python_client.models.create_rag_deployment_response import CreateRagDeploymentResponse
|
|
37
38
|
from platform_api_python_client.models.create_url_request import CreateUrlRequest
|
|
38
39
|
from platform_api_python_client.models.create_url_response import CreateUrlResponse
|
|
39
40
|
from platform_api_python_client.models.credits_response import CreditsResponse
|
|
@@ -56,7 +57,7 @@ from platform_api_python_client.models.get_deployment_revision_response import G
|
|
|
56
57
|
from platform_api_python_client.models.get_deployment_usage_response import GetDeploymentUsageResponse
|
|
57
58
|
from platform_api_python_client.models.get_inference_deployment_response import GetInferenceDeploymentResponse
|
|
58
59
|
from platform_api_python_client.models.get_inference_v3_deployment_response import GetInferenceV3DeploymentResponse
|
|
59
|
-
from platform_api_python_client.models.
|
|
60
|
+
from platform_api_python_client.models.get_job_deployment_response import GetJobDeploymentResponse
|
|
60
61
|
from platform_api_python_client.models.gpu_type_capacity import GpuTypeCapacity
|
|
61
62
|
from platform_api_python_client.models.http_validation_error import HTTPValidationError
|
|
62
63
|
from platform_api_python_client.models.hardware_instance_response import HardwareInstanceResponse
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Platform External API
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: 0.1.0
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
import pprint
|
|
17
|
+
import re # noqa: F401
|
|
18
|
+
import json
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
|
21
|
+
from typing import Any, ClassVar, Dict, List
|
|
22
|
+
from typing_extensions import Annotated
|
|
23
|
+
from typing import Optional, Set
|
|
24
|
+
from typing_extensions import Self
|
|
25
|
+
|
|
26
|
+
class ConfigFileMount(BaseModel):
|
|
27
|
+
"""
|
|
28
|
+
ConfigFileMount
|
|
29
|
+
""" # noqa: E501
|
|
30
|
+
filename: Annotated[str, Field(min_length=1, strict=True, max_length=253)]
|
|
31
|
+
mount_path: Annotated[str, Field(min_length=1, strict=True)]
|
|
32
|
+
content: Annotated[str, Field(min_length=1, strict=True)]
|
|
33
|
+
__properties: ClassVar[List[str]] = ["filename", "mount_path", "content"]
|
|
34
|
+
|
|
35
|
+
@field_validator('filename')
|
|
36
|
+
def filename_validate_regular_expression(cls, value):
|
|
37
|
+
"""Validates the regular expression"""
|
|
38
|
+
if not re.match(r"^[a-zA-Z0-9._-]+$", value):
|
|
39
|
+
raise ValueError(r"must validate the regular expression /^[a-zA-Z0-9._-]+$/")
|
|
40
|
+
return value
|
|
41
|
+
|
|
42
|
+
model_config = ConfigDict(
|
|
43
|
+
populate_by_name=True,
|
|
44
|
+
validate_assignment=True,
|
|
45
|
+
protected_namespaces=(),
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def to_str(self) -> str:
|
|
50
|
+
"""Returns the string representation of the model using alias"""
|
|
51
|
+
return pprint.pformat(self.model_dump(by_alias=True))
|
|
52
|
+
|
|
53
|
+
def to_json(self) -> str:
|
|
54
|
+
"""Returns the JSON representation of the model using alias"""
|
|
55
|
+
# TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
|
|
56
|
+
return json.dumps(self.to_dict())
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def from_json(cls, json_str: str) -> Optional[Self]:
|
|
60
|
+
"""Create an instance of ConfigFileMount from a JSON string"""
|
|
61
|
+
return cls.from_dict(json.loads(json_str))
|
|
62
|
+
|
|
63
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
64
|
+
"""Return the dictionary representation of the model using alias.
|
|
65
|
+
|
|
66
|
+
This has the following differences from calling pydantic's
|
|
67
|
+
`self.model_dump(by_alias=True)`:
|
|
68
|
+
|
|
69
|
+
* `None` is only added to the output dict for nullable fields that
|
|
70
|
+
were set at model initialization. Other fields with value `None`
|
|
71
|
+
are ignored.
|
|
72
|
+
"""
|
|
73
|
+
excluded_fields: Set[str] = set([
|
|
74
|
+
])
|
|
75
|
+
|
|
76
|
+
_dict = self.model_dump(
|
|
77
|
+
by_alias=True,
|
|
78
|
+
exclude=excluded_fields,
|
|
79
|
+
exclude_none=True,
|
|
80
|
+
)
|
|
81
|
+
return _dict
|
|
82
|
+
|
|
83
|
+
@classmethod
|
|
84
|
+
def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
|
|
85
|
+
"""Create an instance of ConfigFileMount from a dict"""
|
|
86
|
+
if obj is None:
|
|
87
|
+
return None
|
|
88
|
+
|
|
89
|
+
if not isinstance(obj, dict):
|
|
90
|
+
return cls.model_validate(obj)
|
|
91
|
+
|
|
92
|
+
_obj = cls.model_validate({
|
|
93
|
+
"filename": obj.get("filename"),
|
|
94
|
+
"mount_path": obj.get("mount_path"),
|
|
95
|
+
"content": obj.get("content")
|
|
96
|
+
})
|
|
97
|
+
return _obj
|
|
98
|
+
|
|
99
|
+
|
|
@@ -43,9 +43,12 @@ class CreateCServeV3DeploymentRequest(BaseModel):
|
|
|
43
43
|
max_replicas: StrictInt
|
|
44
44
|
initial_replicas: Optional[StrictInt] = None
|
|
45
45
|
concurrency: Optional[StrictInt] = None
|
|
46
|
+
cooldown_period: Optional[StrictInt] = None
|
|
46
47
|
env_vars: Optional[Dict[str, StrictStr]] = None
|
|
47
48
|
enable_logging: Optional[StrictBool] = True
|
|
48
|
-
|
|
49
|
+
enable_node_model_cache: Optional[StrictBool] = False
|
|
50
|
+
session_affinity: Optional[StrictBool] = Field(default=False, description="Enable best-effort sticky routing via the `X-Session-Id` request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state.")
|
|
51
|
+
__properties: ClassVar[List[str]] = ["max_surge", "max_unavailable", "name", "cluster_id", "hardware_instance_id", "user_annotations", "recipe", "cserve_version", "hf_token", "endpoint_bearer_token", "endpoint_certificate_authority", "min_replicas", "max_replicas", "initial_replicas", "concurrency", "cooldown_period", "env_vars", "enable_logging", "enable_node_model_cache", "session_affinity"]
|
|
49
52
|
|
|
50
53
|
@field_validator('name')
|
|
51
54
|
def name_validate_regular_expression(cls, value):
|
|
@@ -141,6 +144,11 @@ class CreateCServeV3DeploymentRequest(BaseModel):
|
|
|
141
144
|
if self.concurrency is None and "concurrency" in self.model_fields_set:
|
|
142
145
|
_dict['concurrency'] = None
|
|
143
146
|
|
|
147
|
+
# set to None if cooldown_period (nullable) is None
|
|
148
|
+
# and model_fields_set contains the field
|
|
149
|
+
if self.cooldown_period is None and "cooldown_period" in self.model_fields_set:
|
|
150
|
+
_dict['cooldown_period'] = None
|
|
151
|
+
|
|
144
152
|
return _dict
|
|
145
153
|
|
|
146
154
|
@classmethod
|
|
@@ -168,8 +176,11 @@ class CreateCServeV3DeploymentRequest(BaseModel):
|
|
|
168
176
|
"max_replicas": obj.get("max_replicas"),
|
|
169
177
|
"initial_replicas": obj.get("initial_replicas"),
|
|
170
178
|
"concurrency": obj.get("concurrency"),
|
|
179
|
+
"cooldown_period": obj.get("cooldown_period"),
|
|
171
180
|
"env_vars": obj.get("env_vars"),
|
|
172
|
-
"enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True
|
|
181
|
+
"enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True,
|
|
182
|
+
"enable_node_model_cache": obj.get("enable_node_model_cache") if obj.get("enable_node_model_cache") is not None else False,
|
|
183
|
+
"session_affinity": obj.get("session_affinity") if obj.get("session_affinity") is not None else False
|
|
173
184
|
})
|
|
174
185
|
return _obj
|
|
175
186
|
|
|
@@ -21,6 +21,7 @@ from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, Strict
|
|
|
21
21
|
from typing import Any, ClassVar, Dict, List, Optional
|
|
22
22
|
from typing_extensions import Annotated
|
|
23
23
|
from platform_api_python_client.models.backend_protocol import BackendProtocol
|
|
24
|
+
from platform_api_python_client.models.config_file_mount import ConfigFileMount
|
|
24
25
|
from platform_api_python_client.models.image_pull_secret_credentials import ImagePullSecretCredentials
|
|
25
26
|
from typing import Optional, Set
|
|
26
27
|
from typing_extensions import Self
|
|
@@ -42,14 +43,18 @@ class CreateInferenceV3DeploymentRequest(BaseModel):
|
|
|
42
43
|
max_replicas: StrictInt
|
|
43
44
|
initial_replicas: Optional[StrictInt] = None
|
|
44
45
|
concurrency: Optional[StrictInt] = None
|
|
46
|
+
cooldown_period: Optional[StrictInt] = None
|
|
45
47
|
healthcheck: Optional[StrictStr] = None
|
|
46
48
|
env_vars: Optional[Dict[str, StrictStr]] = None
|
|
47
49
|
command: Optional[StrictStr] = None
|
|
48
50
|
endpoint_bearer_token: Optional[StrictStr] = None
|
|
49
51
|
endpoint_certificate_authority: Optional[StrictStr] = None
|
|
52
|
+
hf_token: Optional[StrictStr] = None
|
|
50
53
|
backend_protocol: Optional[BackendProtocol] = None
|
|
51
54
|
enable_logging: Optional[StrictBool] = False
|
|
52
|
-
|
|
55
|
+
session_affinity: Optional[StrictBool] = Field(default=False, description="Enable best-effort sticky routing via the `X-Session-Id` request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state.")
|
|
56
|
+
config_file: Optional[ConfigFileMount] = None
|
|
57
|
+
__properties: ClassVar[List[str]] = ["max_surge", "max_unavailable", "name", "cluster_id", "hardware_instance_id", "user_annotations", "image_url", "image_pull_secret_credentials", "port", "min_replicas", "max_replicas", "initial_replicas", "concurrency", "cooldown_period", "healthcheck", "env_vars", "command", "endpoint_bearer_token", "endpoint_certificate_authority", "hf_token", "backend_protocol", "enable_logging", "session_affinity", "config_file"]
|
|
53
58
|
|
|
54
59
|
@field_validator('name')
|
|
55
60
|
def name_validate_regular_expression(cls, value):
|
|
@@ -100,6 +105,9 @@ class CreateInferenceV3DeploymentRequest(BaseModel):
|
|
|
100
105
|
# override the default output from pydantic by calling `to_dict()` of image_pull_secret_credentials
|
|
101
106
|
if self.image_pull_secret_credentials:
|
|
102
107
|
_dict['image_pull_secret_credentials'] = self.image_pull_secret_credentials.to_dict()
|
|
108
|
+
# override the default output from pydantic by calling `to_dict()` of config_file
|
|
109
|
+
if self.config_file:
|
|
110
|
+
_dict['config_file'] = self.config_file.to_dict()
|
|
103
111
|
# set to None if max_surge (nullable) is None
|
|
104
112
|
# and model_fields_set contains the field
|
|
105
113
|
if self.max_surge is None and "max_surge" in self.model_fields_set:
|
|
@@ -130,6 +138,11 @@ class CreateInferenceV3DeploymentRequest(BaseModel):
|
|
|
130
138
|
if self.concurrency is None and "concurrency" in self.model_fields_set:
|
|
131
139
|
_dict['concurrency'] = None
|
|
132
140
|
|
|
141
|
+
# set to None if cooldown_period (nullable) is None
|
|
142
|
+
# and model_fields_set contains the field
|
|
143
|
+
if self.cooldown_period is None and "cooldown_period" in self.model_fields_set:
|
|
144
|
+
_dict['cooldown_period'] = None
|
|
145
|
+
|
|
133
146
|
# set to None if healthcheck (nullable) is None
|
|
134
147
|
# and model_fields_set contains the field
|
|
135
148
|
if self.healthcheck is None and "healthcheck" in self.model_fields_set:
|
|
@@ -155,6 +168,16 @@ class CreateInferenceV3DeploymentRequest(BaseModel):
|
|
|
155
168
|
if self.endpoint_certificate_authority is None and "endpoint_certificate_authority" in self.model_fields_set:
|
|
156
169
|
_dict['endpoint_certificate_authority'] = None
|
|
157
170
|
|
|
171
|
+
# set to None if hf_token (nullable) is None
|
|
172
|
+
# and model_fields_set contains the field
|
|
173
|
+
if self.hf_token is None and "hf_token" in self.model_fields_set:
|
|
174
|
+
_dict['hf_token'] = None
|
|
175
|
+
|
|
176
|
+
# set to None if config_file (nullable) is None
|
|
177
|
+
# and model_fields_set contains the field
|
|
178
|
+
if self.config_file is None and "config_file" in self.model_fields_set:
|
|
179
|
+
_dict['config_file'] = None
|
|
180
|
+
|
|
158
181
|
return _dict
|
|
159
182
|
|
|
160
183
|
@classmethod
|
|
@@ -180,13 +203,17 @@ class CreateInferenceV3DeploymentRequest(BaseModel):
|
|
|
180
203
|
"max_replicas": obj.get("max_replicas"),
|
|
181
204
|
"initial_replicas": obj.get("initial_replicas"),
|
|
182
205
|
"concurrency": obj.get("concurrency"),
|
|
206
|
+
"cooldown_period": obj.get("cooldown_period"),
|
|
183
207
|
"healthcheck": obj.get("healthcheck"),
|
|
184
208
|
"env_vars": obj.get("env_vars"),
|
|
185
209
|
"command": obj.get("command"),
|
|
186
210
|
"endpoint_bearer_token": obj.get("endpoint_bearer_token"),
|
|
187
211
|
"endpoint_certificate_authority": obj.get("endpoint_certificate_authority"),
|
|
212
|
+
"hf_token": obj.get("hf_token"),
|
|
188
213
|
"backend_protocol": obj.get("backend_protocol"),
|
|
189
|
-
"enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else False
|
|
214
|
+
"enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else False,
|
|
215
|
+
"session_affinity": obj.get("session_affinity") if obj.get("session_affinity") is not None else False,
|
|
216
|
+
"config_file": ConfigFileMount.from_dict(obj["config_file"]) if obj.get("config_file") is not None else None
|
|
190
217
|
})
|
|
191
218
|
return _obj
|
|
192
219
|
|
|
@@ -17,34 +17,29 @@ import pprint
|
|
|
17
17
|
import re # noqa: F401
|
|
18
18
|
import json
|
|
19
19
|
|
|
20
|
-
from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr, field_validator
|
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, StrictStr, field_validator
|
|
21
21
|
from typing import Any, ClassVar, Dict, List, Optional
|
|
22
22
|
from typing_extensions import Annotated
|
|
23
|
-
from platform_api_python_client.models.
|
|
23
|
+
from platform_api_python_client.models.image_pull_secret_credentials import ImagePullSecretCredentials
|
|
24
24
|
from typing import Optional, Set
|
|
25
25
|
from typing_extensions import Self
|
|
26
26
|
|
|
27
|
-
class
|
|
27
|
+
class CreateJobDeploymentRequest(BaseModel):
|
|
28
28
|
"""
|
|
29
|
-
|
|
29
|
+
CreateJobDeploymentRequest
|
|
30
30
|
""" # noqa: E501
|
|
31
31
|
name: Annotated[str, Field(min_length=1, strict=True, max_length=20)]
|
|
32
32
|
cluster_id: StrictInt
|
|
33
33
|
hardware_instance_id: StrictInt
|
|
34
34
|
user_annotations: Optional[Dict[str, StrictStr]] = None
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
hf_token: Optional[StrictStr] = None
|
|
38
|
-
llm_model: StrictStr
|
|
39
|
-
centml_api_key: StrictStr
|
|
40
|
-
min_scale: Optional[StrictInt] = 1
|
|
41
|
-
max_scale: Optional[StrictInt] = 1
|
|
42
|
-
initial_scale: Optional[StrictInt] = None
|
|
43
|
-
endpoint_bearer_token: Optional[StrictStr] = None
|
|
44
|
-
endpoint_certificate_authority: Optional[StrictStr] = None
|
|
45
|
-
concurrency: Optional[StrictInt] = None
|
|
35
|
+
image_url: StrictStr
|
|
36
|
+
image_pull_secret_credentials: Optional[ImagePullSecretCredentials] = None
|
|
46
37
|
env_vars: Optional[Dict[str, StrictStr]] = None
|
|
47
|
-
|
|
38
|
+
command: Optional[StrictStr] = None
|
|
39
|
+
completions: Optional[StrictInt] = 1
|
|
40
|
+
parallelism: Optional[StrictInt] = 1
|
|
41
|
+
enable_logging: Optional[StrictBool] = True
|
|
42
|
+
__properties: ClassVar[List[str]] = ["name", "cluster_id", "hardware_instance_id", "user_annotations", "image_url", "image_pull_secret_credentials", "env_vars", "command", "completions", "parallelism", "enable_logging"]
|
|
48
43
|
|
|
49
44
|
@field_validator('name')
|
|
50
45
|
def name_validate_regular_expression(cls, value):
|
|
@@ -71,7 +66,7 @@ class CreateRagDeploymentRequest(BaseModel):
|
|
|
71
66
|
|
|
72
67
|
@classmethod
|
|
73
68
|
def from_json(cls, json_str: str) -> Optional[Self]:
|
|
74
|
-
"""Create an instance of
|
|
69
|
+
"""Create an instance of CreateJobDeploymentRequest from a JSON string"""
|
|
75
70
|
return cls.from_dict(json.loads(json_str))
|
|
76
71
|
|
|
77
72
|
def to_dict(self) -> Dict[str, Any]:
|
|
@@ -92,49 +87,34 @@ class CreateRagDeploymentRequest(BaseModel):
|
|
|
92
87
|
exclude=excluded_fields,
|
|
93
88
|
exclude_none=True,
|
|
94
89
|
)
|
|
95
|
-
# override the default output from pydantic by calling `to_dict()` of
|
|
96
|
-
if self.
|
|
97
|
-
_dict['
|
|
90
|
+
# override the default output from pydantic by calling `to_dict()` of image_pull_secret_credentials
|
|
91
|
+
if self.image_pull_secret_credentials:
|
|
92
|
+
_dict['image_pull_secret_credentials'] = self.image_pull_secret_credentials.to_dict()
|
|
98
93
|
# set to None if user_annotations (nullable) is None
|
|
99
94
|
# and model_fields_set contains the field
|
|
100
95
|
if self.user_annotations is None and "user_annotations" in self.model_fields_set:
|
|
101
96
|
_dict['user_annotations'] = None
|
|
102
97
|
|
|
103
|
-
# set to None if
|
|
98
|
+
# set to None if image_pull_secret_credentials (nullable) is None
|
|
104
99
|
# and model_fields_set contains the field
|
|
105
|
-
if self.
|
|
106
|
-
_dict['
|
|
100
|
+
if self.image_pull_secret_credentials is None and "image_pull_secret_credentials" in self.model_fields_set:
|
|
101
|
+
_dict['image_pull_secret_credentials'] = None
|
|
107
102
|
|
|
108
|
-
# set to None if
|
|
103
|
+
# set to None if env_vars (nullable) is None
|
|
109
104
|
# and model_fields_set contains the field
|
|
110
|
-
if self.
|
|
111
|
-
_dict['
|
|
105
|
+
if self.env_vars is None and "env_vars" in self.model_fields_set:
|
|
106
|
+
_dict['env_vars'] = None
|
|
112
107
|
|
|
113
|
-
# set to None if
|
|
108
|
+
# set to None if command (nullable) is None
|
|
114
109
|
# and model_fields_set contains the field
|
|
115
|
-
if self.
|
|
116
|
-
_dict['
|
|
117
|
-
|
|
118
|
-
# set to None if endpoint_bearer_token (nullable) is None
|
|
119
|
-
# and model_fields_set contains the field
|
|
120
|
-
if self.endpoint_bearer_token is None and "endpoint_bearer_token" in self.model_fields_set:
|
|
121
|
-
_dict['endpoint_bearer_token'] = None
|
|
122
|
-
|
|
123
|
-
# set to None if endpoint_certificate_authority (nullable) is None
|
|
124
|
-
# and model_fields_set contains the field
|
|
125
|
-
if self.endpoint_certificate_authority is None and "endpoint_certificate_authority" in self.model_fields_set:
|
|
126
|
-
_dict['endpoint_certificate_authority'] = None
|
|
127
|
-
|
|
128
|
-
# set to None if concurrency (nullable) is None
|
|
129
|
-
# and model_fields_set contains the field
|
|
130
|
-
if self.concurrency is None and "concurrency" in self.model_fields_set:
|
|
131
|
-
_dict['concurrency'] = None
|
|
110
|
+
if self.command is None and "command" in self.model_fields_set:
|
|
111
|
+
_dict['command'] = None
|
|
132
112
|
|
|
133
113
|
return _dict
|
|
134
114
|
|
|
135
115
|
@classmethod
|
|
136
116
|
def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
|
|
137
|
-
"""Create an instance of
|
|
117
|
+
"""Create an instance of CreateJobDeploymentRequest from a dict"""
|
|
138
118
|
if obj is None:
|
|
139
119
|
return None
|
|
140
120
|
|
|
@@ -146,18 +126,13 @@ class CreateRagDeploymentRequest(BaseModel):
|
|
|
146
126
|
"cluster_id": obj.get("cluster_id"),
|
|
147
127
|
"hardware_instance_id": obj.get("hardware_instance_id"),
|
|
148
128
|
"user_annotations": obj.get("user_annotations"),
|
|
149
|
-
"
|
|
150
|
-
"
|
|
151
|
-
"
|
|
152
|
-
"
|
|
153
|
-
"
|
|
154
|
-
"
|
|
155
|
-
"
|
|
156
|
-
"initial_scale": obj.get("initial_scale"),
|
|
157
|
-
"endpoint_bearer_token": obj.get("endpoint_bearer_token"),
|
|
158
|
-
"endpoint_certificate_authority": obj.get("endpoint_certificate_authority"),
|
|
159
|
-
"concurrency": obj.get("concurrency"),
|
|
160
|
-
"env_vars": obj.get("env_vars")
|
|
129
|
+
"image_url": obj.get("image_url"),
|
|
130
|
+
"image_pull_secret_credentials": ImagePullSecretCredentials.from_dict(obj["image_pull_secret_credentials"]) if obj.get("image_pull_secret_credentials") is not None else None,
|
|
131
|
+
"env_vars": obj.get("env_vars"),
|
|
132
|
+
"command": obj.get("command"),
|
|
133
|
+
"completions": obj.get("completions") if obj.get("completions") is not None else 1,
|
|
134
|
+
"parallelism": obj.get("parallelism") if obj.get("parallelism") is not None else 1,
|
|
135
|
+
"enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True
|
|
161
136
|
})
|
|
162
137
|
return _obj
|
|
163
138
|
|
|
@@ -23,9 +23,9 @@ from typing import Any, ClassVar, Dict, List
|
|
|
23
23
|
from typing import Optional, Set
|
|
24
24
|
from typing_extensions import Self
|
|
25
25
|
|
|
26
|
-
class
|
|
26
|
+
class CreateJobDeploymentResponse(BaseModel):
|
|
27
27
|
"""
|
|
28
|
-
|
|
28
|
+
CreateJobDeploymentResponse
|
|
29
29
|
""" # noqa: E501
|
|
30
30
|
id: StrictInt
|
|
31
31
|
created_at: datetime
|
|
@@ -50,7 +50,7 @@ class CreateRagDeploymentResponse(BaseModel):
|
|
|
50
50
|
|
|
51
51
|
@classmethod
|
|
52
52
|
def from_json(cls, json_str: str) -> Optional[Self]:
|
|
53
|
-
"""Create an instance of
|
|
53
|
+
"""Create an instance of CreateJobDeploymentResponse from a JSON string"""
|
|
54
54
|
return cls.from_dict(json.loads(json_str))
|
|
55
55
|
|
|
56
56
|
def to_dict(self) -> Dict[str, Any]:
|
|
@@ -75,7 +75,7 @@ class CreateRagDeploymentResponse(BaseModel):
|
|
|
75
75
|
|
|
76
76
|
@classmethod
|
|
77
77
|
def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
|
|
78
|
-
"""Create an instance of
|
|
78
|
+
"""Create an instance of CreateJobDeploymentResponse from a dict"""
|
|
79
79
|
if obj is None:
|
|
80
80
|
return None
|
|
81
81
|
|
|
@@ -18,7 +18,7 @@ import re # noqa: F401
|
|
|
18
18
|
import json
|
|
19
19
|
|
|
20
20
|
from datetime import datetime
|
|
21
|
-
from pydantic import BaseModel, ConfigDict, StrictBool, StrictInt, StrictStr
|
|
21
|
+
from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, StrictStr
|
|
22
22
|
from typing import Any, ClassVar, Dict, List, Optional
|
|
23
23
|
from platform_api_python_client.models.c_serve_v2_recipe import CServeV2Recipe
|
|
24
24
|
from platform_api_python_client.models.deployment_status import DeploymentStatus
|
|
@@ -50,9 +50,12 @@ class GetCServeV3DeploymentResponse(BaseModel):
|
|
|
50
50
|
endpoint_certificate_authority: Optional[StrictStr] = None
|
|
51
51
|
endpoint_bearer_token: Optional[StrictStr] = None
|
|
52
52
|
concurrency: Optional[StrictInt] = None
|
|
53
|
+
cooldown_period: Optional[StrictInt] = 1800
|
|
53
54
|
env_vars: Optional[Dict[str, StrictStr]] = None
|
|
54
55
|
enable_logging: Optional[StrictBool] = True
|
|
55
|
-
|
|
56
|
+
enable_node_model_cache: Optional[StrictBool] = False
|
|
57
|
+
session_affinity: Optional[StrictBool] = Field(default=False, description="Enable best-effort sticky routing via the `X-Session-Id` request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state.")
|
|
58
|
+
__properties: ClassVar[List[str]] = ["creator_email", "cluster_id", "id", "name", "endpoint_url", "image_url", "type", "status", "created_at", "hardware_instance_id", "revision_number", "user_annotations", "recipe", "cserve_version", "min_replicas", "max_replicas", "initial_replicas", "endpoint_certificate_authority", "endpoint_bearer_token", "concurrency", "cooldown_period", "env_vars", "enable_logging", "enable_node_model_cache", "session_affinity"]
|
|
56
59
|
|
|
57
60
|
model_config = ConfigDict(
|
|
58
61
|
populate_by_name=True,
|
|
@@ -163,8 +166,11 @@ class GetCServeV3DeploymentResponse(BaseModel):
|
|
|
163
166
|
"endpoint_certificate_authority": obj.get("endpoint_certificate_authority"),
|
|
164
167
|
"endpoint_bearer_token": obj.get("endpoint_bearer_token"),
|
|
165
168
|
"concurrency": obj.get("concurrency"),
|
|
169
|
+
"cooldown_period": obj.get("cooldown_period") if obj.get("cooldown_period") is not None else 1800,
|
|
166
170
|
"env_vars": obj.get("env_vars"),
|
|
167
|
-
"enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True
|
|
171
|
+
"enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True,
|
|
172
|
+
"enable_node_model_cache": obj.get("enable_node_model_cache") if obj.get("enable_node_model_cache") is not None else False,
|
|
173
|
+
"session_affinity": obj.get("session_affinity") if obj.get("session_affinity") is not None else False
|
|
168
174
|
})
|
|
169
175
|
return _obj
|
|
170
176
|
|
|
@@ -18,9 +18,10 @@ import re # noqa: F401
|
|
|
18
18
|
import json
|
|
19
19
|
|
|
20
20
|
from datetime import datetime
|
|
21
|
-
from pydantic import BaseModel, ConfigDict, StrictBool, StrictInt, StrictStr
|
|
21
|
+
from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, StrictStr
|
|
22
22
|
from typing import Any, ClassVar, Dict, List, Optional
|
|
23
23
|
from platform_api_python_client.models.backend_protocol import BackendProtocol
|
|
24
|
+
from platform_api_python_client.models.config_file_mount import ConfigFileMount
|
|
24
25
|
from platform_api_python_client.models.deployment_status import DeploymentStatus
|
|
25
26
|
from platform_api_python_client.models.deployment_type import DeploymentType
|
|
26
27
|
from platform_api_python_client.models.image_pull_secret_credentials import ImagePullSecretCredentials
|
|
@@ -48,6 +49,7 @@ class GetInferenceV3DeploymentResponse(BaseModel):
|
|
|
48
49
|
max_replicas: StrictInt
|
|
49
50
|
initial_replicas: Optional[StrictInt] = None
|
|
50
51
|
concurrency: Optional[StrictInt] = None
|
|
52
|
+
cooldown_period: Optional[StrictInt] = 1800
|
|
51
53
|
healthcheck: Optional[StrictStr] = None
|
|
52
54
|
endpoint_certificate_authority: Optional[StrictStr] = None
|
|
53
55
|
endpoint_bearer_token: Optional[StrictStr] = None
|
|
@@ -58,7 +60,9 @@ class GetInferenceV3DeploymentResponse(BaseModel):
|
|
|
58
60
|
image_pull_secret_credentials: Optional[ImagePullSecretCredentials] = None
|
|
59
61
|
backend_protocol: Optional[BackendProtocol] = None
|
|
60
62
|
enable_logging: Optional[StrictBool] = True
|
|
61
|
-
|
|
63
|
+
session_affinity: Optional[StrictBool] = Field(default=False, description="Enable best-effort sticky routing via the `X-Session-Id` request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state.")
|
|
64
|
+
config_file: Optional[ConfigFileMount] = None
|
|
65
|
+
__properties: ClassVar[List[str]] = ["creator_email", "cluster_id", "id", "name", "endpoint_url", "image_url", "type", "status", "created_at", "hardware_instance_id", "revision_number", "user_annotations", "container_port", "min_replicas", "max_replicas", "initial_replicas", "concurrency", "cooldown_period", "healthcheck", "endpoint_certificate_authority", "endpoint_bearer_token", "env_vars", "command", "command_args", "original_command", "image_pull_secret_credentials", "backend_protocol", "enable_logging", "session_affinity", "config_file"]
|
|
62
66
|
|
|
63
67
|
model_config = ConfigDict(
|
|
64
68
|
populate_by_name=True,
|
|
@@ -102,6 +106,9 @@ class GetInferenceV3DeploymentResponse(BaseModel):
|
|
|
102
106
|
# override the default output from pydantic by calling `to_dict()` of image_pull_secret_credentials
|
|
103
107
|
if self.image_pull_secret_credentials:
|
|
104
108
|
_dict['image_pull_secret_credentials'] = self.image_pull_secret_credentials.to_dict()
|
|
109
|
+
# override the default output from pydantic by calling `to_dict()` of config_file
|
|
110
|
+
if self.config_file:
|
|
111
|
+
_dict['config_file'] = self.config_file.to_dict()
|
|
105
112
|
# set to None if image_url (nullable) is None
|
|
106
113
|
# and model_fields_set contains the field
|
|
107
114
|
if self.image_url is None and "image_url" in self.model_fields_set:
|
|
@@ -162,6 +169,11 @@ class GetInferenceV3DeploymentResponse(BaseModel):
|
|
|
162
169
|
if self.image_pull_secret_credentials is None and "image_pull_secret_credentials" in self.model_fields_set:
|
|
163
170
|
_dict['image_pull_secret_credentials'] = None
|
|
164
171
|
|
|
172
|
+
# set to None if config_file (nullable) is None
|
|
173
|
+
# and model_fields_set contains the field
|
|
174
|
+
if self.config_file is None and "config_file" in self.model_fields_set:
|
|
175
|
+
_dict['config_file'] = None
|
|
176
|
+
|
|
165
177
|
return _dict
|
|
166
178
|
|
|
167
179
|
@classmethod
|
|
@@ -191,6 +203,7 @@ class GetInferenceV3DeploymentResponse(BaseModel):
|
|
|
191
203
|
"max_replicas": obj.get("max_replicas"),
|
|
192
204
|
"initial_replicas": obj.get("initial_replicas"),
|
|
193
205
|
"concurrency": obj.get("concurrency"),
|
|
206
|
+
"cooldown_period": obj.get("cooldown_period") if obj.get("cooldown_period") is not None else 1800,
|
|
194
207
|
"healthcheck": obj.get("healthcheck"),
|
|
195
208
|
"endpoint_certificate_authority": obj.get("endpoint_certificate_authority"),
|
|
196
209
|
"endpoint_bearer_token": obj.get("endpoint_bearer_token"),
|
|
@@ -200,7 +213,9 @@ class GetInferenceV3DeploymentResponse(BaseModel):
|
|
|
200
213
|
"original_command": obj.get("original_command"),
|
|
201
214
|
"image_pull_secret_credentials": ImagePullSecretCredentials.from_dict(obj["image_pull_secret_credentials"]) if obj.get("image_pull_secret_credentials") is not None else None,
|
|
202
215
|
"backend_protocol": obj.get("backend_protocol"),
|
|
203
|
-
"enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True
|
|
216
|
+
"enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True,
|
|
217
|
+
"session_affinity": obj.get("session_affinity") if obj.get("session_affinity") is not None else False,
|
|
218
|
+
"config_file": ConfigFileMount.from_dict(obj["config_file"]) if obj.get("config_file") is not None else None
|
|
204
219
|
})
|
|
205
220
|
return _obj
|
|
206
221
|
|