platform-api-python-client 4.8.4__py3-none-any.whl → 4.9.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. platform_api_python_client/__init__.py +5 -4
  2. platform_api_python_client/api/external_api.py +129 -420
  3. platform_api_python_client/api_client.py +1 -1
  4. platform_api_python_client/configuration.py +1 -1
  5. platform_api_python_client/models/__init__.py +4 -3
  6. platform_api_python_client/models/config_file_mount.py +99 -0
  7. platform_api_python_client/models/create_c_serve_v3_deployment_request.py +13 -2
  8. platform_api_python_client/models/create_inference_v3_deployment_request.py +29 -2
  9. platform_api_python_client/models/{create_rag_deployment_request.py → create_job_deployment_request.py} +32 -57
  10. platform_api_python_client/models/{create_rag_deployment_response.py → create_job_deployment_response.py} +4 -4
  11. platform_api_python_client/models/deployment_type.py +1 -0
  12. platform_api_python_client/models/get_c_serve_v3_deployment_response.py +9 -3
  13. platform_api_python_client/models/get_inference_v3_deployment_response.py +18 -3
  14. platform_api_python_client/models/{get_rag_deployment_response.py → get_job_deployment_response.py} +35 -48
  15. platform_api_python_client/models/pod_status.py +2 -0
  16. platform_api_python_client/models/service_status.py +4 -0
  17. {platform_api_python_client-4.8.4.dist-info → platform_api_python_client-4.9.7.dist-info}/METADATA +1 -1
  18. {platform_api_python_client-4.8.4.dist-info → platform_api_python_client-4.9.7.dist-info}/RECORD +20 -19
  19. {platform_api_python_client-4.8.4.dist-info → platform_api_python_client-4.9.7.dist-info}/WHEEL +0 -0
  20. {platform_api_python_client-4.8.4.dist-info → platform_api_python_client-4.9.7.dist-info}/top_level.txt +0 -0
@@ -90,7 +90,7 @@ class ApiClient:
90
90
  self.default_headers[header_name] = header_value
91
91
  self.cookie = cookie
92
92
  # Set default User-Agent.
93
- self.user_agent = 'OpenAPI-Generator/4.8.4/python'
93
+ self.user_agent = 'OpenAPI-Generator/4.9.7/python'
94
94
  self.client_side_validation = configuration.client_side_validation
95
95
 
96
96
  def __enter__(self):
@@ -392,7 +392,7 @@ class Configuration:
392
392
  "OS: {env}\n"\
393
393
  "Python Version: {pyversion}\n"\
394
394
  "Version of the API: 0.1.0\n"\
395
- "SDK Package Version: 4.8.4".\
395
+ "SDK Package Version: 4.9.7".\
396
396
  format(env=sys.platform, pyversion=sys.version)
397
397
 
398
398
  def get_host_settings(self):
@@ -21,6 +21,7 @@ from platform_api_python_client.models.c_serve_recipe_perf import CServeRecipePe
21
21
  from platform_api_python_client.models.c_serve_recipe_response import CServeRecipeResponse
22
22
  from platform_api_python_client.models.c_serve_v2_recipe import CServeV2Recipe
23
23
  from platform_api_python_client.models.cluster_capacity_response import ClusterCapacityResponse
24
+ from platform_api_python_client.models.config_file_mount import ConfigFileMount
24
25
  from platform_api_python_client.models.create_c_serve_v2_deployment_request import CreateCServeV2DeploymentRequest
25
26
  from platform_api_python_client.models.create_c_serve_v2_deployment_response import CreateCServeV2DeploymentResponse
26
27
  from platform_api_python_client.models.create_c_serve_v3_deployment_request import CreateCServeV3DeploymentRequest
@@ -30,10 +31,10 @@ from platform_api_python_client.models.create_compute_deployment_response import
30
31
  from platform_api_python_client.models.create_inference_deployment_request import CreateInferenceDeploymentRequest
31
32
  from platform_api_python_client.models.create_inference_deployment_response import CreateInferenceDeploymentResponse
32
33
  from platform_api_python_client.models.create_inference_v3_deployment_request import CreateInferenceV3DeploymentRequest
34
+ from platform_api_python_client.models.create_job_deployment_request import CreateJobDeploymentRequest
35
+ from platform_api_python_client.models.create_job_deployment_response import CreateJobDeploymentResponse
33
36
  from platform_api_python_client.models.create_organization_request import CreateOrganizationRequest
34
37
  from platform_api_python_client.models.create_organization_response import CreateOrganizationResponse
35
- from platform_api_python_client.models.create_rag_deployment_request import CreateRagDeploymentRequest
36
- from platform_api_python_client.models.create_rag_deployment_response import CreateRagDeploymentResponse
37
38
  from platform_api_python_client.models.create_url_request import CreateUrlRequest
38
39
  from platform_api_python_client.models.create_url_response import CreateUrlResponse
39
40
  from platform_api_python_client.models.credits_response import CreditsResponse
@@ -56,7 +57,7 @@ from platform_api_python_client.models.get_deployment_revision_response import G
56
57
  from platform_api_python_client.models.get_deployment_usage_response import GetDeploymentUsageResponse
57
58
  from platform_api_python_client.models.get_inference_deployment_response import GetInferenceDeploymentResponse
58
59
  from platform_api_python_client.models.get_inference_v3_deployment_response import GetInferenceV3DeploymentResponse
59
- from platform_api_python_client.models.get_rag_deployment_response import GetRagDeploymentResponse
60
+ from platform_api_python_client.models.get_job_deployment_response import GetJobDeploymentResponse
60
61
  from platform_api_python_client.models.gpu_type_capacity import GpuTypeCapacity
61
62
  from platform_api_python_client.models.http_validation_error import HTTPValidationError
62
63
  from platform_api_python_client.models.hardware_instance_response import HardwareInstanceResponse
@@ -0,0 +1,99 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ Platform External API
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: 0.1.0
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ from __future__ import annotations
16
+ import pprint
17
+ import re # noqa: F401
18
+ import json
19
+
20
+ from pydantic import BaseModel, ConfigDict, Field, field_validator
21
+ from typing import Any, ClassVar, Dict, List
22
+ from typing_extensions import Annotated
23
+ from typing import Optional, Set
24
+ from typing_extensions import Self
25
+
26
+ class ConfigFileMount(BaseModel):
27
+ """
28
+ ConfigFileMount
29
+ """ # noqa: E501
30
+ filename: Annotated[str, Field(min_length=1, strict=True, max_length=253)]
31
+ mount_path: Annotated[str, Field(min_length=1, strict=True)]
32
+ content: Annotated[str, Field(min_length=1, strict=True)]
33
+ __properties: ClassVar[List[str]] = ["filename", "mount_path", "content"]
34
+
35
+ @field_validator('filename')
36
+ def filename_validate_regular_expression(cls, value):
37
+ """Validates the regular expression"""
38
+ if not re.match(r"^[a-zA-Z0-9._-]+$", value):
39
+ raise ValueError(r"must validate the regular expression /^[a-zA-Z0-9._-]+$/")
40
+ return value
41
+
42
+ model_config = ConfigDict(
43
+ populate_by_name=True,
44
+ validate_assignment=True,
45
+ protected_namespaces=(),
46
+ )
47
+
48
+
49
+ def to_str(self) -> str:
50
+ """Returns the string representation of the model using alias"""
51
+ return pprint.pformat(self.model_dump(by_alias=True))
52
+
53
+ def to_json(self) -> str:
54
+ """Returns the JSON representation of the model using alias"""
55
+ # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
56
+ return json.dumps(self.to_dict())
57
+
58
+ @classmethod
59
+ def from_json(cls, json_str: str) -> Optional[Self]:
60
+ """Create an instance of ConfigFileMount from a JSON string"""
61
+ return cls.from_dict(json.loads(json_str))
62
+
63
+ def to_dict(self) -> Dict[str, Any]:
64
+ """Return the dictionary representation of the model using alias.
65
+
66
+ This has the following differences from calling pydantic's
67
+ `self.model_dump(by_alias=True)`:
68
+
69
+ * `None` is only added to the output dict for nullable fields that
70
+ were set at model initialization. Other fields with value `None`
71
+ are ignored.
72
+ """
73
+ excluded_fields: Set[str] = set([
74
+ ])
75
+
76
+ _dict = self.model_dump(
77
+ by_alias=True,
78
+ exclude=excluded_fields,
79
+ exclude_none=True,
80
+ )
81
+ return _dict
82
+
83
+ @classmethod
84
+ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
85
+ """Create an instance of ConfigFileMount from a dict"""
86
+ if obj is None:
87
+ return None
88
+
89
+ if not isinstance(obj, dict):
90
+ return cls.model_validate(obj)
91
+
92
+ _obj = cls.model_validate({
93
+ "filename": obj.get("filename"),
94
+ "mount_path": obj.get("mount_path"),
95
+ "content": obj.get("content")
96
+ })
97
+ return _obj
98
+
99
+
@@ -43,9 +43,12 @@ class CreateCServeV3DeploymentRequest(BaseModel):
43
43
  max_replicas: StrictInt
44
44
  initial_replicas: Optional[StrictInt] = None
45
45
  concurrency: Optional[StrictInt] = None
46
+ cooldown_period: Optional[StrictInt] = None
46
47
  env_vars: Optional[Dict[str, StrictStr]] = None
47
48
  enable_logging: Optional[StrictBool] = True
48
- __properties: ClassVar[List[str]] = ["max_surge", "max_unavailable", "name", "cluster_id", "hardware_instance_id", "user_annotations", "recipe", "cserve_version", "hf_token", "endpoint_bearer_token", "endpoint_certificate_authority", "min_replicas", "max_replicas", "initial_replicas", "concurrency", "env_vars", "enable_logging"]
49
+ enable_node_model_cache: Optional[StrictBool] = False
50
+ session_affinity: Optional[StrictBool] = Field(default=False, description="Enable best-effort sticky routing via the `X-Session-Id` request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state.")
51
+ __properties: ClassVar[List[str]] = ["max_surge", "max_unavailable", "name", "cluster_id", "hardware_instance_id", "user_annotations", "recipe", "cserve_version", "hf_token", "endpoint_bearer_token", "endpoint_certificate_authority", "min_replicas", "max_replicas", "initial_replicas", "concurrency", "cooldown_period", "env_vars", "enable_logging", "enable_node_model_cache", "session_affinity"]
49
52
 
50
53
  @field_validator('name')
51
54
  def name_validate_regular_expression(cls, value):
@@ -141,6 +144,11 @@ class CreateCServeV3DeploymentRequest(BaseModel):
141
144
  if self.concurrency is None and "concurrency" in self.model_fields_set:
142
145
  _dict['concurrency'] = None
143
146
 
147
+ # set to None if cooldown_period (nullable) is None
148
+ # and model_fields_set contains the field
149
+ if self.cooldown_period is None and "cooldown_period" in self.model_fields_set:
150
+ _dict['cooldown_period'] = None
151
+
144
152
  return _dict
145
153
 
146
154
  @classmethod
@@ -168,8 +176,11 @@ class CreateCServeV3DeploymentRequest(BaseModel):
168
176
  "max_replicas": obj.get("max_replicas"),
169
177
  "initial_replicas": obj.get("initial_replicas"),
170
178
  "concurrency": obj.get("concurrency"),
179
+ "cooldown_period": obj.get("cooldown_period"),
171
180
  "env_vars": obj.get("env_vars"),
172
- "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True
181
+ "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True,
182
+ "enable_node_model_cache": obj.get("enable_node_model_cache") if obj.get("enable_node_model_cache") is not None else False,
183
+ "session_affinity": obj.get("session_affinity") if obj.get("session_affinity") is not None else False
173
184
  })
174
185
  return _obj
175
186
 
@@ -21,6 +21,7 @@ from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, Strict
21
21
  from typing import Any, ClassVar, Dict, List, Optional
22
22
  from typing_extensions import Annotated
23
23
  from platform_api_python_client.models.backend_protocol import BackendProtocol
24
+ from platform_api_python_client.models.config_file_mount import ConfigFileMount
24
25
  from platform_api_python_client.models.image_pull_secret_credentials import ImagePullSecretCredentials
25
26
  from typing import Optional, Set
26
27
  from typing_extensions import Self
@@ -42,14 +43,18 @@ class CreateInferenceV3DeploymentRequest(BaseModel):
42
43
  max_replicas: StrictInt
43
44
  initial_replicas: Optional[StrictInt] = None
44
45
  concurrency: Optional[StrictInt] = None
46
+ cooldown_period: Optional[StrictInt] = None
45
47
  healthcheck: Optional[StrictStr] = None
46
48
  env_vars: Optional[Dict[str, StrictStr]] = None
47
49
  command: Optional[StrictStr] = None
48
50
  endpoint_bearer_token: Optional[StrictStr] = None
49
51
  endpoint_certificate_authority: Optional[StrictStr] = None
52
+ hf_token: Optional[StrictStr] = None
50
53
  backend_protocol: Optional[BackendProtocol] = None
51
54
  enable_logging: Optional[StrictBool] = False
52
- __properties: ClassVar[List[str]] = ["max_surge", "max_unavailable", "name", "cluster_id", "hardware_instance_id", "user_annotations", "image_url", "image_pull_secret_credentials", "port", "min_replicas", "max_replicas", "initial_replicas", "concurrency", "healthcheck", "env_vars", "command", "endpoint_bearer_token", "endpoint_certificate_authority", "backend_protocol", "enable_logging"]
55
+ session_affinity: Optional[StrictBool] = Field(default=False, description="Enable best-effort sticky routing via the `X-Session-Id` request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state.")
56
+ config_file: Optional[ConfigFileMount] = None
57
+ __properties: ClassVar[List[str]] = ["max_surge", "max_unavailable", "name", "cluster_id", "hardware_instance_id", "user_annotations", "image_url", "image_pull_secret_credentials", "port", "min_replicas", "max_replicas", "initial_replicas", "concurrency", "cooldown_period", "healthcheck", "env_vars", "command", "endpoint_bearer_token", "endpoint_certificate_authority", "hf_token", "backend_protocol", "enable_logging", "session_affinity", "config_file"]
53
58
 
54
59
  @field_validator('name')
55
60
  def name_validate_regular_expression(cls, value):
@@ -100,6 +105,9 @@ class CreateInferenceV3DeploymentRequest(BaseModel):
100
105
  # override the default output from pydantic by calling `to_dict()` of image_pull_secret_credentials
101
106
  if self.image_pull_secret_credentials:
102
107
  _dict['image_pull_secret_credentials'] = self.image_pull_secret_credentials.to_dict()
108
+ # override the default output from pydantic by calling `to_dict()` of config_file
109
+ if self.config_file:
110
+ _dict['config_file'] = self.config_file.to_dict()
103
111
  # set to None if max_surge (nullable) is None
104
112
  # and model_fields_set contains the field
105
113
  if self.max_surge is None and "max_surge" in self.model_fields_set:
@@ -130,6 +138,11 @@ class CreateInferenceV3DeploymentRequest(BaseModel):
130
138
  if self.concurrency is None and "concurrency" in self.model_fields_set:
131
139
  _dict['concurrency'] = None
132
140
 
141
+ # set to None if cooldown_period (nullable) is None
142
+ # and model_fields_set contains the field
143
+ if self.cooldown_period is None and "cooldown_period" in self.model_fields_set:
144
+ _dict['cooldown_period'] = None
145
+
133
146
  # set to None if healthcheck (nullable) is None
134
147
  # and model_fields_set contains the field
135
148
  if self.healthcheck is None and "healthcheck" in self.model_fields_set:
@@ -155,6 +168,16 @@ class CreateInferenceV3DeploymentRequest(BaseModel):
155
168
  if self.endpoint_certificate_authority is None and "endpoint_certificate_authority" in self.model_fields_set:
156
169
  _dict['endpoint_certificate_authority'] = None
157
170
 
171
+ # set to None if hf_token (nullable) is None
172
+ # and model_fields_set contains the field
173
+ if self.hf_token is None and "hf_token" in self.model_fields_set:
174
+ _dict['hf_token'] = None
175
+
176
+ # set to None if config_file (nullable) is None
177
+ # and model_fields_set contains the field
178
+ if self.config_file is None and "config_file" in self.model_fields_set:
179
+ _dict['config_file'] = None
180
+
158
181
  return _dict
159
182
 
160
183
  @classmethod
@@ -180,13 +203,17 @@ class CreateInferenceV3DeploymentRequest(BaseModel):
180
203
  "max_replicas": obj.get("max_replicas"),
181
204
  "initial_replicas": obj.get("initial_replicas"),
182
205
  "concurrency": obj.get("concurrency"),
206
+ "cooldown_period": obj.get("cooldown_period"),
183
207
  "healthcheck": obj.get("healthcheck"),
184
208
  "env_vars": obj.get("env_vars"),
185
209
  "command": obj.get("command"),
186
210
  "endpoint_bearer_token": obj.get("endpoint_bearer_token"),
187
211
  "endpoint_certificate_authority": obj.get("endpoint_certificate_authority"),
212
+ "hf_token": obj.get("hf_token"),
188
213
  "backend_protocol": obj.get("backend_protocol"),
189
- "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else False
214
+ "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else False,
215
+ "session_affinity": obj.get("session_affinity") if obj.get("session_affinity") is not None else False,
216
+ "config_file": ConfigFileMount.from_dict(obj["config_file"]) if obj.get("config_file") is not None else None
190
217
  })
191
218
  return _obj
192
219
 
@@ -17,34 +17,29 @@ import pprint
17
17
  import re # noqa: F401
18
18
  import json
19
19
 
20
- from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr, field_validator
20
+ from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, StrictStr, field_validator
21
21
  from typing import Any, ClassVar, Dict, List, Optional
22
22
  from typing_extensions import Annotated
23
- from platform_api_python_client.models.c_serve_v2_recipe import CServeV2Recipe
23
+ from platform_api_python_client.models.image_pull_secret_credentials import ImagePullSecretCredentials
24
24
  from typing import Optional, Set
25
25
  from typing_extensions import Self
26
26
 
27
- class CreateRagDeploymentRequest(BaseModel):
27
+ class CreateJobDeploymentRequest(BaseModel):
28
28
  """
29
- CreateRagDeploymentRequest
29
+ CreateJobDeploymentRequest
30
30
  """ # noqa: E501
31
31
  name: Annotated[str, Field(min_length=1, strict=True, max_length=20)]
32
32
  cluster_id: StrictInt
33
33
  hardware_instance_id: StrictInt
34
34
  user_annotations: Optional[Dict[str, StrictStr]] = None
35
- recipe: CServeV2Recipe
36
- cserve_version: Optional[StrictStr] = None
37
- hf_token: Optional[StrictStr] = None
38
- llm_model: StrictStr
39
- centml_api_key: StrictStr
40
- min_scale: Optional[StrictInt] = 1
41
- max_scale: Optional[StrictInt] = 1
42
- initial_scale: Optional[StrictInt] = None
43
- endpoint_bearer_token: Optional[StrictStr] = None
44
- endpoint_certificate_authority: Optional[StrictStr] = None
45
- concurrency: Optional[StrictInt] = None
35
+ image_url: StrictStr
36
+ image_pull_secret_credentials: Optional[ImagePullSecretCredentials] = None
46
37
  env_vars: Optional[Dict[str, StrictStr]] = None
47
- __properties: ClassVar[List[str]] = ["name", "cluster_id", "hardware_instance_id", "user_annotations", "recipe", "cserve_version", "hf_token", "llm_model", "centml_api_key", "min_scale", "max_scale", "initial_scale", "endpoint_bearer_token", "endpoint_certificate_authority", "concurrency", "env_vars"]
38
+ command: Optional[StrictStr] = None
39
+ completions: Optional[StrictInt] = 1
40
+ parallelism: Optional[StrictInt] = 1
41
+ enable_logging: Optional[StrictBool] = True
42
+ __properties: ClassVar[List[str]] = ["name", "cluster_id", "hardware_instance_id", "user_annotations", "image_url", "image_pull_secret_credentials", "env_vars", "command", "completions", "parallelism", "enable_logging"]
48
43
 
49
44
  @field_validator('name')
50
45
  def name_validate_regular_expression(cls, value):
@@ -71,7 +66,7 @@ class CreateRagDeploymentRequest(BaseModel):
71
66
 
72
67
  @classmethod
73
68
  def from_json(cls, json_str: str) -> Optional[Self]:
74
- """Create an instance of CreateRagDeploymentRequest from a JSON string"""
69
+ """Create an instance of CreateJobDeploymentRequest from a JSON string"""
75
70
  return cls.from_dict(json.loads(json_str))
76
71
 
77
72
  def to_dict(self) -> Dict[str, Any]:
@@ -92,49 +87,34 @@ class CreateRagDeploymentRequest(BaseModel):
92
87
  exclude=excluded_fields,
93
88
  exclude_none=True,
94
89
  )
95
- # override the default output from pydantic by calling `to_dict()` of recipe
96
- if self.recipe:
97
- _dict['recipe'] = self.recipe.to_dict()
90
+ # override the default output from pydantic by calling `to_dict()` of image_pull_secret_credentials
91
+ if self.image_pull_secret_credentials:
92
+ _dict['image_pull_secret_credentials'] = self.image_pull_secret_credentials.to_dict()
98
93
  # set to None if user_annotations (nullable) is None
99
94
  # and model_fields_set contains the field
100
95
  if self.user_annotations is None and "user_annotations" in self.model_fields_set:
101
96
  _dict['user_annotations'] = None
102
97
 
103
- # set to None if cserve_version (nullable) is None
98
+ # set to None if image_pull_secret_credentials (nullable) is None
104
99
  # and model_fields_set contains the field
105
- if self.cserve_version is None and "cserve_version" in self.model_fields_set:
106
- _dict['cserve_version'] = None
100
+ if self.image_pull_secret_credentials is None and "image_pull_secret_credentials" in self.model_fields_set:
101
+ _dict['image_pull_secret_credentials'] = None
107
102
 
108
- # set to None if hf_token (nullable) is None
103
+ # set to None if env_vars (nullable) is None
109
104
  # and model_fields_set contains the field
110
- if self.hf_token is None and "hf_token" in self.model_fields_set:
111
- _dict['hf_token'] = None
105
+ if self.env_vars is None and "env_vars" in self.model_fields_set:
106
+ _dict['env_vars'] = None
112
107
 
113
- # set to None if initial_scale (nullable) is None
108
+ # set to None if command (nullable) is None
114
109
  # and model_fields_set contains the field
115
- if self.initial_scale is None and "initial_scale" in self.model_fields_set:
116
- _dict['initial_scale'] = None
117
-
118
- # set to None if endpoint_bearer_token (nullable) is None
119
- # and model_fields_set contains the field
120
- if self.endpoint_bearer_token is None and "endpoint_bearer_token" in self.model_fields_set:
121
- _dict['endpoint_bearer_token'] = None
122
-
123
- # set to None if endpoint_certificate_authority (nullable) is None
124
- # and model_fields_set contains the field
125
- if self.endpoint_certificate_authority is None and "endpoint_certificate_authority" in self.model_fields_set:
126
- _dict['endpoint_certificate_authority'] = None
127
-
128
- # set to None if concurrency (nullable) is None
129
- # and model_fields_set contains the field
130
- if self.concurrency is None and "concurrency" in self.model_fields_set:
131
- _dict['concurrency'] = None
110
+ if self.command is None and "command" in self.model_fields_set:
111
+ _dict['command'] = None
132
112
 
133
113
  return _dict
134
114
 
135
115
  @classmethod
136
116
  def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
137
- """Create an instance of CreateRagDeploymentRequest from a dict"""
117
+ """Create an instance of CreateJobDeploymentRequest from a dict"""
138
118
  if obj is None:
139
119
  return None
140
120
 
@@ -146,18 +126,13 @@ class CreateRagDeploymentRequest(BaseModel):
146
126
  "cluster_id": obj.get("cluster_id"),
147
127
  "hardware_instance_id": obj.get("hardware_instance_id"),
148
128
  "user_annotations": obj.get("user_annotations"),
149
- "recipe": CServeV2Recipe.from_dict(obj["recipe"]) if obj.get("recipe") is not None else None,
150
- "cserve_version": obj.get("cserve_version"),
151
- "hf_token": obj.get("hf_token"),
152
- "llm_model": obj.get("llm_model"),
153
- "centml_api_key": obj.get("centml_api_key"),
154
- "min_scale": obj.get("min_scale") if obj.get("min_scale") is not None else 1,
155
- "max_scale": obj.get("max_scale") if obj.get("max_scale") is not None else 1,
156
- "initial_scale": obj.get("initial_scale"),
157
- "endpoint_bearer_token": obj.get("endpoint_bearer_token"),
158
- "endpoint_certificate_authority": obj.get("endpoint_certificate_authority"),
159
- "concurrency": obj.get("concurrency"),
160
- "env_vars": obj.get("env_vars")
129
+ "image_url": obj.get("image_url"),
130
+ "image_pull_secret_credentials": ImagePullSecretCredentials.from_dict(obj["image_pull_secret_credentials"]) if obj.get("image_pull_secret_credentials") is not None else None,
131
+ "env_vars": obj.get("env_vars"),
132
+ "command": obj.get("command"),
133
+ "completions": obj.get("completions") if obj.get("completions") is not None else 1,
134
+ "parallelism": obj.get("parallelism") if obj.get("parallelism") is not None else 1,
135
+ "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True
161
136
  })
162
137
  return _obj
163
138
 
@@ -23,9 +23,9 @@ from typing import Any, ClassVar, Dict, List
23
23
  from typing import Optional, Set
24
24
  from typing_extensions import Self
25
25
 
26
- class CreateRagDeploymentResponse(BaseModel):
26
+ class CreateJobDeploymentResponse(BaseModel):
27
27
  """
28
- CreateRagDeploymentResponse
28
+ CreateJobDeploymentResponse
29
29
  """ # noqa: E501
30
30
  id: StrictInt
31
31
  created_at: datetime
@@ -50,7 +50,7 @@ class CreateRagDeploymentResponse(BaseModel):
50
50
 
51
51
  @classmethod
52
52
  def from_json(cls, json_str: str) -> Optional[Self]:
53
- """Create an instance of CreateRagDeploymentResponse from a JSON string"""
53
+ """Create an instance of CreateJobDeploymentResponse from a JSON string"""
54
54
  return cls.from_dict(json.loads(json_str))
55
55
 
56
56
  def to_dict(self) -> Dict[str, Any]:
@@ -75,7 +75,7 @@ class CreateRagDeploymentResponse(BaseModel):
75
75
 
76
76
  @classmethod
77
77
  def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
78
- """Create an instance of CreateRagDeploymentResponse from a dict"""
78
+ """Create an instance of CreateJobDeploymentResponse from a dict"""
79
79
  if obj is None:
80
80
  return None
81
81
 
@@ -38,6 +38,7 @@ class DeploymentType(str, Enum):
38
38
  CSERVE_V3 = 'cserve_v3'
39
39
  DEPLOYMENT = 'deployment'
40
40
  RAG = 'rag'
41
+ JOB = 'job'
41
42
 
42
43
  @classmethod
43
44
  def from_json(cls, json_str: str) -> Self:
@@ -18,7 +18,7 @@ import re # noqa: F401
18
18
  import json
19
19
 
20
20
  from datetime import datetime
21
- from pydantic import BaseModel, ConfigDict, StrictBool, StrictInt, StrictStr
21
+ from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, StrictStr
22
22
  from typing import Any, ClassVar, Dict, List, Optional
23
23
  from platform_api_python_client.models.c_serve_v2_recipe import CServeV2Recipe
24
24
  from platform_api_python_client.models.deployment_status import DeploymentStatus
@@ -50,9 +50,12 @@ class GetCServeV3DeploymentResponse(BaseModel):
50
50
  endpoint_certificate_authority: Optional[StrictStr] = None
51
51
  endpoint_bearer_token: Optional[StrictStr] = None
52
52
  concurrency: Optional[StrictInt] = None
53
+ cooldown_period: Optional[StrictInt] = 1800
53
54
  env_vars: Optional[Dict[str, StrictStr]] = None
54
55
  enable_logging: Optional[StrictBool] = True
55
- __properties: ClassVar[List[str]] = ["creator_email", "cluster_id", "id", "name", "endpoint_url", "image_url", "type", "status", "created_at", "hardware_instance_id", "revision_number", "user_annotations", "recipe", "cserve_version", "min_replicas", "max_replicas", "initial_replicas", "endpoint_certificate_authority", "endpoint_bearer_token", "concurrency", "env_vars", "enable_logging"]
56
+ enable_node_model_cache: Optional[StrictBool] = False
57
+ session_affinity: Optional[StrictBool] = Field(default=False, description="Enable best-effort sticky routing via the `X-Session-Id` request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state.")
58
+ __properties: ClassVar[List[str]] = ["creator_email", "cluster_id", "id", "name", "endpoint_url", "image_url", "type", "status", "created_at", "hardware_instance_id", "revision_number", "user_annotations", "recipe", "cserve_version", "min_replicas", "max_replicas", "initial_replicas", "endpoint_certificate_authority", "endpoint_bearer_token", "concurrency", "cooldown_period", "env_vars", "enable_logging", "enable_node_model_cache", "session_affinity"]
56
59
 
57
60
  model_config = ConfigDict(
58
61
  populate_by_name=True,
@@ -163,8 +166,11 @@ class GetCServeV3DeploymentResponse(BaseModel):
163
166
  "endpoint_certificate_authority": obj.get("endpoint_certificate_authority"),
164
167
  "endpoint_bearer_token": obj.get("endpoint_bearer_token"),
165
168
  "concurrency": obj.get("concurrency"),
169
+ "cooldown_period": obj.get("cooldown_period") if obj.get("cooldown_period") is not None else 1800,
166
170
  "env_vars": obj.get("env_vars"),
167
- "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True
171
+ "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True,
172
+ "enable_node_model_cache": obj.get("enable_node_model_cache") if obj.get("enable_node_model_cache") is not None else False,
173
+ "session_affinity": obj.get("session_affinity") if obj.get("session_affinity") is not None else False
168
174
  })
169
175
  return _obj
170
176
 
@@ -18,9 +18,10 @@ import re # noqa: F401
18
18
  import json
19
19
 
20
20
  from datetime import datetime
21
- from pydantic import BaseModel, ConfigDict, StrictBool, StrictInt, StrictStr
21
+ from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, StrictStr
22
22
  from typing import Any, ClassVar, Dict, List, Optional
23
23
  from platform_api_python_client.models.backend_protocol import BackendProtocol
24
+ from platform_api_python_client.models.config_file_mount import ConfigFileMount
24
25
  from platform_api_python_client.models.deployment_status import DeploymentStatus
25
26
  from platform_api_python_client.models.deployment_type import DeploymentType
26
27
  from platform_api_python_client.models.image_pull_secret_credentials import ImagePullSecretCredentials
@@ -48,6 +49,7 @@ class GetInferenceV3DeploymentResponse(BaseModel):
48
49
  max_replicas: StrictInt
49
50
  initial_replicas: Optional[StrictInt] = None
50
51
  concurrency: Optional[StrictInt] = None
52
+ cooldown_period: Optional[StrictInt] = 1800
51
53
  healthcheck: Optional[StrictStr] = None
52
54
  endpoint_certificate_authority: Optional[StrictStr] = None
53
55
  endpoint_bearer_token: Optional[StrictStr] = None
@@ -58,7 +60,9 @@ class GetInferenceV3DeploymentResponse(BaseModel):
58
60
  image_pull_secret_credentials: Optional[ImagePullSecretCredentials] = None
59
61
  backend_protocol: Optional[BackendProtocol] = None
60
62
  enable_logging: Optional[StrictBool] = True
61
- __properties: ClassVar[List[str]] = ["creator_email", "cluster_id", "id", "name", "endpoint_url", "image_url", "type", "status", "created_at", "hardware_instance_id", "revision_number", "user_annotations", "container_port", "min_replicas", "max_replicas", "initial_replicas", "concurrency", "healthcheck", "endpoint_certificate_authority", "endpoint_bearer_token", "env_vars", "command", "command_args", "original_command", "image_pull_secret_credentials", "backend_protocol", "enable_logging"]
63
+ session_affinity: Optional[StrictBool] = Field(default=False, description="Enable best-effort sticky routing via the `X-Session-Id` request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state.")
64
+ config_file: Optional[ConfigFileMount] = None
65
+ __properties: ClassVar[List[str]] = ["creator_email", "cluster_id", "id", "name", "endpoint_url", "image_url", "type", "status", "created_at", "hardware_instance_id", "revision_number", "user_annotations", "container_port", "min_replicas", "max_replicas", "initial_replicas", "concurrency", "cooldown_period", "healthcheck", "endpoint_certificate_authority", "endpoint_bearer_token", "env_vars", "command", "command_args", "original_command", "image_pull_secret_credentials", "backend_protocol", "enable_logging", "session_affinity", "config_file"]
62
66
 
63
67
  model_config = ConfigDict(
64
68
  populate_by_name=True,
@@ -102,6 +106,9 @@ class GetInferenceV3DeploymentResponse(BaseModel):
102
106
  # override the default output from pydantic by calling `to_dict()` of image_pull_secret_credentials
103
107
  if self.image_pull_secret_credentials:
104
108
  _dict['image_pull_secret_credentials'] = self.image_pull_secret_credentials.to_dict()
109
+ # override the default output from pydantic by calling `to_dict()` of config_file
110
+ if self.config_file:
111
+ _dict['config_file'] = self.config_file.to_dict()
105
112
  # set to None if image_url (nullable) is None
106
113
  # and model_fields_set contains the field
107
114
  if self.image_url is None and "image_url" in self.model_fields_set:
@@ -162,6 +169,11 @@ class GetInferenceV3DeploymentResponse(BaseModel):
162
169
  if self.image_pull_secret_credentials is None and "image_pull_secret_credentials" in self.model_fields_set:
163
170
  _dict['image_pull_secret_credentials'] = None
164
171
 
172
+ # set to None if config_file (nullable) is None
173
+ # and model_fields_set contains the field
174
+ if self.config_file is None and "config_file" in self.model_fields_set:
175
+ _dict['config_file'] = None
176
+
165
177
  return _dict
166
178
 
167
179
  @classmethod
@@ -191,6 +203,7 @@ class GetInferenceV3DeploymentResponse(BaseModel):
191
203
  "max_replicas": obj.get("max_replicas"),
192
204
  "initial_replicas": obj.get("initial_replicas"),
193
205
  "concurrency": obj.get("concurrency"),
206
+ "cooldown_period": obj.get("cooldown_period") if obj.get("cooldown_period") is not None else 1800,
194
207
  "healthcheck": obj.get("healthcheck"),
195
208
  "endpoint_certificate_authority": obj.get("endpoint_certificate_authority"),
196
209
  "endpoint_bearer_token": obj.get("endpoint_bearer_token"),
@@ -200,7 +213,9 @@ class GetInferenceV3DeploymentResponse(BaseModel):
200
213
  "original_command": obj.get("original_command"),
201
214
  "image_pull_secret_credentials": ImagePullSecretCredentials.from_dict(obj["image_pull_secret_credentials"]) if obj.get("image_pull_secret_credentials") is not None else None,
202
215
  "backend_protocol": obj.get("backend_protocol"),
203
- "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True
216
+ "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True,
217
+ "session_affinity": obj.get("session_affinity") if obj.get("session_affinity") is not None else False,
218
+ "config_file": ConfigFileMount.from_dict(obj["config_file"]) if obj.get("config_file") is not None else None
204
219
  })
205
220
  return _obj
206
221