PyPI - platform-api-python-client - Versions diffs - 4.8.4__py3-none-any.whl → 4.9.7__py3-none-any.whl - Mend

platform-api-python-client 4.8.4py3-none-any.whl → 4.9.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

platform_api_python_client/api_client.py CHANGED Viewed

@@ -90,7 +90,7 @@ class ApiClient:
             self.default_headers[header_name] = header_value
         self.cookie = cookie
         # Set default User-Agent.
-        self.user_agent = 'OpenAPI-Generator/4.8.4/python'
+        self.user_agent = 'OpenAPI-Generator/4.9.7/python'
         self.client_side_validation = configuration.client_side_validation
     def __enter__(self):

platform_api_python_client/configuration.py CHANGED Viewed

@@ -392,7 +392,7 @@ class Configuration:
                "OS: {env}\n"\
                "Python Version: {pyversion}\n"\
                "Version of the API: 0.1.0\n"\
-               "SDK Package Version: 4.8.4".\
+               "SDK Package Version: 4.9.7".\
                format(env=sys.platform, pyversion=sys.version)
     def get_host_settings(self):

platform_api_python_client/models/__init__.py CHANGED Viewed

@@ -21,6 +21,7 @@ from platform_api_python_client.models.c_serve_recipe_perf import CServeRecipePe
 from platform_api_python_client.models.c_serve_recipe_response import CServeRecipeResponse
 from platform_api_python_client.models.c_serve_v2_recipe import CServeV2Recipe
 from platform_api_python_client.models.cluster_capacity_response import ClusterCapacityResponse
+from platform_api_python_client.models.config_file_mount import ConfigFileMount
 from platform_api_python_client.models.create_c_serve_v2_deployment_request import CreateCServeV2DeploymentRequest
 from platform_api_python_client.models.create_c_serve_v2_deployment_response import CreateCServeV2DeploymentResponse
 from platform_api_python_client.models.create_c_serve_v3_deployment_request import CreateCServeV3DeploymentRequest
@@ -30,10 +31,10 @@ from platform_api_python_client.models.create_compute_deployment_response import
 from platform_api_python_client.models.create_inference_deployment_request import CreateInferenceDeploymentRequest
 from platform_api_python_client.models.create_inference_deployment_response import CreateInferenceDeploymentResponse
 from platform_api_python_client.models.create_inference_v3_deployment_request import CreateInferenceV3DeploymentRequest
+from platform_api_python_client.models.create_job_deployment_request import CreateJobDeploymentRequest
+from platform_api_python_client.models.create_job_deployment_response import CreateJobDeploymentResponse
 from platform_api_python_client.models.create_organization_request import CreateOrganizationRequest
 from platform_api_python_client.models.create_organization_response import CreateOrganizationResponse
-from platform_api_python_client.models.create_rag_deployment_request import CreateRagDeploymentRequest
-from platform_api_python_client.models.create_rag_deployment_response import CreateRagDeploymentResponse
 from platform_api_python_client.models.create_url_request import CreateUrlRequest
 from platform_api_python_client.models.create_url_response import CreateUrlResponse
 from platform_api_python_client.models.credits_response import CreditsResponse
@@ -56,7 +57,7 @@ from platform_api_python_client.models.get_deployment_revision_response import G
 from platform_api_python_client.models.get_deployment_usage_response import GetDeploymentUsageResponse
 from platform_api_python_client.models.get_inference_deployment_response import GetInferenceDeploymentResponse
 from platform_api_python_client.models.get_inference_v3_deployment_response import GetInferenceV3DeploymentResponse
-from platform_api_python_client.models.get_rag_deployment_response import GetRagDeploymentResponse
+from platform_api_python_client.models.get_job_deployment_response import GetJobDeploymentResponse
 from platform_api_python_client.models.gpu_type_capacity import GpuTypeCapacity
 from platform_api_python_client.models.http_validation_error import HTTPValidationError
 from platform_api_python_client.models.hardware_instance_response import HardwareInstanceResponse

platform_api_python_client/models/config_file_mount.py ADDED Viewed

@@ -0,0 +1,99 @@
+# coding: utf-8
+"""
+    Platform External API
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+    Do not edit the class manually.
+"""  # noqa: E501
+from __future__ import annotations
+import pprint
+import re  # noqa: F401
+import json
+from pydantic import BaseModel, ConfigDict, Field, field_validator
+from typing import Any, ClassVar, Dict, List
+from typing_extensions import Annotated
+from typing import Optional, Set
+from typing_extensions import Self
+class ConfigFileMount(BaseModel):
+    """
+    ConfigFileMount
+    """ # noqa: E501
+    filename: Annotated[str, Field(min_length=1, strict=True, max_length=253)]
+    mount_path: Annotated[str, Field(min_length=1, strict=True)]
+    content: Annotated[str, Field(min_length=1, strict=True)]
+    __properties: ClassVar[List[str]] = ["filename", "mount_path", "content"]
+    @field_validator('filename')
+    def filename_validate_regular_expression(cls, value):
+        """Validates the regular expression"""
+        if not re.match(r"^[a-zA-Z0-9._-]+$", value):
+            raise ValueError(r"must validate the regular expression /^[a-zA-Z0-9._-]+$/")
+        return value
+    model_config = ConfigDict(
+        populate_by_name=True,
+        validate_assignment=True,
+        protected_namespaces=(),
+    )
+    def to_str(self) -> str:
+        """Returns the string representation of the model using alias"""
+        return pprint.pformat(self.model_dump(by_alias=True))
+    def to_json(self) -> str:
+        """Returns the JSON representation of the model using alias"""
+        # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
+        return json.dumps(self.to_dict())
+    @classmethod
+    def from_json(cls, json_str: str) -> Optional[Self]:
+        """Create an instance of ConfigFileMount from a JSON string"""
+        return cls.from_dict(json.loads(json_str))
+    def to_dict(self) -> Dict[str, Any]:
+        """Return the dictionary representation of the model using alias.
+        This has the following differences from calling pydantic's
+        `self.model_dump(by_alias=True)`:
+        * `None` is only added to the output dict for nullable fields that
+          were set at model initialization. Other fields with value `None`
+          are ignored.
+        """
+        excluded_fields: Set[str] = set([
+        ])
+        _dict = self.model_dump(
+            by_alias=True,
+            exclude=excluded_fields,
+            exclude_none=True,
+        )
+        return _dict
+    @classmethod
+    def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
+        """Create an instance of ConfigFileMount from a dict"""
+        if obj is None:
+            return None
+        if not isinstance(obj, dict):
+            return cls.model_validate(obj)
+        _obj = cls.model_validate({
+            "filename": obj.get("filename"),
+            "mount_path": obj.get("mount_path"),
+            "content": obj.get("content")
+        })
+        return _obj

platform_api_python_client/models/create_c_serve_v3_deployment_request.py CHANGED Viewed

@@ -43,9 +43,12 @@ class CreateCServeV3DeploymentRequest(BaseModel):
     max_replicas: StrictInt
     initial_replicas: Optional[StrictInt] = None
     concurrency: Optional[StrictInt] = None
+    cooldown_period: Optional[StrictInt] = None
     env_vars: Optional[Dict[str, StrictStr]] = None
     enable_logging: Optional[StrictBool] = True
-    __properties: ClassVar[List[str]] = ["max_surge", "max_unavailable", "name", "cluster_id", "hardware_instance_id", "user_annotations", "recipe", "cserve_version", "hf_token", "endpoint_bearer_token", "endpoint_certificate_authority", "min_replicas", "max_replicas", "initial_replicas", "concurrency", "env_vars", "enable_logging"]
+    enable_node_model_cache: Optional[StrictBool] = False
+    session_affinity: Optional[StrictBool] = Field(default=False, description="Enable best-effort sticky routing via the `X-Session-Id` request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state.")
+    __properties: ClassVar[List[str]] = ["max_surge", "max_unavailable", "name", "cluster_id", "hardware_instance_id", "user_annotations", "recipe", "cserve_version", "hf_token", "endpoint_bearer_token", "endpoint_certificate_authority", "min_replicas", "max_replicas", "initial_replicas", "concurrency", "cooldown_period", "env_vars", "enable_logging", "enable_node_model_cache", "session_affinity"]
     @field_validator('name')
     def name_validate_regular_expression(cls, value):
@@ -141,6 +144,11 @@ class CreateCServeV3DeploymentRequest(BaseModel):
         if self.concurrency is None and "concurrency" in self.model_fields_set:
             _dict['concurrency'] = None
+        # set to None if cooldown_period (nullable) is None
+        # and model_fields_set contains the field
+        if self.cooldown_period is None and "cooldown_period" in self.model_fields_set:
+            _dict['cooldown_period'] = None
         return _dict
     @classmethod
@@ -168,8 +176,11 @@ class CreateCServeV3DeploymentRequest(BaseModel):
             "max_replicas": obj.get("max_replicas"),
             "initial_replicas": obj.get("initial_replicas"),
             "concurrency": obj.get("concurrency"),
+            "cooldown_period": obj.get("cooldown_period"),
             "env_vars": obj.get("env_vars"),
-            "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True
+            "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True,
+            "enable_node_model_cache": obj.get("enable_node_model_cache") if obj.get("enable_node_model_cache") is not None else False,
+            "session_affinity": obj.get("session_affinity") if obj.get("session_affinity") is not None else False
         })
         return _obj

platform_api_python_client/models/create_inference_v3_deployment_request.py CHANGED Viewed

@@ -21,6 +21,7 @@ from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, Strict
 from typing import Any, ClassVar, Dict, List, Optional
 from typing_extensions import Annotated
 from platform_api_python_client.models.backend_protocol import BackendProtocol
+from platform_api_python_client.models.config_file_mount import ConfigFileMount
 from platform_api_python_client.models.image_pull_secret_credentials import ImagePullSecretCredentials
 from typing import Optional, Set
 from typing_extensions import Self
@@ -42,14 +43,18 @@ class CreateInferenceV3DeploymentRequest(BaseModel):
     max_replicas: StrictInt
     initial_replicas: Optional[StrictInt] = None
     concurrency: Optional[StrictInt] = None
+    cooldown_period: Optional[StrictInt] = None
     healthcheck: Optional[StrictStr] = None
     env_vars: Optional[Dict[str, StrictStr]] = None
     command: Optional[StrictStr] = None
     endpoint_bearer_token: Optional[StrictStr] = None
     endpoint_certificate_authority: Optional[StrictStr] = None
+    hf_token: Optional[StrictStr] = None
     backend_protocol: Optional[BackendProtocol] = None
     enable_logging: Optional[StrictBool] = False
-    __properties: ClassVar[List[str]] = ["max_surge", "max_unavailable", "name", "cluster_id", "hardware_instance_id", "user_annotations", "image_url", "image_pull_secret_credentials", "port", "min_replicas", "max_replicas", "initial_replicas", "concurrency", "healthcheck", "env_vars", "command", "endpoint_bearer_token", "endpoint_certificate_authority", "backend_protocol", "enable_logging"]
+    session_affinity: Optional[StrictBool] = Field(default=False, description="Enable best-effort sticky routing via the `X-Session-Id` request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state.")
+    config_file: Optional[ConfigFileMount] = None
+    __properties: ClassVar[List[str]] = ["max_surge", "max_unavailable", "name", "cluster_id", "hardware_instance_id", "user_annotations", "image_url", "image_pull_secret_credentials", "port", "min_replicas", "max_replicas", "initial_replicas", "concurrency", "cooldown_period", "healthcheck", "env_vars", "command", "endpoint_bearer_token", "endpoint_certificate_authority", "hf_token", "backend_protocol", "enable_logging", "session_affinity", "config_file"]
     @field_validator('name')
     def name_validate_regular_expression(cls, value):
@@ -100,6 +105,9 @@ class CreateInferenceV3DeploymentRequest(BaseModel):
         # override the default output from pydantic by calling `to_dict()` of image_pull_secret_credentials
         if self.image_pull_secret_credentials:
             _dict['image_pull_secret_credentials'] = self.image_pull_secret_credentials.to_dict()
+        # override the default output from pydantic by calling `to_dict()` of config_file
+        if self.config_file:
+            _dict['config_file'] = self.config_file.to_dict()
         # set to None if max_surge (nullable) is None
         # and model_fields_set contains the field
         if self.max_surge is None and "max_surge" in self.model_fields_set:
@@ -130,6 +138,11 @@ class CreateInferenceV3DeploymentRequest(BaseModel):
         if self.concurrency is None and "concurrency" in self.model_fields_set:
             _dict['concurrency'] = None
+        # set to None if cooldown_period (nullable) is None
+        # and model_fields_set contains the field
+        if self.cooldown_period is None and "cooldown_period" in self.model_fields_set:
+            _dict['cooldown_period'] = None
         # set to None if healthcheck (nullable) is None
         # and model_fields_set contains the field
         if self.healthcheck is None and "healthcheck" in self.model_fields_set:
@@ -155,6 +168,16 @@ class CreateInferenceV3DeploymentRequest(BaseModel):
         if self.endpoint_certificate_authority is None and "endpoint_certificate_authority" in self.model_fields_set:
             _dict['endpoint_certificate_authority'] = None
+        # set to None if hf_token (nullable) is None
+        # and model_fields_set contains the field
+        if self.hf_token is None and "hf_token" in self.model_fields_set:
+            _dict['hf_token'] = None
+        # set to None if config_file (nullable) is None
+        # and model_fields_set contains the field
+        if self.config_file is None and "config_file" in self.model_fields_set:
+            _dict['config_file'] = None
         return _dict
     @classmethod
@@ -180,13 +203,17 @@ class CreateInferenceV3DeploymentRequest(BaseModel):
             "max_replicas": obj.get("max_replicas"),
             "initial_replicas": obj.get("initial_replicas"),
             "concurrency": obj.get("concurrency"),
+            "cooldown_period": obj.get("cooldown_period"),
             "healthcheck": obj.get("healthcheck"),
             "env_vars": obj.get("env_vars"),
             "command": obj.get("command"),
             "endpoint_bearer_token": obj.get("endpoint_bearer_token"),
             "endpoint_certificate_authority": obj.get("endpoint_certificate_authority"),
+            "hf_token": obj.get("hf_token"),
             "backend_protocol": obj.get("backend_protocol"),
-            "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else False
+            "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else False,
+            "session_affinity": obj.get("session_affinity") if obj.get("session_affinity") is not None else False,
+            "config_file": ConfigFileMount.from_dict(obj["config_file"]) if obj.get("config_file") is not None else None
         })
         return _obj

platform_api_python_client/models/{create_rag_deployment_request.py → create_job_deployment_request.py} RENAMED Viewed

@@ -17,34 +17,29 @@ import pprint
 import re  # noqa: F401
 import json
-from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr, field_validator
+from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, StrictStr, field_validator
 from typing import Any, ClassVar, Dict, List, Optional
 from typing_extensions import Annotated
-from platform_api_python_client.models.c_serve_v2_recipe import CServeV2Recipe
+from platform_api_python_client.models.image_pull_secret_credentials import ImagePullSecretCredentials
 from typing import Optional, Set
 from typing_extensions import Self
-class CreateRagDeploymentRequest(BaseModel):
+class CreateJobDeploymentRequest(BaseModel):
     """
-    CreateRagDeploymentRequest
+    CreateJobDeploymentRequest
     """ # noqa: E501
     name: Annotated[str, Field(min_length=1, strict=True, max_length=20)]
     cluster_id: StrictInt
     hardware_instance_id: StrictInt
     user_annotations: Optional[Dict[str, StrictStr]] = None
-    recipe: CServeV2Recipe
-    cserve_version: Optional[StrictStr] = None
-    hf_token: Optional[StrictStr] = None
-    llm_model: StrictStr
-    centml_api_key: StrictStr
-    min_scale: Optional[StrictInt] = 1
-    max_scale: Optional[StrictInt] = 1
-    initial_scale: Optional[StrictInt] = None
-    endpoint_bearer_token: Optional[StrictStr] = None
-    endpoint_certificate_authority: Optional[StrictStr] = None
-    concurrency: Optional[StrictInt] = None
+    image_url: StrictStr
+    image_pull_secret_credentials: Optional[ImagePullSecretCredentials] = None
     env_vars: Optional[Dict[str, StrictStr]] = None
-    __properties: ClassVar[List[str]] = ["name", "cluster_id", "hardware_instance_id", "user_annotations", "recipe", "cserve_version", "hf_token", "llm_model", "centml_api_key", "min_scale", "max_scale", "initial_scale", "endpoint_bearer_token", "endpoint_certificate_authority", "concurrency", "env_vars"]
+    command: Optional[StrictStr] = None
+    completions: Optional[StrictInt] = 1
+    parallelism: Optional[StrictInt] = 1
+    enable_logging: Optional[StrictBool] = True
+    __properties: ClassVar[List[str]] = ["name", "cluster_id", "hardware_instance_id", "user_annotations", "image_url", "image_pull_secret_credentials", "env_vars", "command", "completions", "parallelism", "enable_logging"]
     @field_validator('name')
     def name_validate_regular_expression(cls, value):
@@ -71,7 +66,7 @@ class CreateRagDeploymentRequest(BaseModel):
     @classmethod
     def from_json(cls, json_str: str) -> Optional[Self]:
-        """Create an instance of CreateRagDeploymentRequest from a JSON string"""
+        """Create an instance of CreateJobDeploymentRequest from a JSON string"""
         return cls.from_dict(json.loads(json_str))
     def to_dict(self) -> Dict[str, Any]:
@@ -92,49 +87,34 @@ class CreateRagDeploymentRequest(BaseModel):
             exclude=excluded_fields,
             exclude_none=True,
         )
-        # override the default output from pydantic by calling `to_dict()` of recipe
-        if self.recipe:
-            _dict['recipe'] = self.recipe.to_dict()
+        # override the default output from pydantic by calling `to_dict()` of image_pull_secret_credentials
+        if self.image_pull_secret_credentials:
+            _dict['image_pull_secret_credentials'] = self.image_pull_secret_credentials.to_dict()
         # set to None if user_annotations (nullable) is None
         # and model_fields_set contains the field
         if self.user_annotations is None and "user_annotations" in self.model_fields_set:
             _dict['user_annotations'] = None
-        # set to None if cserve_version (nullable) is None
+        # set to None if image_pull_secret_credentials (nullable) is None
         # and model_fields_set contains the field
-        if self.cserve_version is None and "cserve_version" in self.model_fields_set:
-            _dict['cserve_version'] = None
+        if self.image_pull_secret_credentials is None and "image_pull_secret_credentials" in self.model_fields_set:
+            _dict['image_pull_secret_credentials'] = None
-        # set to None if hf_token (nullable) is None
+        # set to None if env_vars (nullable) is None
         # and model_fields_set contains the field
-        if self.hf_token is None and "hf_token" in self.model_fields_set:
-            _dict['hf_token'] = None
+        if self.env_vars is None and "env_vars" in self.model_fields_set:
+            _dict['env_vars'] = None
-        # set to None if initial_scale (nullable) is None
+        # set to None if command (nullable) is None
         # and model_fields_set contains the field
-        if self.initial_scale is None and "initial_scale" in self.model_fields_set:
-            _dict['initial_scale'] = None
-        # set to None if endpoint_bearer_token (nullable) is None
-        # and model_fields_set contains the field
-        if self.endpoint_bearer_token is None and "endpoint_bearer_token" in self.model_fields_set:
-            _dict['endpoint_bearer_token'] = None
-        # set to None if endpoint_certificate_authority (nullable) is None
-        # and model_fields_set contains the field
-        if self.endpoint_certificate_authority is None and "endpoint_certificate_authority" in self.model_fields_set:
-            _dict['endpoint_certificate_authority'] = None
-        # set to None if concurrency (nullable) is None
-        # and model_fields_set contains the field
-        if self.concurrency is None and "concurrency" in self.model_fields_set:
-            _dict['concurrency'] = None
+        if self.command is None and "command" in self.model_fields_set:
+            _dict['command'] = None
         return _dict
     @classmethod
     def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
-        """Create an instance of CreateRagDeploymentRequest from a dict"""
+        """Create an instance of CreateJobDeploymentRequest from a dict"""
         if obj is None:
             return None
@@ -146,18 +126,13 @@ class CreateRagDeploymentRequest(BaseModel):
             "cluster_id": obj.get("cluster_id"),
             "hardware_instance_id": obj.get("hardware_instance_id"),
             "user_annotations": obj.get("user_annotations"),
-            "recipe": CServeV2Recipe.from_dict(obj["recipe"]) if obj.get("recipe") is not None else None,
-            "cserve_version": obj.get("cserve_version"),
-            "hf_token": obj.get("hf_token"),
-            "llm_model": obj.get("llm_model"),
-            "centml_api_key": obj.get("centml_api_key"),
-            "min_scale": obj.get("min_scale") if obj.get("min_scale") is not None else 1,
-            "max_scale": obj.get("max_scale") if obj.get("max_scale") is not None else 1,
-            "initial_scale": obj.get("initial_scale"),
-            "endpoint_bearer_token": obj.get("endpoint_bearer_token"),
-            "endpoint_certificate_authority": obj.get("endpoint_certificate_authority"),
-            "concurrency": obj.get("concurrency"),
-            "env_vars": obj.get("env_vars")
+            "image_url": obj.get("image_url"),
+            "image_pull_secret_credentials": ImagePullSecretCredentials.from_dict(obj["image_pull_secret_credentials"]) if obj.get("image_pull_secret_credentials") is not None else None,
+            "env_vars": obj.get("env_vars"),
+            "command": obj.get("command"),
+            "completions": obj.get("completions") if obj.get("completions") is not None else 1,
+            "parallelism": obj.get("parallelism") if obj.get("parallelism") is not None else 1,
+            "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True
         })
         return _obj

platform_api_python_client/models/{create_rag_deployment_response.py → create_job_deployment_response.py} RENAMED Viewed

@@ -23,9 +23,9 @@ from typing import Any, ClassVar, Dict, List
 from typing import Optional, Set
 from typing_extensions import Self
-class CreateRagDeploymentResponse(BaseModel):
+class CreateJobDeploymentResponse(BaseModel):
     """
-    CreateRagDeploymentResponse
+    CreateJobDeploymentResponse
     """ # noqa: E501
     id: StrictInt
     created_at: datetime
@@ -50,7 +50,7 @@ class CreateRagDeploymentResponse(BaseModel):
     @classmethod
     def from_json(cls, json_str: str) -> Optional[Self]:
-        """Create an instance of CreateRagDeploymentResponse from a JSON string"""
+        """Create an instance of CreateJobDeploymentResponse from a JSON string"""
         return cls.from_dict(json.loads(json_str))
     def to_dict(self) -> Dict[str, Any]:
@@ -75,7 +75,7 @@ class CreateRagDeploymentResponse(BaseModel):
     @classmethod
     def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
-        """Create an instance of CreateRagDeploymentResponse from a dict"""
+        """Create an instance of CreateJobDeploymentResponse from a dict"""
         if obj is None:
             return None

platform_api_python_client/models/deployment_type.py CHANGED Viewed

@@ -38,6 +38,7 @@ class DeploymentType(str, Enum):
     CSERVE_V3 = 'cserve_v3'
     DEPLOYMENT = 'deployment'
     RAG = 'rag'
+    JOB = 'job'
     @classmethod
     def from_json(cls, json_str: str) -> Self:

platform_api_python_client/models/get_c_serve_v3_deployment_response.py CHANGED Viewed

@@ -18,7 +18,7 @@ import re  # noqa: F401
 import json
 from datetime import datetime
-from pydantic import BaseModel, ConfigDict, StrictBool, StrictInt, StrictStr
+from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, StrictStr
 from typing import Any, ClassVar, Dict, List, Optional
 from platform_api_python_client.models.c_serve_v2_recipe import CServeV2Recipe
 from platform_api_python_client.models.deployment_status import DeploymentStatus
@@ -50,9 +50,12 @@ class GetCServeV3DeploymentResponse(BaseModel):
     endpoint_certificate_authority: Optional[StrictStr] = None
     endpoint_bearer_token: Optional[StrictStr] = None
     concurrency: Optional[StrictInt] = None
+    cooldown_period: Optional[StrictInt] = 1800
     env_vars: Optional[Dict[str, StrictStr]] = None
     enable_logging: Optional[StrictBool] = True
-    __properties: ClassVar[List[str]] = ["creator_email", "cluster_id", "id", "name", "endpoint_url", "image_url", "type", "status", "created_at", "hardware_instance_id", "revision_number", "user_annotations", "recipe", "cserve_version", "min_replicas", "max_replicas", "initial_replicas", "endpoint_certificate_authority", "endpoint_bearer_token", "concurrency", "env_vars", "enable_logging"]
+    enable_node_model_cache: Optional[StrictBool] = False
+    session_affinity: Optional[StrictBool] = Field(default=False, description="Enable best-effort sticky routing via the `X-Session-Id` request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state.")
+    __properties: ClassVar[List[str]] = ["creator_email", "cluster_id", "id", "name", "endpoint_url", "image_url", "type", "status", "created_at", "hardware_instance_id", "revision_number", "user_annotations", "recipe", "cserve_version", "min_replicas", "max_replicas", "initial_replicas", "endpoint_certificate_authority", "endpoint_bearer_token", "concurrency", "cooldown_period", "env_vars", "enable_logging", "enable_node_model_cache", "session_affinity"]
     model_config = ConfigDict(
         populate_by_name=True,
@@ -163,8 +166,11 @@ class GetCServeV3DeploymentResponse(BaseModel):
             "endpoint_certificate_authority": obj.get("endpoint_certificate_authority"),
             "endpoint_bearer_token": obj.get("endpoint_bearer_token"),
             "concurrency": obj.get("concurrency"),
+            "cooldown_period": obj.get("cooldown_period") if obj.get("cooldown_period") is not None else 1800,
             "env_vars": obj.get("env_vars"),
-            "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True
+            "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True,
+            "enable_node_model_cache": obj.get("enable_node_model_cache") if obj.get("enable_node_model_cache") is not None else False,
+            "session_affinity": obj.get("session_affinity") if obj.get("session_affinity") is not None else False
         })
         return _obj

platform_api_python_client/models/get_inference_v3_deployment_response.py CHANGED Viewed

@@ -18,9 +18,10 @@ import re  # noqa: F401
 import json
 from datetime import datetime
-from pydantic import BaseModel, ConfigDict, StrictBool, StrictInt, StrictStr
+from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, StrictStr
 from typing import Any, ClassVar, Dict, List, Optional
 from platform_api_python_client.models.backend_protocol import BackendProtocol
+from platform_api_python_client.models.config_file_mount import ConfigFileMount
 from platform_api_python_client.models.deployment_status import DeploymentStatus
 from platform_api_python_client.models.deployment_type import DeploymentType
 from platform_api_python_client.models.image_pull_secret_credentials import ImagePullSecretCredentials
@@ -48,6 +49,7 @@ class GetInferenceV3DeploymentResponse(BaseModel):
     max_replicas: StrictInt
     initial_replicas: Optional[StrictInt] = None
     concurrency: Optional[StrictInt] = None
+    cooldown_period: Optional[StrictInt] = 1800
     healthcheck: Optional[StrictStr] = None
     endpoint_certificate_authority: Optional[StrictStr] = None
     endpoint_bearer_token: Optional[StrictStr] = None
@@ -58,7 +60,9 @@ class GetInferenceV3DeploymentResponse(BaseModel):
     image_pull_secret_credentials: Optional[ImagePullSecretCredentials] = None
     backend_protocol: Optional[BackendProtocol] = None
     enable_logging: Optional[StrictBool] = True
-    __properties: ClassVar[List[str]] = ["creator_email", "cluster_id", "id", "name", "endpoint_url", "image_url", "type", "status", "created_at", "hardware_instance_id", "revision_number", "user_annotations", "container_port", "min_replicas", "max_replicas", "initial_replicas", "concurrency", "healthcheck", "endpoint_certificate_authority", "endpoint_bearer_token", "env_vars", "command", "command_args", "original_command", "image_pull_secret_credentials", "backend_protocol", "enable_logging"]
+    session_affinity: Optional[StrictBool] = Field(default=False, description="Enable best-effort sticky routing via the `X-Session-Id` request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state.")
+    config_file: Optional[ConfigFileMount] = None
+    __properties: ClassVar[List[str]] = ["creator_email", "cluster_id", "id", "name", "endpoint_url", "image_url", "type", "status", "created_at", "hardware_instance_id", "revision_number", "user_annotations", "container_port", "min_replicas", "max_replicas", "initial_replicas", "concurrency", "cooldown_period", "healthcheck", "endpoint_certificate_authority", "endpoint_bearer_token", "env_vars", "command", "command_args", "original_command", "image_pull_secret_credentials", "backend_protocol", "enable_logging", "session_affinity", "config_file"]
     model_config = ConfigDict(
         populate_by_name=True,
@@ -102,6 +106,9 @@ class GetInferenceV3DeploymentResponse(BaseModel):
         # override the default output from pydantic by calling `to_dict()` of image_pull_secret_credentials
         if self.image_pull_secret_credentials:
             _dict['image_pull_secret_credentials'] = self.image_pull_secret_credentials.to_dict()
+        # override the default output from pydantic by calling `to_dict()` of config_file
+        if self.config_file:
+            _dict['config_file'] = self.config_file.to_dict()
         # set to None if image_url (nullable) is None
         # and model_fields_set contains the field
         if self.image_url is None and "image_url" in self.model_fields_set:
@@ -162,6 +169,11 @@ class GetInferenceV3DeploymentResponse(BaseModel):
         if self.image_pull_secret_credentials is None and "image_pull_secret_credentials" in self.model_fields_set:
             _dict['image_pull_secret_credentials'] = None
+        # set to None if config_file (nullable) is None
+        # and model_fields_set contains the field
+        if self.config_file is None and "config_file" in self.model_fields_set:
+            _dict['config_file'] = None
         return _dict
     @classmethod
@@ -191,6 +203,7 @@ class GetInferenceV3DeploymentResponse(BaseModel):
             "max_replicas": obj.get("max_replicas"),
             "initial_replicas": obj.get("initial_replicas"),
             "concurrency": obj.get("concurrency"),
+            "cooldown_period": obj.get("cooldown_period") if obj.get("cooldown_period") is not None else 1800,
             "healthcheck": obj.get("healthcheck"),
             "endpoint_certificate_authority": obj.get("endpoint_certificate_authority"),
             "endpoint_bearer_token": obj.get("endpoint_bearer_token"),
@@ -200,7 +213,9 @@ class GetInferenceV3DeploymentResponse(BaseModel):
             "original_command": obj.get("original_command"),
             "image_pull_secret_credentials": ImagePullSecretCredentials.from_dict(obj["image_pull_secret_credentials"]) if obj.get("image_pull_secret_credentials") is not None else None,
             "backend_protocol": obj.get("backend_protocol"),
-            "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True
+            "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True,
+            "session_affinity": obj.get("session_affinity") if obj.get("session_affinity") is not None else False,
+            "config_file": ConfigFileMount.from_dict(obj["config_file"]) if obj.get("config_file") is not None else None
         })
         return _obj

platform-api-python-client 4.8.4__py3-none-any.whl → 4.9.7__py3-none-any.whl

platform-api-python-client 4.8.4py3-none-any.whl → 4.9.7py3-none-any.whl