scale-gp-beta 0.1.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. scale_gp/__init__.py +96 -0
  2. scale_gp/_base_client.py +2058 -0
  3. scale_gp/_client.py +544 -0
  4. scale_gp/_compat.py +219 -0
  5. scale_gp/_constants.py +14 -0
  6. scale_gp/_exceptions.py +108 -0
  7. scale_gp/_files.py +123 -0
  8. scale_gp/_models.py +801 -0
  9. scale_gp/_qs.py +150 -0
  10. scale_gp/_resource.py +43 -0
  11. scale_gp/_response.py +830 -0
  12. scale_gp/_streaming.py +333 -0
  13. scale_gp/_types.py +217 -0
  14. scale_gp/_utils/__init__.py +57 -0
  15. scale_gp/_utils/_logs.py +25 -0
  16. scale_gp/_utils/_proxy.py +62 -0
  17. scale_gp/_utils/_reflection.py +42 -0
  18. scale_gp/_utils/_streams.py +12 -0
  19. scale_gp/_utils/_sync.py +86 -0
  20. scale_gp/_utils/_transform.py +402 -0
  21. scale_gp/_utils/_typing.py +149 -0
  22. scale_gp/_utils/_utils.py +414 -0
  23. scale_gp/_version.py +4 -0
  24. scale_gp/lib/.keep +4 -0
  25. scale_gp/pagination.py +83 -0
  26. scale_gp/py.typed +0 -0
  27. scale_gp/resources/__init__.py +103 -0
  28. scale_gp/resources/chat/__init__.py +33 -0
  29. scale_gp/resources/chat/chat.py +102 -0
  30. scale_gp/resources/chat/completions.py +1054 -0
  31. scale_gp/resources/completions.py +765 -0
  32. scale_gp/resources/files/__init__.py +33 -0
  33. scale_gp/resources/files/content.py +162 -0
  34. scale_gp/resources/files/files.py +558 -0
  35. scale_gp/resources/inference.py +210 -0
  36. scale_gp/resources/models.py +834 -0
  37. scale_gp/resources/question_sets.py +680 -0
  38. scale_gp/resources/questions.py +396 -0
  39. scale_gp/types/__init__.py +33 -0
  40. scale_gp/types/chat/__init__.py +8 -0
  41. scale_gp/types/chat/chat_completion.py +257 -0
  42. scale_gp/types/chat/chat_completion_chunk.py +240 -0
  43. scale_gp/types/chat/completion_create_params.py +156 -0
  44. scale_gp/types/chat/completion_create_response.py +11 -0
  45. scale_gp/types/completion.py +116 -0
  46. scale_gp/types/completion_create_params.py +108 -0
  47. scale_gp/types/file.py +30 -0
  48. scale_gp/types/file_create_params.py +13 -0
  49. scale_gp/types/file_delete_response.py +16 -0
  50. scale_gp/types/file_list.py +27 -0
  51. scale_gp/types/file_list_params.py +16 -0
  52. scale_gp/types/file_update_params.py +12 -0
  53. scale_gp/types/files/__init__.py +3 -0
  54. scale_gp/types/inference_create_params.py +25 -0
  55. scale_gp/types/inference_create_response.py +11 -0
  56. scale_gp/types/inference_model.py +167 -0
  57. scale_gp/types/inference_model_list.py +27 -0
  58. scale_gp/types/inference_response.py +14 -0
  59. scale_gp/types/inference_response_chunk.py +14 -0
  60. scale_gp/types/model_create_params.py +165 -0
  61. scale_gp/types/model_delete_response.py +16 -0
  62. scale_gp/types/model_list_params.py +20 -0
  63. scale_gp/types/model_update_params.py +161 -0
  64. scale_gp/types/question.py +68 -0
  65. scale_gp/types/question_create_params.py +59 -0
  66. scale_gp/types/question_list.py +27 -0
  67. scale_gp/types/question_list_params.py +16 -0
  68. scale_gp/types/question_set.py +106 -0
  69. scale_gp/types/question_set_create_params.py +115 -0
  70. scale_gp/types/question_set_delete_response.py +16 -0
  71. scale_gp/types/question_set_list.py +27 -0
  72. scale_gp/types/question_set_list_params.py +20 -0
  73. scale_gp/types/question_set_retrieve_params.py +12 -0
  74. scale_gp/types/question_set_update_params.py +23 -0
  75. scale_gp_beta-0.1.0a2.dist-info/METADATA +440 -0
  76. scale_gp_beta-0.1.0a2.dist-info/RECORD +78 -0
  77. scale_gp_beta-0.1.0a2.dist-info/WHEEL +4 -0
  78. scale_gp_beta-0.1.0a2.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,27 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import List, Optional
4
+ from typing_extensions import Literal
5
+
6
+ from .file import File
7
+ from .._models import BaseModel
8
+
9
+ __all__ = ["FileList"]
10
+
11
+
12
+ class FileList(BaseModel):
13
+ has_more: bool
14
+ """Whether there are more items left to be fetched."""
15
+
16
+ items: List[File]
17
+
18
+ total: int
19
+ """The total of items that match the query.
20
+
21
+ This is greater than or equal to the number of items returned.
22
+ """
23
+
24
+ limit: Optional[int] = None
25
+ """The maximum number of items to return."""
26
+
27
+ object: Optional[Literal["list"]] = None
@@ -0,0 +1,16 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional
6
+ from typing_extensions import TypedDict
7
+
8
+ __all__ = ["FileListParams"]
9
+
10
+
11
+ class FileListParams(TypedDict, total=False):
12
+ ending_before: Optional[str]
13
+
14
+ limit: int
15
+
16
+ starting_after: Optional[str]
@@ -0,0 +1,12 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict
6
+ from typing_extensions import TypedDict
7
+
8
+ __all__ = ["FileUpdateParams"]
9
+
10
+
11
+ class FileUpdateParams(TypedDict, total=False):
12
+ tags: Dict[str, object]
@@ -0,0 +1,3 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
@@ -0,0 +1,25 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict
6
+ from typing_extensions import Required, TypedDict
7
+
8
+ __all__ = ["InferenceCreateParams", "InferenceConfiguration"]
9
+
10
+
11
+ class InferenceCreateParams(TypedDict, total=False):
12
+ model: Required[str]
13
+ """model specified as `vendor/name` (ex. openai/gpt-5)"""
14
+
15
+ args: Dict[str, object]
16
+ """Arguments passed into model"""
17
+
18
+ inference_configuration: InferenceConfiguration
19
+ """Vendor specific configuration"""
20
+
21
+
22
+ class InferenceConfiguration(TypedDict, total=False):
23
+ num_retries: int
24
+
25
+ timeout_seconds: int
@@ -0,0 +1,11 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import Union
4
+ from typing_extensions import TypeAlias
5
+
6
+ from .inference_response import InferenceResponse
7
+ from .inference_response_chunk import InferenceResponseChunk
8
+
9
+ __all__ = ["InferenceCreateResponse"]
10
+
11
+ InferenceCreateResponse: TypeAlias = Union[InferenceResponse, InferenceResponseChunk]
@@ -0,0 +1,167 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import TYPE_CHECKING, Dict, List, Union, Optional
4
+ from datetime import datetime
5
+ from typing_extensions import Literal, TypeAlias
6
+
7
+ from pydantic import Field as FieldInfo
8
+
9
+ from .._models import BaseModel
10
+
11
+ __all__ = [
12
+ "InferenceModel",
13
+ "VendorConfiguration",
14
+ "VendorConfigurationLaunchVendorConfiguration",
15
+ "VendorConfigurationLaunchVendorConfigurationModelImage",
16
+ "VendorConfigurationLaunchVendorConfigurationModelInfra",
17
+ "VendorConfigurationLlmEngineVendorConfiguration",
18
+ ]
19
+
20
+
21
+ class VendorConfigurationLaunchVendorConfigurationModelImage(BaseModel):
22
+ command: List[str]
23
+
24
+ registry: str
25
+
26
+ repository: str
27
+
28
+ tag: str
29
+
30
+ env_vars: Optional[Dict[str, object]] = None
31
+
32
+ healthcheck_route: Optional[str] = None
33
+
34
+ predict_route: Optional[str] = None
35
+
36
+ readiness_delay: Optional[int] = None
37
+
38
+ request_schema: Optional[Dict[str, object]] = None
39
+
40
+ response_schema: Optional[Dict[str, object]] = None
41
+
42
+ streaming_command: Optional[List[str]] = None
43
+
44
+ streaming_predict_route: Optional[str] = None
45
+
46
+
47
+ class VendorConfigurationLaunchVendorConfigurationModelInfra(BaseModel):
48
+ cpus: Union[str, int, None] = None
49
+
50
+ endpoint_type: Optional[Literal["async", "sync", "streaming"]] = None
51
+
52
+ gpu_type: Optional[
53
+ Literal[
54
+ "nvidia-tesla-t4",
55
+ "nvidia-ampere-a10",
56
+ "nvidia-ampere-a100",
57
+ "nvidia-ampere-a100e",
58
+ "nvidia-hopper-h100",
59
+ "nvidia-hopper-h100-1g20gb",
60
+ "nvidia-hopper-h100-3g40gb",
61
+ ]
62
+ ] = None
63
+
64
+ gpus: Optional[int] = None
65
+
66
+ high_priority: Optional[bool] = None
67
+
68
+ labels: Optional[Dict[str, str]] = None
69
+
70
+ max_workers: Optional[int] = None
71
+
72
+ memory: Optional[str] = None
73
+
74
+ min_workers: Optional[int] = None
75
+
76
+ per_worker: Optional[int] = None
77
+
78
+ public_inference: Optional[bool] = None
79
+
80
+ storage: Optional[str] = None
81
+
82
+
83
+ class VendorConfigurationLaunchVendorConfiguration(BaseModel):
84
+ api_model_image: VendorConfigurationLaunchVendorConfigurationModelImage = FieldInfo(alias="model_image")
85
+
86
+ api_model_infra: VendorConfigurationLaunchVendorConfigurationModelInfra = FieldInfo(alias="model_infra")
87
+
88
+
89
+ class VendorConfigurationLlmEngineVendorConfiguration(BaseModel):
90
+ model: str
91
+
92
+ checkpoint_path: Optional[str] = None
93
+
94
+ cpus: Optional[int] = None
95
+
96
+ default_callback_url: Optional[str] = None
97
+
98
+ endpoint_type: Optional[str] = None
99
+
100
+ gpu_type: Optional[str] = None
101
+
102
+ gpus: Optional[int] = None
103
+
104
+ high_priority: Optional[bool] = None
105
+
106
+ inference_framework: Optional[str] = None
107
+
108
+ inference_framework_image_tag: Optional[str] = None
109
+
110
+ labels: Optional[Dict[str, str]] = None
111
+
112
+ max_workers: Optional[int] = None
113
+
114
+ memory: Optional[str] = None
115
+
116
+ min_workers: Optional[int] = None
117
+
118
+ nodes_per_worker: Optional[int] = None
119
+
120
+ num_shards: Optional[int] = None
121
+
122
+ per_worker: Optional[int] = None
123
+
124
+ post_inference_hooks: Optional[List[str]] = None
125
+
126
+ public_inference: Optional[bool] = None
127
+
128
+ quantize: Optional[str] = None
129
+
130
+ source: Optional[str] = None
131
+
132
+ storage: Optional[str] = None
133
+
134
+ if TYPE_CHECKING:
135
+ # Stub to indicate that arbitrary properties are accepted.
136
+ # To access properties that are not valid identifiers you can use `getattr`, e.g.
137
+ # `getattr(obj, '$type')`
138
+ def __getattr__(self, attr: str) -> object: ...
139
+
140
+
141
+ VendorConfiguration: TypeAlias = Union[
142
+ VendorConfigurationLaunchVendorConfiguration, VendorConfigurationLlmEngineVendorConfiguration
143
+ ]
144
+
145
+
146
+ class InferenceModel(BaseModel):
147
+ id: str
148
+
149
+ created_at: datetime
150
+
151
+ created_by_user_id: str
152
+
153
+ api_model_type: Literal["generic", "completion", "chat_completion"] = FieldInfo(alias="model_type")
154
+
155
+ api_model_vendor: Literal["openai", "cohere", "vertex_ai", "anthropic", "launch", "llmengine", "model_zoo"] = (
156
+ FieldInfo(alias="model_vendor")
157
+ )
158
+
159
+ name: str
160
+
161
+ status: Literal["failed", "ready", "deploying"]
162
+
163
+ vendor_configuration: VendorConfiguration
164
+
165
+ api_model_metadata: Optional[Dict[str, object]] = FieldInfo(alias="model_metadata", default=None)
166
+
167
+ object: Optional[Literal["model"]] = None
@@ -0,0 +1,27 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import List, Optional
4
+ from typing_extensions import Literal
5
+
6
+ from .._models import BaseModel
7
+ from .inference_model import InferenceModel
8
+
9
+ __all__ = ["InferenceModelList"]
10
+
11
+
12
+ class InferenceModelList(BaseModel):
13
+ has_more: bool
14
+ """Whether there are more items left to be fetched."""
15
+
16
+ items: List[InferenceModel]
17
+
18
+ total: int
19
+ """The total of items that match the query.
20
+
21
+ This is greater than or equal to the number of items returned.
22
+ """
23
+
24
+ limit: Optional[int] = None
25
+ """The maximum number of items to return."""
26
+
27
+ object: Optional[Literal["list"]] = None
@@ -0,0 +1,14 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import Optional
4
+ from typing_extensions import Literal
5
+
6
+ from .._models import BaseModel
7
+
8
+ __all__ = ["InferenceResponse"]
9
+
10
+
11
+ class InferenceResponse(BaseModel):
12
+ response: object
13
+
14
+ object: Optional[Literal["generic_inference"]] = None
@@ -0,0 +1,14 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import Optional
4
+ from typing_extensions import Literal
5
+
6
+ from .._models import BaseModel
7
+
8
+ __all__ = ["InferenceResponseChunk"]
9
+
10
+
11
+ class InferenceResponseChunk(BaseModel):
12
+ response: object
13
+
14
+ object: Optional[Literal["generic_inference.chunk"]] = None
@@ -0,0 +1,165 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, List, Union
6
+ from typing_extensions import Literal, Required, TypeAlias, TypedDict
7
+
8
+ __all__ = [
9
+ "ModelCreateParams",
10
+ "LaunchModelCreateRequest",
11
+ "LaunchModelCreateRequestVendorConfiguration",
12
+ "LaunchModelCreateRequestVendorConfigurationModelImage",
13
+ "LaunchModelCreateRequestVendorConfigurationModelInfra",
14
+ "LlmEngineModelCreateRequest",
15
+ "LlmEngineModelCreateRequestVendorConfiguration",
16
+ ]
17
+
18
+
19
+ class LaunchModelCreateRequest(TypedDict, total=False):
20
+ name: Required[str]
21
+ """Unique name to reference your model"""
22
+
23
+ vendor_configuration: Required[LaunchModelCreateRequestVendorConfiguration]
24
+
25
+ model_metadata: Dict[str, object]
26
+
27
+ model_type: Literal["generic"]
28
+
29
+ model_vendor: Literal["launch"]
30
+
31
+ on_conflict: Literal["error", "update"]
32
+
33
+
34
+ class LaunchModelCreateRequestVendorConfigurationModelImage(TypedDict, total=False):
35
+ command: Required[List[str]]
36
+
37
+ registry: Required[str]
38
+
39
+ repository: Required[str]
40
+
41
+ tag: Required[str]
42
+
43
+ env_vars: Dict[str, object]
44
+
45
+ healthcheck_route: str
46
+
47
+ predict_route: str
48
+
49
+ readiness_delay: int
50
+
51
+ request_schema: Dict[str, object]
52
+
53
+ response_schema: Dict[str, object]
54
+
55
+ streaming_command: List[str]
56
+
57
+ streaming_predict_route: str
58
+
59
+
60
+ class LaunchModelCreateRequestVendorConfigurationModelInfra(TypedDict, total=False):
61
+ cpus: Union[str, int]
62
+
63
+ endpoint_type: Literal["async", "sync", "streaming"]
64
+
65
+ gpu_type: Literal[
66
+ "nvidia-tesla-t4",
67
+ "nvidia-ampere-a10",
68
+ "nvidia-ampere-a100",
69
+ "nvidia-ampere-a100e",
70
+ "nvidia-hopper-h100",
71
+ "nvidia-hopper-h100-1g20gb",
72
+ "nvidia-hopper-h100-3g40gb",
73
+ ]
74
+
75
+ gpus: int
76
+
77
+ high_priority: bool
78
+
79
+ labels: Dict[str, str]
80
+
81
+ max_workers: int
82
+
83
+ memory: str
84
+
85
+ min_workers: int
86
+
87
+ per_worker: int
88
+
89
+ public_inference: bool
90
+
91
+ storage: str
92
+
93
+
94
+ class LaunchModelCreateRequestVendorConfiguration(TypedDict, total=False):
95
+ model_image: Required[LaunchModelCreateRequestVendorConfigurationModelImage]
96
+
97
+ model_infra: Required[LaunchModelCreateRequestVendorConfigurationModelInfra]
98
+
99
+
100
+ class LlmEngineModelCreateRequest(TypedDict, total=False):
101
+ name: Required[str]
102
+ """Unique name to reference your model"""
103
+
104
+ vendor_configuration: Required[LlmEngineModelCreateRequestVendorConfiguration]
105
+
106
+ model_metadata: Dict[str, object]
107
+
108
+ model_type: Literal["chat_completion"]
109
+
110
+ model_vendor: Literal["llmengine"]
111
+
112
+ on_conflict: Literal["error", "update"]
113
+
114
+
115
+ class LlmEngineModelCreateRequestVendorConfigurationTyped(TypedDict, total=False):
116
+ model: Required[str]
117
+
118
+ checkpoint_path: str
119
+
120
+ cpus: int
121
+
122
+ default_callback_url: str
123
+
124
+ endpoint_type: str
125
+
126
+ gpu_type: str
127
+
128
+ gpus: int
129
+
130
+ high_priority: bool
131
+
132
+ inference_framework: str
133
+
134
+ inference_framework_image_tag: str
135
+
136
+ labels: Dict[str, str]
137
+
138
+ max_workers: int
139
+
140
+ memory: str
141
+
142
+ min_workers: int
143
+
144
+ nodes_per_worker: int
145
+
146
+ num_shards: int
147
+
148
+ per_worker: int
149
+
150
+ post_inference_hooks: List[str]
151
+
152
+ public_inference: bool
153
+
154
+ quantize: str
155
+
156
+ source: str
157
+
158
+ storage: str
159
+
160
+
161
+ LlmEngineModelCreateRequestVendorConfiguration: TypeAlias = Union[
162
+ LlmEngineModelCreateRequestVendorConfigurationTyped, Dict[str, object]
163
+ ]
164
+
165
+ ModelCreateParams: TypeAlias = Union[LaunchModelCreateRequest, LlmEngineModelCreateRequest]
@@ -0,0 +1,16 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import Optional
4
+ from typing_extensions import Literal
5
+
6
+ from .._models import BaseModel
7
+
8
+ __all__ = ["ModelDeleteResponse"]
9
+
10
+
11
+ class ModelDeleteResponse(BaseModel):
12
+ id: str
13
+
14
+ deleted: bool
15
+
16
+ object: Optional[Literal["model"]] = None
@@ -0,0 +1,20 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional
6
+ from typing_extensions import Literal, TypedDict
7
+
8
+ __all__ = ["ModelListParams"]
9
+
10
+
11
+ class ModelListParams(TypedDict, total=False):
12
+ ending_before: Optional[str]
13
+
14
+ limit: int
15
+
16
+ model_vendor: Optional[Literal["openai", "cohere", "vertex_ai", "anthropic", "launch", "llmengine", "model_zoo"]]
17
+
18
+ name: Optional[str]
19
+
20
+ starting_after: Optional[str]
@@ -0,0 +1,161 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, List, Union
6
+ from typing_extensions import Literal, Required, TypeAlias, TypedDict
7
+
8
+ __all__ = [
9
+ "ModelUpdateParams",
10
+ "DefaultModelPatchRequest",
11
+ "ModelConfigurationPatchRequest",
12
+ "ModelConfigurationPatchRequestVendorConfiguration",
13
+ "ModelConfigurationPatchRequestVendorConfigurationPartialLaunchVendorConfiguration",
14
+ "ModelConfigurationPatchRequestVendorConfigurationPartialLaunchVendorConfigurationModelImage",
15
+ "ModelConfigurationPatchRequestVendorConfigurationPartialLaunchVendorConfigurationModelInfra",
16
+ "ModelConfigurationPatchRequestVendorConfigurationPartialLlmEngineVendorConfiguration",
17
+ "SwapNamesModelPatchRequest",
18
+ ]
19
+
20
+
21
+ class DefaultModelPatchRequest(TypedDict, total=False):
22
+ model_metadata: Dict[str, object]
23
+
24
+
25
+ class ModelConfigurationPatchRequest(TypedDict, total=False):
26
+ vendor_configuration: Required[ModelConfigurationPatchRequestVendorConfiguration]
27
+
28
+ model_metadata: Dict[str, object]
29
+
30
+
31
+ class ModelConfigurationPatchRequestVendorConfigurationPartialLaunchVendorConfigurationModelImage(
32
+ TypedDict, total=False
33
+ ):
34
+ command: List[str]
35
+
36
+ env_vars: Dict[str, object]
37
+
38
+ healthcheck_route: str
39
+
40
+ predict_route: str
41
+
42
+ readiness_delay: int
43
+
44
+ registry: str
45
+
46
+ repository: str
47
+
48
+ request_schema: Dict[str, object]
49
+
50
+ response_schema: Dict[str, object]
51
+
52
+ streaming_command: List[str]
53
+
54
+ streaming_predict_route: str
55
+
56
+ tag: str
57
+
58
+
59
+ class ModelConfigurationPatchRequestVendorConfigurationPartialLaunchVendorConfigurationModelInfra(
60
+ TypedDict, total=False
61
+ ):
62
+ cpus: Union[str, int]
63
+
64
+ endpoint_type: Literal["async", "sync", "streaming"]
65
+
66
+ gpu_type: Literal[
67
+ "nvidia-tesla-t4",
68
+ "nvidia-ampere-a10",
69
+ "nvidia-ampere-a100",
70
+ "nvidia-ampere-a100e",
71
+ "nvidia-hopper-h100",
72
+ "nvidia-hopper-h100-1g20gb",
73
+ "nvidia-hopper-h100-3g40gb",
74
+ ]
75
+
76
+ gpus: int
77
+
78
+ high_priority: bool
79
+
80
+ labels: Dict[str, str]
81
+
82
+ max_workers: int
83
+
84
+ memory: str
85
+
86
+ min_workers: int
87
+
88
+ per_worker: int
89
+
90
+ public_inference: bool
91
+
92
+ storage: str
93
+
94
+
95
+ class ModelConfigurationPatchRequestVendorConfigurationPartialLaunchVendorConfiguration(TypedDict, total=False):
96
+ model_image: ModelConfigurationPatchRequestVendorConfigurationPartialLaunchVendorConfigurationModelImage
97
+
98
+ model_infra: ModelConfigurationPatchRequestVendorConfigurationPartialLaunchVendorConfigurationModelInfra
99
+
100
+
101
+ class ModelConfigurationPatchRequestVendorConfigurationPartialLlmEngineVendorConfiguration(TypedDict, total=False):
102
+ checkpoint_path: str
103
+
104
+ cpus: int
105
+
106
+ default_callback_url: str
107
+
108
+ endpoint_type: str
109
+
110
+ gpu_type: str
111
+
112
+ gpus: int
113
+
114
+ high_priority: bool
115
+
116
+ inference_framework: str
117
+
118
+ inference_framework_image_tag: str
119
+
120
+ labels: Dict[str, str]
121
+
122
+ max_workers: int
123
+
124
+ memory: str
125
+
126
+ min_workers: int
127
+
128
+ model: str
129
+
130
+ nodes_per_worker: int
131
+
132
+ num_shards: int
133
+
134
+ per_worker: int
135
+
136
+ post_inference_hooks: List[str]
137
+
138
+ public_inference: bool
139
+
140
+ quantize: str
141
+
142
+ source: str
143
+
144
+ storage: str
145
+
146
+
147
+ ModelConfigurationPatchRequestVendorConfiguration: TypeAlias = Union[
148
+ ModelConfigurationPatchRequestVendorConfigurationPartialLaunchVendorConfiguration,
149
+ ModelConfigurationPatchRequestVendorConfigurationPartialLlmEngineVendorConfiguration,
150
+ ]
151
+
152
+
153
+ class SwapNamesModelPatchRequest(TypedDict, total=False):
154
+ name: Required[str]
155
+
156
+ on_conflict: Literal["error", "swap"]
157
+
158
+
159
+ ModelUpdateParams: TypeAlias = Union[
160
+ DefaultModelPatchRequest, ModelConfigurationPatchRequest, SwapNamesModelPatchRequest
161
+ ]