scale-gp-beta 0.1.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scale_gp/__init__.py +96 -0
- scale_gp/_base_client.py +2058 -0
- scale_gp/_client.py +544 -0
- scale_gp/_compat.py +219 -0
- scale_gp/_constants.py +14 -0
- scale_gp/_exceptions.py +108 -0
- scale_gp/_files.py +123 -0
- scale_gp/_models.py +801 -0
- scale_gp/_qs.py +150 -0
- scale_gp/_resource.py +43 -0
- scale_gp/_response.py +830 -0
- scale_gp/_streaming.py +333 -0
- scale_gp/_types.py +217 -0
- scale_gp/_utils/__init__.py +57 -0
- scale_gp/_utils/_logs.py +25 -0
- scale_gp/_utils/_proxy.py +62 -0
- scale_gp/_utils/_reflection.py +42 -0
- scale_gp/_utils/_streams.py +12 -0
- scale_gp/_utils/_sync.py +86 -0
- scale_gp/_utils/_transform.py +402 -0
- scale_gp/_utils/_typing.py +149 -0
- scale_gp/_utils/_utils.py +414 -0
- scale_gp/_version.py +4 -0
- scale_gp/lib/.keep +4 -0
- scale_gp/pagination.py +83 -0
- scale_gp/py.typed +0 -0
- scale_gp/resources/__init__.py +103 -0
- scale_gp/resources/chat/__init__.py +33 -0
- scale_gp/resources/chat/chat.py +102 -0
- scale_gp/resources/chat/completions.py +1054 -0
- scale_gp/resources/completions.py +765 -0
- scale_gp/resources/files/__init__.py +33 -0
- scale_gp/resources/files/content.py +162 -0
- scale_gp/resources/files/files.py +558 -0
- scale_gp/resources/inference.py +210 -0
- scale_gp/resources/models.py +834 -0
- scale_gp/resources/question_sets.py +680 -0
- scale_gp/resources/questions.py +396 -0
- scale_gp/types/__init__.py +33 -0
- scale_gp/types/chat/__init__.py +8 -0
- scale_gp/types/chat/chat_completion.py +257 -0
- scale_gp/types/chat/chat_completion_chunk.py +240 -0
- scale_gp/types/chat/completion_create_params.py +156 -0
- scale_gp/types/chat/completion_create_response.py +11 -0
- scale_gp/types/completion.py +116 -0
- scale_gp/types/completion_create_params.py +108 -0
- scale_gp/types/file.py +30 -0
- scale_gp/types/file_create_params.py +13 -0
- scale_gp/types/file_delete_response.py +16 -0
- scale_gp/types/file_list.py +27 -0
- scale_gp/types/file_list_params.py +16 -0
- scale_gp/types/file_update_params.py +12 -0
- scale_gp/types/files/__init__.py +3 -0
- scale_gp/types/inference_create_params.py +25 -0
- scale_gp/types/inference_create_response.py +11 -0
- scale_gp/types/inference_model.py +167 -0
- scale_gp/types/inference_model_list.py +27 -0
- scale_gp/types/inference_response.py +14 -0
- scale_gp/types/inference_response_chunk.py +14 -0
- scale_gp/types/model_create_params.py +165 -0
- scale_gp/types/model_delete_response.py +16 -0
- scale_gp/types/model_list_params.py +20 -0
- scale_gp/types/model_update_params.py +161 -0
- scale_gp/types/question.py +68 -0
- scale_gp/types/question_create_params.py +59 -0
- scale_gp/types/question_list.py +27 -0
- scale_gp/types/question_list_params.py +16 -0
- scale_gp/types/question_set.py +106 -0
- scale_gp/types/question_set_create_params.py +115 -0
- scale_gp/types/question_set_delete_response.py +16 -0
- scale_gp/types/question_set_list.py +27 -0
- scale_gp/types/question_set_list_params.py +20 -0
- scale_gp/types/question_set_retrieve_params.py +12 -0
- scale_gp/types/question_set_update_params.py +23 -0
- scale_gp_beta-0.1.0a2.dist-info/METADATA +440 -0
- scale_gp_beta-0.1.0a2.dist-info/RECORD +78 -0
- scale_gp_beta-0.1.0a2.dist-info/WHEEL +4 -0
- scale_gp_beta-0.1.0a2.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
from typing_extensions import Literal
|
|
5
|
+
|
|
6
|
+
from .file import File
|
|
7
|
+
from .._models import BaseModel
|
|
8
|
+
|
|
9
|
+
__all__ = ["FileList"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class FileList(BaseModel):
|
|
13
|
+
has_more: bool
|
|
14
|
+
"""Whether there are more items left to be fetched."""
|
|
15
|
+
|
|
16
|
+
items: List[File]
|
|
17
|
+
|
|
18
|
+
total: int
|
|
19
|
+
"""The total of items that match the query.
|
|
20
|
+
|
|
21
|
+
This is greater than or equal to the number of items returned.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
limit: Optional[int] = None
|
|
25
|
+
"""The maximum number of items to return."""
|
|
26
|
+
|
|
27
|
+
object: Optional[Literal["list"]] = None
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Optional
|
|
6
|
+
from typing_extensions import TypedDict
|
|
7
|
+
|
|
8
|
+
__all__ = ["FileListParams"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FileListParams(TypedDict, total=False):
|
|
12
|
+
ending_before: Optional[str]
|
|
13
|
+
|
|
14
|
+
limit: int
|
|
15
|
+
|
|
16
|
+
starting_after: Optional[str]
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Dict
|
|
6
|
+
from typing_extensions import TypedDict
|
|
7
|
+
|
|
8
|
+
__all__ = ["FileUpdateParams"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FileUpdateParams(TypedDict, total=False):
|
|
12
|
+
tags: Dict[str, object]
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Dict
|
|
6
|
+
from typing_extensions import Required, TypedDict
|
|
7
|
+
|
|
8
|
+
__all__ = ["InferenceCreateParams", "InferenceConfiguration"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class InferenceCreateParams(TypedDict, total=False):
|
|
12
|
+
model: Required[str]
|
|
13
|
+
"""model specified as `vendor/name` (ex. openai/gpt-5)"""
|
|
14
|
+
|
|
15
|
+
args: Dict[str, object]
|
|
16
|
+
"""Arguments passed into model"""
|
|
17
|
+
|
|
18
|
+
inference_configuration: InferenceConfiguration
|
|
19
|
+
"""Vendor specific configuration"""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class InferenceConfiguration(TypedDict, total=False):
|
|
23
|
+
num_retries: int
|
|
24
|
+
|
|
25
|
+
timeout_seconds: int
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import Union
|
|
4
|
+
from typing_extensions import TypeAlias
|
|
5
|
+
|
|
6
|
+
from .inference_response import InferenceResponse
|
|
7
|
+
from .inference_response_chunk import InferenceResponseChunk
|
|
8
|
+
|
|
9
|
+
__all__ = ["InferenceCreateResponse"]
|
|
10
|
+
|
|
11
|
+
InferenceCreateResponse: TypeAlias = Union[InferenceResponse, InferenceResponseChunk]
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Dict, List, Union, Optional
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing_extensions import Literal, TypeAlias
|
|
6
|
+
|
|
7
|
+
from pydantic import Field as FieldInfo
|
|
8
|
+
|
|
9
|
+
from .._models import BaseModel
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"InferenceModel",
|
|
13
|
+
"VendorConfiguration",
|
|
14
|
+
"VendorConfigurationLaunchVendorConfiguration",
|
|
15
|
+
"VendorConfigurationLaunchVendorConfigurationModelImage",
|
|
16
|
+
"VendorConfigurationLaunchVendorConfigurationModelInfra",
|
|
17
|
+
"VendorConfigurationLlmEngineVendorConfiguration",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class VendorConfigurationLaunchVendorConfigurationModelImage(BaseModel):
|
|
22
|
+
command: List[str]
|
|
23
|
+
|
|
24
|
+
registry: str
|
|
25
|
+
|
|
26
|
+
repository: str
|
|
27
|
+
|
|
28
|
+
tag: str
|
|
29
|
+
|
|
30
|
+
env_vars: Optional[Dict[str, object]] = None
|
|
31
|
+
|
|
32
|
+
healthcheck_route: Optional[str] = None
|
|
33
|
+
|
|
34
|
+
predict_route: Optional[str] = None
|
|
35
|
+
|
|
36
|
+
readiness_delay: Optional[int] = None
|
|
37
|
+
|
|
38
|
+
request_schema: Optional[Dict[str, object]] = None
|
|
39
|
+
|
|
40
|
+
response_schema: Optional[Dict[str, object]] = None
|
|
41
|
+
|
|
42
|
+
streaming_command: Optional[List[str]] = None
|
|
43
|
+
|
|
44
|
+
streaming_predict_route: Optional[str] = None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class VendorConfigurationLaunchVendorConfigurationModelInfra(BaseModel):
|
|
48
|
+
cpus: Union[str, int, None] = None
|
|
49
|
+
|
|
50
|
+
endpoint_type: Optional[Literal["async", "sync", "streaming"]] = None
|
|
51
|
+
|
|
52
|
+
gpu_type: Optional[
|
|
53
|
+
Literal[
|
|
54
|
+
"nvidia-tesla-t4",
|
|
55
|
+
"nvidia-ampere-a10",
|
|
56
|
+
"nvidia-ampere-a100",
|
|
57
|
+
"nvidia-ampere-a100e",
|
|
58
|
+
"nvidia-hopper-h100",
|
|
59
|
+
"nvidia-hopper-h100-1g20gb",
|
|
60
|
+
"nvidia-hopper-h100-3g40gb",
|
|
61
|
+
]
|
|
62
|
+
] = None
|
|
63
|
+
|
|
64
|
+
gpus: Optional[int] = None
|
|
65
|
+
|
|
66
|
+
high_priority: Optional[bool] = None
|
|
67
|
+
|
|
68
|
+
labels: Optional[Dict[str, str]] = None
|
|
69
|
+
|
|
70
|
+
max_workers: Optional[int] = None
|
|
71
|
+
|
|
72
|
+
memory: Optional[str] = None
|
|
73
|
+
|
|
74
|
+
min_workers: Optional[int] = None
|
|
75
|
+
|
|
76
|
+
per_worker: Optional[int] = None
|
|
77
|
+
|
|
78
|
+
public_inference: Optional[bool] = None
|
|
79
|
+
|
|
80
|
+
storage: Optional[str] = None
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class VendorConfigurationLaunchVendorConfiguration(BaseModel):
|
|
84
|
+
api_model_image: VendorConfigurationLaunchVendorConfigurationModelImage = FieldInfo(alias="model_image")
|
|
85
|
+
|
|
86
|
+
api_model_infra: VendorConfigurationLaunchVendorConfigurationModelInfra = FieldInfo(alias="model_infra")
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class VendorConfigurationLlmEngineVendorConfiguration(BaseModel):
|
|
90
|
+
model: str
|
|
91
|
+
|
|
92
|
+
checkpoint_path: Optional[str] = None
|
|
93
|
+
|
|
94
|
+
cpus: Optional[int] = None
|
|
95
|
+
|
|
96
|
+
default_callback_url: Optional[str] = None
|
|
97
|
+
|
|
98
|
+
endpoint_type: Optional[str] = None
|
|
99
|
+
|
|
100
|
+
gpu_type: Optional[str] = None
|
|
101
|
+
|
|
102
|
+
gpus: Optional[int] = None
|
|
103
|
+
|
|
104
|
+
high_priority: Optional[bool] = None
|
|
105
|
+
|
|
106
|
+
inference_framework: Optional[str] = None
|
|
107
|
+
|
|
108
|
+
inference_framework_image_tag: Optional[str] = None
|
|
109
|
+
|
|
110
|
+
labels: Optional[Dict[str, str]] = None
|
|
111
|
+
|
|
112
|
+
max_workers: Optional[int] = None
|
|
113
|
+
|
|
114
|
+
memory: Optional[str] = None
|
|
115
|
+
|
|
116
|
+
min_workers: Optional[int] = None
|
|
117
|
+
|
|
118
|
+
nodes_per_worker: Optional[int] = None
|
|
119
|
+
|
|
120
|
+
num_shards: Optional[int] = None
|
|
121
|
+
|
|
122
|
+
per_worker: Optional[int] = None
|
|
123
|
+
|
|
124
|
+
post_inference_hooks: Optional[List[str]] = None
|
|
125
|
+
|
|
126
|
+
public_inference: Optional[bool] = None
|
|
127
|
+
|
|
128
|
+
quantize: Optional[str] = None
|
|
129
|
+
|
|
130
|
+
source: Optional[str] = None
|
|
131
|
+
|
|
132
|
+
storage: Optional[str] = None
|
|
133
|
+
|
|
134
|
+
if TYPE_CHECKING:
|
|
135
|
+
# Stub to indicate that arbitrary properties are accepted.
|
|
136
|
+
# To access properties that are not valid identifiers you can use `getattr`, e.g.
|
|
137
|
+
# `getattr(obj, '$type')`
|
|
138
|
+
def __getattr__(self, attr: str) -> object: ...
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
VendorConfiguration: TypeAlias = Union[
|
|
142
|
+
VendorConfigurationLaunchVendorConfiguration, VendorConfigurationLlmEngineVendorConfiguration
|
|
143
|
+
]
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class InferenceModel(BaseModel):
|
|
147
|
+
id: str
|
|
148
|
+
|
|
149
|
+
created_at: datetime
|
|
150
|
+
|
|
151
|
+
created_by_user_id: str
|
|
152
|
+
|
|
153
|
+
api_model_type: Literal["generic", "completion", "chat_completion"] = FieldInfo(alias="model_type")
|
|
154
|
+
|
|
155
|
+
api_model_vendor: Literal["openai", "cohere", "vertex_ai", "anthropic", "launch", "llmengine", "model_zoo"] = (
|
|
156
|
+
FieldInfo(alias="model_vendor")
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
name: str
|
|
160
|
+
|
|
161
|
+
status: Literal["failed", "ready", "deploying"]
|
|
162
|
+
|
|
163
|
+
vendor_configuration: VendorConfiguration
|
|
164
|
+
|
|
165
|
+
api_model_metadata: Optional[Dict[str, object]] = FieldInfo(alias="model_metadata", default=None)
|
|
166
|
+
|
|
167
|
+
object: Optional[Literal["model"]] = None
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
from typing_extensions import Literal
|
|
5
|
+
|
|
6
|
+
from .._models import BaseModel
|
|
7
|
+
from .inference_model import InferenceModel
|
|
8
|
+
|
|
9
|
+
__all__ = ["InferenceModelList"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class InferenceModelList(BaseModel):
|
|
13
|
+
has_more: bool
|
|
14
|
+
"""Whether there are more items left to be fetched."""
|
|
15
|
+
|
|
16
|
+
items: List[InferenceModel]
|
|
17
|
+
|
|
18
|
+
total: int
|
|
19
|
+
"""The total of items that match the query.
|
|
20
|
+
|
|
21
|
+
This is greater than or equal to the number of items returned.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
limit: Optional[int] = None
|
|
25
|
+
"""The maximum number of items to return."""
|
|
26
|
+
|
|
27
|
+
object: Optional[Literal["list"]] = None
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
from typing_extensions import Literal
|
|
5
|
+
|
|
6
|
+
from .._models import BaseModel
|
|
7
|
+
|
|
8
|
+
__all__ = ["InferenceResponse"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class InferenceResponse(BaseModel):
|
|
12
|
+
response: object
|
|
13
|
+
|
|
14
|
+
object: Optional[Literal["generic_inference"]] = None
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
from typing_extensions import Literal
|
|
5
|
+
|
|
6
|
+
from .._models import BaseModel
|
|
7
|
+
|
|
8
|
+
__all__ = ["InferenceResponseChunk"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class InferenceResponseChunk(BaseModel):
|
|
12
|
+
response: object
|
|
13
|
+
|
|
14
|
+
object: Optional[Literal["generic_inference.chunk"]] = None
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Dict, List, Union
|
|
6
|
+
from typing_extensions import Literal, Required, TypeAlias, TypedDict
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"ModelCreateParams",
|
|
10
|
+
"LaunchModelCreateRequest",
|
|
11
|
+
"LaunchModelCreateRequestVendorConfiguration",
|
|
12
|
+
"LaunchModelCreateRequestVendorConfigurationModelImage",
|
|
13
|
+
"LaunchModelCreateRequestVendorConfigurationModelInfra",
|
|
14
|
+
"LlmEngineModelCreateRequest",
|
|
15
|
+
"LlmEngineModelCreateRequestVendorConfiguration",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class LaunchModelCreateRequest(TypedDict, total=False):
|
|
20
|
+
name: Required[str]
|
|
21
|
+
"""Unique name to reference your model"""
|
|
22
|
+
|
|
23
|
+
vendor_configuration: Required[LaunchModelCreateRequestVendorConfiguration]
|
|
24
|
+
|
|
25
|
+
model_metadata: Dict[str, object]
|
|
26
|
+
|
|
27
|
+
model_type: Literal["generic"]
|
|
28
|
+
|
|
29
|
+
model_vendor: Literal["launch"]
|
|
30
|
+
|
|
31
|
+
on_conflict: Literal["error", "update"]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class LaunchModelCreateRequestVendorConfigurationModelImage(TypedDict, total=False):
|
|
35
|
+
command: Required[List[str]]
|
|
36
|
+
|
|
37
|
+
registry: Required[str]
|
|
38
|
+
|
|
39
|
+
repository: Required[str]
|
|
40
|
+
|
|
41
|
+
tag: Required[str]
|
|
42
|
+
|
|
43
|
+
env_vars: Dict[str, object]
|
|
44
|
+
|
|
45
|
+
healthcheck_route: str
|
|
46
|
+
|
|
47
|
+
predict_route: str
|
|
48
|
+
|
|
49
|
+
readiness_delay: int
|
|
50
|
+
|
|
51
|
+
request_schema: Dict[str, object]
|
|
52
|
+
|
|
53
|
+
response_schema: Dict[str, object]
|
|
54
|
+
|
|
55
|
+
streaming_command: List[str]
|
|
56
|
+
|
|
57
|
+
streaming_predict_route: str
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class LaunchModelCreateRequestVendorConfigurationModelInfra(TypedDict, total=False):
|
|
61
|
+
cpus: Union[str, int]
|
|
62
|
+
|
|
63
|
+
endpoint_type: Literal["async", "sync", "streaming"]
|
|
64
|
+
|
|
65
|
+
gpu_type: Literal[
|
|
66
|
+
"nvidia-tesla-t4",
|
|
67
|
+
"nvidia-ampere-a10",
|
|
68
|
+
"nvidia-ampere-a100",
|
|
69
|
+
"nvidia-ampere-a100e",
|
|
70
|
+
"nvidia-hopper-h100",
|
|
71
|
+
"nvidia-hopper-h100-1g20gb",
|
|
72
|
+
"nvidia-hopper-h100-3g40gb",
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
gpus: int
|
|
76
|
+
|
|
77
|
+
high_priority: bool
|
|
78
|
+
|
|
79
|
+
labels: Dict[str, str]
|
|
80
|
+
|
|
81
|
+
max_workers: int
|
|
82
|
+
|
|
83
|
+
memory: str
|
|
84
|
+
|
|
85
|
+
min_workers: int
|
|
86
|
+
|
|
87
|
+
per_worker: int
|
|
88
|
+
|
|
89
|
+
public_inference: bool
|
|
90
|
+
|
|
91
|
+
storage: str
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class LaunchModelCreateRequestVendorConfiguration(TypedDict, total=False):
|
|
95
|
+
model_image: Required[LaunchModelCreateRequestVendorConfigurationModelImage]
|
|
96
|
+
|
|
97
|
+
model_infra: Required[LaunchModelCreateRequestVendorConfigurationModelInfra]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class LlmEngineModelCreateRequest(TypedDict, total=False):
|
|
101
|
+
name: Required[str]
|
|
102
|
+
"""Unique name to reference your model"""
|
|
103
|
+
|
|
104
|
+
vendor_configuration: Required[LlmEngineModelCreateRequestVendorConfiguration]
|
|
105
|
+
|
|
106
|
+
model_metadata: Dict[str, object]
|
|
107
|
+
|
|
108
|
+
model_type: Literal["chat_completion"]
|
|
109
|
+
|
|
110
|
+
model_vendor: Literal["llmengine"]
|
|
111
|
+
|
|
112
|
+
on_conflict: Literal["error", "update"]
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class LlmEngineModelCreateRequestVendorConfigurationTyped(TypedDict, total=False):
|
|
116
|
+
model: Required[str]
|
|
117
|
+
|
|
118
|
+
checkpoint_path: str
|
|
119
|
+
|
|
120
|
+
cpus: int
|
|
121
|
+
|
|
122
|
+
default_callback_url: str
|
|
123
|
+
|
|
124
|
+
endpoint_type: str
|
|
125
|
+
|
|
126
|
+
gpu_type: str
|
|
127
|
+
|
|
128
|
+
gpus: int
|
|
129
|
+
|
|
130
|
+
high_priority: bool
|
|
131
|
+
|
|
132
|
+
inference_framework: str
|
|
133
|
+
|
|
134
|
+
inference_framework_image_tag: str
|
|
135
|
+
|
|
136
|
+
labels: Dict[str, str]
|
|
137
|
+
|
|
138
|
+
max_workers: int
|
|
139
|
+
|
|
140
|
+
memory: str
|
|
141
|
+
|
|
142
|
+
min_workers: int
|
|
143
|
+
|
|
144
|
+
nodes_per_worker: int
|
|
145
|
+
|
|
146
|
+
num_shards: int
|
|
147
|
+
|
|
148
|
+
per_worker: int
|
|
149
|
+
|
|
150
|
+
post_inference_hooks: List[str]
|
|
151
|
+
|
|
152
|
+
public_inference: bool
|
|
153
|
+
|
|
154
|
+
quantize: str
|
|
155
|
+
|
|
156
|
+
source: str
|
|
157
|
+
|
|
158
|
+
storage: str
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
LlmEngineModelCreateRequestVendorConfiguration: TypeAlias = Union[
|
|
162
|
+
LlmEngineModelCreateRequestVendorConfigurationTyped, Dict[str, object]
|
|
163
|
+
]
|
|
164
|
+
|
|
165
|
+
ModelCreateParams: TypeAlias = Union[LaunchModelCreateRequest, LlmEngineModelCreateRequest]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
from typing_extensions import Literal
|
|
5
|
+
|
|
6
|
+
from .._models import BaseModel
|
|
7
|
+
|
|
8
|
+
__all__ = ["ModelDeleteResponse"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ModelDeleteResponse(BaseModel):
|
|
12
|
+
id: str
|
|
13
|
+
|
|
14
|
+
deleted: bool
|
|
15
|
+
|
|
16
|
+
object: Optional[Literal["model"]] = None
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Optional
|
|
6
|
+
from typing_extensions import Literal, TypedDict
|
|
7
|
+
|
|
8
|
+
__all__ = ["ModelListParams"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ModelListParams(TypedDict, total=False):
|
|
12
|
+
ending_before: Optional[str]
|
|
13
|
+
|
|
14
|
+
limit: int
|
|
15
|
+
|
|
16
|
+
model_vendor: Optional[Literal["openai", "cohere", "vertex_ai", "anthropic", "launch", "llmengine", "model_zoo"]]
|
|
17
|
+
|
|
18
|
+
name: Optional[str]
|
|
19
|
+
|
|
20
|
+
starting_after: Optional[str]
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Dict, List, Union
|
|
6
|
+
from typing_extensions import Literal, Required, TypeAlias, TypedDict
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"ModelUpdateParams",
|
|
10
|
+
"DefaultModelPatchRequest",
|
|
11
|
+
"ModelConfigurationPatchRequest",
|
|
12
|
+
"ModelConfigurationPatchRequestVendorConfiguration",
|
|
13
|
+
"ModelConfigurationPatchRequestVendorConfigurationPartialLaunchVendorConfiguration",
|
|
14
|
+
"ModelConfigurationPatchRequestVendorConfigurationPartialLaunchVendorConfigurationModelImage",
|
|
15
|
+
"ModelConfigurationPatchRequestVendorConfigurationPartialLaunchVendorConfigurationModelInfra",
|
|
16
|
+
"ModelConfigurationPatchRequestVendorConfigurationPartialLlmEngineVendorConfiguration",
|
|
17
|
+
"SwapNamesModelPatchRequest",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DefaultModelPatchRequest(TypedDict, total=False):
|
|
22
|
+
model_metadata: Dict[str, object]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ModelConfigurationPatchRequest(TypedDict, total=False):
|
|
26
|
+
vendor_configuration: Required[ModelConfigurationPatchRequestVendorConfiguration]
|
|
27
|
+
|
|
28
|
+
model_metadata: Dict[str, object]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ModelConfigurationPatchRequestVendorConfigurationPartialLaunchVendorConfigurationModelImage(
|
|
32
|
+
TypedDict, total=False
|
|
33
|
+
):
|
|
34
|
+
command: List[str]
|
|
35
|
+
|
|
36
|
+
env_vars: Dict[str, object]
|
|
37
|
+
|
|
38
|
+
healthcheck_route: str
|
|
39
|
+
|
|
40
|
+
predict_route: str
|
|
41
|
+
|
|
42
|
+
readiness_delay: int
|
|
43
|
+
|
|
44
|
+
registry: str
|
|
45
|
+
|
|
46
|
+
repository: str
|
|
47
|
+
|
|
48
|
+
request_schema: Dict[str, object]
|
|
49
|
+
|
|
50
|
+
response_schema: Dict[str, object]
|
|
51
|
+
|
|
52
|
+
streaming_command: List[str]
|
|
53
|
+
|
|
54
|
+
streaming_predict_route: str
|
|
55
|
+
|
|
56
|
+
tag: str
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class ModelConfigurationPatchRequestVendorConfigurationPartialLaunchVendorConfigurationModelInfra(
|
|
60
|
+
TypedDict, total=False
|
|
61
|
+
):
|
|
62
|
+
cpus: Union[str, int]
|
|
63
|
+
|
|
64
|
+
endpoint_type: Literal["async", "sync", "streaming"]
|
|
65
|
+
|
|
66
|
+
gpu_type: Literal[
|
|
67
|
+
"nvidia-tesla-t4",
|
|
68
|
+
"nvidia-ampere-a10",
|
|
69
|
+
"nvidia-ampere-a100",
|
|
70
|
+
"nvidia-ampere-a100e",
|
|
71
|
+
"nvidia-hopper-h100",
|
|
72
|
+
"nvidia-hopper-h100-1g20gb",
|
|
73
|
+
"nvidia-hopper-h100-3g40gb",
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
gpus: int
|
|
77
|
+
|
|
78
|
+
high_priority: bool
|
|
79
|
+
|
|
80
|
+
labels: Dict[str, str]
|
|
81
|
+
|
|
82
|
+
max_workers: int
|
|
83
|
+
|
|
84
|
+
memory: str
|
|
85
|
+
|
|
86
|
+
min_workers: int
|
|
87
|
+
|
|
88
|
+
per_worker: int
|
|
89
|
+
|
|
90
|
+
public_inference: bool
|
|
91
|
+
|
|
92
|
+
storage: str
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class ModelConfigurationPatchRequestVendorConfigurationPartialLaunchVendorConfiguration(TypedDict, total=False):
|
|
96
|
+
model_image: ModelConfigurationPatchRequestVendorConfigurationPartialLaunchVendorConfigurationModelImage
|
|
97
|
+
|
|
98
|
+
model_infra: ModelConfigurationPatchRequestVendorConfigurationPartialLaunchVendorConfigurationModelInfra
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class ModelConfigurationPatchRequestVendorConfigurationPartialLlmEngineVendorConfiguration(TypedDict, total=False):
|
|
102
|
+
checkpoint_path: str
|
|
103
|
+
|
|
104
|
+
cpus: int
|
|
105
|
+
|
|
106
|
+
default_callback_url: str
|
|
107
|
+
|
|
108
|
+
endpoint_type: str
|
|
109
|
+
|
|
110
|
+
gpu_type: str
|
|
111
|
+
|
|
112
|
+
gpus: int
|
|
113
|
+
|
|
114
|
+
high_priority: bool
|
|
115
|
+
|
|
116
|
+
inference_framework: str
|
|
117
|
+
|
|
118
|
+
inference_framework_image_tag: str
|
|
119
|
+
|
|
120
|
+
labels: Dict[str, str]
|
|
121
|
+
|
|
122
|
+
max_workers: int
|
|
123
|
+
|
|
124
|
+
memory: str
|
|
125
|
+
|
|
126
|
+
min_workers: int
|
|
127
|
+
|
|
128
|
+
model: str
|
|
129
|
+
|
|
130
|
+
nodes_per_worker: int
|
|
131
|
+
|
|
132
|
+
num_shards: int
|
|
133
|
+
|
|
134
|
+
per_worker: int
|
|
135
|
+
|
|
136
|
+
post_inference_hooks: List[str]
|
|
137
|
+
|
|
138
|
+
public_inference: bool
|
|
139
|
+
|
|
140
|
+
quantize: str
|
|
141
|
+
|
|
142
|
+
source: str
|
|
143
|
+
|
|
144
|
+
storage: str
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
ModelConfigurationPatchRequestVendorConfiguration: TypeAlias = Union[
|
|
148
|
+
ModelConfigurationPatchRequestVendorConfigurationPartialLaunchVendorConfiguration,
|
|
149
|
+
ModelConfigurationPatchRequestVendorConfigurationPartialLlmEngineVendorConfiguration,
|
|
150
|
+
]
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class SwapNamesModelPatchRequest(TypedDict, total=False):
|
|
154
|
+
name: Required[str]
|
|
155
|
+
|
|
156
|
+
on_conflict: Literal["error", "swap"]
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
ModelUpdateParams: TypeAlias = Union[
|
|
160
|
+
DefaultModelPatchRequest, ModelConfigurationPatchRequest, SwapNamesModelPatchRequest
|
|
161
|
+
]
|