gmicloud 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gmicloud/__init__.py +2 -2
- gmicloud/_internal/_client/_artifact_client.py +40 -7
- gmicloud/_internal/_client/_file_upload_client.py +10 -7
- gmicloud/_internal/_config.py +9 -3
- gmicloud/_internal/_manager/_artifact_manager.py +116 -18
- gmicloud/_internal/_manager/_task_manager.py +32 -27
- gmicloud/_internal/_manager/serve_command_utils.py +121 -0
- gmicloud/_internal/_models.py +135 -31
- gmicloud/tests/test_artifacts.py +6 -22
- {gmicloud-0.1.6.dist-info → gmicloud-0.1.7.dist-info}/METADATA +97 -7
- {gmicloud-0.1.6.dist-info → gmicloud-0.1.7.dist-info}/RECORD +13 -12
- {gmicloud-0.1.6.dist-info → gmicloud-0.1.7.dist-info}/WHEEL +1 -1
- {gmicloud-0.1.6.dist-info → gmicloud-0.1.7.dist-info}/top_level.txt +0 -0
gmicloud/__init__.py
CHANGED
@@ -15,7 +15,7 @@ from ._internal._models import (
|
|
15
15
|
OneOffScheduling,
|
16
16
|
DailyScheduling,
|
17
17
|
DailyTrigger,
|
18
|
-
|
18
|
+
Template,
|
19
19
|
)
|
20
20
|
from ._internal._enums import (
|
21
21
|
BuildStatus,
|
@@ -39,7 +39,7 @@ __all__ = [
|
|
39
39
|
"OneOffScheduling",
|
40
40
|
"DailyScheduling",
|
41
41
|
"DailyTrigger",
|
42
|
-
"
|
42
|
+
"Template",
|
43
43
|
"BuildStatus",
|
44
44
|
"TaskEndpointStatus",
|
45
45
|
]
|
@@ -1,7 +1,7 @@
|
|
1
1
|
from typing import List
|
2
2
|
import logging
|
3
3
|
from requests.exceptions import RequestException
|
4
|
-
|
4
|
+
import json
|
5
5
|
from ._http_client import HTTPClient
|
6
6
|
from ._iam_client import IAMClient
|
7
7
|
from ._decorator import handle_refresh_token
|
@@ -120,6 +120,39 @@ class ArtifactClient:
|
|
120
120
|
logger.error(f"Failed to rebuild artifact {artifact_id}: {e}")
|
121
121
|
return None
|
122
122
|
|
123
|
+
@handle_refresh_token
|
124
|
+
def add_env_parameters_to_artifact(self, artifact_id: str, env_parameters: dict[str, str]) -> None:
|
125
|
+
"""
|
126
|
+
Updates an artifact by its ID.
|
127
|
+
|
128
|
+
:param artifact_id: The ID of the artifact to update.
|
129
|
+
:param request: The request object containing the updated artifact details.
|
130
|
+
"""
|
131
|
+
try:
|
132
|
+
old_artifact = self.get_artifact(artifact_id)
|
133
|
+
if not old_artifact:
|
134
|
+
logger.error(f"Artifact {artifact_id} not found")
|
135
|
+
return
|
136
|
+
request = UpdateArtifactRequestBody(
|
137
|
+
artifact_description=old_artifact.artifact_metadata.artifact_description,
|
138
|
+
artifact_name=old_artifact.artifact_metadata.artifact_name,
|
139
|
+
artifact_tags=old_artifact.artifact_metadata.artifact_tags,
|
140
|
+
env_parameters=old_artifact.artifact_parameters.env_parameters,
|
141
|
+
model_parameters=old_artifact.artifact_parameters.model_parameters
|
142
|
+
)
|
143
|
+
new_env_parameters = [EnvParameter(key=k, value=v) for k, v in env_parameters.items()]
|
144
|
+
if not request.env_parameters:
|
145
|
+
request.env_parameters = []
|
146
|
+
request.env_parameters.extend(new_env_parameters)
|
147
|
+
response = self.client.put(
|
148
|
+
f"/update_artifact?artifact_id={artifact_id}",
|
149
|
+
self.iam_client.get_custom_headers(),
|
150
|
+
request.model_dump()
|
151
|
+
)
|
152
|
+
except (RequestException, ValueError) as e:
|
153
|
+
logger.error(f"Failed to add env parameters to artifact {artifact_id}: {e}")
|
154
|
+
return
|
155
|
+
|
123
156
|
@handle_refresh_token
|
124
157
|
def delete_artifact(self, artifact_id: str) -> Optional[DeleteArtifactResponse]:
|
125
158
|
"""
|
@@ -140,7 +173,7 @@ class ArtifactClient:
|
|
140
173
|
return None
|
141
174
|
|
142
175
|
@handle_refresh_token
|
143
|
-
def get_bigfile_upload_url(self, request:
|
176
|
+
def get_bigfile_upload_url(self, request: ResumableUploadLinkRequest) -> Optional[ResumableUploadLinkResponse]:
|
144
177
|
"""
|
145
178
|
Generates a pre-signed URL for uploading a large file.
|
146
179
|
|
@@ -156,7 +189,7 @@ class ArtifactClient:
|
|
156
189
|
logger.error("Empty response from /get_bigfile_upload_url")
|
157
190
|
return None
|
158
191
|
|
159
|
-
return
|
192
|
+
return ResumableUploadLinkResponse.model_validate(response)
|
160
193
|
|
161
194
|
except (RequestException, ValueError) as e:
|
162
195
|
logger.error(f"Failed to generate upload URL: {e}")
|
@@ -186,12 +219,12 @@ class ArtifactClient:
|
|
186
219
|
return None
|
187
220
|
|
188
221
|
@handle_refresh_token
|
189
|
-
def get_public_templates(self) -> List[
|
222
|
+
def get_public_templates(self) -> List[Template]:
|
190
223
|
"""
|
191
224
|
Fetches all artifact templates.
|
192
225
|
|
193
|
-
:return: A list of
|
194
|
-
:rtype: List[
|
226
|
+
:return: A list of Template objects.
|
227
|
+
:rtype: List[Template]
|
195
228
|
"""
|
196
229
|
try:
|
197
230
|
response = self.client.get("/get_public_templates", self.iam_client.get_custom_headers())
|
@@ -201,7 +234,7 @@ class ArtifactClient:
|
|
201
234
|
return []
|
202
235
|
|
203
236
|
try:
|
204
|
-
result =
|
237
|
+
result = GetTemplatesResponse.model_validate(response)
|
205
238
|
return result.artifact_templates
|
206
239
|
except ValueError as ve:
|
207
240
|
logger.error(f"Failed to validate response data: {ve}")
|
@@ -1,8 +1,10 @@
|
|
1
1
|
import os
|
2
2
|
import requests
|
3
|
+
import logging
|
3
4
|
|
4
5
|
from .._exceptions import UploadFileError
|
5
6
|
|
7
|
+
logger = logging.getLogger()
|
6
8
|
|
7
9
|
class FileUploadClient:
|
8
10
|
CHUNK_SIZE = 10 * 1024 * 1024 # 10MB Default Chunk Size
|
@@ -45,13 +47,13 @@ class FileUploadClient:
|
|
45
47
|
"""
|
46
48
|
try:
|
47
49
|
file_size = os.path.getsize(file_path)
|
48
|
-
|
50
|
+
logger.info(f"File {file_path} size: {file_size} bytes")
|
49
51
|
|
50
52
|
start_byte = 0
|
51
53
|
uploaded_range = FileUploadClient._check_file_status(upload_url, file_size)
|
52
54
|
if uploaded_range:
|
53
55
|
start_byte = int(uploaded_range.split("-")[1]) + 1
|
54
|
-
|
56
|
+
logger.info(f"Resuming uploading {file_path} from {start_byte} bytes")
|
55
57
|
|
56
58
|
with open(file_path, "rb") as file:
|
57
59
|
while start_byte < file_size:
|
@@ -74,14 +76,15 @@ class FileUploadClient:
|
|
74
76
|
# Ensure upload is successful for this chunk
|
75
77
|
if resp.status_code not in (200, 201, 308):
|
76
78
|
raise UploadFileError(
|
77
|
-
f"Failed to upload file, code:{resp.status_code} ,message: {resp.text}")
|
79
|
+
f"Failed to upload file {file_path}, code:{resp.status_code} ,message: {resp.text}")
|
78
80
|
|
79
81
|
start_byte = end_byte + 1
|
80
|
-
|
82
|
+
percentage = (start_byte / file_size) * 100
|
83
|
+
logger.info(f"File {file_path} uploaded {end_byte + 1:,}/{file_size:,} bytes ({percentage:.2f}%)")
|
81
84
|
|
82
|
-
|
85
|
+
logger.info(f"File {file_path} uploaded successfully.")
|
83
86
|
except Exception as e:
|
84
|
-
raise UploadFileError(f"Failed to upload file: {str(e)}")
|
87
|
+
raise UploadFileError(f"Failed to upload file {file_path}, got error: {str(e)}")
|
85
88
|
|
86
89
|
@staticmethod
|
87
90
|
def _check_file_status(upload_url: str, file_size: int) -> str:
|
@@ -104,7 +107,7 @@ class FileUploadClient:
|
|
104
107
|
if resp.status_code == 308:
|
105
108
|
range_header = resp.headers.get("Range")
|
106
109
|
if range_header:
|
107
|
-
|
110
|
+
logger.info(f"Server reports partial upload range: {range_header}")
|
108
111
|
return range_header
|
109
112
|
|
110
113
|
if resp.status_code in (200, 201):
|
gmicloud/_internal/_config.py
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
1
|
+
# Dev environment
|
2
|
+
# ARTIFACT_SERVICE_BASE_URL = "https://ce-tot.gmicloud-dev.com/api/v1/ie/artifact"
|
3
|
+
# TASK_SERVICE_BASE_URL = "https://ce-tot.gmicloud-dev.com/api/v1/ie/task"
|
4
|
+
# IAM_SERVICE_BASE_URL = "https://ce-tot.gmicloud-dev.com/api/v1"
|
5
|
+
|
6
|
+
# Prod environment
|
7
|
+
ARTIFACT_SERVICE_BASE_URL = "https://inference-engine.gmicloud.ai/api/v1/ie/artifact"
|
8
|
+
TASK_SERVICE_BASE_URL = "https://inference-engine.gmicloud.ai/api/v1/ie/task"
|
9
|
+
IAM_SERVICE_BASE_URL = "https://inference-engine.gmicloud.ai/api/v1"
|
@@ -2,11 +2,16 @@ import os
|
|
2
2
|
import time
|
3
3
|
from typing import List
|
4
4
|
import mimetypes
|
5
|
+
import concurrent.futures
|
6
|
+
import re
|
7
|
+
from tqdm import tqdm
|
8
|
+
from tqdm.contrib.logging import logging_redirect_tqdm
|
5
9
|
|
6
10
|
from .._client._iam_client import IAMClient
|
7
11
|
from .._client._artifact_client import ArtifactClient
|
8
12
|
from .._client._file_upload_client import FileUploadClient
|
9
13
|
from .._models import *
|
14
|
+
from .._manager.serve_command_utils import parse_server_command, extract_gpu_num_from_serve_command
|
10
15
|
|
11
16
|
import logging
|
12
17
|
|
@@ -53,7 +58,12 @@ class ArtifactManager:
|
|
53
58
|
self,
|
54
59
|
artifact_name: str,
|
55
60
|
description: Optional[str] = "",
|
56
|
-
tags: Optional[List[str]] = None
|
61
|
+
tags: Optional[List[str]] = None,
|
62
|
+
deployment_type: Optional[str] = "",
|
63
|
+
template_id: Optional[str] = "",
|
64
|
+
env_parameters: Optional[List["EnvParameter"]] = None,
|
65
|
+
model_description: Optional[str] = "",
|
66
|
+
model_parameters: Optional[List["ModelParameter"]] = None,
|
57
67
|
) -> CreateArtifactResponse:
|
58
68
|
"""
|
59
69
|
Create a new artifact for a user.
|
@@ -69,11 +79,16 @@ class ArtifactManager:
|
|
69
79
|
|
70
80
|
req = CreateArtifactRequest(artifact_name=artifact_name,
|
71
81
|
artifact_description=description,
|
72
|
-
artifact_tags=tags,
|
82
|
+
artifact_tags=tags,
|
83
|
+
deployment_type=deployment_type,
|
84
|
+
template_id=template_id,
|
85
|
+
env_parameters=env_parameters,
|
86
|
+
model_description=model_description,
|
87
|
+
model_parameters=model_parameters)
|
73
88
|
|
74
89
|
return self.artifact_client.create_artifact(req)
|
75
90
|
|
76
|
-
def create_artifact_from_template(self, artifact_template_id: str) -> str:
|
91
|
+
def create_artifact_from_template(self, artifact_template_id: str, env_parameters: Optional[dict[str, str]] = None) -> str:
|
77
92
|
"""
|
78
93
|
Create a new artifact for a user using a template.
|
79
94
|
|
@@ -85,11 +100,16 @@ class ArtifactManager:
|
|
85
100
|
if not artifact_template_id or not artifact_template_id.strip():
|
86
101
|
raise ValueError("Artifact template ID is required and cannot be empty.")
|
87
102
|
|
103
|
+
|
88
104
|
resp = self.artifact_client.create_artifact_from_template(artifact_template_id)
|
89
105
|
if not resp or not resp.artifact_id:
|
90
106
|
raise ValueError("Failed to create artifact from template.")
|
91
107
|
|
108
|
+
if env_parameters:
|
109
|
+
self.artifact_client.add_env_parameters_to_artifact(resp.artifact_id, env_parameters)
|
110
|
+
|
92
111
|
return resp.artifact_id
|
112
|
+
|
93
113
|
|
94
114
|
def create_artifact_from_template_name(self, artifact_template_name: str) -> tuple[str, ReplicaResource]:
|
95
115
|
"""
|
@@ -125,6 +145,56 @@ class ArtifactManager:
|
|
125
145
|
except Exception as e:
|
126
146
|
logger.error(f"Failed to create artifact from template, Error: {e}")
|
127
147
|
raise e
|
148
|
+
|
149
|
+
def create_artifact_for_serve_command_and_custom_model(self, template_name: str, artifact_name: str, serve_command: str, gpu_type: str, artifact_description: str = "") -> tuple[str, ReplicaResource]:
|
150
|
+
"""
|
151
|
+
Create an artifact from a template and support custom model.
|
152
|
+
:param artifact_template_name: The name of the template to use.
|
153
|
+
:return: A tuple containing the artifact ID and the recommended replica resources.
|
154
|
+
:rtype: tuple[str, ReplicaResource]
|
155
|
+
"""
|
156
|
+
|
157
|
+
recommended_replica_resources = None
|
158
|
+
picked_template = None
|
159
|
+
try:
|
160
|
+
templates = self.get_public_templates()
|
161
|
+
except Exception as e:
|
162
|
+
logger.error(f"Failed to get artifact templates, Error: {e}")
|
163
|
+
for template in templates:
|
164
|
+
if template.template_data and template.template_data.name == template_name:
|
165
|
+
picked_template = template
|
166
|
+
break
|
167
|
+
if not picked_template:
|
168
|
+
raise ValueError(f"Template with name {template_name} not found.")
|
169
|
+
|
170
|
+
try:
|
171
|
+
if gpu_type not in ["H100", "H200"]:
|
172
|
+
raise ValueError("Only support A100 and H100 for now")
|
173
|
+
|
174
|
+
type, env_vars, serve_args_dict = parse_server_command(serve_command)
|
175
|
+
if type.lower() not in template_name.lower():
|
176
|
+
raise ValueError(f"Template {template_name} does not support inference with {type}.")
|
177
|
+
num_gpus = extract_gpu_num_from_serve_command(serve_args_dict)
|
178
|
+
recommended_replica_resources = ReplicaResource(
|
179
|
+
cpu=num_gpus * 16,
|
180
|
+
ram_gb=num_gpus * 100,
|
181
|
+
gpu=num_gpus,
|
182
|
+
gpu_name=gpu_type,
|
183
|
+
)
|
184
|
+
except Exception as e:
|
185
|
+
raise ValueError(f"Failed to parse serve command, Error: {e}")
|
186
|
+
|
187
|
+
try:
|
188
|
+
env_vars = [
|
189
|
+
EnvParameter(key="SERVE_COMMAND", value=serve_command),
|
190
|
+
EnvParameter(key="GPU_TYPE", value=gpu_type),
|
191
|
+
]
|
192
|
+
resp = self.create_artifact(artifact_name, artifact_description, deployment_type="template", template_id=picked_template.template_id, env_parameters=env_vars)
|
193
|
+
# Assume Artifact is already with BuildStatus.SUCCESS status
|
194
|
+
return resp.artifact_id, recommended_replica_resources
|
195
|
+
except Exception as e:
|
196
|
+
logger.error(f"Failed to create artifact from template, Error: {e}")
|
197
|
+
raise e
|
128
198
|
|
129
199
|
def rebuild_artifact(self, artifact_id: str) -> RebuildArtifactResponse:
|
130
200
|
"""
|
@@ -211,7 +281,7 @@ class ArtifactManager:
|
|
211
281
|
model_file_name = os.path.basename(model_file_path)
|
212
282
|
model_file_type = mimetypes.guess_type(model_file_path)[0]
|
213
283
|
|
214
|
-
req =
|
284
|
+
req = ResumableUploadLinkRequest(artifact_id=artifact_id, file_name=model_file_name, file_type=model_file_type)
|
215
285
|
|
216
286
|
resp = self.artifact_client.get_bigfile_upload_url(req)
|
217
287
|
if not resp or not resp.upload_link:
|
@@ -250,36 +320,64 @@ class ArtifactManager:
|
|
250
320
|
|
251
321
|
FileUploadClient.upload_large_file(upload_link, file_path)
|
252
322
|
|
323
|
+
|
324
|
+
def upload_model_files_to_artifact(self, artifact_id: str, model_directory: str) -> None:
|
325
|
+
"""
|
326
|
+
Upload model files to an existing artifact.
|
327
|
+
|
328
|
+
:param artifact_id: The ID of the artifact to upload the model files to.
|
329
|
+
:param model_directory: The path to the model directory.
|
330
|
+
"""
|
331
|
+
|
332
|
+
# List all files in the model directory recursively
|
333
|
+
model_file_paths = []
|
334
|
+
for root, _, files in os.walk(model_directory):
|
335
|
+
for file in files:
|
336
|
+
model_file_paths.append(os.path.join(root, file))
|
337
|
+
|
338
|
+
def upload_file(model_file_path):
|
339
|
+
self._validate_file_path(model_file_path)
|
340
|
+
bigfile_upload_url_resp = self.artifact_client.get_bigfile_upload_url(
|
341
|
+
ResumableUploadLinkRequest(artifact_id=artifact_id, file_name=os.path.basename(model_file_path))
|
342
|
+
)
|
343
|
+
FileUploadClient.upload_large_file(bigfile_upload_url_resp.upload_link, model_file_path)
|
344
|
+
|
345
|
+
# Upload files in parallel with progress bar
|
346
|
+
with tqdm(total=len(model_file_paths), desc="Uploading model files") as progress_bar:
|
347
|
+
with logging_redirect_tqdm():
|
348
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
349
|
+
futures = {executor.submit(upload_file, path): path for path in model_file_paths}
|
350
|
+
for future in concurrent.futures.as_completed(futures):
|
351
|
+
try:
|
352
|
+
future.result()
|
353
|
+
except Exception as e:
|
354
|
+
logger.error(f"Failed to upload file {futures[future]}, Error: {e}")
|
355
|
+
progress_bar.update(1)
|
356
|
+
|
253
357
|
def create_artifact_with_model_files(
|
254
358
|
self,
|
255
359
|
artifact_name: str,
|
256
360
|
artifact_file_path: str,
|
257
|
-
|
361
|
+
model_directory: str,
|
258
362
|
description: Optional[str] = "",
|
259
363
|
tags: Optional[str] = None
|
260
364
|
) -> str:
|
261
365
|
"""
|
262
366
|
Create a new artifact for a user and upload model files associated with the artifact.
|
263
|
-
|
264
367
|
:param artifact_name: The name of the artifact.
|
265
368
|
:param artifact_file_path: The path to the artifact file(Dockerfile+serve.py).
|
266
|
-
:param
|
369
|
+
:param model_directory: The path to the model directory.
|
267
370
|
:param description: An optional description for the artifact.
|
268
371
|
:param tags: Optional tags associated with the artifact, as a comma-separated string.
|
269
372
|
:return: The `artifact_id` of the created artifact.
|
270
|
-
:raises FileNotFoundError: If the provided `file_path` does not exist.
|
271
373
|
"""
|
272
374
|
artifact_id = self.create_artifact_with_file(artifact_name, artifact_file_path, description, tags)
|
375
|
+
logger.info(f"Artifact created: {artifact_id}")
|
273
376
|
|
274
|
-
|
275
|
-
self._validate_file_path(model_file_path)
|
276
|
-
bigfile_upload_url_resp = self.artifact_client.get_bigfile_upload_url(
|
277
|
-
GetBigFileUploadUrlRequest(artifact_id=artifact_id, model_file_path=model_file_path)
|
278
|
-
)
|
279
|
-
FileUploadClient.upload_large_file(bigfile_upload_url_resp.upload_link, model_file_path)
|
377
|
+
self.upload_model_files_to_artifact(artifact_id, model_directory)
|
280
378
|
|
281
379
|
return artifact_id
|
282
|
-
|
380
|
+
|
283
381
|
|
284
382
|
def wait_for_artifact_ready(self, artifact_id: str, timeout_s: int = 900) -> None:
|
285
383
|
"""
|
@@ -304,12 +402,12 @@ class ArtifactManager:
|
|
304
402
|
time.sleep(10)
|
305
403
|
|
306
404
|
|
307
|
-
def get_public_templates(self) -> List[
|
405
|
+
def get_public_templates(self) -> List[Template]:
|
308
406
|
"""
|
309
407
|
Fetch all artifact templates.
|
310
408
|
|
311
|
-
:return: A list of
|
312
|
-
:rtype: List[
|
409
|
+
:return: A list of Template objects.
|
410
|
+
:rtype: List[Template]
|
313
411
|
"""
|
314
412
|
return self.artifact_client.get_public_templates()
|
315
413
|
|
@@ -41,7 +41,7 @@ class TaskManager:
|
|
41
41
|
|
42
42
|
:return: A list of `Task` objects.
|
43
43
|
"""
|
44
|
-
resp = self.task_client.get_all_tasks(
|
44
|
+
resp = self.task_client.get_all_tasks()
|
45
45
|
if not resp or not resp.tasks:
|
46
46
|
return []
|
47
47
|
|
@@ -63,6 +63,7 @@ class TaskManager:
|
|
63
63
|
if not resp or not resp.task:
|
64
64
|
raise ValueError("Failed to create task.")
|
65
65
|
|
66
|
+
logger.info(f"Task created: {resp.task.task_id}")
|
66
67
|
return resp.task
|
67
68
|
|
68
69
|
def create_task_from_file(self, artifact_id: str, config_file_path: str, trigger_timestamp: int = None) -> Task:
|
@@ -138,48 +139,54 @@ class TaskManager:
|
|
138
139
|
return self.task_client.start_task(task_id)
|
139
140
|
|
140
141
|
|
141
|
-
def
|
142
|
+
def wait_for_task(self, task_id: str, timeout_s: int = 900) -> Task:
|
142
143
|
"""
|
143
|
-
|
144
|
+
Wait for a task to reach the RUNNING state or raise an exception if it fails.
|
144
145
|
|
145
|
-
:param task_id: The ID of the task to
|
146
|
+
:param task_id: The ID of the task to wait for.
|
146
147
|
:param timeout_s: The timeout in seconds.
|
147
148
|
:return: The task object.
|
148
149
|
:rtype: Task
|
149
150
|
"""
|
150
|
-
# trigger start task
|
151
|
-
try:
|
152
|
-
self.start_task(task_id)
|
153
|
-
logger.info(f"Started task ID: {task_id}")
|
154
|
-
except Exception as e:
|
155
|
-
logger.error(f"Failed to start task, Error: {e}")
|
156
|
-
raise e
|
157
|
-
|
158
151
|
start_time = time.time()
|
159
152
|
while True:
|
160
153
|
try:
|
161
154
|
task = self.get_task(task_id)
|
162
155
|
if task.task_status == TaskStatus.RUNNING:
|
163
|
-
|
164
|
-
elif task.task_status in [TaskStatus.NEEDSTOP, TaskStatus.ARCHIVED]:
|
165
|
-
raise Exception(f"Unexpected task status after starting: {task.task_status}")
|
166
|
-
# Also check endpoint status.
|
167
|
-
elif task.task_status == TaskStatus.RUNNING:
|
168
|
-
if task.endpoint_info and task.endpoint_info.endpoint_status == TaskEndpointStatus.RUNNING:
|
156
|
+
if task.endpoint_info is not None and task.endpoint_info.endpoint_status == TaskEndpointStatus.RUNNING:
|
169
157
|
return task
|
170
|
-
elif task.endpoint_info and task.endpoint_info.endpoint_status in [TaskEndpointStatus.UNKNOWN, TaskEndpointStatus.ARCHIVED]:
|
171
|
-
raise Exception(f"Unexpected endpoint status after starting: {task.endpoint_info.endpoint_status}")
|
172
158
|
else:
|
173
|
-
|
159
|
+
if task.cluster_endpoints:
|
160
|
+
for ce in task.cluster_endpoints:
|
161
|
+
if ce.endpoint_status == TaskEndpointStatus.RUNNING:
|
162
|
+
return task
|
163
|
+
if task.task_status in [TaskStatus.NEEDSTOP, TaskStatus.ARCHIVED]:
|
164
|
+
raise Exception(f"Unexpected task status after starting: {task.task_status}")
|
174
165
|
else:
|
175
166
|
logger.info(f"Pending task starting. Task status: {task.task_status}")
|
176
|
-
|
177
167
|
except Exception as e:
|
178
168
|
logger.error(f"Failed to get task, Error: {e}")
|
179
169
|
if time.time() - start_time > timeout_s:
|
180
170
|
raise Exception(f"Task creation takes more than {timeout_s // 60} minutes. Testing aborted.")
|
181
171
|
time.sleep(10)
|
182
172
|
|
173
|
+
def start_task_and_wait(self, task_id: str, timeout_s: int = 3600) -> Task:
|
174
|
+
"""
|
175
|
+
Start a task and wait for it to be ready.
|
176
|
+
|
177
|
+
:param task_id: The ID of the task to start.
|
178
|
+
:param timeout_s: The timeout in seconds.
|
179
|
+
:return: The task object.
|
180
|
+
:rtype: Task
|
181
|
+
"""
|
182
|
+
try:
|
183
|
+
self.start_task(task_id)
|
184
|
+
logger.info(f"Started task ID: {task_id}")
|
185
|
+
except Exception as e:
|
186
|
+
logger.error(f"Failed to start task, Error: {e}")
|
187
|
+
raise e
|
188
|
+
|
189
|
+
return self.wait_for_task(task_id, timeout_s)
|
183
190
|
|
184
191
|
def stop_task(self, task_id: str) -> bool:
|
185
192
|
"""
|
@@ -190,16 +197,15 @@ class TaskManager:
|
|
190
197
|
:raises ValueError: If `task_id` is invalid (None or empty string).
|
191
198
|
"""
|
192
199
|
self._validate_not_empty(task_id, "Task ID")
|
200
|
+
return self.task_client.stop_task(task_id)
|
193
201
|
|
194
202
|
|
195
|
-
def stop_task_and_wait(self, task_id: str, timeout_s: int =
|
196
|
-
task_manager = self.task_manager
|
203
|
+
def stop_task_and_wait(self, task_id: str, timeout_s: int = 3600):
|
197
204
|
try:
|
198
|
-
self.
|
205
|
+
self.stop_task(task_id)
|
199
206
|
logger.info(f"Stopping task ID: {task_id}")
|
200
207
|
except Exception as e:
|
201
208
|
logger.error(f"Failed to stop task, Error: {e}")
|
202
|
-
task_manager = self.task_manager
|
203
209
|
start_time = time.time()
|
204
210
|
while True:
|
205
211
|
try:
|
@@ -212,7 +218,6 @@ class TaskManager:
|
|
212
218
|
raise Exception(f"Task stopping takes more than {timeout_s // 60} minutes. Testing aborted.")
|
213
219
|
time.sleep(10)
|
214
220
|
|
215
|
-
return self.task_client.stop_task(task_id)
|
216
221
|
|
217
222
|
def get_usage_data(self, start_timestamp: str, end_timestamp: str) -> GetUsageDataResponse:
|
218
223
|
"""
|
@@ -0,0 +1,121 @@
|
|
1
|
+
import shlex
|
2
|
+
import os
|
3
|
+
import logging
|
4
|
+
|
5
|
+
logger = logging.getLogger(__name__)
|
6
|
+
|
7
|
+
def parse_server_command(cmd_str: str) -> tuple[str, dict, dict]:
|
8
|
+
"""
|
9
|
+
parse server command
|
10
|
+
Maybe their are more than two types of server command
|
11
|
+
if not found, we can add more parse function
|
12
|
+
"""
|
13
|
+
if "vllm serve" in cmd_str:
|
14
|
+
return ("vllm", *parse_server_vllm_command(cmd_str))
|
15
|
+
elif "sglang.launch_server" in cmd_str:
|
16
|
+
return ("sglang", *parse_server_sglang_command(cmd_str))
|
17
|
+
else:
|
18
|
+
raise ValueError(f"Unknown serve command: {cmd_str}")
|
19
|
+
|
20
|
+
def extract_env_and_args(tokens: list) -> tuple[dict, list]:
|
21
|
+
"""
|
22
|
+
Extract environment variables from the tokens list.
|
23
|
+
and add the params or flags to environment variables
|
24
|
+
"""
|
25
|
+
env_vars = {}
|
26
|
+
while tokens and '=' in tokens[0] and not tokens[0].startswith('--'):
|
27
|
+
key, value = tokens.pop(0).split('=', 1)
|
28
|
+
env_vars[key] = value
|
29
|
+
for k, v in env_vars.items():
|
30
|
+
os.environ[k] = v
|
31
|
+
return env_vars, tokens
|
32
|
+
|
33
|
+
def parse_flags_and_args(tokens: list) -> dict:
|
34
|
+
"""
|
35
|
+
parse flags and args
|
36
|
+
include three types --flag=value and --flag value annd --flag
|
37
|
+
"""
|
38
|
+
result = {}
|
39
|
+
i = 0
|
40
|
+
while i < len(tokens):
|
41
|
+
token = tokens[i]
|
42
|
+
if token.startswith('--'):
|
43
|
+
if '=' in token:
|
44
|
+
key, value = token[2:].split('=', 1)
|
45
|
+
result[key] = value.strip("'\"")
|
46
|
+
elif i + 1 < len(tokens) and not tokens[i + 1].startswith('--'):
|
47
|
+
result[token[2:]] = tokens[i + 1].strip("'\"")
|
48
|
+
i += 1
|
49
|
+
elif i + 1 < len(tokens) and not tokens[i + 1].startswith('-'):
|
50
|
+
result[token[1:]] = tokens[i + 1].strip("'\"")
|
51
|
+
i += 1
|
52
|
+
else:
|
53
|
+
result[token[2:]] = True
|
54
|
+
else:
|
55
|
+
logger.warning(f"Ignoring unknown token: {token}")
|
56
|
+
i += 1
|
57
|
+
return result
|
58
|
+
|
59
|
+
def parse_server_vllm_command(cmd_str: str) -> tuple[dict, dict]:
|
60
|
+
""" parse vllm command"""
|
61
|
+
tokens = shlex.split(cmd_str)
|
62
|
+
result = {}
|
63
|
+
|
64
|
+
# 提取环境变量
|
65
|
+
env_vars, tokens = extract_env_and_args(tokens)
|
66
|
+
if env_vars:
|
67
|
+
result["env_vars"] = env_vars
|
68
|
+
|
69
|
+
# vllm serve + model
|
70
|
+
if tokens[:2] != ['vllm', 'serve']:
|
71
|
+
raise ValueError("Invalid vllm serve command format. Example: vllm serve <model path>")
|
72
|
+
|
73
|
+
if len(tokens) < 3:
|
74
|
+
raise ValueError("Missing model path in vllm serve command. Example: vllm serve <model path>")
|
75
|
+
|
76
|
+
model_path = tokens[2]
|
77
|
+
result["model-path"] = model_path
|
78
|
+
|
79
|
+
flags = parse_flags_and_args(tokens[3:])
|
80
|
+
result.update(flags)
|
81
|
+
return (env_vars, result)
|
82
|
+
|
83
|
+
def parse_server_sglang_command(cmd_str: str) -> tuple[dict, dict]:
|
84
|
+
""" parse sglang command"""
|
85
|
+
tokens = shlex.split(cmd_str)
|
86
|
+
result = {}
|
87
|
+
|
88
|
+
# 提取环境变量
|
89
|
+
env_vars, tokens = extract_env_and_args(tokens)
|
90
|
+
if env_vars:
|
91
|
+
result["env_vars"] = env_vars
|
92
|
+
# python3 -m sglang.launch_server
|
93
|
+
if tokens[:3] != ['python3', '-m', 'sglang.launch_server'] and tokens[:3] != ['python', '-m', 'sglang.launch_server']:
|
94
|
+
raise ValueError("Invalid sglang command format. Example: python3 -m sglang.launch_server")
|
95
|
+
|
96
|
+
flags = parse_flags_and_args(tokens[3:])
|
97
|
+
result.update(flags)
|
98
|
+
return (env_vars, result)
|
99
|
+
|
100
|
+
def extract_gpu_num_from_serve_command(serve_args_dict: dict) -> int:
|
101
|
+
""" extract gpu num from serve command """
|
102
|
+
cmd_tp_size = 1
|
103
|
+
cmd_dp_size = 1
|
104
|
+
if "tensor-parallel-size" in serve_args_dict:
|
105
|
+
cmd_tp_size = int(serve_args_dict["tensor-parallel-size"])
|
106
|
+
elif "tp" in serve_args_dict:
|
107
|
+
cmd_tp_size = int(serve_args_dict["tp"])
|
108
|
+
elif "tp-size" in serve_args_dict:
|
109
|
+
cmd_tp_size = int(serve_args_dict["tp-size"])
|
110
|
+
if "data-parallel-size" in serve_args_dict:
|
111
|
+
cmd_dp_size = int(serve_args_dict["data-parallel-size"])
|
112
|
+
elif "dp" in serve_args_dict:
|
113
|
+
cmd_dp_size = int(serve_args_dict["dp"])
|
114
|
+
elif "dp-size" in serve_args_dict:
|
115
|
+
cmd_dp_size = int(serve_args_dict["dp-size"])
|
116
|
+
if "pipeline_parallel_size" in serve_args_dict or "pp" in serve_args_dict:
|
117
|
+
raise ValueError("Pipeline parallel size is not supported.")
|
118
|
+
cmd_gpu_num = cmd_tp_size * cmd_dp_size
|
119
|
+
if cmd_gpu_num > 8:
|
120
|
+
raise ValueError("Only support up to 8 GPUs for single task replica.")
|
121
|
+
return cmd_gpu_num
|
gmicloud/_internal/_models.py
CHANGED
@@ -22,9 +22,13 @@ class ArtifactMetadata(BaseModel):
|
|
22
22
|
user_id: Optional[str] = "" # The user ID associated with this artifact.
|
23
23
|
artifact_name: Optional[str] = "" # Name of the artifact.
|
24
24
|
artifact_description: Optional[str] = "" # Description of the artifact.
|
25
|
-
artifact_tags: Optional[List[str]] =
|
25
|
+
artifact_tags: Optional[List[str]] = None # Changed from List[str] with default to Optional[List[str]]
|
26
26
|
artifact_volume_path: Optional[str] = "" # Path to the volume where the artifact is stored.
|
27
27
|
artifact_template_id: Optional[str] = "" # The template ID used to create this artifact.
|
28
|
+
artifact_icon_link: Optional[str] = "" # Link to the icon for the artifact.
|
29
|
+
is_public: Optional[bool] = False # Indicates if the artifact is public.
|
30
|
+
org_id: Optional[str] = "" # Organization ID associated with this artifact.
|
31
|
+
update_by: Optional[str] = "" # User ID who last updated the artifact.
|
28
32
|
|
29
33
|
|
30
34
|
class ArtifactData(BaseModel):
|
@@ -43,6 +47,29 @@ class ArtifactData(BaseModel):
|
|
43
47
|
update_at: Optional[datetime] # Timestamp when the artifact was last updated.
|
44
48
|
|
45
49
|
|
50
|
+
class EnvParameter(BaseModel):
|
51
|
+
"""
|
52
|
+
Environment parameter for an artifact.
|
53
|
+
"""
|
54
|
+
key: str # Key for the environment parameter.
|
55
|
+
value: str # Value for the environment parameter.
|
56
|
+
|
57
|
+
|
58
|
+
class ArtifactDetails(BaseModel):
|
59
|
+
"""
|
60
|
+
Additional details for an artifact.
|
61
|
+
"""
|
62
|
+
model_description: Optional[str] = "" # Description of the model.
|
63
|
+
|
64
|
+
|
65
|
+
class ArtifactParameters(BaseModel):
|
66
|
+
"""
|
67
|
+
Parameters for an artifact.
|
68
|
+
"""
|
69
|
+
env_parameters: Optional[List[EnvParameter]] = None # Environment parameters.
|
70
|
+
model_parameters: Optional[List["ModelParameter"]] = None # Model parameters.
|
71
|
+
|
72
|
+
|
46
73
|
class Artifact(BaseModel):
|
47
74
|
"""
|
48
75
|
Representation of an artifact, including its data and metadata.
|
@@ -53,6 +80,7 @@ class Artifact(BaseModel):
|
|
53
80
|
build_status: Optional[BuildStatus] = None # Status of the artifact build (e.g., in progress, succeeded, failed).
|
54
81
|
artifact_data: Optional[ArtifactData] = None # Data associated with the artifact.
|
55
82
|
artifact_metadata: Optional[ArtifactMetadata] = None # Metadata describing the artifact.
|
83
|
+
artifact_parameters: Optional[ArtifactParameters] = None # Parameters for the artifact.
|
56
84
|
big_files_metadata: Optional[List[BigFileMetadata]] = None # Metadata for large files associated with the artifact.
|
57
85
|
|
58
86
|
|
@@ -69,7 +97,11 @@ class CreateArtifactRequest(BaseModel):
|
|
69
97
|
"""
|
70
98
|
artifact_name: str # The name of the artifact to create.
|
71
99
|
artifact_description: Optional[str] = "" # Description of the artifact.
|
72
|
-
artifact_tags: Optional[List[str]] = None # Tags for the artifact
|
100
|
+
artifact_tags: Optional[List[str]] = None # Tags for the artifact.
|
101
|
+
deployment_type: Optional[str] = "" # Deployment type
|
102
|
+
template_id: Optional[str] = "" # Template ID
|
103
|
+
env_parameters: Optional[List["EnvParameter"]] = None # Environment parameters.
|
104
|
+
model_description: Optional[str] = "" # Description of the model.
|
73
105
|
model_parameters: Optional[List["ModelParameter"]] = None # Parameters for the artifact.
|
74
106
|
|
75
107
|
|
@@ -79,9 +111,10 @@ class CreateArtifactResponse(BaseModel):
|
|
79
111
|
"""
|
80
112
|
artifact_id: str # ID of the newly created artifact.
|
81
113
|
upload_link: str # URL to upload the artifact data.
|
114
|
+
artifact_icon_link: Optional[str] = "" # Link to the icon for the artifact.
|
82
115
|
|
83
116
|
|
84
|
-
class
|
117
|
+
class ResumableUploadLinkRequest(BaseModel):
|
85
118
|
"""
|
86
119
|
Request to generate a pre-signed URL for uploading large files.
|
87
120
|
"""
|
@@ -90,7 +123,7 @@ class GetBigFileUploadUrlRequest(BaseModel):
|
|
90
123
|
file_type: Optional[str] = "" # MIME type of the file.
|
91
124
|
|
92
125
|
|
93
|
-
class
|
126
|
+
class ResumableUploadLinkResponse(BaseModel):
|
94
127
|
"""
|
95
128
|
Response containing a pre-signed upload URL for large files.
|
96
129
|
"""
|
@@ -98,6 +131,13 @@ class GetBigFileUploadUrlResponse(BaseModel):
|
|
98
131
|
upload_link: str # Pre-signed upload URL for the file.
|
99
132
|
|
100
133
|
|
134
|
+
class RebuildArtifactRequest(BaseModel):
|
135
|
+
"""
|
136
|
+
Request object for rebuilding an artifact.
|
137
|
+
"""
|
138
|
+
artifact_id: str # ID of the artifact to rebuild.
|
139
|
+
|
140
|
+
|
101
141
|
class RebuildArtifactResponse(BaseModel):
|
102
142
|
"""
|
103
143
|
Response object after rebuilding an artifact.
|
@@ -106,6 +146,91 @@ class RebuildArtifactResponse(BaseModel):
|
|
106
146
|
build_status: BuildStatus # Status of the artifact build (e.g., in progress, succeeded, failed).
|
107
147
|
|
108
148
|
|
149
|
+
class EndpointInfo(BaseModel):
|
150
|
+
"""
|
151
|
+
Additional information about the task endpoint.
|
152
|
+
"""
|
153
|
+
endpoint_status: Optional[TaskEndpointStatus] = None # Current status of the task (e.g., running, stopped).
|
154
|
+
endpoint_url: Optional[str] = "" # URL for accessing the task endpoint.
|
155
|
+
|
156
|
+
|
157
|
+
class GetAllArtifactsWithEndpointsResponse(BaseModel):
|
158
|
+
"""
|
159
|
+
Response containing a list of all artifacts with their endpoints.
|
160
|
+
"""
|
161
|
+
artifact_id: str # Unique identifier for the artifact.
|
162
|
+
artifact_data: Optional[ArtifactData] = None # Data associated with the artifact.
|
163
|
+
artifact_metadata: Optional[ArtifactMetadata] = None # Metadata describing the artifact.
|
164
|
+
artifact_details: Optional[ArtifactDetails] = None # Additional details about the artifact.
|
165
|
+
artifact_parameters: Optional[ArtifactParameters] = None # Parameters for the artifact.
|
166
|
+
big_files_metadata: Optional[List[BigFileMetadata]] = None # Metadata for large files.
|
167
|
+
endpoints: Optional[List[EndpointInfo]] = None # Endpoints associated with the artifact.
|
168
|
+
|
169
|
+
|
170
|
+
class GetArtifactResponse(BaseModel):
|
171
|
+
"""
|
172
|
+
Response containing the details of an artifact.
|
173
|
+
"""
|
174
|
+
artifact_id: str # Unique identifier for the artifact.
|
175
|
+
artifact_link: Optional[str] = "" # Link to access the artifact.
|
176
|
+
artifact_resource: Optional[str] = "" # Resource associated with the artifact.
|
177
|
+
build_file_name: Optional[str] = "" # Name of the file used for the build.
|
178
|
+
build_status: Optional[str] = "" # Status of the artifact build.
|
179
|
+
artifact_metadata: Optional[ArtifactMetadata] = None # Metadata describing the artifact.
|
180
|
+
artifact_parameters: Optional[ArtifactParameters] = None # Parameters for the artifact.
|
181
|
+
big_files_metadata: Optional[List[BigFileMetadata]] = None # Metadata for large files.
|
182
|
+
|
183
|
+
|
184
|
+
class GetPublicArtifactsResponse(BaseModel):
|
185
|
+
"""
|
186
|
+
Response containing public artifact details.
|
187
|
+
"""
|
188
|
+
artifact_id: str # Unique identifier for the artifact.
|
189
|
+
artifact_data: Optional[ArtifactData] = None # Data associated with the artifact.
|
190
|
+
artifact_metadata: Optional[ArtifactMetadata] = None # Metadata describing the artifact.
|
191
|
+
artifact_details: Optional[ArtifactDetails] = None # Additional details about the artifact.
|
192
|
+
artifact_parameters: Optional[ArtifactParameters] = None # Parameters for the artifact.
|
193
|
+
endpoints: Optional[List[EndpointInfo]] = None # Endpoints associated with the artifact.
|
194
|
+
|
195
|
+
|
196
|
+
class UpdateArtifactRequestBody(BaseModel):
|
197
|
+
"""
|
198
|
+
Request object for updating an artifact.
|
199
|
+
"""
|
200
|
+
artifact_name: Optional[str] = "" # The name of the artifact.
|
201
|
+
artifact_description: Optional[str] = "" # Description of the artifact.
|
202
|
+
artifact_tags: Optional[List[str]] = None # Tags for the artifact.
|
203
|
+
env_parameters: Optional[List[EnvParameter]] = None # Environment parameters.
|
204
|
+
model_description: Optional[str] = "" # Description of the model.
|
205
|
+
model_parameters: Optional[List["ModelParameter"]] = None # Parameters for the artifact.
|
206
|
+
need_update_icon: Optional[bool] = False # Whether to update the artifact icon.
|
207
|
+
|
208
|
+
|
209
|
+
class UpdateArtifactResponse(BaseModel):
|
210
|
+
"""
|
211
|
+
Response object after updating an artifact.
|
212
|
+
"""
|
213
|
+
artifact_id: str # ID of the updated artifact.
|
214
|
+
status: str # Status of the update operation.
|
215
|
+
artifact_icon_link: Optional[str] = "" # Link to the icon for the artifact.
|
216
|
+
|
217
|
+
|
218
|
+
class GetTemplatesResponse(BaseModel):
|
219
|
+
"""
|
220
|
+
Response containing a list of artifact templates.
|
221
|
+
"""
|
222
|
+
artifact_templates: list["Template"] # List of artifact templates.
|
223
|
+
|
224
|
+
|
225
|
+
class Template(BaseModel):
|
226
|
+
"""
|
227
|
+
Template for creating an artifact.
|
228
|
+
"""
|
229
|
+
template_id: str # Unique identifier for the artifact template.
|
230
|
+
template_data: Optional["TemplateData"] = None # Data for the artifact template.
|
231
|
+
template_metadata: Optional["TemplateMetadata"] = None # Metadata for the artifact template.
|
232
|
+
|
233
|
+
|
109
234
|
class DeleteArtifactResponse(BaseModel):
|
110
235
|
"""
|
111
236
|
Response object after deleting an artifact.
|
@@ -132,22 +257,6 @@ class DeleteBigfileResponse(BaseModel):
|
|
132
257
|
status: Optional[str] = "" # Status of the deletion process.
|
133
258
|
|
134
259
|
|
135
|
-
class GetPublicTemplatesResponse(BaseModel):
|
136
|
-
"""
|
137
|
-
Response containing a list of artifact templates.
|
138
|
-
"""
|
139
|
-
artifact_templates: list["ArtifactTemplate"] # List of artifact templates.
|
140
|
-
|
141
|
-
|
142
|
-
class ArtifactTemplate(BaseModel):
|
143
|
-
"""
|
144
|
-
Template for creating an artifact.
|
145
|
-
"""
|
146
|
-
template_id: str # Unique identifier for the artifact template.
|
147
|
-
template_data: Optional["TemplateData"] = None # Data for the artifact template.
|
148
|
-
template_metadata: Optional["TemplateMetadata"] = None # Metadata for the artifact template.
|
149
|
-
|
150
|
-
|
151
260
|
class TemplateMetadata(BaseModel):
|
152
261
|
"""
|
153
262
|
Metadata for an artifact template.
|
@@ -158,6 +267,7 @@ class TemplateMetadata(BaseModel):
|
|
158
267
|
is_public: Optional[bool] = False # Indicates if the template is public.
|
159
268
|
update_at: Optional[str] = None # Timestamp when the template was last updated.
|
160
269
|
update_by: Optional[str] = "" # ID of the user who last updated the template.
|
270
|
+
status: Optional[str] = "" # Status of the template.
|
161
271
|
|
162
272
|
|
163
273
|
class TemplateData(BaseModel):
|
@@ -173,6 +283,7 @@ class TemplateData(BaseModel):
|
|
173
283
|
resources: Optional["ResourcesTemplate"] = None # Resource allocation template.
|
174
284
|
tags: Optional[List[str]] = None # Tags associated with the artifact template.
|
175
285
|
volume_path: Optional[str] = "" # Path to the volume where the artifact is stored.
|
286
|
+
env_parameters: Optional[List["EnvParameter"]] = None # Added missing field
|
176
287
|
|
177
288
|
|
178
289
|
class ModelParameter(BaseModel):
|
@@ -204,8 +315,9 @@ class CreateArtifactFromTemplateRequest(BaseModel):
|
|
204
315
|
"""
|
205
316
|
Request object to create a new artifact from a template.
|
206
317
|
"""
|
207
|
-
user_id: str # The user ID creating the artifact.
|
318
|
+
# user_id: str # The user ID creating the artifact.
|
208
319
|
artifact_template_id: str # The ID of the artifact template to use.
|
320
|
+
env_parameters: Optional[List["EnvParameter"]] = None # Environment parameters.
|
209
321
|
|
210
322
|
|
211
323
|
class CreateArtifactFromTemplateResponse(BaseModel):
|
@@ -304,14 +416,6 @@ class TaskConfig(BaseModel):
|
|
304
416
|
last_update_timestamp: Optional[int] = 0 # Timestamp when the task was last updated.
|
305
417
|
|
306
418
|
|
307
|
-
class EndpointInfo(BaseModel):
|
308
|
-
"""
|
309
|
-
Additional information about the task endpoint.
|
310
|
-
"""
|
311
|
-
endpoint_status: Optional[TaskEndpointStatus] = None # Current status of the task (e.g., running, stopped).
|
312
|
-
endpoint_url: Optional[str] = "" # URL for accessing the task endpoint.
|
313
|
-
|
314
|
-
|
315
419
|
class UserPreference(BaseModel):
|
316
420
|
"""
|
317
421
|
User preference for a task.
|
@@ -329,8 +433,8 @@ class Task(BaseModel):
|
|
329
433
|
config: Optional[TaskConfig] = None # Configuration data for the task.
|
330
434
|
endpoint_info: Optional[EndpointInfo] = None # Additional information about the task endpoint.
|
331
435
|
cluster_endpoints: Optional[List[EndpointInfo]] = None # Endpoints for the task cluster.
|
332
|
-
task_status: Optional[TaskStatus] =
|
333
|
-
readiness_status: Optional[str] =
|
436
|
+
task_status: Optional[TaskStatus] = None # Status of the task.
|
437
|
+
readiness_status: Optional[str] = None # Readiness status of the task.
|
334
438
|
user_preference: Optional[UserPreference] = None # User preference for the task.
|
335
439
|
|
336
440
|
|
gmicloud/tests/test_artifacts.py
CHANGED
@@ -131,34 +131,18 @@ class TestArtifactManager(unittest.TestCase):
|
|
131
131
|
upload_link = "http://upload-link"
|
132
132
|
bigfile_upload_link = "http://bigfile-upload-link"
|
133
133
|
artifact_file_path = "./testdata/test.zip"
|
134
|
-
|
134
|
+
model_directory= "./testdata"
|
135
135
|
|
136
136
|
mock_create_artifact.return_value = CreateArtifactResponse(artifact_id="1", upload_link=upload_link)
|
137
|
-
mock_get_bigfile_upload_url.return_value =
|
137
|
+
mock_get_bigfile_upload_url.return_value = ResumableUploadLinkResponse(artifact_id="1",
|
138
138
|
upload_link=bigfile_upload_link)
|
139
139
|
|
140
140
|
artifact_id = self.artifact_manager.create_artifact_with_model_files(artifact_name="artifact_name",
|
141
141
|
artifact_file_path=artifact_file_path,
|
142
|
-
|
142
|
+
model_directory=model_directory)
|
143
143
|
self.assertEqual(artifact_id, "1")
|
144
144
|
mock_upload_small_file.assert_called_once_with(upload_link, artifact_file_path, "application/zip")
|
145
|
-
mock_upload_large_file.
|
146
|
-
|
147
|
-
@patch('gmicloud._internal._client._artifact_client.ArtifactClient.create_artifact')
|
148
|
-
@patch('gmicloud._internal._client._file_upload_client.FileUploadClient.upload_small_file')
|
149
|
-
def test_create_artifact_with_model_files_raises_file_not_found_error_for_model_file(self, mock_create_artifact,
|
150
|
-
mock_upload_small_file):
|
151
|
-
upload_link = "http://upload-link"
|
152
|
-
artifact_file_path = "./testdata/test.zip"
|
153
|
-
model_file_path = "./testdata/nonexistent.zip"
|
154
|
-
|
155
|
-
mock_create_artifact.return_value = CreateArtifactResponse(artifact_id="1", upload_link=upload_link)
|
156
|
-
|
157
|
-
with self.assertRaises(FileNotFoundError) as context:
|
158
|
-
self.artifact_manager.create_artifact_with_model_files(artifact_name="artifact_name",
|
159
|
-
artifact_file_path=artifact_file_path,
|
160
|
-
model_file_paths=[model_file_path])
|
161
|
-
self.assertTrue(f"File not found: {model_file_path}" in str(context.exception))
|
145
|
+
self.assertEqual(mock_upload_large_file.call_count, 6) # 6 files in testdata directory
|
162
146
|
|
163
147
|
@patch('gmicloud._internal._client._artifact_client.ArtifactClient.rebuild_artifact')
|
164
148
|
def test_rebuild_artifact_rebuilds_successfully(self, mock_rebuild_artifact):
|
@@ -203,7 +187,7 @@ class TestArtifactManager(unittest.TestCase):
|
|
203
187
|
upload_link = "http://upload-link"
|
204
188
|
model_file_path = "./testdata/model.zip"
|
205
189
|
|
206
|
-
mock_get_bigfile_upload_url.return_value =
|
190
|
+
mock_get_bigfile_upload_url.return_value = ResumableUploadLinkResponse(artifact_id="1", upload_link=upload_link)
|
207
191
|
upload_link = self.artifact_manager.get_bigfile_upload_url("1", model_file_path)
|
208
192
|
self.assertEqual(upload_link, upload_link)
|
209
193
|
|
@@ -253,7 +237,7 @@ class TestArtifactManager(unittest.TestCase):
|
|
253
237
|
|
254
238
|
@patch('gmicloud._internal._client._artifact_client.ArtifactClient.get_public_templates')
|
255
239
|
def test_get_artifact_templates_returns_templates(self, mock_get_public_templates):
|
256
|
-
mock_get_public_templates.return_value = [
|
240
|
+
mock_get_public_templates.return_value = [Template(template_id="1", template_data=TemplateData(name="Template1"))]
|
257
241
|
templates = self.artifact_manager.get_public_templates()
|
258
242
|
self.assertEqual(len(templates), 1)
|
259
243
|
self.assertEqual(templates[0].template_id, "1")
|
@@ -1,8 +1,8 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: gmicloud
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.7
|
4
4
|
Summary: GMI Cloud Python SDK
|
5
|
-
Author-email: GMI <
|
5
|
+
Author-email: GMI <gmi@gmitec.net>
|
6
6
|
License: MIT
|
7
7
|
Classifier: Programming Language :: Python :: 3
|
8
8
|
Classifier: License :: OSI Approved :: MIT License
|
@@ -10,7 +10,7 @@ Classifier: Operating System :: OS Independent
|
|
10
10
|
Requires-Python: >=3.6
|
11
11
|
Description-Content-Type: text/markdown
|
12
12
|
|
13
|
-
# GMICloud SDK
|
13
|
+
# GMICloud SDK
|
14
14
|
|
15
15
|
## Overview
|
16
16
|
Before you start: Our service and GPU resource is currenly invite-only so please contact our team (getstarted@gmicloud.ai) to get invited if you don't have one yet.
|
@@ -45,7 +45,7 @@ There are two ways to configure the SDK:
|
|
45
45
|
Set the following environment variables:
|
46
46
|
|
47
47
|
```shell
|
48
|
-
export GMI_CLOUD_CLIENT_ID=<YOUR_CLIENT_ID>
|
48
|
+
export GMI_CLOUD_CLIENT_ID=<YOUR_CLIENT_ID> # Pick what every ID you need.
|
49
49
|
export GMI_CLOUD_EMAIL=<YOUR_EMAIL>
|
50
50
|
export GMI_CLOUD_PASSWORD=<YOUR_PASSWORD>
|
51
51
|
```
|
@@ -73,7 +73,7 @@ pip install -r requirements.txt
|
|
73
73
|
python -m examples.create_task_from_artifact_template.py
|
74
74
|
```
|
75
75
|
|
76
|
-
### 2.
|
76
|
+
### 2. Example of create an inference task from an artifact template
|
77
77
|
|
78
78
|
This is the simplest example to deploy an inference task using an existing artifact template:
|
79
79
|
|
@@ -119,6 +119,97 @@ print(call_chat_completion(cli, task.task_id))
|
|
119
119
|
|
120
120
|
```
|
121
121
|
|
122
|
+
### 3. Example of creating an inference task based on custom model with local vllm / SGLang serve command
|
123
|
+
* Full example is available at [examples/inference_task_with_custom_model.py](https://github.com/GMISWE/python-sdk/blob/main/examples/inference_task_with_custom_model.py)
|
124
|
+
|
125
|
+
1. Prepare custom model checkpoint (using a model downloaded from HF as an example)
|
126
|
+
|
127
|
+
```python
|
128
|
+
# Download model from huggingface
|
129
|
+
from huggingface_hub import snapshot_download
|
130
|
+
|
131
|
+
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
|
132
|
+
model_checkpoint_save_dir = "files/model_garden"
|
133
|
+
snapshot_download(repo_id=model_name, local_dir=model_checkpoint_save_dir)
|
134
|
+
```
|
135
|
+
|
136
|
+
2. Find a template of specific SGLang version
|
137
|
+
|
138
|
+
```python
|
139
|
+
# export GMI_CLOUD_CLIENT_ID=<YOUR_CLIENT_ID>
|
140
|
+
# export GMI_CLOUD_EMAIL=<YOUR_EMAIL>
|
141
|
+
# export GMI_CLOUD_PASSWORD=<YOUR_PASSWORD>
|
142
|
+
cli = Client()
|
143
|
+
|
144
|
+
# List templates offered by GMI cloud
|
145
|
+
templates = cli.artifact_manager.list_public_template_names()
|
146
|
+
print(f"Found {len(templates)} templates: {templates}")
|
147
|
+
```
|
148
|
+
|
149
|
+
3. Pick a template (e.g. SGLang 0.4.5) and prepare a local serve command
|
150
|
+
|
151
|
+
```python
|
152
|
+
# Example for vllm server
|
153
|
+
picked_template_name = "gmi_vllm_0.8.4"
|
154
|
+
serve_command = "vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B --trust-remote-code --gpu-memory-utilization 0.8"
|
155
|
+
|
156
|
+
# Example for sglang server
|
157
|
+
picked_template_name = "gmi_sglang_0.4.5.post1"
|
158
|
+
serve_command = "python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B --trust-remote-code --mem-fraction-static 0.8 --tp 2"
|
159
|
+
```
|
160
|
+
|
161
|
+
4. Create an artifact and upload custom model. The artifact can be reused to create inference tasks later. Artifact also suggests recommended resources for each inference server replica
|
162
|
+
|
163
|
+
```python
|
164
|
+
artifact_id, recommended_replica_resources = cli.artifact_manager.create_artifact_from_template_name(
|
165
|
+
artifact_template_name=picked_template_name,
|
166
|
+
env_parameters={
|
167
|
+
"SERVER_COMMAND": serve_command,
|
168
|
+
"GPU_TYPE": "H100",
|
169
|
+
}
|
170
|
+
)
|
171
|
+
print(f"Created artifact {artifact_id} with recommended resources: {recommended_replica_resources}")
|
172
|
+
|
173
|
+
# Upload model files to artifact
|
174
|
+
cli.artifact_manager.upload_model_files_to_artifact(artifact_id, model_checkpoint_save_dir)
|
175
|
+
```
|
176
|
+
|
177
|
+
5. Create Inference task (defining min/max inference replica), start and wait
|
178
|
+
|
179
|
+
```python
|
180
|
+
new_task = Task(
|
181
|
+
config=TaskConfig(
|
182
|
+
ray_task_config=RayTaskConfig(
|
183
|
+
artifact_id=artifact_id,
|
184
|
+
file_path="serve",
|
185
|
+
deployment_name="app",
|
186
|
+
replica_resource=recommended_replica_resources,
|
187
|
+
),
|
188
|
+
task_scheduling = TaskScheduling(
|
189
|
+
scheduling_oneoff=OneOffScheduling(
|
190
|
+
trigger_timestamp=int(datetime.now().timestamp()),
|
191
|
+
min_replicas=1,
|
192
|
+
max_replicas=4,
|
193
|
+
)
|
194
|
+
),
|
195
|
+
),
|
196
|
+
)
|
197
|
+
task = cli.task_manager.create_task(new_task)
|
198
|
+
task_id = task.task_id
|
199
|
+
task = cli.task_manager.get_task(task_id)
|
200
|
+
print(f"Task created: {task.config.task_name}. You can check details at https://inference-engine.gmicloud.ai/user-console/task")
|
201
|
+
|
202
|
+
# Start Task and wait for it to be ready
|
203
|
+
cli.task_manager.start_task_and_wait(task_id)
|
204
|
+
```
|
205
|
+
|
206
|
+
6. Test with sample chat completion request
|
207
|
+
|
208
|
+
```python
|
209
|
+
print(call_chat_completion(cli, task_id))
|
210
|
+
```
|
211
|
+
|
212
|
+
|
122
213
|
## API Reference
|
123
214
|
|
124
215
|
### Client
|
@@ -144,4 +235,3 @@ password: Optional[str] = ""
|
|
144
235
|
* get_task(task_id: str): Retrieve the status and details of a specific task.
|
145
236
|
|
146
237
|
## Notes & Troubleshooting
|
147
|
-
k
|
@@ -1,27 +1,28 @@
|
|
1
|
-
gmicloud/__init__.py,sha256=
|
1
|
+
gmicloud/__init__.py,sha256=xSzrAxiby5Te20yhy1ZylGHmQKVV_w1QjFe6D99VZxw,968
|
2
2
|
gmicloud/client.py,sha256=G0tD0xQnpqDKS-3l-AAU-K3FAHOsqsTzsAq2NVxiamY,10539
|
3
3
|
gmicloud/_internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
gmicloud/_internal/_config.py,sha256=
|
4
|
+
gmicloud/_internal/_config.py,sha256=BenHiCnedpHA5phz49UWBXa1mg_q9W8zYs7A8esqGcU,494
|
5
5
|
gmicloud/_internal/_constants.py,sha256=Y085dwFlqdFkCf39iBfxz39QiiB7lX59ayNJjB86_m4,378
|
6
6
|
gmicloud/_internal/_enums.py,sha256=5d6Z8TFJYCmhNI1TDbPpBbG1tNe96StIEH4tEw20RZk,789
|
7
7
|
gmicloud/_internal/_exceptions.py,sha256=hScBq7n2fOit4_umlkabZJchY8zVbWSRfWM2Y0rLCbw,306
|
8
|
-
gmicloud/_internal/_models.py,sha256=
|
8
|
+
gmicloud/_internal/_models.py,sha256=2l65aZdQxyXlY0Dj23P6NFf59_zopgf9OoUMLAz5T2U,22685
|
9
9
|
gmicloud/_internal/_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
-
gmicloud/_internal/_client/_artifact_client.py,sha256
|
10
|
+
gmicloud/_internal/_client/_artifact_client.py,sha256=0lyHAdUybN8A1mEwZ7p1yK2yQEyoDG2vTB4Qe5RI2ik,9974
|
11
11
|
gmicloud/_internal/_client/_decorator.py,sha256=sy4gxzsUB6ORXHw5pqmMf7TTlK41Nmu1fhIhK2AIsbY,670
|
12
|
-
gmicloud/_internal/_client/_file_upload_client.py,sha256=
|
12
|
+
gmicloud/_internal/_client/_file_upload_client.py,sha256=r29iXG_0DOi-uTLu9plpfZMWGqOck_AdDHJZprcf8uI,4918
|
13
13
|
gmicloud/_internal/_client/_http_client.py,sha256=j--3emTjJ_l9CTdnkTbcpf7gYcUEl341pv2O5cU67l0,5741
|
14
14
|
gmicloud/_internal/_client/_iam_client.py,sha256=pgOXIqp9aJvcIUCEVkYPEyMUyxBftecojHAbs8Gbl94,7013
|
15
15
|
gmicloud/_internal/_client/_task_client.py,sha256=69OqZC_kwSDkTSVVyi51Tn_OyUV6R0nin4z4gLfZ-Lg,6141
|
16
16
|
gmicloud/_internal/_manager/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
|
-
gmicloud/_internal/_manager/_artifact_manager.py,sha256=
|
17
|
+
gmicloud/_internal/_manager/_artifact_manager.py,sha256=yK4veVwCY1cipy3rdnGvhnKNvkBx3SYYpHKqzjtXJn0,20731
|
18
18
|
gmicloud/_internal/_manager/_iam_manager.py,sha256=nAqPCaUfSXTnx2MEQa8e0YUOBFYWDRiETgK1PImdf4o,1167
|
19
|
-
gmicloud/_internal/_manager/_task_manager.py,sha256=
|
19
|
+
gmicloud/_internal/_manager/_task_manager.py,sha256=zBW_TkYhbSvAc_p7Q3z6Vgl2Cayv8zIkawTT6OcB4x4,11291
|
20
|
+
gmicloud/_internal/_manager/serve_command_utils.py,sha256=xjB6B9CNAmohou41H755iCCgkLNrjvdnu9NcJApTm1k,4373
|
20
21
|
gmicloud/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
|
-
gmicloud/tests/test_artifacts.py,sha256=
|
22
|
+
gmicloud/tests/test_artifacts.py,sha256=w0T0EpATIGLrSUPaBfTZ2ZC_X2XeaTlFEi3DZ4evIcE,15825
|
22
23
|
gmicloud/tests/test_tasks.py,sha256=yL-aFf80ShgTyxEONTWh-xbWDf5XnUNtIeA5hYvhKM0,10963
|
23
24
|
gmicloud/utils/uninstall_packages.py,sha256=zzuuaJPf39oTXWZ_7tUAGseoxocuCbbkoglJSD5yDrE,1127
|
24
|
-
gmicloud-0.1.
|
25
|
-
gmicloud-0.1.
|
26
|
-
gmicloud-0.1.
|
27
|
-
gmicloud-0.1.
|
25
|
+
gmicloud-0.1.7.dist-info/METADATA,sha256=LFLXvJeQ9ocyJQ8hFbTaNZAWJ7NvsO7FCN4tyaN5YY8,7927
|
26
|
+
gmicloud-0.1.7.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
27
|
+
gmicloud-0.1.7.dist-info/top_level.txt,sha256=AZimLw3y0WPpLiSiOidZ1gD0dxALh-jQNk4fxC05hYE,9
|
28
|
+
gmicloud-0.1.7.dist-info/RECORD,,
|
File without changes
|