gmicloud 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gmicloud/__init__.py CHANGED
@@ -15,7 +15,7 @@ from ._internal._models import (
15
15
  OneOffScheduling,
16
16
  DailyScheduling,
17
17
  DailyTrigger,
18
- ArtifactTemplate,
18
+ Template,
19
19
  )
20
20
  from ._internal._enums import (
21
21
  BuildStatus,
@@ -39,7 +39,7 @@ __all__ = [
39
39
  "OneOffScheduling",
40
40
  "DailyScheduling",
41
41
  "DailyTrigger",
42
- "ArtifactTemplate",
42
+ "Template",
43
43
  "BuildStatus",
44
44
  "TaskEndpointStatus",
45
45
  ]
@@ -1,7 +1,7 @@
1
1
  from typing import List
2
2
  import logging
3
3
  from requests.exceptions import RequestException
4
-
4
+ import json
5
5
  from ._http_client import HTTPClient
6
6
  from ._iam_client import IAMClient
7
7
  from ._decorator import handle_refresh_token
@@ -120,6 +120,39 @@ class ArtifactClient:
120
120
  logger.error(f"Failed to rebuild artifact {artifact_id}: {e}")
121
121
  return None
122
122
 
123
+ @handle_refresh_token
124
+ def add_env_parameters_to_artifact(self, artifact_id: str, env_parameters: dict[str, str]) -> None:
125
+ """
126
+ Updates an artifact by its ID.
127
+
128
+ :param artifact_id: The ID of the artifact to update.
129
+ :param request: The request object containing the updated artifact details.
130
+ """
131
+ try:
132
+ old_artifact = self.get_artifact(artifact_id)
133
+ if not old_artifact:
134
+ logger.error(f"Artifact {artifact_id} not found")
135
+ return
136
+ request = UpdateArtifactRequestBody(
137
+ artifact_description=old_artifact.artifact_metadata.artifact_description,
138
+ artifact_name=old_artifact.artifact_metadata.artifact_name,
139
+ artifact_tags=old_artifact.artifact_metadata.artifact_tags,
140
+ env_parameters=old_artifact.artifact_parameters.env_parameters,
141
+ model_parameters=old_artifact.artifact_parameters.model_parameters
142
+ )
143
+ new_env_parameters = [EnvParameter(key=k, value=v) for k, v in env_parameters.items()]
144
+ if not request.env_parameters:
145
+ request.env_parameters = []
146
+ request.env_parameters.extend(new_env_parameters)
147
+ response = self.client.put(
148
+ f"/update_artifact?artifact_id={artifact_id}",
149
+ self.iam_client.get_custom_headers(),
150
+ request.model_dump()
151
+ )
152
+ except (RequestException, ValueError) as e:
153
+ logger.error(f"Failed to add env parameters to artifact {artifact_id}: {e}")
154
+ return
155
+
123
156
  @handle_refresh_token
124
157
  def delete_artifact(self, artifact_id: str) -> Optional[DeleteArtifactResponse]:
125
158
  """
@@ -140,7 +173,7 @@ class ArtifactClient:
140
173
  return None
141
174
 
142
175
  @handle_refresh_token
143
- def get_bigfile_upload_url(self, request: GetBigFileUploadUrlRequest) -> Optional[GetBigFileUploadUrlResponse]:
176
+ def get_bigfile_upload_url(self, request: ResumableUploadLinkRequest) -> Optional[ResumableUploadLinkResponse]:
144
177
  """
145
178
  Generates a pre-signed URL for uploading a large file.
146
179
 
@@ -156,7 +189,7 @@ class ArtifactClient:
156
189
  logger.error("Empty response from /get_bigfile_upload_url")
157
190
  return None
158
191
 
159
- return GetBigFileUploadUrlResponse.model_validate(response)
192
+ return ResumableUploadLinkResponse.model_validate(response)
160
193
 
161
194
  except (RequestException, ValueError) as e:
162
195
  logger.error(f"Failed to generate upload URL: {e}")
@@ -186,12 +219,12 @@ class ArtifactClient:
186
219
  return None
187
220
 
188
221
  @handle_refresh_token
189
- def get_public_templates(self) -> List[ArtifactTemplate]:
222
+ def get_public_templates(self) -> List[Template]:
190
223
  """
191
224
  Fetches all artifact templates.
192
225
 
193
- :return: A list of ArtifactTemplate objects.
194
- :rtype: List[ArtifactTemplate]
226
+ :return: A list of Template objects.
227
+ :rtype: List[Template]
195
228
  """
196
229
  try:
197
230
  response = self.client.get("/get_public_templates", self.iam_client.get_custom_headers())
@@ -201,7 +234,7 @@ class ArtifactClient:
201
234
  return []
202
235
 
203
236
  try:
204
- result = GetPublicTemplatesResponse.model_validate(response)
237
+ result = GetTemplatesResponse.model_validate(response)
205
238
  return result.artifact_templates
206
239
  except ValueError as ve:
207
240
  logger.error(f"Failed to validate response data: {ve}")
@@ -1,8 +1,10 @@
1
1
  import os
2
2
  import requests
3
+ import logging
3
4
 
4
5
  from .._exceptions import UploadFileError
5
6
 
7
+ logger = logging.getLogger()
6
8
 
7
9
  class FileUploadClient:
8
10
  CHUNK_SIZE = 10 * 1024 * 1024 # 10MB Default Chunk Size
@@ -45,13 +47,13 @@ class FileUploadClient:
45
47
  """
46
48
  try:
47
49
  file_size = os.path.getsize(file_path)
48
- print(f"File Size: {file_size} bytes")
50
+ logger.info(f"File {file_path} size: {file_size} bytes")
49
51
 
50
52
  start_byte = 0
51
53
  uploaded_range = FileUploadClient._check_file_status(upload_url, file_size)
52
54
  if uploaded_range:
53
55
  start_byte = int(uploaded_range.split("-")[1]) + 1
54
- print(f"Resuming upload from {start_byte} bytes")
56
+ logger.info(f"Resuming uploading {file_path} from {start_byte} bytes")
55
57
 
56
58
  with open(file_path, "rb") as file:
57
59
  while start_byte < file_size:
@@ -74,14 +76,15 @@ class FileUploadClient:
74
76
  # Ensure upload is successful for this chunk
75
77
  if resp.status_code not in (200, 201, 308):
76
78
  raise UploadFileError(
77
- f"Failed to upload file, code:{resp.status_code} ,message: {resp.text}")
79
+ f"Failed to upload file {file_path}, code:{resp.status_code} ,message: {resp.text}")
78
80
 
79
81
  start_byte = end_byte + 1
80
- print(f"Uploaded {end_byte + 1}/{file_size} bytes")
82
+ percentage = (start_byte / file_size) * 100
83
+ logger.info(f"File {file_path} uploaded {end_byte + 1:,}/{file_size:,} bytes ({percentage:.2f}%)")
81
84
 
82
- print("Upload completed successfully.")
85
+ logger.info(f"File {file_path} uploaded successfully.")
83
86
  except Exception as e:
84
- raise UploadFileError(f"Failed to upload file: {str(e)}")
87
+ raise UploadFileError(f"Failed to upload file {file_path}, got error: {str(e)}")
85
88
 
86
89
  @staticmethod
87
90
  def _check_file_status(upload_url: str, file_size: int) -> str:
@@ -104,7 +107,7 @@ class FileUploadClient:
104
107
  if resp.status_code == 308:
105
108
  range_header = resp.headers.get("Range")
106
109
  if range_header:
107
- print(f"Server reports partial upload range: {range_header}")
110
+ logger.info(f"Server reports partial upload range: {range_header}")
108
111
  return range_header
109
112
 
110
113
  if resp.status_code in (200, 201):
@@ -1,3 +1,9 @@
1
- ARTIFACT_SERVICE_BASE_URL = "https://ce-tot.gmicloud-dev.com/api/v1/ie/artifact"
2
- TASK_SERVICE_BASE_URL = "https://ce-tot.gmicloud-dev.com/api/v1/ie/task"
3
- IAM_SERVICE_BASE_URL = "https://ce-tot.gmicloud-dev.com/api/v1"
1
+ # Dev environment
2
+ # ARTIFACT_SERVICE_BASE_URL = "https://ce-tot.gmicloud-dev.com/api/v1/ie/artifact"
3
+ # TASK_SERVICE_BASE_URL = "https://ce-tot.gmicloud-dev.com/api/v1/ie/task"
4
+ # IAM_SERVICE_BASE_URL = "https://ce-tot.gmicloud-dev.com/api/v1"
5
+
6
+ # Prod environment
7
+ ARTIFACT_SERVICE_BASE_URL = "https://inference-engine.gmicloud.ai/api/v1/ie/artifact"
8
+ TASK_SERVICE_BASE_URL = "https://inference-engine.gmicloud.ai/api/v1/ie/task"
9
+ IAM_SERVICE_BASE_URL = "https://inference-engine.gmicloud.ai/api/v1"
@@ -2,11 +2,16 @@ import os
2
2
  import time
3
3
  from typing import List
4
4
  import mimetypes
5
+ import concurrent.futures
6
+ import re
7
+ from tqdm import tqdm
8
+ from tqdm.contrib.logging import logging_redirect_tqdm
5
9
 
6
10
  from .._client._iam_client import IAMClient
7
11
  from .._client._artifact_client import ArtifactClient
8
12
  from .._client._file_upload_client import FileUploadClient
9
13
  from .._models import *
14
+ from .._manager.serve_command_utils import parse_server_command, extract_gpu_num_from_serve_command
10
15
 
11
16
  import logging
12
17
 
@@ -53,7 +58,12 @@ class ArtifactManager:
53
58
  self,
54
59
  artifact_name: str,
55
60
  description: Optional[str] = "",
56
- tags: Optional[List[str]] = None
61
+ tags: Optional[List[str]] = None,
62
+ deployment_type: Optional[str] = "",
63
+ template_id: Optional[str] = "",
64
+ env_parameters: Optional[List["EnvParameter"]] = None,
65
+ model_description: Optional[str] = "",
66
+ model_parameters: Optional[List["ModelParameter"]] = None,
57
67
  ) -> CreateArtifactResponse:
58
68
  """
59
69
  Create a new artifact for a user.
@@ -69,11 +79,16 @@ class ArtifactManager:
69
79
 
70
80
  req = CreateArtifactRequest(artifact_name=artifact_name,
71
81
  artifact_description=description,
72
- artifact_tags=tags, )
82
+ artifact_tags=tags,
83
+ deployment_type=deployment_type,
84
+ template_id=template_id,
85
+ env_parameters=env_parameters,
86
+ model_description=model_description,
87
+ model_parameters=model_parameters)
73
88
 
74
89
  return self.artifact_client.create_artifact(req)
75
90
 
76
- def create_artifact_from_template(self, artifact_template_id: str) -> str:
91
+ def create_artifact_from_template(self, artifact_template_id: str, env_parameters: Optional[dict[str, str]] = None) -> str:
77
92
  """
78
93
  Create a new artifact for a user using a template.
79
94
 
@@ -85,11 +100,16 @@ class ArtifactManager:
85
100
  if not artifact_template_id or not artifact_template_id.strip():
86
101
  raise ValueError("Artifact template ID is required and cannot be empty.")
87
102
 
103
+
88
104
  resp = self.artifact_client.create_artifact_from_template(artifact_template_id)
89
105
  if not resp or not resp.artifact_id:
90
106
  raise ValueError("Failed to create artifact from template.")
91
107
 
108
+ if env_parameters:
109
+ self.artifact_client.add_env_parameters_to_artifact(resp.artifact_id, env_parameters)
110
+
92
111
  return resp.artifact_id
112
+
93
113
 
94
114
  def create_artifact_from_template_name(self, artifact_template_name: str) -> tuple[str, ReplicaResource]:
95
115
  """
@@ -125,6 +145,56 @@ class ArtifactManager:
125
145
  except Exception as e:
126
146
  logger.error(f"Failed to create artifact from template, Error: {e}")
127
147
  raise e
148
+
149
+ def create_artifact_for_serve_command_and_custom_model(self, template_name: str, artifact_name: str, serve_command: str, gpu_type: str, artifact_description: str = "") -> tuple[str, ReplicaResource]:
150
+ """
151
+ Create an artifact from a template and support custom model.
152
+ :param artifact_template_name: The name of the template to use.
153
+ :return: A tuple containing the artifact ID and the recommended replica resources.
154
+ :rtype: tuple[str, ReplicaResource]
155
+ """
156
+
157
+ recommended_replica_resources = None
158
+ picked_template = None
159
+ try:
160
+ templates = self.get_public_templates()
161
+ except Exception as e:
162
+ logger.error(f"Failed to get artifact templates, Error: {e}")
163
+ for template in templates:
164
+ if template.template_data and template.template_data.name == template_name:
165
+ picked_template = template
166
+ break
167
+ if not picked_template:
168
+ raise ValueError(f"Template with name {template_name} not found.")
169
+
170
+ try:
171
+ if gpu_type not in ["H100", "H200"]:
172
+ raise ValueError("Only support A100 and H100 for now")
173
+
174
+ type, env_vars, serve_args_dict = parse_server_command(serve_command)
175
+ if type.lower() not in template_name.lower():
176
+ raise ValueError(f"Template {template_name} does not support inference with {type}.")
177
+ num_gpus = extract_gpu_num_from_serve_command(serve_args_dict)
178
+ recommended_replica_resources = ReplicaResource(
179
+ cpu=num_gpus * 16,
180
+ ram_gb=num_gpus * 100,
181
+ gpu=num_gpus,
182
+ gpu_name=gpu_type,
183
+ )
184
+ except Exception as e:
185
+ raise ValueError(f"Failed to parse serve command, Error: {e}")
186
+
187
+ try:
188
+ env_vars = [
189
+ EnvParameter(key="SERVE_COMMAND", value=serve_command),
190
+ EnvParameter(key="GPU_TYPE", value=gpu_type),
191
+ ]
192
+ resp = self.create_artifact(artifact_name, artifact_description, deployment_type="template", template_id=picked_template.template_id, env_parameters=env_vars)
193
+ # Assume Artifact is already with BuildStatus.SUCCESS status
194
+ return resp.artifact_id, recommended_replica_resources
195
+ except Exception as e:
196
+ logger.error(f"Failed to create artifact from template, Error: {e}")
197
+ raise e
128
198
 
129
199
  def rebuild_artifact(self, artifact_id: str) -> RebuildArtifactResponse:
130
200
  """
@@ -211,7 +281,7 @@ class ArtifactManager:
211
281
  model_file_name = os.path.basename(model_file_path)
212
282
  model_file_type = mimetypes.guess_type(model_file_path)[0]
213
283
 
214
- req = GetBigFileUploadUrlRequest(artifact_id=artifact_id, file_name=model_file_name, file_type=model_file_type)
284
+ req = ResumableUploadLinkRequest(artifact_id=artifact_id, file_name=model_file_name, file_type=model_file_type)
215
285
 
216
286
  resp = self.artifact_client.get_bigfile_upload_url(req)
217
287
  if not resp or not resp.upload_link:
@@ -250,36 +320,64 @@ class ArtifactManager:
250
320
 
251
321
  FileUploadClient.upload_large_file(upload_link, file_path)
252
322
 
323
+
324
+ def upload_model_files_to_artifact(self, artifact_id: str, model_directory: str) -> None:
325
+ """
326
+ Upload model files to an existing artifact.
327
+
328
+ :param artifact_id: The ID of the artifact to upload the model files to.
329
+ :param model_directory: The path to the model directory.
330
+ """
331
+
332
+ # List all files in the model directory recursively
333
+ model_file_paths = []
334
+ for root, _, files in os.walk(model_directory):
335
+ for file in files:
336
+ model_file_paths.append(os.path.join(root, file))
337
+
338
+ def upload_file(model_file_path):
339
+ self._validate_file_path(model_file_path)
340
+ bigfile_upload_url_resp = self.artifact_client.get_bigfile_upload_url(
341
+ ResumableUploadLinkRequest(artifact_id=artifact_id, file_name=os.path.basename(model_file_path))
342
+ )
343
+ FileUploadClient.upload_large_file(bigfile_upload_url_resp.upload_link, model_file_path)
344
+
345
+ # Upload files in parallel with progress bar
346
+ with tqdm(total=len(model_file_paths), desc="Uploading model files") as progress_bar:
347
+ with logging_redirect_tqdm():
348
+ with concurrent.futures.ThreadPoolExecutor() as executor:
349
+ futures = {executor.submit(upload_file, path): path for path in model_file_paths}
350
+ for future in concurrent.futures.as_completed(futures):
351
+ try:
352
+ future.result()
353
+ except Exception as e:
354
+ logger.error(f"Failed to upload file {futures[future]}, Error: {e}")
355
+ progress_bar.update(1)
356
+
253
357
  def create_artifact_with_model_files(
254
358
  self,
255
359
  artifact_name: str,
256
360
  artifact_file_path: str,
257
- model_file_paths: List[str],
361
+ model_directory: str,
258
362
  description: Optional[str] = "",
259
363
  tags: Optional[str] = None
260
364
  ) -> str:
261
365
  """
262
366
  Create a new artifact for a user and upload model files associated with the artifact.
263
-
264
367
  :param artifact_name: The name of the artifact.
265
368
  :param artifact_file_path: The path to the artifact file(Dockerfile+serve.py).
266
- :param model_file_paths: The paths to the model files.
369
+ :param model_directory: The path to the model directory.
267
370
  :param description: An optional description for the artifact.
268
371
  :param tags: Optional tags associated with the artifact, as a comma-separated string.
269
372
  :return: The `artifact_id` of the created artifact.
270
- :raises FileNotFoundError: If the provided `file_path` does not exist.
271
373
  """
272
374
  artifact_id = self.create_artifact_with_file(artifact_name, artifact_file_path, description, tags)
375
+ logger.info(f"Artifact created: {artifact_id}")
273
376
 
274
- for model_file_path in model_file_paths:
275
- self._validate_file_path(model_file_path)
276
- bigfile_upload_url_resp = self.artifact_client.get_bigfile_upload_url(
277
- GetBigFileUploadUrlRequest(artifact_id=artifact_id, model_file_path=model_file_path)
278
- )
279
- FileUploadClient.upload_large_file(bigfile_upload_url_resp.upload_link, model_file_path)
377
+ self.upload_model_files_to_artifact(artifact_id, model_directory)
280
378
 
281
379
  return artifact_id
282
-
380
+
283
381
 
284
382
  def wait_for_artifact_ready(self, artifact_id: str, timeout_s: int = 900) -> None:
285
383
  """
@@ -304,12 +402,12 @@ class ArtifactManager:
304
402
  time.sleep(10)
305
403
 
306
404
 
307
- def get_public_templates(self) -> List[ArtifactTemplate]:
405
+ def get_public_templates(self) -> List[Template]:
308
406
  """
309
407
  Fetch all artifact templates.
310
408
 
311
- :return: A list of ArtifactTemplate objects.
312
- :rtype: List[ArtifactTemplate]
409
+ :return: A list of Template objects.
410
+ :rtype: List[Template]
313
411
  """
314
412
  return self.artifact_client.get_public_templates()
315
413
 
@@ -41,7 +41,7 @@ class TaskManager:
41
41
 
42
42
  :return: A list of `Task` objects.
43
43
  """
44
- resp = self.task_client.get_all_tasks(self.iam_client.get_user_id())
44
+ resp = self.task_client.get_all_tasks()
45
45
  if not resp or not resp.tasks:
46
46
  return []
47
47
 
@@ -63,6 +63,7 @@ class TaskManager:
63
63
  if not resp or not resp.task:
64
64
  raise ValueError("Failed to create task.")
65
65
 
66
+ logger.info(f"Task created: {resp.task.task_id}")
66
67
  return resp.task
67
68
 
68
69
  def create_task_from_file(self, artifact_id: str, config_file_path: str, trigger_timestamp: int = None) -> Task:
@@ -138,48 +139,54 @@ class TaskManager:
138
139
  return self.task_client.start_task(task_id)
139
140
 
140
141
 
141
- def start_task_and_wait(self, task_id: str, timeout_s: int = 900) -> Task:
142
+ def wait_for_task(self, task_id: str, timeout_s: int = 900) -> Task:
142
143
  """
143
- Start a task and wait for it to be ready.
144
+ Wait for a task to reach the RUNNING state or raise an exception if it fails.
144
145
 
145
- :param task_id: The ID of the task to start.
146
+ :param task_id: The ID of the task to wait for.
146
147
  :param timeout_s: The timeout in seconds.
147
148
  :return: The task object.
148
149
  :rtype: Task
149
150
  """
150
- # trigger start task
151
- try:
152
- self.start_task(task_id)
153
- logger.info(f"Started task ID: {task_id}")
154
- except Exception as e:
155
- logger.error(f"Failed to start task, Error: {e}")
156
- raise e
157
-
158
151
  start_time = time.time()
159
152
  while True:
160
153
  try:
161
154
  task = self.get_task(task_id)
162
155
  if task.task_status == TaskStatus.RUNNING:
163
- return task
164
- elif task.task_status in [TaskStatus.NEEDSTOP, TaskStatus.ARCHIVED]:
165
- raise Exception(f"Unexpected task status after starting: {task.task_status}")
166
- # Also check endpoint status.
167
- elif task.task_status == TaskStatus.RUNNING:
168
- if task.endpoint_info and task.endpoint_info.endpoint_status == TaskEndpointStatus.RUNNING:
156
+ if task.endpoint_info is not None and task.endpoint_info.endpoint_status == TaskEndpointStatus.RUNNING:
169
157
  return task
170
- elif task.endpoint_info and task.endpoint_info.endpoint_status in [TaskEndpointStatus.UNKNOWN, TaskEndpointStatus.ARCHIVED]:
171
- raise Exception(f"Unexpected endpoint status after starting: {task.endpoint_info.endpoint_status}")
172
158
  else:
173
- logger.info(f"Pending endpoint starting. endpoint status: {task.endpoint_info.endpoint_status}")
159
+ if task.cluster_endpoints:
160
+ for ce in task.cluster_endpoints:
161
+ if ce.endpoint_status == TaskEndpointStatus.RUNNING:
162
+ return task
163
+ if task.task_status in [TaskStatus.NEEDSTOP, TaskStatus.ARCHIVED]:
164
+ raise Exception(f"Unexpected task status after starting: {task.task_status}")
174
165
  else:
175
166
  logger.info(f"Pending task starting. Task status: {task.task_status}")
176
-
177
167
  except Exception as e:
178
168
  logger.error(f"Failed to get task, Error: {e}")
179
169
  if time.time() - start_time > timeout_s:
180
170
  raise Exception(f"Task creation takes more than {timeout_s // 60} minutes. Testing aborted.")
181
171
  time.sleep(10)
182
172
 
173
+ def start_task_and_wait(self, task_id: str, timeout_s: int = 3600) -> Task:
174
+ """
175
+ Start a task and wait for it to be ready.
176
+
177
+ :param task_id: The ID of the task to start.
178
+ :param timeout_s: The timeout in seconds.
179
+ :return: The task object.
180
+ :rtype: Task
181
+ """
182
+ try:
183
+ self.start_task(task_id)
184
+ logger.info(f"Started task ID: {task_id}")
185
+ except Exception as e:
186
+ logger.error(f"Failed to start task, Error: {e}")
187
+ raise e
188
+
189
+ return self.wait_for_task(task_id, timeout_s)
183
190
 
184
191
  def stop_task(self, task_id: str) -> bool:
185
192
  """
@@ -190,16 +197,15 @@ class TaskManager:
190
197
  :raises ValueError: If `task_id` is invalid (None or empty string).
191
198
  """
192
199
  self._validate_not_empty(task_id, "Task ID")
200
+ return self.task_client.stop_task(task_id)
193
201
 
194
202
 
195
- def stop_task_and_wait(self, task_id: str, timeout_s: int = 900):
196
- task_manager = self.task_manager
203
+ def stop_task_and_wait(self, task_id: str, timeout_s: int = 3600):
197
204
  try:
198
- self.task_manager.stop_task(task_id)
205
+ self.stop_task(task_id)
199
206
  logger.info(f"Stopping task ID: {task_id}")
200
207
  except Exception as e:
201
208
  logger.error(f"Failed to stop task, Error: {e}")
202
- task_manager = self.task_manager
203
209
  start_time = time.time()
204
210
  while True:
205
211
  try:
@@ -212,7 +218,6 @@ class TaskManager:
212
218
  raise Exception(f"Task stopping takes more than {timeout_s // 60} minutes. Testing aborted.")
213
219
  time.sleep(10)
214
220
 
215
- return self.task_client.stop_task(task_id)
216
221
 
217
222
  def get_usage_data(self, start_timestamp: str, end_timestamp: str) -> GetUsageDataResponse:
218
223
  """
@@ -0,0 +1,121 @@
1
+ import shlex
2
+ import os
3
+ import logging
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ def parse_server_command(cmd_str: str) -> tuple[str, dict, dict]:
8
+ """
9
+ parse server command
10
+ Maybe their are more than two types of server command
11
+ if not found, we can add more parse function
12
+ """
13
+ if "vllm serve" in cmd_str:
14
+ return ("vllm", *parse_server_vllm_command(cmd_str))
15
+ elif "sglang.launch_server" in cmd_str:
16
+ return ("sglang", *parse_server_sglang_command(cmd_str))
17
+ else:
18
+ raise ValueError(f"Unknown serve command: {cmd_str}")
19
+
20
+ def extract_env_and_args(tokens: list) -> tuple[dict, list]:
21
+ """
22
+ Extract environment variables from the tokens list.
23
+ and add the params or flags to environment variables
24
+ """
25
+ env_vars = {}
26
+ while tokens and '=' in tokens[0] and not tokens[0].startswith('--'):
27
+ key, value = tokens.pop(0).split('=', 1)
28
+ env_vars[key] = value
29
+ for k, v in env_vars.items():
30
+ os.environ[k] = v
31
+ return env_vars, tokens
32
+
33
+ def parse_flags_and_args(tokens: list) -> dict:
34
+ """
35
+ parse flags and args
36
+ include three types --flag=value and --flag value annd --flag
37
+ """
38
+ result = {}
39
+ i = 0
40
+ while i < len(tokens):
41
+ token = tokens[i]
42
+ if token.startswith('--'):
43
+ if '=' in token:
44
+ key, value = token[2:].split('=', 1)
45
+ result[key] = value.strip("'\"")
46
+ elif i + 1 < len(tokens) and not tokens[i + 1].startswith('--'):
47
+ result[token[2:]] = tokens[i + 1].strip("'\"")
48
+ i += 1
49
+ elif i + 1 < len(tokens) and not tokens[i + 1].startswith('-'):
50
+ result[token[1:]] = tokens[i + 1].strip("'\"")
51
+ i += 1
52
+ else:
53
+ result[token[2:]] = True
54
+ else:
55
+ logger.warning(f"Ignoring unknown token: {token}")
56
+ i += 1
57
+ return result
58
+
59
+ def parse_server_vllm_command(cmd_str: str) -> tuple[dict, dict]:
60
+ """ parse vllm command"""
61
+ tokens = shlex.split(cmd_str)
62
+ result = {}
63
+
64
+ # 提取环境变量
65
+ env_vars, tokens = extract_env_and_args(tokens)
66
+ if env_vars:
67
+ result["env_vars"] = env_vars
68
+
69
+ # vllm serve + model
70
+ if tokens[:2] != ['vllm', 'serve']:
71
+ raise ValueError("Invalid vllm serve command format. Example: vllm serve <model path>")
72
+
73
+ if len(tokens) < 3:
74
+ raise ValueError("Missing model path in vllm serve command. Example: vllm serve <model path>")
75
+
76
+ model_path = tokens[2]
77
+ result["model-path"] = model_path
78
+
79
+ flags = parse_flags_and_args(tokens[3:])
80
+ result.update(flags)
81
+ return (env_vars, result)
82
+
83
+ def parse_server_sglang_command(cmd_str: str) -> tuple[dict, dict]:
84
+ """ parse sglang command"""
85
+ tokens = shlex.split(cmd_str)
86
+ result = {}
87
+
88
+ # 提取环境变量
89
+ env_vars, tokens = extract_env_and_args(tokens)
90
+ if env_vars:
91
+ result["env_vars"] = env_vars
92
+ # python3 -m sglang.launch_server
93
+ if tokens[:3] != ['python3', '-m', 'sglang.launch_server'] and tokens[:3] != ['python', '-m', 'sglang.launch_server']:
94
+ raise ValueError("Invalid sglang command format. Example: python3 -m sglang.launch_server")
95
+
96
+ flags = parse_flags_and_args(tokens[3:])
97
+ result.update(flags)
98
+ return (env_vars, result)
99
+
100
+ def extract_gpu_num_from_serve_command(serve_args_dict: dict) -> int:
101
+ """ extract gpu num from serve command """
102
+ cmd_tp_size = 1
103
+ cmd_dp_size = 1
104
+ if "tensor-parallel-size" in serve_args_dict:
105
+ cmd_tp_size = int(serve_args_dict["tensor-parallel-size"])
106
+ elif "tp" in serve_args_dict:
107
+ cmd_tp_size = int(serve_args_dict["tp"])
108
+ elif "tp-size" in serve_args_dict:
109
+ cmd_tp_size = int(serve_args_dict["tp-size"])
110
+ if "data-parallel-size" in serve_args_dict:
111
+ cmd_dp_size = int(serve_args_dict["data-parallel-size"])
112
+ elif "dp" in serve_args_dict:
113
+ cmd_dp_size = int(serve_args_dict["dp"])
114
+ elif "dp-size" in serve_args_dict:
115
+ cmd_dp_size = int(serve_args_dict["dp-size"])
116
+ if "pipeline_parallel_size" in serve_args_dict or "pp" in serve_args_dict:
117
+ raise ValueError("Pipeline parallel size is not supported.")
118
+ cmd_gpu_num = cmd_tp_size * cmd_dp_size
119
+ if cmd_gpu_num > 8:
120
+ raise ValueError("Only support up to 8 GPUs for single task replica.")
121
+ return cmd_gpu_num
@@ -22,9 +22,13 @@ class ArtifactMetadata(BaseModel):
22
22
  user_id: Optional[str] = "" # The user ID associated with this artifact.
23
23
  artifact_name: Optional[str] = "" # Name of the artifact.
24
24
  artifact_description: Optional[str] = "" # Description of the artifact.
25
- artifact_tags: Optional[List[str]] = "" # Comma-separated tags for categorizing the artifact.
25
+ artifact_tags: Optional[List[str]] = None # Changed from List[str] with default to Optional[List[str]]
26
26
  artifact_volume_path: Optional[str] = "" # Path to the volume where the artifact is stored.
27
27
  artifact_template_id: Optional[str] = "" # The template ID used to create this artifact.
28
+ artifact_icon_link: Optional[str] = "" # Link to the icon for the artifact.
29
+ is_public: Optional[bool] = False # Indicates if the artifact is public.
30
+ org_id: Optional[str] = "" # Organization ID associated with this artifact.
31
+ update_by: Optional[str] = "" # User ID who last updated the artifact.
28
32
 
29
33
 
30
34
  class ArtifactData(BaseModel):
@@ -43,6 +47,29 @@ class ArtifactData(BaseModel):
43
47
  update_at: Optional[datetime] # Timestamp when the artifact was last updated.
44
48
 
45
49
 
50
+ class EnvParameter(BaseModel):
51
+ """
52
+ Environment parameter for an artifact.
53
+ """
54
+ key: str # Key for the environment parameter.
55
+ value: str # Value for the environment parameter.
56
+
57
+
58
+ class ArtifactDetails(BaseModel):
59
+ """
60
+ Additional details for an artifact.
61
+ """
62
+ model_description: Optional[str] = "" # Description of the model.
63
+
64
+
65
+ class ArtifactParameters(BaseModel):
66
+ """
67
+ Parameters for an artifact.
68
+ """
69
+ env_parameters: Optional[List[EnvParameter]] = None # Environment parameters.
70
+ model_parameters: Optional[List["ModelParameter"]] = None # Model parameters.
71
+
72
+
46
73
  class Artifact(BaseModel):
47
74
  """
48
75
  Representation of an artifact, including its data and metadata.
@@ -53,6 +80,7 @@ class Artifact(BaseModel):
53
80
  build_status: Optional[BuildStatus] = None # Status of the artifact build (e.g., in progress, succeeded, failed).
54
81
  artifact_data: Optional[ArtifactData] = None # Data associated with the artifact.
55
82
  artifact_metadata: Optional[ArtifactMetadata] = None # Metadata describing the artifact.
83
+ artifact_parameters: Optional[ArtifactParameters] = None # Parameters for the artifact.
56
84
  big_files_metadata: Optional[List[BigFileMetadata]] = None # Metadata for large files associated with the artifact.
57
85
 
58
86
 
@@ -69,7 +97,11 @@ class CreateArtifactRequest(BaseModel):
69
97
  """
70
98
  artifact_name: str # The name of the artifact to create.
71
99
  artifact_description: Optional[str] = "" # Description of the artifact.
72
- artifact_tags: Optional[List[str]] = None # Tags for the artifact, separated by commas.
100
+ artifact_tags: Optional[List[str]] = None # Tags for the artifact.
101
+ deployment_type: Optional[str] = "" # Deployment type
102
+ template_id: Optional[str] = "" # Template ID
103
+ env_parameters: Optional[List["EnvParameter"]] = None # Environment parameters.
104
+ model_description: Optional[str] = "" # Description of the model.
73
105
  model_parameters: Optional[List["ModelParameter"]] = None # Parameters for the artifact.
74
106
 
75
107
 
@@ -79,9 +111,10 @@ class CreateArtifactResponse(BaseModel):
79
111
  """
80
112
  artifact_id: str # ID of the newly created artifact.
81
113
  upload_link: str # URL to upload the artifact data.
114
+ artifact_icon_link: Optional[str] = "" # Link to the icon for the artifact.
82
115
 
83
116
 
84
- class GetBigFileUploadUrlRequest(BaseModel):
117
+ class ResumableUploadLinkRequest(BaseModel):
85
118
  """
86
119
  Request to generate a pre-signed URL for uploading large files.
87
120
  """
@@ -90,7 +123,7 @@ class GetBigFileUploadUrlRequest(BaseModel):
90
123
  file_type: Optional[str] = "" # MIME type of the file.
91
124
 
92
125
 
93
- class GetBigFileUploadUrlResponse(BaseModel):
126
+ class ResumableUploadLinkResponse(BaseModel):
94
127
  """
95
128
  Response containing a pre-signed upload URL for large files.
96
129
  """
@@ -98,6 +131,13 @@ class GetBigFileUploadUrlResponse(BaseModel):
98
131
  upload_link: str # Pre-signed upload URL for the file.
99
132
 
100
133
 
134
+ class RebuildArtifactRequest(BaseModel):
135
+ """
136
+ Request object for rebuilding an artifact.
137
+ """
138
+ artifact_id: str # ID of the artifact to rebuild.
139
+
140
+
101
141
  class RebuildArtifactResponse(BaseModel):
102
142
  """
103
143
  Response object after rebuilding an artifact.
@@ -106,6 +146,91 @@ class RebuildArtifactResponse(BaseModel):
106
146
  build_status: BuildStatus # Status of the artifact build (e.g., in progress, succeeded, failed).
107
147
 
108
148
 
149
+ class EndpointInfo(BaseModel):
150
+ """
151
+ Additional information about the task endpoint.
152
+ """
153
+ endpoint_status: Optional[TaskEndpointStatus] = None # Current status of the task (e.g., running, stopped).
154
+ endpoint_url: Optional[str] = "" # URL for accessing the task endpoint.
155
+
156
+
157
+ class GetAllArtifactsWithEndpointsResponse(BaseModel):
158
+ """
159
+ Response containing a list of all artifacts with their endpoints.
160
+ """
161
+ artifact_id: str # Unique identifier for the artifact.
162
+ artifact_data: Optional[ArtifactData] = None # Data associated with the artifact.
163
+ artifact_metadata: Optional[ArtifactMetadata] = None # Metadata describing the artifact.
164
+ artifact_details: Optional[ArtifactDetails] = None # Additional details about the artifact.
165
+ artifact_parameters: Optional[ArtifactParameters] = None # Parameters for the artifact.
166
+ big_files_metadata: Optional[List[BigFileMetadata]] = None # Metadata for large files.
167
+ endpoints: Optional[List[EndpointInfo]] = None # Endpoints associated with the artifact.
168
+
169
+
170
+ class GetArtifactResponse(BaseModel):
171
+ """
172
+ Response containing the details of an artifact.
173
+ """
174
+ artifact_id: str # Unique identifier for the artifact.
175
+ artifact_link: Optional[str] = "" # Link to access the artifact.
176
+ artifact_resource: Optional[str] = "" # Resource associated with the artifact.
177
+ build_file_name: Optional[str] = "" # Name of the file used for the build.
178
+ build_status: Optional[str] = "" # Status of the artifact build.
179
+ artifact_metadata: Optional[ArtifactMetadata] = None # Metadata describing the artifact.
180
+ artifact_parameters: Optional[ArtifactParameters] = None # Parameters for the artifact.
181
+ big_files_metadata: Optional[List[BigFileMetadata]] = None # Metadata for large files.
182
+
183
+
184
+ class GetPublicArtifactsResponse(BaseModel):
185
+ """
186
+ Response containing public artifact details.
187
+ """
188
+ artifact_id: str # Unique identifier for the artifact.
189
+ artifact_data: Optional[ArtifactData] = None # Data associated with the artifact.
190
+ artifact_metadata: Optional[ArtifactMetadata] = None # Metadata describing the artifact.
191
+ artifact_details: Optional[ArtifactDetails] = None # Additional details about the artifact.
192
+ artifact_parameters: Optional[ArtifactParameters] = None # Parameters for the artifact.
193
+ endpoints: Optional[List[EndpointInfo]] = None # Endpoints associated with the artifact.
194
+
195
+
196
+ class UpdateArtifactRequestBody(BaseModel):
197
+ """
198
+ Request object for updating an artifact.
199
+ """
200
+ artifact_name: Optional[str] = "" # The name of the artifact.
201
+ artifact_description: Optional[str] = "" # Description of the artifact.
202
+ artifact_tags: Optional[List[str]] = None # Tags for the artifact.
203
+ env_parameters: Optional[List[EnvParameter]] = None # Environment parameters.
204
+ model_description: Optional[str] = "" # Description of the model.
205
+ model_parameters: Optional[List["ModelParameter"]] = None # Parameters for the artifact.
206
+ need_update_icon: Optional[bool] = False # Whether to update the artifact icon.
207
+
208
+
209
+ class UpdateArtifactResponse(BaseModel):
210
+ """
211
+ Response object after updating an artifact.
212
+ """
213
+ artifact_id: str # ID of the updated artifact.
214
+ status: str # Status of the update operation.
215
+ artifact_icon_link: Optional[str] = "" # Link to the icon for the artifact.
216
+
217
+
218
+ class GetTemplatesResponse(BaseModel):
219
+ """
220
+ Response containing a list of artifact templates.
221
+ """
222
+ artifact_templates: list["Template"] # List of artifact templates.
223
+
224
+
225
+ class Template(BaseModel):
226
+ """
227
+ Template for creating an artifact.
228
+ """
229
+ template_id: str # Unique identifier for the artifact template.
230
+ template_data: Optional["TemplateData"] = None # Data for the artifact template.
231
+ template_metadata: Optional["TemplateMetadata"] = None # Metadata for the artifact template.
232
+
233
+
109
234
  class DeleteArtifactResponse(BaseModel):
110
235
  """
111
236
  Response object after deleting an artifact.
@@ -132,22 +257,6 @@ class DeleteBigfileResponse(BaseModel):
132
257
  status: Optional[str] = "" # Status of the deletion process.
133
258
 
134
259
 
135
- class GetPublicTemplatesResponse(BaseModel):
136
- """
137
- Response containing a list of artifact templates.
138
- """
139
- artifact_templates: list["ArtifactTemplate"] # List of artifact templates.
140
-
141
-
142
- class ArtifactTemplate(BaseModel):
143
- """
144
- Template for creating an artifact.
145
- """
146
- template_id: str # Unique identifier for the artifact template.
147
- template_data: Optional["TemplateData"] = None # Data for the artifact template.
148
- template_metadata: Optional["TemplateMetadata"] = None # Metadata for the artifact template.
149
-
150
-
151
260
  class TemplateMetadata(BaseModel):
152
261
  """
153
262
  Metadata for an artifact template.
@@ -158,6 +267,7 @@ class TemplateMetadata(BaseModel):
158
267
  is_public: Optional[bool] = False # Indicates if the template is public.
159
268
  update_at: Optional[str] = None # Timestamp when the template was last updated.
160
269
  update_by: Optional[str] = "" # ID of the user who last updated the template.
270
+ status: Optional[str] = "" # Status of the template.
161
271
 
162
272
 
163
273
  class TemplateData(BaseModel):
@@ -173,6 +283,7 @@ class TemplateData(BaseModel):
173
283
  resources: Optional["ResourcesTemplate"] = None # Resource allocation template.
174
284
  tags: Optional[List[str]] = None # Tags associated with the artifact template.
175
285
  volume_path: Optional[str] = "" # Path to the volume where the artifact is stored.
286
+ env_parameters: Optional[List["EnvParameter"]] = None # Added missing field
176
287
 
177
288
 
178
289
  class ModelParameter(BaseModel):
@@ -204,8 +315,9 @@ class CreateArtifactFromTemplateRequest(BaseModel):
204
315
  """
205
316
  Request object to create a new artifact from a template.
206
317
  """
207
- user_id: str # The user ID creating the artifact.
318
+ # user_id: str # The user ID creating the artifact.
208
319
  artifact_template_id: str # The ID of the artifact template to use.
320
+ env_parameters: Optional[List["EnvParameter"]] = None # Environment parameters.
209
321
 
210
322
 
211
323
  class CreateArtifactFromTemplateResponse(BaseModel):
@@ -304,14 +416,6 @@ class TaskConfig(BaseModel):
304
416
  last_update_timestamp: Optional[int] = 0 # Timestamp when the task was last updated.
305
417
 
306
418
 
307
- class EndpointInfo(BaseModel):
308
- """
309
- Additional information about the task endpoint.
310
- """
311
- endpoint_status: Optional[TaskEndpointStatus] = None # Current status of the task (e.g., running, stopped).
312
- endpoint_url: Optional[str] = "" # URL for accessing the task endpoint.
313
-
314
-
315
419
  class UserPreference(BaseModel):
316
420
  """
317
421
  User preference for a task.
@@ -329,8 +433,8 @@ class Task(BaseModel):
329
433
  config: Optional[TaskConfig] = None # Configuration data for the task.
330
434
  endpoint_info: Optional[EndpointInfo] = None # Additional information about the task endpoint.
331
435
  cluster_endpoints: Optional[List[EndpointInfo]] = None # Endpoints for the task cluster.
332
- task_status: Optional[TaskStatus] = "" # Status of the task.
333
- readiness_status: Optional[str] = "" # Readiness status of the task.
436
+ task_status: Optional[TaskStatus] = None # Status of the task.
437
+ readiness_status: Optional[str] = None # Readiness status of the task.
334
438
  user_preference: Optional[UserPreference] = None # User preference for the task.
335
439
 
336
440
 
@@ -131,34 +131,18 @@ class TestArtifactManager(unittest.TestCase):
131
131
  upload_link = "http://upload-link"
132
132
  bigfile_upload_link = "http://bigfile-upload-link"
133
133
  artifact_file_path = "./testdata/test.zip"
134
- model_file_path = "./testdata/model.zip"
134
+ model_directory= "./testdata"
135
135
 
136
136
  mock_create_artifact.return_value = CreateArtifactResponse(artifact_id="1", upload_link=upload_link)
137
- mock_get_bigfile_upload_url.return_value = GetBigFileUploadUrlResponse(artifact_id="1",
137
+ mock_get_bigfile_upload_url.return_value = ResumableUploadLinkResponse(artifact_id="1",
138
138
  upload_link=bigfile_upload_link)
139
139
 
140
140
  artifact_id = self.artifact_manager.create_artifact_with_model_files(artifact_name="artifact_name",
141
141
  artifact_file_path=artifact_file_path,
142
- model_file_paths=[model_file_path])
142
+ model_directory=model_directory)
143
143
  self.assertEqual(artifact_id, "1")
144
144
  mock_upload_small_file.assert_called_once_with(upload_link, artifact_file_path, "application/zip")
145
- mock_upload_large_file.assert_called_once_with(bigfile_upload_link, model_file_path)
146
-
147
- @patch('gmicloud._internal._client._artifact_client.ArtifactClient.create_artifact')
148
- @patch('gmicloud._internal._client._file_upload_client.FileUploadClient.upload_small_file')
149
- def test_create_artifact_with_model_files_raises_file_not_found_error_for_model_file(self, mock_create_artifact,
150
- mock_upload_small_file):
151
- upload_link = "http://upload-link"
152
- artifact_file_path = "./testdata/test.zip"
153
- model_file_path = "./testdata/nonexistent.zip"
154
-
155
- mock_create_artifact.return_value = CreateArtifactResponse(artifact_id="1", upload_link=upload_link)
156
-
157
- with self.assertRaises(FileNotFoundError) as context:
158
- self.artifact_manager.create_artifact_with_model_files(artifact_name="artifact_name",
159
- artifact_file_path=artifact_file_path,
160
- model_file_paths=[model_file_path])
161
- self.assertTrue(f"File not found: {model_file_path}" in str(context.exception))
145
+ self.assertEqual(mock_upload_large_file.call_count, 6) # 6 files in testdata directory
162
146
 
163
147
  @patch('gmicloud._internal._client._artifact_client.ArtifactClient.rebuild_artifact')
164
148
  def test_rebuild_artifact_rebuilds_successfully(self, mock_rebuild_artifact):
@@ -203,7 +187,7 @@ class TestArtifactManager(unittest.TestCase):
203
187
  upload_link = "http://upload-link"
204
188
  model_file_path = "./testdata/model.zip"
205
189
 
206
- mock_get_bigfile_upload_url.return_value = GetBigFileUploadUrlResponse(artifact_id="1", upload_link=upload_link)
190
+ mock_get_bigfile_upload_url.return_value = ResumableUploadLinkResponse(artifact_id="1", upload_link=upload_link)
207
191
  upload_link = self.artifact_manager.get_bigfile_upload_url("1", model_file_path)
208
192
  self.assertEqual(upload_link, upload_link)
209
193
 
@@ -253,7 +237,7 @@ class TestArtifactManager(unittest.TestCase):
253
237
 
254
238
  @patch('gmicloud._internal._client._artifact_client.ArtifactClient.get_public_templates')
255
239
  def test_get_artifact_templates_returns_templates(self, mock_get_public_templates):
256
- mock_get_public_templates.return_value = [ArtifactTemplate(template_id="1", template_data=TemplateData(name="Template1"))]
240
+ mock_get_public_templates.return_value = [Template(template_id="1", template_data=TemplateData(name="Template1"))]
257
241
  templates = self.artifact_manager.get_public_templates()
258
242
  self.assertEqual(len(templates), 1)
259
243
  self.assertEqual(templates[0].template_id, "1")
@@ -1,8 +1,8 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: gmicloud
3
- Version: 0.1.6
3
+ Version: 0.1.7
4
4
  Summary: GMI Cloud Python SDK
5
- Author-email: GMI <support@gmicloud.ai>
5
+ Author-email: GMI <gmi@gmitec.net>
6
6
  License: MIT
7
7
  Classifier: Programming Language :: Python :: 3
8
8
  Classifier: License :: OSI Approved :: MIT License
@@ -10,7 +10,7 @@ Classifier: Operating System :: OS Independent
10
10
  Requires-Python: >=3.6
11
11
  Description-Content-Type: text/markdown
12
12
 
13
- # GMICloud SDK (Beta)
13
+ # GMICloud SDK
14
14
 
15
15
  ## Overview
16
16
  Before you start: Our service and GPU resource is currenly invite-only so please contact our team (getstarted@gmicloud.ai) to get invited if you don't have one yet.
@@ -45,7 +45,7 @@ There are two ways to configure the SDK:
45
45
  Set the following environment variables:
46
46
 
47
47
  ```shell
48
- export GMI_CLOUD_CLIENT_ID=<YOUR_CLIENT_ID>
48
+ export GMI_CLOUD_CLIENT_ID=<YOUR_CLIENT_ID> # Pick what every ID you need.
49
49
  export GMI_CLOUD_EMAIL=<YOUR_EMAIL>
50
50
  export GMI_CLOUD_PASSWORD=<YOUR_PASSWORD>
51
51
  ```
@@ -73,7 +73,7 @@ pip install -r requirements.txt
73
73
  python -m examples.create_task_from_artifact_template.py
74
74
  ```
75
75
 
76
- ### 2. Create an inference task from an artifact template
76
+ ### 2. Example of create an inference task from an artifact template
77
77
 
78
78
  This is the simplest example to deploy an inference task using an existing artifact template:
79
79
 
@@ -119,6 +119,97 @@ print(call_chat_completion(cli, task.task_id))
119
119
 
120
120
  ```
121
121
 
122
+ ### 3. Example of creating an inference task based on custom model with local vllm / SGLang serve command
123
+ * Full example is available at [examples/inference_task_with_custom_model.py](https://github.com/GMISWE/python-sdk/blob/main/examples/inference_task_with_custom_model.py)
124
+
125
+ 1. Prepare custom model checkpoint (using a model downloaded from HF as an example)
126
+
127
+ ```python
128
+ # Download model from huggingface
129
+ from huggingface_hub import snapshot_download
130
+
131
+ model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
132
+ model_checkpoint_save_dir = "files/model_garden"
133
+ snapshot_download(repo_id=model_name, local_dir=model_checkpoint_save_dir)
134
+ ```
135
+
136
+ 2. Find a template of specific SGLang version
137
+
138
+ ```python
139
+ # export GMI_CLOUD_CLIENT_ID=<YOUR_CLIENT_ID>
140
+ # export GMI_CLOUD_EMAIL=<YOUR_EMAIL>
141
+ # export GMI_CLOUD_PASSWORD=<YOUR_PASSWORD>
142
+ cli = Client()
143
+
144
+ # List templates offered by GMI cloud
145
+ templates = cli.artifact_manager.list_public_template_names()
146
+ print(f"Found {len(templates)} templates: {templates}")
147
+ ```
148
+
149
+ 3. Pick a template (e.g. SGLang 0.4.5) and prepare a local serve command
150
+
151
+ ```python
152
+ # Example for vllm server
153
+ picked_template_name = "gmi_vllm_0.8.4"
154
+ serve_command = "vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B --trust-remote-code --gpu-memory-utilization 0.8"
155
+
156
+ # Example for sglang server
157
+ picked_template_name = "gmi_sglang_0.4.5.post1"
158
+ serve_command = "python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B --trust-remote-code --mem-fraction-static 0.8 --tp 2"
159
+ ```
160
+
161
+ 4. Create an artifact and upload custom model. The artifact can be reused to create inference tasks later. Artifact also suggests recommended resources for each inference server replica
162
+
163
+ ```python
164
+ artifact_id, recommended_replica_resources = cli.artifact_manager.create_artifact_from_template_name(
165
+ artifact_template_name=picked_template_name,
166
+ env_parameters={
167
+ "SERVER_COMMAND": serve_command,
168
+ "GPU_TYPE": "H100",
169
+ }
170
+ )
171
+ print(f"Created artifact {artifact_id} with recommended resources: {recommended_replica_resources}")
172
+
173
+ # Upload model files to artifact
174
+ cli.artifact_manager.upload_model_files_to_artifact(artifact_id, model_checkpoint_save_dir)
175
+ ```
176
+
177
+ 5. Create Inference task (defining min/max inference replica), start and wait
178
+
179
+ ```python
180
+ new_task = Task(
181
+ config=TaskConfig(
182
+ ray_task_config=RayTaskConfig(
183
+ artifact_id=artifact_id,
184
+ file_path="serve",
185
+ deployment_name="app",
186
+ replica_resource=recommended_replica_resources,
187
+ ),
188
+ task_scheduling = TaskScheduling(
189
+ scheduling_oneoff=OneOffScheduling(
190
+ trigger_timestamp=int(datetime.now().timestamp()),
191
+ min_replicas=1,
192
+ max_replicas=4,
193
+ )
194
+ ),
195
+ ),
196
+ )
197
+ task = cli.task_manager.create_task(new_task)
198
+ task_id = task.task_id
199
+ task = cli.task_manager.get_task(task_id)
200
+ print(f"Task created: {task.config.task_name}. You can check details at https://inference-engine.gmicloud.ai/user-console/task")
201
+
202
+ # Start Task and wait for it to be ready
203
+ cli.task_manager.start_task_and_wait(task_id)
204
+ ```
205
+
206
+ 6. Test with sample chat completion request
207
+
208
+ ```python
209
+ print(call_chat_completion(cli, task_id))
210
+ ```
211
+
212
+
122
213
  ## API Reference
123
214
 
124
215
  ### Client
@@ -144,4 +235,3 @@ password: Optional[str] = ""
144
235
  * get_task(task_id: str): Retrieve the status and details of a specific task.
145
236
 
146
237
  ## Notes & Troubleshooting
147
- k
@@ -1,27 +1,28 @@
1
- gmicloud/__init__.py,sha256=aIgu4MAw4nExv781-pzSZLG8MscqAMZ5lM5fGyqg7QU,984
1
+ gmicloud/__init__.py,sha256=xSzrAxiby5Te20yhy1ZylGHmQKVV_w1QjFe6D99VZxw,968
2
2
  gmicloud/client.py,sha256=G0tD0xQnpqDKS-3l-AAU-K3FAHOsqsTzsAq2NVxiamY,10539
3
3
  gmicloud/_internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- gmicloud/_internal/_config.py,sha256=qIH76TSyS3MQWe62LHI46RJhDnklNFisdajY75oUAqE,218
4
+ gmicloud/_internal/_config.py,sha256=BenHiCnedpHA5phz49UWBXa1mg_q9W8zYs7A8esqGcU,494
5
5
  gmicloud/_internal/_constants.py,sha256=Y085dwFlqdFkCf39iBfxz39QiiB7lX59ayNJjB86_m4,378
6
6
  gmicloud/_internal/_enums.py,sha256=5d6Z8TFJYCmhNI1TDbPpBbG1tNe96StIEH4tEw20RZk,789
7
7
  gmicloud/_internal/_exceptions.py,sha256=hScBq7n2fOit4_umlkabZJchY8zVbWSRfWM2Y0rLCbw,306
8
- gmicloud/_internal/_models.py,sha256=eArBzdhiMosLVZVUyoE_mvfxRS8yKPkuqhlDaa57Iog,17863
8
+ gmicloud/_internal/_models.py,sha256=2l65aZdQxyXlY0Dj23P6NFf59_zopgf9OoUMLAz5T2U,22685
9
9
  gmicloud/_internal/_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- gmicloud/_internal/_client/_artifact_client.py,sha256=-CyMdTauVovuv3whs8yUqmv3-WW2e9m2GoEG9D6eNbc,8374
10
+ gmicloud/_internal/_client/_artifact_client.py,sha256=0lyHAdUybN8A1mEwZ7p1yK2yQEyoDG2vTB4Qe5RI2ik,9974
11
11
  gmicloud/_internal/_client/_decorator.py,sha256=sy4gxzsUB6ORXHw5pqmMf7TTlK41Nmu1fhIhK2AIsbY,670
12
- gmicloud/_internal/_client/_file_upload_client.py,sha256=1JRs4X57S3EScPIP9w2DC1Uo6_Wbcjumcw3nVM7uIGM,4667
12
+ gmicloud/_internal/_client/_file_upload_client.py,sha256=r29iXG_0DOi-uTLu9plpfZMWGqOck_AdDHJZprcf8uI,4918
13
13
  gmicloud/_internal/_client/_http_client.py,sha256=j--3emTjJ_l9CTdnkTbcpf7gYcUEl341pv2O5cU67l0,5741
14
14
  gmicloud/_internal/_client/_iam_client.py,sha256=pgOXIqp9aJvcIUCEVkYPEyMUyxBftecojHAbs8Gbl94,7013
15
15
  gmicloud/_internal/_client/_task_client.py,sha256=69OqZC_kwSDkTSVVyi51Tn_OyUV6R0nin4z4gLfZ-Lg,6141
16
16
  gmicloud/_internal/_manager/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- gmicloud/_internal/_manager/_artifact_manager.py,sha256=TBvGps__Kk1Ym7jztY3tNZ3XomKPrDIFPV7XyyLwHuw,15941
17
+ gmicloud/_internal/_manager/_artifact_manager.py,sha256=yK4veVwCY1cipy3rdnGvhnKNvkBx3SYYpHKqzjtXJn0,20731
18
18
  gmicloud/_internal/_manager/_iam_manager.py,sha256=nAqPCaUfSXTnx2MEQa8e0YUOBFYWDRiETgK1PImdf4o,1167
19
- gmicloud/_internal/_manager/_task_manager.py,sha256=YDUcAdRkJhGumA1LLfpXfYs6jmLnev8P27UItPZHUBs,11268
19
+ gmicloud/_internal/_manager/_task_manager.py,sha256=zBW_TkYhbSvAc_p7Q3z6Vgl2Cayv8zIkawTT6OcB4x4,11291
20
+ gmicloud/_internal/_manager/serve_command_utils.py,sha256=xjB6B9CNAmohou41H755iCCgkLNrjvdnu9NcJApTm1k,4373
20
21
  gmicloud/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
- gmicloud/tests/test_artifacts.py,sha256=q1jiTk5DN4G3LCLCO_8KbWArdc6RG3sETe1MCEt-vbI,16979
22
+ gmicloud/tests/test_artifacts.py,sha256=w0T0EpATIGLrSUPaBfTZ2ZC_X2XeaTlFEi3DZ4evIcE,15825
22
23
  gmicloud/tests/test_tasks.py,sha256=yL-aFf80ShgTyxEONTWh-xbWDf5XnUNtIeA5hYvhKM0,10963
23
24
  gmicloud/utils/uninstall_packages.py,sha256=zzuuaJPf39oTXWZ_7tUAGseoxocuCbbkoglJSD5yDrE,1127
24
- gmicloud-0.1.6.dist-info/METADATA,sha256=rqwbl1_3RfzhdBpn9eb3u1My3pk10k7T3r23oEiTshY,4675
25
- gmicloud-0.1.6.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
26
- gmicloud-0.1.6.dist-info/top_level.txt,sha256=AZimLw3y0WPpLiSiOidZ1gD0dxALh-jQNk4fxC05hYE,9
27
- gmicloud-0.1.6.dist-info/RECORD,,
25
+ gmicloud-0.1.7.dist-info/METADATA,sha256=LFLXvJeQ9ocyJQ8hFbTaNZAWJ7NvsO7FCN4tyaN5YY8,7927
26
+ gmicloud-0.1.7.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
27
+ gmicloud-0.1.7.dist-info/top_level.txt,sha256=AZimLw3y0WPpLiSiOidZ1gD0dxALh-jQNk4fxC05hYE,9
28
+ gmicloud-0.1.7.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.0.0)
2
+ Generator: setuptools (79.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5