gmicloud 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,78 @@
1
+ import os
2
+ import jwt
3
+ import time
4
+ import json
5
+ import logging
6
+ import threading
7
+ from pathlib import Path
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ CONFIG_FILE_NAME = ".gmicloud.config.json"
12
+
13
+ # create the thread lock object56
14
+ lock = threading.Lock()
15
+
16
+ def _read_config_file()->dict|None:
17
+ """Read the config file."""
18
+ base_dir = Path.home()
19
+ config_file_path =os.path.join(base_dir,CONFIG_FILE_NAME)
20
+ if not os.path.exists(config_file_path):
21
+ return None
22
+ with lock:
23
+ # open the config file, read mode with lock
24
+ with open(config_file_path,"r") as fr:
25
+ return json.loads(fr.read())
26
+
27
+
28
+ def _write_config_file(config_file_path:str,config_dic:dict)->None:
29
+ """Write the config file."""
30
+ with lock:
31
+ # open the config file, write mode with lock
32
+ with open(config_file_path,"w") as fw:
33
+ # transform the config dictionary to JSON format and write it to the file
34
+ fw.write(json.dumps(config_dic))
35
+
36
+
37
+ def write_user_refresh_token_to_system_config(email:str,refresh_token:str)->bool:
38
+ """Write the user refresh token to the system config file."""
39
+ base_dir = Path.home()
40
+ config_file_path = os.path.join(base_dir,CONFIG_FILE_NAME)
41
+ try:
42
+ # check the config file is exists. if not, create it, if yes, update the refresh token
43
+ if not os.path.exists(config_file_path):
44
+ config_dic = { email : {"refresh_token": refresh_token} }
45
+ _write_config_file(config_file_path,config_dic)
46
+ else:
47
+ config_dic = _read_config_file()
48
+ if not config_dic.get(email):
49
+ config_dic[email] = dict()
50
+ config_dic[email] = {"refresh_token": refresh_token}
51
+ _write_config_file(config_file_path,config_dic)
52
+ except Exception as e:
53
+ logger.error("write file wrong :", e)
54
+ return False
55
+ return True
56
+
57
+
58
+ def get_user_refresh_token_from_system_config(email:str)->str|None:
59
+ """Get the user refresh token from the system config file."""
60
+ config_dic = _read_config_file()
61
+ if not config_dic or not config_dic.get(email):
62
+ return None
63
+ return config_dic[email]["refresh_token"]
64
+
65
+
66
+ def _parese_refresh_token(refresh_token:str)->dict:
67
+ """Parse the refresh token."""
68
+ return jwt.decode(refresh_token, options={"verify_signature": False})
69
+
70
+
71
+ def is_refresh_token_expired(refresh_token:str)->bool:
72
+ """Check the refresh token is expired. if expired, return True, else return False."""
73
+ try:
74
+ refresh_token_time = _parese_refresh_token(refresh_token)['exp']
75
+ except Exception as e:
76
+ logger.error("parse refresh token wrong :", e)
77
+ return True
78
+ return refresh_token_time < time.time()
@@ -6,7 +6,11 @@ from ._http_client import HTTPClient
6
6
  from .._config import IAM_SERVICE_BASE_URL
7
7
  from .._models import *
8
8
  from .._constants import CLIENT_ID_HEADER, AUTHORIZATION_HEADER
9
-
9
+ from ._auth_config import (
10
+ get_user_refresh_token_from_system_config,
11
+ write_user_refresh_token_to_system_config,
12
+ is_refresh_token_expired
13
+ )
10
14
  logger = logging.getLogger(__name__)
11
15
 
12
16
 
@@ -38,42 +42,50 @@ class IAMClient:
38
42
  Returns True if login is successful, otherwise False.
39
43
  """
40
44
  try:
41
- custom_headers = {CLIENT_ID_HEADER: self._client_id}
42
- req = AuthTokenRequest(email=self._email, password=self._password)
43
- auth_tokens_result = self.client.post("/me/auth-tokens", custom_headers, req.model_dump())
44
-
45
- if not auth_tokens_result:
46
- logger.error("Login failed: Received empty response from auth-tokens endpoint")
47
- return False
48
-
49
- auth_tokens_resp = AuthTokenResponse.model_validate(auth_tokens_result)
50
-
51
- # Handle 2FA
52
- if auth_tokens_resp.is2FARequired:
53
- for attempt in range(3):
54
- code = input(f"Attempt {attempt + 1}/3: Please enter the 2FA code: ")
55
- create_session_req = CreateSessionRequest(
56
- type="native", authToken=auth_tokens_resp.authToken, otpCode=code
57
- )
58
- try:
59
- session_result = self.client.post("/me/sessions", custom_headers,
60
- create_session_req.model_dump())
61
- if session_result:
62
- break
63
- except RequestException:
64
- logger.warning("Invalid 2FA code, please try again.")
65
- if attempt == 2:
66
- logger.error("Failed to create session after 3 incorrect 2FA attempts.")
67
- return False
45
+ # Check config refresh token is available and is not expired, if yes ,refresh it
46
+ temp_refresh_token = get_user_refresh_token_from_system_config(self._email)
47
+ if temp_refresh_token and not is_refresh_token_expired(temp_refresh_token):
48
+ self._refresh_token = temp_refresh_token
49
+ self.refresh_token()
68
50
  else:
69
- create_session_req = CreateSessionRequest(type="native", authToken=auth_tokens_resp.authToken,
70
- otpCode=None)
71
- session_result = self.client.post("/me/sessions", custom_headers, create_session_req.model_dump())
72
-
73
- create_session_resp = CreateSessionResponse.model_validate(session_result)
74
-
75
- self._access_token = create_session_resp.accessToken
76
- self._refresh_token = create_session_resp.refreshToken
51
+ custom_headers = {CLIENT_ID_HEADER: self._client_id}
52
+ req = AuthTokenRequest(email=self._email, password=self._password)
53
+ auth_tokens_result = self.client.post("/me/auth-tokens", custom_headers, req.model_dump())
54
+
55
+ if not auth_tokens_result:
56
+ logger.error("Login failed: Received empty response from auth-tokens endpoint")
57
+ return False
58
+
59
+ auth_tokens_resp = AuthTokenResponse.model_validate(auth_tokens_result)
60
+
61
+ # Handle 2FA
62
+ if auth_tokens_resp.is2FARequired:
63
+ for attempt in range(3):
64
+ code = input(f"Attempt {attempt + 1}/3: Please enter the 2FA code: ")
65
+ create_session_req = CreateSessionRequest(
66
+ type="native", authToken=auth_tokens_resp.authToken, otpCode=code
67
+ )
68
+ try:
69
+ session_result = self.client.post("/me/sessions", custom_headers,
70
+ create_session_req.model_dump())
71
+ if session_result:
72
+ break
73
+ except RequestException:
74
+ logger.warning("Invalid 2FA code, please try again.")
75
+ if attempt == 2:
76
+ logger.error("Failed to create session after 3 incorrect 2FA attempts.")
77
+ return False
78
+ else:
79
+ create_session_req = CreateSessionRequest(type="native", authToken=auth_tokens_resp.authToken,
80
+ otpCode=None)
81
+ session_result = self.client.post("/me/sessions", custom_headers, create_session_req.model_dump())
82
+
83
+ create_session_resp = CreateSessionResponse.model_validate(session_result)
84
+
85
+ self._access_token = create_session_resp.accessToken
86
+ self._refresh_token = create_session_resp.refreshToken
87
+ # first login write refresh token to system config
88
+ write_user_refresh_token_to_system_config(self._email,self._refresh_token)
77
89
  self._user_id = self.parse_user_id()
78
90
 
79
91
  # Fetch profile to get organization ID
@@ -96,7 +108,12 @@ class IAMClient:
96
108
  """
97
109
  try:
98
110
  custom_headers = {CLIENT_ID_HEADER: self._client_id}
99
- result = self.client.patch("/me/sessions", custom_headers, {"refreshToken": self._refresh_token})
111
+ try:
112
+ result = self.client.patch("/me/sessions", custom_headers, {"refreshToken": self._refresh_token})
113
+ except Exception as err:
114
+ logger.error(f"{str(err)}, please re-login.")
115
+ write_user_refresh_token_to_system_config(self._email,"")
116
+ return False
100
117
 
101
118
  if not result:
102
119
  logger.error("Failed to refresh token: Empty response received")
@@ -105,7 +122,9 @@ class IAMClient:
105
122
  resp = CreateSessionResponse.model_validate(result)
106
123
  self._access_token = resp.accessToken
107
124
  self._refresh_token = resp.refreshToken
108
-
125
+ # the _refresh_token will be updated when call this function
126
+ # so write it to system config file for update the _refresh_token expired time
127
+ write_user_refresh_token_to_system_config(self._email,self._refresh_token)
109
128
  return True
110
129
  except (RequestException, ValueError) as e:
111
130
  logger.error(f"Token refresh failed: {e}")
@@ -0,0 +1,111 @@
1
+
2
+ import logging
3
+ from requests.exceptions import RequestException
4
+
5
+ from ._http_client import HTTPClient
6
+ from ._decorator import handle_refresh_token
7
+ from ._iam_client import IAMClient
8
+ from .._config import IAM_SERVICE_BASE_URL
9
+ from .._models import *
10
+
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class VideoClient:
16
+ """
17
+ A client for interacting with the video service API.
18
+
19
+ This client provides methods to retrieve, create, update, and stop video tasks
20
+ through HTTP calls to the video service.
21
+ """
22
+
23
+ def __init__(self, iam_client: IAMClient):
24
+ """
25
+ Initializes the VideoClient with the given base URL for the video service.
26
+ """
27
+ self.client = HTTPClient(IAM_SERVICE_BASE_URL+ "/ie/requestqueue")
28
+ self.iam_client = iam_client
29
+
30
+
31
+ @handle_refresh_token
32
+ def get_request_detail(self, request_id: str) -> GetRequestResponse:
33
+ """
34
+ Retrieves detailed information about a specific request by its ID. This endpoint requires authentication with a bearer token and only returns requests belonging to the authenticated organization.
35
+
36
+ :param request_id: The ID of the request to be retrieved.
37
+ :return: Details of the GetRequestResponse successfully retrieved
38
+ """
39
+ try:
40
+ response = self.client.get(f"/requests/{request_id}", self.iam_client.get_custom_headers())
41
+ return GetRequestResponse.model_validate(response) if response else None
42
+ except (RequestException, ValueError) as e:
43
+ logger.error(f"Failed to retrieve request details for {request_id}: {e}")
44
+ return None
45
+
46
+
47
+ @handle_refresh_token
48
+ def get_requests(self, model_id: str) -> List[GetRequestResponse]:
49
+ """
50
+ Retrieves a list of requests submitted by the authenticated user for a specific model. This endpoint requires authentication with a bearer token and filters results by the authenticated organization.
51
+
52
+ :param model_id: The ID of the model to be retrieved.
53
+ :return: List of GetRequestResponse successfully retrieved
54
+ """
55
+ try:
56
+ response = self.client.get("/requests", self.iam_client.get_custom_headers(), {"model_id": model_id})
57
+ requests = response.get('requests', []) if response else []
58
+ return [GetRequestResponse.model_validate(req) for req in requests] if requests else None
59
+ except (RequestException, ValueError) as e:
60
+ logger.error(f"Failed to retrieve requests for model {model_id}: {e}")
61
+ return None
62
+
63
+
64
+ @handle_refresh_token
65
+ def create_request(self, request: SubmitRequestRequest) -> SubmitRequestResponse:
66
+ """
67
+ Submits a new asynchronous request to process a specified model with provided parameters. This endpoint requires authentication with a bearer token.
68
+
69
+ :param request: The request data to be created by SubmitRequestRequest model.
70
+ :return: The created request data as SubmitRequestResponse model.
71
+ """
72
+ try:
73
+ response = self.client.post("/requests", self.iam_client.get_custom_headers(), request.model_dump())
74
+ return SubmitRequestResponse.model_validate(response) if response else None
75
+ except (RequestException, ValueError) as e:
76
+ logger.error(f"Failed to create request: {e}")
77
+ return None
78
+
79
+
80
+ @handle_refresh_token
81
+ def get_model_detail(self, model_id: str) -> GetModelResponse:
82
+ """
83
+ Retrieves detailed information about a specific model by its ID.
84
+
85
+ :param model_id: The ID of the model to be retrieved.
86
+ :return: Details of the GetModelResponse model successfully retrieved.
87
+ """
88
+ try:
89
+ response = self.client.get(f"/models/{model_id}", self.iam_client.get_custom_headers())
90
+ return GetModelResponse.model_validate(response) if response else None
91
+ except (RequestException, ValueError) as e:
92
+ logger.error(f"Failed to retrieve model details for {model_id}: {e}")
93
+ return None
94
+
95
+
96
+ @handle_refresh_token
97
+ def get_models(self) -> List[GetModelResponse]:
98
+ """
99
+ Retrieves a list of available models from the video service.
100
+
101
+ :return: A list of GetModelResponse model successfully retrieved.
102
+ """
103
+ try:
104
+ response = self.client.get("/models", self.iam_client.get_custom_headers())
105
+ models = response.get('models', []) if response else []
106
+ return [GetModelResponse.model_validate(model) for model in models] if models else None
107
+ except (RequestException, ValueError) as e:
108
+ logger.error(f"Failed to retrieve models: {e}")
109
+ return None
110
+
111
+
@@ -24,6 +24,7 @@ class TaskEndpointStatus(str, Enum):
24
24
  UNREADY = "unready"
25
25
  NEW = "new"
26
26
 
27
+
27
28
  class TaskStatus(str, Enum):
28
29
  IDLE = "idle"
29
30
  STARTING = "starting"
@@ -32,7 +33,24 @@ class TaskStatus(str, Enum):
32
33
  NEEDSTOP = "needstop"
33
34
  ARCHIVED = "archived"
34
35
 
36
+
35
37
  class ModelParameterType(str, Enum):
36
38
  NUMERIC = "numeric"
37
39
  TEXT = "text"
38
- BOOLEAN = "boolean"
40
+ BOOLEAN = "boolean"
41
+
42
+
43
+ class RequestStatus(Enum):
44
+ CREATED = "created"
45
+ QUEUED = "queued"
46
+ DISPATCHED = "dispatched"
47
+ PROCESSING = "processing"
48
+ SUCCESS = "success"
49
+ FAILED = "failed"
50
+ CANCELLED = "cancelled"
51
+
52
+
53
+ class HostType(Enum):
54
+ DEFAULT = ""
55
+ INTERNAL = "internal"
56
+ EXTERNAL = "external"
@@ -1,6 +1,6 @@
1
1
  import os
2
2
  import time
3
- from typing import List
3
+ from typing import List, Dict, Any
4
4
  import mimetypes
5
5
  import concurrent.futures
6
6
  import re
@@ -64,6 +64,7 @@ class ArtifactManager:
64
64
  env_parameters: Optional[List["EnvParameter"]] = None,
65
65
  model_description: Optional[str] = "",
66
66
  model_parameters: Optional[List["ModelParameter"]] = None,
67
+ artifact_volume_path: Optional[str] = "",
67
68
  ) -> CreateArtifactResponse:
68
69
  """
69
70
  Create a new artifact for a user.
@@ -84,7 +85,8 @@ class ArtifactManager:
84
85
  template_id=template_id,
85
86
  env_parameters=env_parameters,
86
87
  model_description=model_description,
87
- model_parameters=model_parameters)
88
+ model_parameters=model_parameters,
89
+ artifact_volume_path=artifact_volume_path)
88
90
 
89
91
  return self.artifact_client.create_artifact(req)
90
92
 
@@ -146,7 +148,7 @@ class ArtifactManager:
146
148
  logger.error(f"Failed to create artifact from template, Error: {e}")
147
149
  raise e
148
150
 
149
- def create_artifact_for_serve_command_and_custom_model(self, template_name: str, artifact_name: str, serve_command: str, gpu_type: str, artifact_description: str = "") -> tuple[str, ReplicaResource]:
151
+ def create_artifact_for_serve_command_and_custom_model(self, template_name: str, artifact_name: str, serve_command: str, gpu_type: str, artifact_description: str = "", pre_download_model: str = "", env_parameters: Optional[Dict[str, Any]] = None) -> tuple[str, ReplicaResource]:
150
152
  """
151
153
  Create an artifact from a template and support custom model.
152
154
  :param artifact_template_name: The name of the template to use.
@@ -169,7 +171,7 @@ class ArtifactManager:
169
171
 
170
172
  try:
171
173
  if gpu_type not in ["H100", "H200"]:
172
- raise ValueError("Only support A100 and H100 for now")
174
+ raise ValueError("Only support H100 and H200 for now")
173
175
 
174
176
  type, env_vars, serve_args_dict = parse_server_command(serve_command)
175
177
  if type.lower() not in template_name.lower():
@@ -185,11 +187,25 @@ class ArtifactManager:
185
187
  raise ValueError(f"Failed to parse serve command, Error: {e}")
186
188
 
187
189
  try:
188
- env_vars = [
190
+ env_vars = []
191
+ if picked_template.template_data and picked_template.template_data.env_parameters:
192
+ env_vars = picked_template.template_data.env_parameters
193
+ env_vars_map = {param.key: param for param in env_vars}
194
+ if env_parameters:
195
+ for key, value in env_parameters.items():
196
+ if key in ['GPU_TYPE', 'SERVE_COMMAND']:
197
+ continue
198
+ if key not in env_vars_map:
199
+ new_param = EnvParameter(key=key, value=value)
200
+ env_vars.append(new_param)
201
+ env_vars_map[key] = new_param
202
+ else:
203
+ env_vars_map[key].value = value
204
+ env_vars.extend([
189
205
  EnvParameter(key="SERVE_COMMAND", value=serve_command),
190
206
  EnvParameter(key="GPU_TYPE", value=gpu_type),
191
- ]
192
- resp = self.create_artifact(artifact_name, artifact_description, deployment_type="template", template_id=picked_template.template_id, env_parameters=env_vars)
207
+ ])
208
+ resp = self.create_artifact(artifact_name, artifact_description, deployment_type="template", template_id=picked_template.template_id, env_parameters=env_vars, artifact_volume_path=f"models/{pre_download_model}")
193
209
  # Assume Artifact is already with BuildStatus.SUCCESS status
194
210
  return resp.artifact_id, recommended_replica_resources
195
211
  except Exception as e:
@@ -332,6 +348,9 @@ class ArtifactManager:
332
348
  # List all files in the model directory recursively
333
349
  model_file_paths = []
334
350
  for root, _, files in os.walk(model_directory):
351
+ # Skip .cache folder
352
+ if '.cache' in root.split(os.path.sep):
353
+ continue
335
354
  for file in files:
336
355
  model_file_paths.append(os.path.join(root, file))
337
356
 
@@ -393,7 +412,7 @@ class ArtifactManager:
393
412
  artifact = self.get_artifact(artifact_id)
394
413
  if artifact.build_status == BuildStatus.SUCCESS:
395
414
  return
396
- elif artifact.build_status in [BuildStatus.FAILED, BuildStatus.TIMEOUT, BuildStatus.CANCELLED]:
415
+ elif artifact.build_status in [BuildStatus.FAILURE, BuildStatus.TIMEOUT, BuildStatus.CANCELLED]:
397
416
  raise Exception(f"Artifact build failed, status: {artifact.build_status}")
398
417
  except Exception as e:
399
418
  logger.error(f"Failed to get artifact, Error: {e}")
@@ -65,6 +65,24 @@ class TaskManager:
65
65
 
66
66
  logger.info(f"Task created: {resp.task.task_id}")
67
67
  return resp.task
68
+
69
+ def create_task_from_artifact_id(self, artifact_id: str, replica_resource: ReplicaResource, task_scheduling: TaskScheduling) -> Task:
70
+ """
71
+ Create a new task using the configuration data from a file.
72
+ """
73
+ # Create Task based on Artifact
74
+ new_task = Task(
75
+ config=TaskConfig(
76
+ ray_task_config=RayTaskConfig(
77
+ artifact_id=artifact_id,
78
+ file_path="serve",
79
+ deployment_name="app",
80
+ replica_resource=replica_resource,
81
+ ),
82
+ task_scheduling = task_scheduling,
83
+ ),
84
+ )
85
+ return self.create_task(new_task).task_id
68
86
 
69
87
  def create_task_from_file(self, artifact_id: str, config_file_path: str, trigger_timestamp: int = None) -> Task:
70
88
  """
@@ -218,6 +236,17 @@ class TaskManager:
218
236
  raise Exception(f"Task stopping takes more than {timeout_s // 60} minutes. Testing aborted.")
219
237
  time.sleep(10)
220
238
 
239
+ def get_task_endpoint_url(self, task_id: str) -> str:
240
+ task = self.get_task(task_id)
241
+ if task.endpoint_info is not None and task.endpoint_info.endpoint_status == TaskEndpointStatus.RUNNING:
242
+ return task.endpoint_info.endpoint_url
243
+ else:
244
+ if task.cluster_endpoints:
245
+ for ce in task.cluster_endpoints:
246
+ if ce.endpoint_status == TaskEndpointStatus.RUNNING:
247
+ return ce.endpoint_url
248
+ return ""
249
+
221
250
 
222
251
  def get_usage_data(self, start_timestamp: str, end_timestamp: str) -> GetUsageDataResponse:
223
252
  """
@@ -0,0 +1,91 @@
1
+ import os
2
+ import logging
3
+
4
+ from .._client._iam_client import IAMClient
5
+ from .._client._video_client import VideoClient
6
+ from .._models import *
7
+
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class VideoManager:
12
+ """
13
+ A manager for handling video tasks, providing methods to create, update, and stop tasks.
14
+ """
15
+
16
+ def __init__(self, iam_client: IAMClient):
17
+ """
18
+ Initializes the VideoManager with the given IAM client.
19
+ """
20
+ self.video_client = VideoClient(iam_client)
21
+ self.iam_client = iam_client
22
+
23
+
24
+ def get_request_detail(self, request_id: str) -> GetRequestResponse:
25
+ """
26
+ Retrieves detailed information about a specific request by its ID. This endpoint requires authentication with a bearer token and only returns requests belonging to the authenticated organization.
27
+
28
+ :param request_id: The ID of the request to be retrieved.
29
+ :return: Details of the request successfully retrieved
30
+ """
31
+ self._validate_not_empty(request_id, "request_id")
32
+ return self.video_client.get_request_detail(request_id)
33
+
34
+
35
+ def get_requests(self, model_id: str) -> List[GetRequestResponse]:
36
+ """
37
+ Retrieves a list of requests submitted by the authenticated user for a specific model. This endpoint requires authentication with a bearer token and filters results by the authenticated organization.
38
+
39
+ :param model_id: The ID of the model to be retrieved.
40
+ :return: List of user's requests successfully retrieved
41
+ """
42
+ self._validate_not_empty(model_id, "model_id")
43
+ return self.video_client.get_requests(model_id)
44
+
45
+
46
+ def create_request(self, request: SubmitRequestRequest) -> SubmitRequestResponse:
47
+ """
48
+ Submits a new asynchronous request to process a specified model with provided parameters. This endpoint requires authentication with a bearer token.
49
+
50
+ :param request: The request data to be created.
51
+ :return: The created request data.
52
+ """
53
+ if not request:
54
+ raise ValueError("Request data cannot be None.")
55
+ if not request.model:
56
+ raise ValueError("Model ID is required in the request data.")
57
+ if not request.payload:
58
+ raise ValueError("Payload is required in the request data.")
59
+ return self.video_client.create_request(request)
60
+
61
+
62
+ def get_model_detail(self, model_id: str) -> GetModelResponse:
63
+ """
64
+ Retrieves detailed information about a specific model by its ID.
65
+
66
+ :param model_id: The ID of the model to be retrieved.
67
+ :return: Details of the specified model.
68
+ """
69
+ self._validate_not_empty(model_id, "model_id")
70
+ return self.video_client.get_model_detail(model_id)
71
+
72
+
73
+ def get_models(self) -> List[GetModelResponse]:
74
+ """
75
+ Retrieves a list of available models for video processing.
76
+
77
+ :return: A list of available models.
78
+ """
79
+ return self.video_client.get_models()
80
+
81
+
82
+ @staticmethod
83
+ def _validate_not_empty(value: str, name: str):
84
+ """
85
+ Validate a string is neither None nor empty.
86
+
87
+ :param value: The string to validate.
88
+ :param name: The name of the value for error reporting.
89
+ """
90
+ if not value or not value.strip():
91
+ raise ValueError(f"{name} is required and cannot be empty.")
@@ -39,18 +39,21 @@ def parse_flags_and_args(tokens: list) -> dict:
39
39
  i = 0
40
40
  while i < len(tokens):
41
41
  token = tokens[i]
42
- if token.startswith('--'):
42
+ if token.startswith('--') or token.startswith('-'):
43
43
  if '=' in token:
44
44
  key, value = token[2:].split('=', 1)
45
45
  result[key] = value.strip("'\"")
46
46
  elif i + 1 < len(tokens) and not tokens[i + 1].startswith('--'):
47
- result[token[2:]] = tokens[i + 1].strip("'\"")
48
- i += 1
49
- elif i + 1 < len(tokens) and not tokens[i + 1].startswith('-'):
50
- result[token[1:]] = tokens[i + 1].strip("'\"")
47
+ if token.startswith('--'):
48
+ result[token[2:]] = tokens[i + 1].strip("'\"")
49
+ else:
50
+ result[token[1:]] = tokens[i + 1].strip("'\"")
51
51
  i += 1
52
52
  else:
53
- result[token[2:]] = True
53
+ if token.startswith('--'):
54
+ result[token[2:]] = True
55
+ else:
56
+ result[token[1:]] = True
54
57
  else:
55
58
  logger.warning(f"Ignoring unknown token: {token}")
56
59
  i += 1
@@ -118,4 +121,5 @@ def extract_gpu_num_from_serve_command(serve_args_dict: dict) -> int:
118
121
  cmd_gpu_num = cmd_tp_size * cmd_dp_size
119
122
  if cmd_gpu_num > 8:
120
123
  raise ValueError("Only support up to 8 GPUs for single task replica.")
124
+ print(f'cmd_tp_size: {cmd_tp_size}, cmd_dp_size: {cmd_dp_size}, cmd_gpu_num: {cmd_gpu_num}')
121
125
  return cmd_gpu_num
@@ -2,7 +2,7 @@ from typing import Optional, List, Union
2
2
  from datetime import datetime
3
3
 
4
4
  from pydantic import BaseModel
5
- from gmicloud._internal._enums import BuildStatus, TaskStatus, TaskEndpointStatus, ModelParameterType
5
+ from gmicloud._internal._enums import *
6
6
 
7
7
 
8
8
  class BigFileMetadata(BaseModel):
@@ -103,6 +103,7 @@ class CreateArtifactRequest(BaseModel):
103
103
  env_parameters: Optional[List["EnvParameter"]] = None # Environment parameters.
104
104
  model_description: Optional[str] = "" # Description of the model.
105
105
  model_parameters: Optional[List["ModelParameter"]] = None # Parameters for the artifact.
106
+ artifact_volume_path: Optional[str] = "" # Path to the volume where the artifact is stored.
106
107
 
107
108
 
108
109
  class CreateArtifactResponse(BaseModel):
@@ -586,3 +587,85 @@ class GetSelfAPIKeyResponse(BaseModel):
586
587
  """
587
588
  key: APIKey # The API key of the current user.
588
589
  organization: Optional[Organization] = None # Organization information.
590
+
591
+
592
+
593
+ # ----------------- video models -----------------
594
+
595
+ class SubmitRequestRequest(BaseModel):
596
+ """
597
+ The request body for submits a new asynchronous request
598
+ """
599
+ model: str
600
+ payload: dict
601
+
602
+
603
+ class SubmitRequestResponse(BaseModel):
604
+ """
605
+ Represents the response body for a submitted request.
606
+ """
607
+ created_at: Optional[int] = 0
608
+ model: Optional[str] = ""
609
+ queued_at: Optional[int] = 0
610
+ request_id: Optional[str] = ""
611
+ status: Optional[RequestStatus] = None
612
+ updated_at: Optional[int] = 0
613
+
614
+
615
+ class GetRequestResponse(BaseModel):
616
+ """
617
+ Response object for getting a specific request.
618
+ """
619
+ created_at: Optional[int] = 0
620
+ is_public: Optional[bool] = False
621
+ model: Optional[str] = ""
622
+ org_id: Optional[str] = ""
623
+ outcome: Optional[dict] = {}
624
+ payload: Optional[dict] = {}
625
+ queued_at: Optional[int] = 0
626
+ qworker_id: Optional[str] = ""
627
+ request_id: Optional[str] = ""
628
+ status: Optional[RequestStatus] = None
629
+ updated_at: Optional[int] = 0
630
+
631
+
632
+ class ListUserRequestsResponse(BaseModel):
633
+ """
634
+ Represents the response body for listing user requests.
635
+ """
636
+ requests: List[GetRequestResponse]
637
+ total: Optional[int] = 0 # Total number of requests available for the user.
638
+
639
+
640
+ class PriceInfo(BaseModel):
641
+ """
642
+ Represents pricing information for a model.
643
+ """
644
+ price: Optional[int] = 0
645
+ pricing_type: Optional[str] = ""
646
+ unit: Optional[str] = ""
647
+
648
+
649
+ class GetModelResponse(BaseModel):
650
+ """
651
+ Represents the response body for a specific model.
652
+ """
653
+ background_image_url: Optional[str] = ""
654
+ brief_description: Optional[str] = ""
655
+ created_at: Optional[int] = 0
656
+ detailed_description: Optional[str] = ""
657
+ external_api_endpoint: Optional[str] = ""
658
+ external_api_url: Optional[str] = ""
659
+ external_provider: Optional[str] = ""
660
+ host_type: Optional[HostType] = HostType.DEFAULT
661
+ icon_link: Optional[str] = ""
662
+ internal_parameters: Optional[dict] = {}
663
+ modalities: Optional[dict] = {}
664
+ model: Optional[str] = ""
665
+ model_type: Optional[str] = ""
666
+ org_id: Optional[str] = ""
667
+ parameters: Optional[list] = []
668
+ price_info: Optional[PriceInfo] = None
669
+ qworkers: Optional[int] = 0
670
+ tags: Optional[list[str]] = []
671
+ updated_at: Optional[int] = 0
gmicloud/client.py CHANGED
@@ -8,6 +8,7 @@ from ._internal._client._iam_client import IAMClient
8
8
  from ._internal._manager._artifact_manager import ArtifactManager
9
9
  from ._internal._manager._task_manager import TaskManager
10
10
  from ._internal._manager._iam_manager import IAMManager
11
+ from ._internal._manager._video_manager import VideoManager
11
12
  from ._internal._enums import BuildStatus, TaskStatus, TaskEndpointStatus
12
13
  from ._internal._models import Task, TaskConfig, RayTaskConfig, TaskScheduling, ReplicaResource
13
14
 
@@ -37,6 +38,7 @@ class Client:
37
38
  self._artifact_manager = None
38
39
  self._task_manager = None
39
40
  self._iam_manager = None
41
+ self._video_manager = None
40
42
 
41
43
  @property
42
44
  def artifact_manager(self):
@@ -58,6 +60,16 @@ class Client:
58
60
  self._task_manager = TaskManager(self.iam_client)
59
61
  return self._task_manager
60
62
 
63
+ @property
64
+ def video_manager(self):
65
+ """
66
+ Lazy initialization for VideoManager.
67
+ Ensures the Client instance controls its lifecycle.
68
+ """
69
+ if self._video_manager is None:
70
+ self._video_manager = VideoManager(self.iam_client)
71
+ return self._video_manager
72
+
61
73
  @property
62
74
  def iam_manager(self):
63
75
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gmicloud
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: GMI Cloud Python SDK
5
5
  Author-email: GMI <gmi@gmitec.net>
6
6
  License: MIT
@@ -133,7 +133,22 @@ model_checkpoint_save_dir = "files/model_garden"
133
133
  snapshot_download(repo_id=model_name, local_dir=model_checkpoint_save_dir)
134
134
  ```
135
135
 
136
- 2. Find a template of specific SGLang version
136
+ #### Pre-downloaded models
137
+ ```
138
+ "deepseek-ai/DeepSeek-R1"
139
+ "deepseek-ai/DeepSeek-V3-0324"
140
+ "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
141
+ "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
142
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
143
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
144
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
145
+ "meta-llama/Llama-3.3-70B-Instruct"
146
+ "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
147
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct"
148
+ "Qwen/QwQ-32B"
149
+ ```
150
+
151
+ 2. Find a template of specific vllm or SGLang version
137
152
 
138
153
  ```python
139
154
  # export GMI_CLOUD_CLIENT_ID=<YOUR_CLIENT_ID>
@@ -158,55 +173,67 @@ picked_template_name = "gmi_sglang_0.4.5.post1"
158
173
  serve_command = "python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B --trust-remote-code --mem-fraction-static 0.8 --tp 2"
159
174
  ```
160
175
 
161
- 4. Create an artifact and upload custom model. The artifact can be reused to create inference tasks later. Artifact also suggests recommended resources for each inference server replica
176
+ 4. Create an artifact. you can pass `pre_download_model` parameter. If you want custom model, upload model checkpoint to the artifactThe artifact can be reused to create inference tasks later. Artifact also suggests recommended resources for each inference server replica
162
177
 
163
178
  ```python
164
- artifact_id, recommended_replica_resources = cli.artifact_manager.create_artifact_from_template_name(
165
- artifact_template_name=picked_template_name,
166
- env_parameters={
167
- "SERVER_COMMAND": serve_command,
168
- "GPU_TYPE": "H100",
169
- }
179
+ artifact_name = "artifact_hello_world"
180
+ artifact_id, recommended_replica_resources = cli.artifact_manager.create_artifact_for_serve_command_and_custom_model(
181
+ template_name=picked_template_name,
182
+ artifact_name=artifact_name,
183
+ serve_command=serve_command,
184
+ gpu_type="H100",
185
+ artifact_description="This is a test artifact",
186
+ pre_download_model=pick_pre_downloaded_model,
170
187
  )
171
188
  print(f"Created artifact {artifact_id} with recommended resources: {recommended_replica_resources}")
189
+ ```
172
190
 
173
- # Upload model files to artifact
191
+ Alternatively, Upload a custom model checkpoint to artifact
192
+ ```python
174
193
  cli.artifact_manager.upload_model_files_to_artifact(artifact_id, model_checkpoint_save_dir)
194
+
195
+ # Maybe Wait 10 minutes for the artifact to be ready
196
+ time.sleep(10 * 60)
175
197
  ```
176
198
 
177
199
  5. Create Inference task (defining min/max inference replica), start and wait
178
200
 
179
201
  ```python
180
- new_task = Task(
181
- config=TaskConfig(
182
- ray_task_config=RayTaskConfig(
183
- artifact_id=artifact_id,
184
- file_path="serve",
185
- deployment_name="app",
186
- replica_resource=recommended_replica_resources,
187
- ),
188
- task_scheduling = TaskScheduling(
189
- scheduling_oneoff=OneOffScheduling(
190
- trigger_timestamp=int(datetime.now().timestamp()),
191
- min_replicas=1,
192
- max_replicas=4,
193
- )
194
- ),
195
- ),
196
- )
197
- task = cli.task_manager.create_task(new_task)
198
- task_id = task.task_id
199
- task = cli.task_manager.get_task(task_id)
202
+ # Create Task based on Artifact
203
+ new_task_id = cli.task_manager.create_task_from_artifact_id(artifact_id, recommended_replica_resources, TaskScheduling(
204
+ scheduling_oneoff=OneOffScheduling(
205
+ trigger_timestamp=int(datetime.now().timestamp()),
206
+ min_replicas=1,
207
+ max_replicas=4,
208
+ )
209
+ ))
210
+ task = cli.task_manager.get_task(new_task_id)
200
211
  print(f"Task created: {task.config.task_name}. You can check details at https://inference-engine.gmicloud.ai/user-console/task")
201
212
 
202
213
  # Start Task and wait for it to be ready
203
- cli.task_manager.start_task_and_wait(task_id)
214
+ cli.task_manager.start_task_and_wait(new_task_id)
204
215
  ```
205
216
 
206
- 6. Test with sample chat completion request
217
+ 6. Test with sample chat completion request with OpenAI client
207
218
 
208
219
  ```python
209
- print(call_chat_completion(cli, task_id))
220
+ pi_key = "<YOUR_API_KEY>"
221
+ endpoint_url = cli.task_manager.get_task_endpoint_url(new_task_id)
222
+ open_ai = OpenAI(
223
+ base_url=os.getenv("OPENAI_API_BASE", f"https://{endpoint_url}/serve/v1/"),
224
+ api_key=api_key
225
+ )
226
+ # Make a chat completion request using the new OpenAI client.
227
+ completion = open_ai.chat.completions.create(
228
+ model=picked_template_name,
229
+ messages=[
230
+ {"role": "system", "content": "You are a helpful assistant."},
231
+ {"role": "user", "content": "Who are you?"},
232
+ ],
233
+ max_tokens=500,
234
+ temperature=0.7
235
+ )
236
+ print(completion.choices[0].message.content)
210
237
  ```
211
238
 
212
239
 
@@ -1,28 +1,31 @@
1
1
  gmicloud/__init__.py,sha256=xSzrAxiby5Te20yhy1ZylGHmQKVV_w1QjFe6D99VZxw,968
2
- gmicloud/client.py,sha256=G0tD0xQnpqDKS-3l-AAU-K3FAHOsqsTzsAq2NVxiamY,10539
2
+ gmicloud/client.py,sha256=nTMrKhyrGSx9qUDTice2HqmIqlIlsuKoxHnb0T-Ls3c,10947
3
3
  gmicloud/_internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  gmicloud/_internal/_config.py,sha256=BenHiCnedpHA5phz49UWBXa1mg_q9W8zYs7A8esqGcU,494
5
5
  gmicloud/_internal/_constants.py,sha256=Y085dwFlqdFkCf39iBfxz39QiiB7lX59ayNJjB86_m4,378
6
- gmicloud/_internal/_enums.py,sha256=5d6Z8TFJYCmhNI1TDbPpBbG1tNe96StIEH4tEw20RZk,789
6
+ gmicloud/_internal/_enums.py,sha256=aN3At0_iV_6aaUsrOy-JThtRUokeY4nTyxxPLZmIDBU,1093
7
7
  gmicloud/_internal/_exceptions.py,sha256=hScBq7n2fOit4_umlkabZJchY8zVbWSRfWM2Y0rLCbw,306
8
- gmicloud/_internal/_models.py,sha256=2l65aZdQxyXlY0Dj23P6NFf59_zopgf9OoUMLAz5T2U,22685
8
+ gmicloud/_internal/_models.py,sha256=iSRHMUPx_iXEraSg3ouAIM4ipVXQop3MuCGJFvFvMLY,25011
9
9
  gmicloud/_internal/_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  gmicloud/_internal/_client/_artifact_client.py,sha256=0lyHAdUybN8A1mEwZ7p1yK2yQEyoDG2vTB4Qe5RI2ik,9974
11
+ gmicloud/_internal/_client/_auth_config.py,sha256=zlCUPHN_FgWmOAxOAgjBtGRbaChqMa9PPGPuVNKvnc8,2700
11
12
  gmicloud/_internal/_client/_decorator.py,sha256=sy4gxzsUB6ORXHw5pqmMf7TTlK41Nmu1fhIhK2AIsbY,670
12
13
  gmicloud/_internal/_client/_file_upload_client.py,sha256=r29iXG_0DOi-uTLu9plpfZMWGqOck_AdDHJZprcf8uI,4918
13
14
  gmicloud/_internal/_client/_http_client.py,sha256=j--3emTjJ_l9CTdnkTbcpf7gYcUEl341pv2O5cU67l0,5741
14
- gmicloud/_internal/_client/_iam_client.py,sha256=pgOXIqp9aJvcIUCEVkYPEyMUyxBftecojHAbs8Gbl94,7013
15
+ gmicloud/_internal/_client/_iam_client.py,sha256=iXam-UlTCJWCpXmxAhqCo0J2m6nPzNOWa06R5xAy5nQ,8297
15
16
  gmicloud/_internal/_client/_task_client.py,sha256=69OqZC_kwSDkTSVVyi51Tn_OyUV6R0nin4z4gLfZ-Lg,6141
17
+ gmicloud/_internal/_client/_video_client.py,sha256=bjSmChBydGXwuVIm37ltKGmduPJa-H0Bjyc-qhd_PZI,4694
16
18
  gmicloud/_internal/_manager/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- gmicloud/_internal/_manager/_artifact_manager.py,sha256=yK4veVwCY1cipy3rdnGvhnKNvkBx3SYYpHKqzjtXJn0,20731
19
+ gmicloud/_internal/_manager/_artifact_manager.py,sha256=Fq5Qifrdq5yn_QkMAoykuWE04FgqNOd9yZrFQdAi5J8,21874
18
20
  gmicloud/_internal/_manager/_iam_manager.py,sha256=nAqPCaUfSXTnx2MEQa8e0YUOBFYWDRiETgK1PImdf4o,1167
19
- gmicloud/_internal/_manager/_task_manager.py,sha256=zBW_TkYhbSvAc_p7Q3z6Vgl2Cayv8zIkawTT6OcB4x4,11291
20
- gmicloud/_internal/_manager/serve_command_utils.py,sha256=xjB6B9CNAmohou41H755iCCgkLNrjvdnu9NcJApTm1k,4373
21
+ gmicloud/_internal/_manager/_task_manager.py,sha256=g2K0IG1EXzcZRAfXLhUp78em0ZVvKyqlr1PGTBR04JQ,12501
22
+ gmicloud/_internal/_manager/_video_manager.py,sha256=_PwooKf9sZkIx4mYTy57pXtP7J3uwHQHgscns5hQYZ0,3376
23
+ gmicloud/_internal/_manager/serve_command_utils.py,sha256=0PXDRuGbLw_43KBwCxPRdb4QqijZrzYyvM6WOZ2-Ktg,4583
21
24
  gmicloud/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
25
  gmicloud/tests/test_artifacts.py,sha256=w0T0EpATIGLrSUPaBfTZ2ZC_X2XeaTlFEi3DZ4evIcE,15825
23
26
  gmicloud/tests/test_tasks.py,sha256=yL-aFf80ShgTyxEONTWh-xbWDf5XnUNtIeA5hYvhKM0,10963
24
27
  gmicloud/utils/uninstall_packages.py,sha256=zzuuaJPf39oTXWZ_7tUAGseoxocuCbbkoglJSD5yDrE,1127
25
- gmicloud-0.1.7.dist-info/METADATA,sha256=LFLXvJeQ9ocyJQ8hFbTaNZAWJ7NvsO7FCN4tyaN5YY8,7927
26
- gmicloud-0.1.7.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
27
- gmicloud-0.1.7.dist-info/top_level.txt,sha256=AZimLw3y0WPpLiSiOidZ1gD0dxALh-jQNk4fxC05hYE,9
28
- gmicloud-0.1.7.dist-info/RECORD,,
28
+ gmicloud-0.1.9.dist-info/METADATA,sha256=sZlrvpl2xiwBoVJj79IQ0JIFXg8md9mCmA13P99dXj0,9028
29
+ gmicloud-0.1.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
30
+ gmicloud-0.1.9.dist-info/top_level.txt,sha256=AZimLw3y0WPpLiSiOidZ1gD0dxALh-jQNk4fxC05hYE,9
31
+ gmicloud-0.1.9.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (79.0.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5