PyPI - gmicloud - Versions diffs - 0.1.6__py3-none-any.whl → 0.1.9__py3-none-any.whl - Mend

gmicloud 0.1.6py3-none-any.whl → 0.1.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

gmicloud/__init__.py +2 -2
gmicloud/_internal/_client/_artifact_client.py +40 -7
gmicloud/_internal/_client/_auth_config.py +78 -0
gmicloud/_internal/_client/_file_upload_client.py +10 -7
gmicloud/_internal/_client/_iam_client.py +57 -38
gmicloud/_internal/_client/_video_client.py +111 -0
gmicloud/_internal/_config.py +9 -3
gmicloud/_internal/_enums.py +19 -1
gmicloud/_internal/_manager/_artifact_manager.py +137 -20
gmicloud/_internal/_manager/_task_manager.py +61 -27
gmicloud/_internal/_manager/_video_manager.py +91 -0
gmicloud/_internal/_manager/serve_command_utils.py +125 -0
gmicloud/_internal/_models.py +219 -32
gmicloud/client.py +12 -0
gmicloud/tests/test_artifacts.py +6 -22
gmicloud-0.1.9.dist-info/METADATA +264 -0
gmicloud-0.1.9.dist-info/RECORD +31 -0
{gmicloud-0.1.6.dist-info → gmicloud-0.1.9.dist-info}/WHEEL +1 -1
gmicloud-0.1.6.dist-info/METADATA +0 -147
gmicloud-0.1.6.dist-info/RECORD +0 -27
{gmicloud-0.1.6.dist-info → gmicloud-0.1.9.dist-info}/top_level.txt +0 -0

gmicloud/_internal/_manager/_artifact_manager.py CHANGED Viewed

@@ -1,12 +1,17 @@
 import os
 import time
-from typing import List
+from typing import List, Dict, Any
 import mimetypes
+import concurrent.futures
+import re
+from tqdm import tqdm
+from tqdm.contrib.logging import logging_redirect_tqdm
 from .._client._iam_client import IAMClient
 from .._client._artifact_client import ArtifactClient
 from .._client._file_upload_client import FileUploadClient
 from .._models import *
+from .._manager.serve_command_utils import parse_server_command, extract_gpu_num_from_serve_command
 import logging
@@ -53,7 +58,13 @@ class ArtifactManager:
             self,
             artifact_name: str,
             description: Optional[str] = "",
-            tags: Optional[List[str]] = None
+            tags: Optional[List[str]] = None,
+            deployment_type: Optional[str] = "",
+            template_id: Optional[str] = "",
+            env_parameters: Optional[List["EnvParameter"]] = None,
+            model_description: Optional[str] = "",
+            model_parameters: Optional[List["ModelParameter"]] = None,
+            artifact_volume_path: Optional[str] = "",
     ) -> CreateArtifactResponse:
         """
         Create a new artifact for a user.
@@ -69,11 +80,17 @@ class ArtifactManager:
         req = CreateArtifactRequest(artifact_name=artifact_name,
                                     artifact_description=description,
-                                    artifact_tags=tags, )
+                                    artifact_tags=tags,
+                                    deployment_type=deployment_type,
+                                    template_id=template_id,
+                                    env_parameters=env_parameters,
+                                    model_description=model_description,
+                                    model_parameters=model_parameters,
+                                    artifact_volume_path=artifact_volume_path)
         return self.artifact_client.create_artifact(req)
-    def create_artifact_from_template(self, artifact_template_id: str) -> str:
+    def create_artifact_from_template(self, artifact_template_id: str, env_parameters: Optional[dict[str, str]] = None) -> str:
         """
         Create a new artifact for a user using a template.
@@ -85,11 +102,16 @@ class ArtifactManager:
         if not artifact_template_id or not artifact_template_id.strip():
             raise ValueError("Artifact template ID is required and cannot be empty.")
         resp = self.artifact_client.create_artifact_from_template(artifact_template_id)
         if not resp or not resp.artifact_id:
             raise ValueError("Failed to create artifact from template.")
+        if env_parameters:
+            self.artifact_client.add_env_parameters_to_artifact(resp.artifact_id, env_parameters)
         return resp.artifact_id
     def create_artifact_from_template_name(self, artifact_template_name: str) -> tuple[str, ReplicaResource]:
         """
@@ -125,6 +147,70 @@ class ArtifactManager:
         except Exception as e:
             logger.error(f"Failed to create artifact from template, Error: {e}")
             raise e
+    def create_artifact_for_serve_command_and_custom_model(self, template_name: str, artifact_name: str, serve_command: str, gpu_type: str, artifact_description: str = "", pre_download_model: str = "", env_parameters: Optional[Dict[str, Any]] = None) -> tuple[str, ReplicaResource]:
+        """
+        Create an artifact from a template and support custom model.
+        :param artifact_template_name: The name of the template to use.
+        :return: A tuple containing the artifact ID and the recommended replica resources.
+        :rtype: tuple[str, ReplicaResource]
+        """
+        recommended_replica_resources = None
+        picked_template = None
+        try:
+            templates = self.get_public_templates()
+        except Exception as e:
+            logger.error(f"Failed to get artifact templates, Error: {e}")
+        for template in templates:
+            if template.template_data and template.template_data.name == template_name:
+                picked_template = template
+                break
+        if not picked_template:
+            raise ValueError(f"Template with name {template_name} not found.")
+        try:
+            if gpu_type not in ["H100", "H200"]:
+                raise ValueError("Only support H100 and H200 for now")
+            type, env_vars, serve_args_dict = parse_server_command(serve_command)
+            if type.lower() not in template_name.lower():
+                raise ValueError(f"Template {template_name} does not support inference with {type}.")
+            num_gpus = extract_gpu_num_from_serve_command(serve_args_dict)
+            recommended_replica_resources = ReplicaResource(
+                cpu=num_gpus * 16,
+                ram_gb=num_gpus * 100,
+                gpu=num_gpus,
+                gpu_name=gpu_type,
+            )
+        except Exception as e:
+            raise ValueError(f"Failed to parse serve command, Error: {e}")
+        try:
+            env_vars = []
+            if picked_template.template_data and picked_template.template_data.env_parameters:
+                env_vars = picked_template.template_data.env_parameters
+            env_vars_map = {param.key: param for param in env_vars}
+            if env_parameters:
+                for key, value in env_parameters.items():
+                    if key in ['GPU_TYPE', 'SERVE_COMMAND']:
+                        continue
+                    if key not in env_vars_map:
+                        new_param = EnvParameter(key=key, value=value)
+                        env_vars.append(new_param)
+                        env_vars_map[key] = new_param
+                    else:
+                        env_vars_map[key].value = value
+            env_vars.extend([
+                EnvParameter(key="SERVE_COMMAND", value=serve_command),
+                EnvParameter(key="GPU_TYPE", value=gpu_type),
+            ])
+            resp = self.create_artifact(artifact_name, artifact_description, deployment_type="template", template_id=picked_template.template_id, env_parameters=env_vars, artifact_volume_path=f"models/{pre_download_model}")
+            # Assume Artifact is already with BuildStatus.SUCCESS status
+            return resp.artifact_id, recommended_replica_resources
+        except Exception as e:
+            logger.error(f"Failed to create artifact from template, Error: {e}")
+            raise e
     def rebuild_artifact(self, artifact_id: str) -> RebuildArtifactResponse:
         """
@@ -211,7 +297,7 @@ class ArtifactManager:
         model_file_name = os.path.basename(model_file_path)
         model_file_type = mimetypes.guess_type(model_file_path)[0]
-        req = GetBigFileUploadUrlRequest(artifact_id=artifact_id, file_name=model_file_name, file_type=model_file_type)
+        req = ResumableUploadLinkRequest(artifact_id=artifact_id, file_name=model_file_name, file_type=model_file_type)
         resp = self.artifact_client.get_bigfile_upload_url(req)
         if not resp or not resp.upload_link:
@@ -250,36 +336,67 @@ class ArtifactManager:
         FileUploadClient.upload_large_file(upload_link, file_path)
+    def upload_model_files_to_artifact(self, artifact_id: str, model_directory: str) -> None:
+        """
+        Upload model files to an existing artifact.
+        :param artifact_id: The ID of the artifact to upload the model files to.
+        :param model_directory: The path to the model directory.
+        """
+        # List all files in the model directory recursively
+        model_file_paths = []
+        for root, _, files in os.walk(model_directory):
+            # Skip .cache folder
+            if '.cache' in root.split(os.path.sep):
+                continue
+            for file in files:
+                model_file_paths.append(os.path.join(root, file))
+        def upload_file(model_file_path):
+            self._validate_file_path(model_file_path)
+            bigfile_upload_url_resp = self.artifact_client.get_bigfile_upload_url(
+                ResumableUploadLinkRequest(artifact_id=artifact_id, file_name=os.path.basename(model_file_path))
+            )
+            FileUploadClient.upload_large_file(bigfile_upload_url_resp.upload_link, model_file_path)
+        # Upload files in parallel with progress bar
+        with tqdm(total=len(model_file_paths), desc="Uploading model files") as progress_bar:
+            with logging_redirect_tqdm():
+                with concurrent.futures.ThreadPoolExecutor() as executor:
+                    futures = {executor.submit(upload_file, path): path for path in model_file_paths}
+                    for future in concurrent.futures.as_completed(futures):
+                        try:
+                            future.result()
+                        except Exception as e:
+                            logger.error(f"Failed to upload file {futures[future]}, Error: {e}")
+                        progress_bar.update(1)
     def create_artifact_with_model_files(
             self,
             artifact_name: str,
             artifact_file_path: str,
-            model_file_paths: List[str],
+            model_directory: str,
             description: Optional[str] = "",
             tags: Optional[str] = None
     ) -> str:
         """
         Create a new artifact for a user and upload model files associated with the artifact.
         :param artifact_name: The name of the artifact.
         :param artifact_file_path: The path to the artifact file(Dockerfile+serve.py).
-        :param model_file_paths: The paths to the model files.
+        :param model_directory: The path to the model directory.
         :param description: An optional description for the artifact.
         :param tags: Optional tags associated with the artifact, as a comma-separated string.
         :return: The `artifact_id` of the created artifact.
-        :raises FileNotFoundError: If the provided `file_path` does not exist.
         """
         artifact_id = self.create_artifact_with_file(artifact_name, artifact_file_path, description, tags)
+        logger.info(f"Artifact created: {artifact_id}")
-        for model_file_path in model_file_paths:
-            self._validate_file_path(model_file_path)
-            bigfile_upload_url_resp = self.artifact_client.get_bigfile_upload_url(
-                GetBigFileUploadUrlRequest(artifact_id=artifact_id, model_file_path=model_file_path)
-            )
-            FileUploadClient.upload_large_file(bigfile_upload_url_resp.upload_link, model_file_path)
+        self.upload_model_files_to_artifact(artifact_id, model_directory)
         return artifact_id
     def wait_for_artifact_ready(self, artifact_id: str, timeout_s: int = 900) -> None:
         """
@@ -295,7 +412,7 @@ class ArtifactManager:
                 artifact = self.get_artifact(artifact_id)
                 if artifact.build_status == BuildStatus.SUCCESS:
                     return
-                elif artifact.build_status in [BuildStatus.FAILED, BuildStatus.TIMEOUT, BuildStatus.CANCELLED]:
+                elif artifact.build_status in [BuildStatus.FAILURE, BuildStatus.TIMEOUT, BuildStatus.CANCELLED]:
                     raise Exception(f"Artifact build failed, status: {artifact.build_status}")
             except Exception as e:
                 logger.error(f"Failed to get artifact, Error: {e}")
@@ -304,12 +421,12 @@ class ArtifactManager:
             time.sleep(10)
-    def get_public_templates(self) -> List[ArtifactTemplate]:
+    def get_public_templates(self) -> List[Template]:
         """
         Fetch all artifact templates.
-        :return: A list of ArtifactTemplate objects.
-        :rtype: List[ArtifactTemplate]
+        :return: A list of Template objects.
+        :rtype: List[Template]
         """
         return self.artifact_client.get_public_templates()

gmicloud/_internal/_manager/_task_manager.py CHANGED Viewed

@@ -41,7 +41,7 @@ class TaskManager:
         :return: A list of `Task` objects.
         """
-        resp = self.task_client.get_all_tasks(self.iam_client.get_user_id())
+        resp = self.task_client.get_all_tasks()
         if not resp or not resp.tasks:
             return []
@@ -63,7 +63,26 @@ class TaskManager:
         if not resp or not resp.task:
             raise ValueError("Failed to create task.")
+        logger.info(f"Task created: {resp.task.task_id}")
         return resp.task
+    def create_task_from_artifact_id(self, artifact_id: str, replica_resource: ReplicaResource, task_scheduling: TaskScheduling) -> Task:
+        """
+        Create a new task using the configuration data from a file.
+        """
+        # Create Task based on Artifact
+        new_task = Task(
+            config=TaskConfig(
+                ray_task_config=RayTaskConfig(
+                    artifact_id=artifact_id,
+                    file_path="serve",
+                    deployment_name="app",
+                    replica_resource=replica_resource,
+                ),
+                task_scheduling = task_scheduling,
+            ),
+        )
+        return self.create_task(new_task).task_id
     def create_task_from_file(self, artifact_id: str, config_file_path: str, trigger_timestamp: int = None) -> Task:
         """
@@ -138,48 +157,54 @@ class TaskManager:
         return self.task_client.start_task(task_id)
-    def start_task_and_wait(self, task_id: str, timeout_s: int = 900) -> Task:
+    def wait_for_task(self, task_id: str, timeout_s: int = 900) -> Task:
         """
-        Start a task and wait for it to be ready.
+        Wait for a task to reach the RUNNING state or raise an exception if it fails.
-        :param task_id: The ID of the task to start.
+        :param task_id: The ID of the task to wait for.
         :param timeout_s: The timeout in seconds.
         :return: The task object.
         :rtype: Task
         """
-        # trigger start task
-        try:
-            self.start_task(task_id)
-            logger.info(f"Started task ID: {task_id}")
-        except Exception as e:
-            logger.error(f"Failed to start task, Error: {e}")
-            raise e
         start_time = time.time()
         while True:
             try:
                 task = self.get_task(task_id)
                 if task.task_status == TaskStatus.RUNNING:
-                    return task
-                elif task.task_status in [TaskStatus.NEEDSTOP, TaskStatus.ARCHIVED]:
-                    raise Exception(f"Unexpected task status after starting: {task.task_status}")
-                # Also check endpoint status.
-                elif task.task_status == TaskStatus.RUNNING:
-                    if task.endpoint_info and task.endpoint_info.endpoint_status == TaskEndpointStatus.RUNNING:
+                    if task.endpoint_info is not None and task.endpoint_info.endpoint_status == TaskEndpointStatus.RUNNING:
                         return task
-                    elif task.endpoint_info and task.endpoint_info.endpoint_status in [TaskEndpointStatus.UNKNOWN, TaskEndpointStatus.ARCHIVED]:
-                        raise Exception(f"Unexpected endpoint status after starting: {task.endpoint_info.endpoint_status}")
                     else:
-                        logger.info(f"Pending endpoint starting. endpoint status: {task.endpoint_info.endpoint_status}")
+                        if task.cluster_endpoints:
+                            for ce in task.cluster_endpoints:
+                                if ce.endpoint_status == TaskEndpointStatus.RUNNING:
+                                    return task
+                if task.task_status in [TaskStatus.NEEDSTOP, TaskStatus.ARCHIVED]:
+                    raise Exception(f"Unexpected task status after starting: {task.task_status}")
                 else:
                     logger.info(f"Pending task starting. Task status: {task.task_status}")
             except Exception as e:
                 logger.error(f"Failed to get task, Error: {e}")
             if time.time() - start_time > timeout_s:
                 raise Exception(f"Task creation takes more than {timeout_s // 60} minutes. Testing aborted.")
             time.sleep(10)
+    def start_task_and_wait(self, task_id: str, timeout_s: int = 3600) -> Task:
+        """
+        Start a task and wait for it to be ready.
+        :param task_id: The ID of the task to start.
+        :param timeout_s: The timeout in seconds.
+        :return: The task object.
+        :rtype: Task
+        """
+        try:
+            self.start_task(task_id)
+            logger.info(f"Started task ID: {task_id}")
+        except Exception as e:
+            logger.error(f"Failed to start task, Error: {e}")
+            raise e
+        return self.wait_for_task(task_id, timeout_s)
     def stop_task(self, task_id: str) -> bool:
         """
@@ -190,16 +215,15 @@ class TaskManager:
         :raises ValueError: If `task_id` is invalid (None or empty string).
         """
         self._validate_not_empty(task_id, "Task ID")
+        return self.task_client.stop_task(task_id)
-    def stop_task_and_wait(self, task_id: str, timeout_s: int = 900):
-        task_manager = self.task_manager
+    def stop_task_and_wait(self, task_id: str, timeout_s: int = 3600):
         try:
-            self.task_manager.stop_task(task_id)
+            self.stop_task(task_id)
             logger.info(f"Stopping task ID: {task_id}")
         except Exception as e:
             logger.error(f"Failed to stop task, Error: {e}")
-        task_manager = self.task_manager
         start_time = time.time()
         while True:
             try:
@@ -212,7 +236,17 @@ class TaskManager:
                 raise Exception(f"Task stopping takes more than {timeout_s // 60} minutes. Testing aborted.")
             time.sleep(10)
-        return self.task_client.stop_task(task_id)
+    def get_task_endpoint_url(self, task_id: str) -> str:
+        task = self.get_task(task_id)
+        if task.endpoint_info is not None and task.endpoint_info.endpoint_status == TaskEndpointStatus.RUNNING:
+            return task.endpoint_info.endpoint_url
+        else:
+            if task.cluster_endpoints:
+                for ce in task.cluster_endpoints:
+                    if ce.endpoint_status == TaskEndpointStatus.RUNNING:
+                        return ce.endpoint_url
+            return ""
     def get_usage_data(self, start_timestamp: str, end_timestamp: str) -> GetUsageDataResponse:
         """

gmicloud/_internal/_manager/_video_manager.py ADDED Viewed

@@ -0,0 +1,91 @@
+import os
+import logging
+from .._client._iam_client import IAMClient
+from .._client._video_client import VideoClient
+from .._models import *
+logger = logging.getLogger(__name__)
+class VideoManager:
+    """
+    A manager for handling video tasks, providing methods to create, update, and stop tasks.
+    """
+    def __init__(self, iam_client: IAMClient):
+        """
+        Initializes the VideoManager with the given IAM client.
+        """
+        self.video_client = VideoClient(iam_client)
+        self.iam_client = iam_client
+    def get_request_detail(self, request_id: str) -> GetRequestResponse:
+        """
+        Retrieves detailed information about a specific request by its ID. This endpoint requires authentication with a bearer token and only returns requests belonging to the authenticated organization.
+        :param request_id: The ID of the request to be retrieved.
+        :return: Details of the request successfully retrieved
+        """
+        self._validate_not_empty(request_id, "request_id")
+        return self.video_client.get_request_detail(request_id)
+    def get_requests(self, model_id: str) -> List[GetRequestResponse]:
+        """
+        Retrieves a list of requests submitted by the authenticated user for a specific model. This endpoint requires authentication with a bearer token and filters results by the authenticated organization.
+        :param model_id: The ID of the model to be retrieved.
+        :return: List of user's requests successfully retrieved
+        """
+        self._validate_not_empty(model_id, "model_id")
+        return self.video_client.get_requests(model_id)
+    def create_request(self, request: SubmitRequestRequest) -> SubmitRequestResponse:
+        """
+        Submits a new asynchronous request to process a specified model with provided parameters. This endpoint requires authentication with a bearer token.
+        :param request: The request data to be created.
+        :return: The created request data.
+        """
+        if not request:
+            raise ValueError("Request data cannot be None.")
+        if not request.model:
+            raise ValueError("Model ID is required in the request data.")
+        if not request.payload:
+            raise ValueError("Payload is required in the request data.")
+        return self.video_client.create_request(request)
+    def get_model_detail(self, model_id: str) -> GetModelResponse:
+        """
+        Retrieves detailed information about a specific model by its ID.
+        :param model_id: The ID of the model to be retrieved.
+        :return: Details of the specified model.
+        """
+        self._validate_not_empty(model_id, "model_id")
+        return self.video_client.get_model_detail(model_id)
+    def get_models(self) -> List[GetModelResponse]:
+        """
+        Retrieves a list of available models for video processing.
+        :return: A list of available models.
+        """
+        return self.video_client.get_models()
+    @staticmethod
+    def _validate_not_empty(value: str, name: str):
+        """
+        Validate a string is neither None nor empty.
+        :param value: The string to validate.
+        :param name: The name of the value for error reporting.
+        """
+        if not value or not value.strip():
+            raise ValueError(f"{name} is required and cannot be empty.")

gmicloud/_internal/_manager/serve_command_utils.py ADDED Viewed

@@ -0,0 +1,125 @@
+import shlex
+import os
+import logging
+logger = logging.getLogger(__name__)
+def parse_server_command(cmd_str: str) -> tuple[str, dict, dict]:
+    """
+    parse server command
+    Maybe their are more than two types of server command
+    if not found, we can add more parse function
+    """
+    if "vllm serve" in cmd_str:
+        return ("vllm", *parse_server_vllm_command(cmd_str))
+    elif "sglang.launch_server" in cmd_str:
+        return ("sglang", *parse_server_sglang_command(cmd_str))
+    else:
+        raise ValueError(f"Unknown serve command: {cmd_str}")
+def extract_env_and_args(tokens: list) -> tuple[dict, list]:
+    """
+    Extract environment variables from the tokens list.
+    and add the params or flags to environment variables
+    """
+    env_vars = {}
+    while tokens and '=' in tokens[0] and not tokens[0].startswith('--'):
+        key, value = tokens.pop(0).split('=', 1)
+        env_vars[key] = value
+    for k, v in env_vars.items():
+        os.environ[k] = v
+    return env_vars, tokens
+def parse_flags_and_args(tokens: list) -> dict:
+    """
+    parse flags and args
+    include three types --flag=value and --flag value annd --flag
+    """
+    result = {}
+    i = 0
+    while i < len(tokens):
+        token = tokens[i]
+        if token.startswith('--') or token.startswith('-'):
+            if '=' in token:
+                key, value = token[2:].split('=', 1)
+                result[key] = value.strip("'\"")
+            elif i + 1 < len(tokens) and not tokens[i + 1].startswith('--'):
+                if token.startswith('--'):
+                    result[token[2:]] = tokens[i + 1].strip("'\"")
+                else:
+                    result[token[1:]] = tokens[i + 1].strip("'\"")
+                i += 1
+            else:
+                if token.startswith('--'):
+                    result[token[2:]] = True
+                else:
+                    result[token[1:]] = True
+        else:
+            logger.warning(f"Ignoring unknown token: {token}")
+        i += 1
+    return result
+def parse_server_vllm_command(cmd_str: str) -> tuple[dict, dict]:
+    """ parse vllm command"""
+    tokens = shlex.split(cmd_str)
+    result = {}
+    # 提取环境变量
+    env_vars, tokens = extract_env_and_args(tokens)
+    if env_vars:
+        result["env_vars"] = env_vars
+    # vllm serve + model
+    if tokens[:2] != ['vllm', 'serve']:
+        raise ValueError("Invalid vllm serve command format. Example: vllm serve <model path>")
+    if len(tokens) < 3:
+        raise ValueError("Missing model path in vllm serve command. Example: vllm serve <model path>")
+    model_path = tokens[2]
+    result["model-path"] = model_path
+    flags = parse_flags_and_args(tokens[3:])
+    result.update(flags)
+    return (env_vars, result)
+def parse_server_sglang_command(cmd_str: str) -> tuple[dict, dict]:
+    """ parse sglang command"""
+    tokens = shlex.split(cmd_str)
+    result = {}
+    # 提取环境变量
+    env_vars, tokens = extract_env_and_args(tokens)
+    if env_vars:
+        result["env_vars"] = env_vars
+    # python3 -m sglang.launch_server
+    if tokens[:3] != ['python3', '-m', 'sglang.launch_server'] and tokens[:3] != ['python', '-m', 'sglang.launch_server']:
+        raise ValueError("Invalid sglang command format. Example: python3 -m sglang.launch_server")
+    flags = parse_flags_and_args(tokens[3:])
+    result.update(flags)
+    return (env_vars, result)
+def extract_gpu_num_from_serve_command(serve_args_dict: dict) -> int:
+    """ extract gpu num from serve command """
+    cmd_tp_size = 1
+    cmd_dp_size = 1
+    if "tensor-parallel-size" in serve_args_dict:
+        cmd_tp_size = int(serve_args_dict["tensor-parallel-size"])
+    elif "tp" in serve_args_dict:
+        cmd_tp_size = int(serve_args_dict["tp"])
+    elif "tp-size" in serve_args_dict:
+        cmd_tp_size = int(serve_args_dict["tp-size"])
+    if "data-parallel-size" in serve_args_dict:
+        cmd_dp_size = int(serve_args_dict["data-parallel-size"])
+    elif "dp" in serve_args_dict:
+        cmd_dp_size = int(serve_args_dict["dp"])
+    elif "dp-size" in serve_args_dict:
+        cmd_dp_size = int(serve_args_dict["dp-size"])
+    if "pipeline_parallel_size" in serve_args_dict or "pp" in serve_args_dict:
+        raise ValueError("Pipeline parallel size is not supported.")
+    cmd_gpu_num = cmd_tp_size * cmd_dp_size
+    if cmd_gpu_num > 8:
+        raise ValueError("Only support up to 8 GPUs for single task replica.")
+    print(f'cmd_tp_size: {cmd_tp_size}, cmd_dp_size: {cmd_dp_size}, cmd_gpu_num: {cmd_gpu_num}')
+    return cmd_gpu_num

gmicloud 0.1.6__py3-none-any.whl → 0.1.9__py3-none-any.whl

gmicloud 0.1.6py3-none-any.whl → 0.1.9py3-none-any.whl