gmicloud 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gmicloud/__init__.py +2 -2
- gmicloud/_internal/_client/_artifact_client.py +40 -7
- gmicloud/_internal/_client/_file_upload_client.py +10 -7
- gmicloud/_internal/_config.py +9 -3
- gmicloud/_internal/_enums.py +5 -0
- gmicloud/_internal/_manager/_artifact_manager.py +198 -17
- gmicloud/_internal/_manager/_task_manager.py +76 -2
- gmicloud/_internal/_manager/serve_command_utils.py +121 -0
- gmicloud/_internal/_models.py +154 -34
- gmicloud/client.py +179 -75
- gmicloud/tests/test_artifacts.py +6 -22
- gmicloud-0.1.7.dist-info/METADATA +237 -0
- gmicloud-0.1.7.dist-info/RECORD +28 -0
- {gmicloud-0.1.5.dist-info → gmicloud-0.1.7.dist-info}/WHEEL +1 -1
- gmicloud-0.1.5.dist-info/METADATA +0 -246
- gmicloud-0.1.5.dist-info/RECORD +0 -27
- {gmicloud-0.1.5.dist-info → gmicloud-0.1.7.dist-info}/top_level.txt +0 -0
gmicloud/client.py
CHANGED
@@ -8,7 +8,7 @@ from ._internal._client._iam_client import IAMClient
|
|
8
8
|
from ._internal._manager._artifact_manager import ArtifactManager
|
9
9
|
from ._internal._manager._task_manager import TaskManager
|
10
10
|
from ._internal._manager._iam_manager import IAMManager
|
11
|
-
from ._internal._enums import BuildStatus
|
11
|
+
from ._internal._enums import BuildStatus, TaskStatus, TaskEndpointStatus
|
12
12
|
from ._internal._models import Task, TaskConfig, RayTaskConfig, TaskScheduling, ReplicaResource
|
13
13
|
|
14
14
|
logger = logging.getLogger(__name__)
|
@@ -38,80 +38,6 @@ class Client:
|
|
38
38
|
self._task_manager = None
|
39
39
|
self._iam_manager = None
|
40
40
|
|
41
|
-
def create_task_from_artifact_template(self, artifact_template_id: str, task_scheduling: TaskScheduling) -> Task:
|
42
|
-
"""
|
43
|
-
Create a task from a template.
|
44
|
-
|
45
|
-
:param artifact_template_id: The ID of the artifact template to use.
|
46
|
-
:param task_scheduling: The scheduling configuration for the task.
|
47
|
-
:return: A `Task` object containing the details of the created task.
|
48
|
-
:rtype: Task
|
49
|
-
"""
|
50
|
-
if not artifact_template_id or not artifact_template_id.strip():
|
51
|
-
raise ValueError("Artifact Template ID must be provided.")
|
52
|
-
if not task_scheduling:
|
53
|
-
raise ValueError("Task Scheduling must be provided.")
|
54
|
-
|
55
|
-
artifact_manager = self.artifact_manager
|
56
|
-
task_manager = self.task_manager
|
57
|
-
|
58
|
-
templates = artifact_manager.get_public_templates()
|
59
|
-
template = None
|
60
|
-
for v in templates:
|
61
|
-
if v.template_id == artifact_template_id:
|
62
|
-
template = v
|
63
|
-
if not template:
|
64
|
-
raise ValueError(f"Template with ID {artifact_template_id} not found.")
|
65
|
-
if not template.template_data:
|
66
|
-
raise ValueError("Template does not contain template data.")
|
67
|
-
if not template.template_data.ray:
|
68
|
-
raise ValueError("Template does not contain Ray configuration.")
|
69
|
-
if not template.template_data.resources:
|
70
|
-
raise ValueError("Template does not contain resource configuration.")
|
71
|
-
|
72
|
-
artifact_id = artifact_manager.create_artifact_from_template(artifact_template_id)
|
73
|
-
|
74
|
-
logger.info(f"Successfully created artifact from template, artifact_id: {artifact_id}")
|
75
|
-
# Wait for the artifact to be ready
|
76
|
-
while True:
|
77
|
-
try:
|
78
|
-
artifact = artifact_manager.get_artifact(artifact_id)
|
79
|
-
logger.info(f"Successfully got artifact info, artifact status: {artifact.build_status}")
|
80
|
-
# Wait until the artifact is ready
|
81
|
-
if artifact.build_status == BuildStatus.SUCCESS:
|
82
|
-
break
|
83
|
-
except Exception as e:
|
84
|
-
raise e
|
85
|
-
# Wait for 2 seconds
|
86
|
-
time.sleep(2)
|
87
|
-
try:
|
88
|
-
# Create a task
|
89
|
-
task = task_manager.create_task(Task(
|
90
|
-
config=TaskConfig(
|
91
|
-
ray_task_config=RayTaskConfig(
|
92
|
-
ray_version=template.ray.version,
|
93
|
-
file_path=template.ray.file_path,
|
94
|
-
artifact_id=artifact_id,
|
95
|
-
deployment_name=template.ray.deployment_name,
|
96
|
-
replica_resource=ReplicaResource(
|
97
|
-
cpu=template.resources.cpu,
|
98
|
-
ram_gb=template.resources.memory,
|
99
|
-
gpu=template.resources.gpu,
|
100
|
-
),
|
101
|
-
),
|
102
|
-
task_scheduling=task_scheduling,
|
103
|
-
),
|
104
|
-
))
|
105
|
-
|
106
|
-
logger.info(f"Successfully created task, task_id: {task.task_id}")
|
107
|
-
# Start the task
|
108
|
-
task_manager.start_task(task.task_id)
|
109
|
-
logger.info(f"Successfully started task, task_id: {task.task_id}")
|
110
|
-
except Exception as e:
|
111
|
-
raise e
|
112
|
-
|
113
|
-
return task
|
114
|
-
|
115
41
|
@property
|
116
42
|
def artifact_manager(self):
|
117
43
|
"""
|
@@ -141,3 +67,181 @@ class Client:
|
|
141
67
|
if self._iam_manager is None:
|
142
68
|
self._iam_manager = IAMManager(self.iam_client)
|
143
69
|
return self._iam_manager
|
70
|
+
|
71
|
+
# def list_templates(self) -> list[str]:
|
72
|
+
# """
|
73
|
+
# List all public templates.
|
74
|
+
|
75
|
+
# :return: A list of template names.
|
76
|
+
# :rtype: list[str]
|
77
|
+
# """
|
78
|
+
# template_names = []
|
79
|
+
# try:
|
80
|
+
# templates = self.artifact_manager.get_public_templates()
|
81
|
+
# for template in templates:
|
82
|
+
# if template.template_data and template.template_data.name:
|
83
|
+
# template_names.append(template.template_data.name)
|
84
|
+
# return template_names
|
85
|
+
# except Exception as e:
|
86
|
+
# logger.error(f"Failed to get artifact templates, Error: {e}")
|
87
|
+
# return []
|
88
|
+
|
89
|
+
# def wait_for_artifact_ready(self, artifact_id: str, timeout_s: int = 900) -> None:
|
90
|
+
# """
|
91
|
+
# Wait for an artifact to be ready.
|
92
|
+
|
93
|
+
# :param artifact_id: The ID of the artifact to wait for.
|
94
|
+
# :param timeout_s: The timeout in seconds.
|
95
|
+
# :return: None
|
96
|
+
# """
|
97
|
+
# artifact_manager = self.artifact_manager
|
98
|
+
# start_time = time.time()
|
99
|
+
# while True:
|
100
|
+
# try:
|
101
|
+
# artifact = artifact_manager.get_artifact(artifact_id)
|
102
|
+
# if artifact.build_status == BuildStatus.SUCCESS:
|
103
|
+
# return
|
104
|
+
# elif artifact.build_status in [BuildStatus.FAILED, BuildStatus.TIMEOUT, BuildStatus.CANCELLED]:
|
105
|
+
# raise Exception(f"Artifact build failed, status: {artifact.build_status}")
|
106
|
+
# except Exception as e:
|
107
|
+
# logger.error(f"Failed to get artifact, Error: {e}")
|
108
|
+
# if time.time() - start_time > timeout_s:
|
109
|
+
# raise Exception(f"Artifact build takes more than {timeout_s // 60} minutes. Testing aborted.")
|
110
|
+
# time.sleep(10)
|
111
|
+
|
112
|
+
# def create_artifact_from_template(self, artifact_template_name: str) -> tuple[str, ReplicaResource]:
|
113
|
+
# """
|
114
|
+
# Create an artifact from a template.
|
115
|
+
|
116
|
+
# :param artifact_template_name: The name of the template to use.
|
117
|
+
# :return: A tuple containing the artifact ID and the recommended replica resources.
|
118
|
+
# :rtype: tuple[str, ReplicaResource]
|
119
|
+
# """
|
120
|
+
# artifact_manager = self.artifact_manager
|
121
|
+
|
122
|
+
# recommended_replica_resources = None
|
123
|
+
# template_id = None
|
124
|
+
# try:
|
125
|
+
# templates = artifact_manager.get_public_templates()
|
126
|
+
# except Exception as e:
|
127
|
+
# logger.error(f"Failed to get artifact templates, Error: {e}")
|
128
|
+
# for template in templates:
|
129
|
+
# if template.template_data and template.template_data.name == artifact_template_name:
|
130
|
+
# resources_template = template.template_data.resources
|
131
|
+
# recommended_replica_resources = ReplicaResource(
|
132
|
+
# cpu=resources_template.cpu,
|
133
|
+
# ram_gb=resources_template.memory,
|
134
|
+
# gpu=resources_template.gpu,
|
135
|
+
# gpu_name=resources_template.gpu_name,
|
136
|
+
# )
|
137
|
+
# template_id = template.template_id
|
138
|
+
# break
|
139
|
+
# if not template_id:
|
140
|
+
# raise ValueError(f"Template with name {artifact_template_name} not found.")
|
141
|
+
# try:
|
142
|
+
# artifact_id = artifact_manager.create_artifact_from_template(template_id)
|
143
|
+
# self.wait_for_artifact_ready(artifact_id)
|
144
|
+
# return artifact_id, recommended_replica_resources
|
145
|
+
# except Exception as e:
|
146
|
+
# logger.error(f"Failed to create artifact from template, Error: {e}")
|
147
|
+
# raise e
|
148
|
+
|
149
|
+
# def create_task(self, artifact_id: str, replica_resources: ReplicaResource, task_scheduling: TaskScheduling) -> str:
|
150
|
+
# """
|
151
|
+
# Create a task.
|
152
|
+
|
153
|
+
# :param artifact_id: The ID of the artifact to use.
|
154
|
+
# :param replica_resources: The recommended replica resources.
|
155
|
+
# :param task_scheduling: The scheduling configuration for the task.
|
156
|
+
# :return: The ID of the created task.
|
157
|
+
# :rtype: str
|
158
|
+
# """
|
159
|
+
# task_manager = self.task_manager
|
160
|
+
# task = None
|
161
|
+
# try:
|
162
|
+
# task = task_manager.create_task(Task(
|
163
|
+
# config=TaskConfig(
|
164
|
+
# ray_task_config=RayTaskConfig(
|
165
|
+
# artifact_id=artifact_id,
|
166
|
+
# file_path="serve",
|
167
|
+
# deployment_name="app",
|
168
|
+
# replica_resource=replica_resources,
|
169
|
+
# ),
|
170
|
+
# task_scheduling = task_scheduling,
|
171
|
+
# ),
|
172
|
+
# ))
|
173
|
+
# except Exception as e:
|
174
|
+
# logger.error(f"Failed to create task, Error: {e}")
|
175
|
+
# raise e
|
176
|
+
# return task.task_id
|
177
|
+
|
178
|
+
# def start_task_and_wait(self, task_id: str, timeout_s: int = 900) -> Task:
|
179
|
+
# """
|
180
|
+
# Start a task and wait for it to be ready.
|
181
|
+
|
182
|
+
# :param task_id: The ID of the task to start.
|
183
|
+
# :param timeout_s: The timeout in seconds.
|
184
|
+
# :return: The task object.
|
185
|
+
# :rtype: Task
|
186
|
+
# """
|
187
|
+
# task_manager = self.task_manager
|
188
|
+
# # trigger start task
|
189
|
+
# try:
|
190
|
+
# task_manager.start_task(task_id)
|
191
|
+
# logger.info(f"Started task ID: {task_id}")
|
192
|
+
# except Exception as e:
|
193
|
+
# logger.error(f"Failed to start task, Error: {e}")
|
194
|
+
# raise e
|
195
|
+
|
196
|
+
# start_time = time.time()
|
197
|
+
# while True:
|
198
|
+
# try:
|
199
|
+
# task = task_manager.get_task(task_id)
|
200
|
+
# if task.task_status == TaskStatus.RUNNING:
|
201
|
+
# return task
|
202
|
+
# elif task.task_status in [TaskStatus.NEEDSTOP, TaskStatus.ARCHIVED]:
|
203
|
+
# raise Exception(f"Unexpected task status after starting: {task.task_status}")
|
204
|
+
# # Also check endpoint status.
|
205
|
+
# elif task.task_status == TaskStatus.RUNNING:
|
206
|
+
# if task.endpoint_info and task.endpoint_info.endpoint_status == TaskEndpointStatus.RUNNING:
|
207
|
+
# return task
|
208
|
+
# elif task.endpoint_info and task.endpoint_info.endpoint_status in [TaskEndpointStatus.UNKNOWN, TaskEndpointStatus.ARCHIVED]:
|
209
|
+
# raise Exception(f"Unexpected endpoint status after starting: {task.endpoint_info.endpoint_status}")
|
210
|
+
# else:
|
211
|
+
# logger.info(f"Pending endpoint starting. endpoint status: {task.endpoint_info.endpoint_status}")
|
212
|
+
# else:
|
213
|
+
# logger.info(f"Pending task starting. Task status: {task.task_status}")
|
214
|
+
|
215
|
+
# except Exception as e:
|
216
|
+
# logger.error(f"Failed to get task, Error: {e}")
|
217
|
+
# if time.time() - start_time > timeout_s:
|
218
|
+
# raise Exception(f"Task creation takes more than {timeout_s // 60} minutes. Testing aborted.")
|
219
|
+
# time.sleep(10)
|
220
|
+
|
221
|
+
# def stop_task(self, task_id: str, timeout_s: int = 900):
|
222
|
+
# task_manager = self.task_manager
|
223
|
+
# try:
|
224
|
+
# self.task_manager.stop_task(task_id)
|
225
|
+
# logger.info(f"Stopping task ID: {task_id}")
|
226
|
+
# except Exception as e:
|
227
|
+
# logger.error(f"Failed to stop task, Error: {e}")
|
228
|
+
# task_manager = self.task_manager
|
229
|
+
# start_time = time.time()
|
230
|
+
# while True:
|
231
|
+
# try:
|
232
|
+
# task = task_manager.get_task(task_id)
|
233
|
+
# if task.task_status == TaskStatus.IDLE:
|
234
|
+
# break
|
235
|
+
# except Exception as e:
|
236
|
+
# logger.error(f"Failed to get task, Error: {e}")
|
237
|
+
# if time.time() - start_time > timeout_s:
|
238
|
+
# raise Exception(f"Task stopping takes more than {timeout_s // 60} minutes. Testing aborted.")
|
239
|
+
# time.sleep(10)
|
240
|
+
|
241
|
+
# def archive_task(self, task_id: str, timeout_s: int = 900):
|
242
|
+
# task_manager = self.task_manager
|
243
|
+
# try:
|
244
|
+
# self.task_manager.archive_task(task_id)
|
245
|
+
# logger.info(f"Archived task ID: {task_id}")
|
246
|
+
# except Exception as e:
|
247
|
+
# logger.error(f"Failed to archive task, Error: {e}")
|
gmicloud/tests/test_artifacts.py
CHANGED
@@ -131,34 +131,18 @@ class TestArtifactManager(unittest.TestCase):
|
|
131
131
|
upload_link = "http://upload-link"
|
132
132
|
bigfile_upload_link = "http://bigfile-upload-link"
|
133
133
|
artifact_file_path = "./testdata/test.zip"
|
134
|
-
|
134
|
+
model_directory= "./testdata"
|
135
135
|
|
136
136
|
mock_create_artifact.return_value = CreateArtifactResponse(artifact_id="1", upload_link=upload_link)
|
137
|
-
mock_get_bigfile_upload_url.return_value =
|
137
|
+
mock_get_bigfile_upload_url.return_value = ResumableUploadLinkResponse(artifact_id="1",
|
138
138
|
upload_link=bigfile_upload_link)
|
139
139
|
|
140
140
|
artifact_id = self.artifact_manager.create_artifact_with_model_files(artifact_name="artifact_name",
|
141
141
|
artifact_file_path=artifact_file_path,
|
142
|
-
|
142
|
+
model_directory=model_directory)
|
143
143
|
self.assertEqual(artifact_id, "1")
|
144
144
|
mock_upload_small_file.assert_called_once_with(upload_link, artifact_file_path, "application/zip")
|
145
|
-
mock_upload_large_file.
|
146
|
-
|
147
|
-
@patch('gmicloud._internal._client._artifact_client.ArtifactClient.create_artifact')
|
148
|
-
@patch('gmicloud._internal._client._file_upload_client.FileUploadClient.upload_small_file')
|
149
|
-
def test_create_artifact_with_model_files_raises_file_not_found_error_for_model_file(self, mock_create_artifact,
|
150
|
-
mock_upload_small_file):
|
151
|
-
upload_link = "http://upload-link"
|
152
|
-
artifact_file_path = "./testdata/test.zip"
|
153
|
-
model_file_path = "./testdata/nonexistent.zip"
|
154
|
-
|
155
|
-
mock_create_artifact.return_value = CreateArtifactResponse(artifact_id="1", upload_link=upload_link)
|
156
|
-
|
157
|
-
with self.assertRaises(FileNotFoundError) as context:
|
158
|
-
self.artifact_manager.create_artifact_with_model_files(artifact_name="artifact_name",
|
159
|
-
artifact_file_path=artifact_file_path,
|
160
|
-
model_file_paths=[model_file_path])
|
161
|
-
self.assertTrue(f"File not found: {model_file_path}" in str(context.exception))
|
145
|
+
self.assertEqual(mock_upload_large_file.call_count, 6) # 6 files in testdata directory
|
162
146
|
|
163
147
|
@patch('gmicloud._internal._client._artifact_client.ArtifactClient.rebuild_artifact')
|
164
148
|
def test_rebuild_artifact_rebuilds_successfully(self, mock_rebuild_artifact):
|
@@ -203,7 +187,7 @@ class TestArtifactManager(unittest.TestCase):
|
|
203
187
|
upload_link = "http://upload-link"
|
204
188
|
model_file_path = "./testdata/model.zip"
|
205
189
|
|
206
|
-
mock_get_bigfile_upload_url.return_value =
|
190
|
+
mock_get_bigfile_upload_url.return_value = ResumableUploadLinkResponse(artifact_id="1", upload_link=upload_link)
|
207
191
|
upload_link = self.artifact_manager.get_bigfile_upload_url("1", model_file_path)
|
208
192
|
self.assertEqual(upload_link, upload_link)
|
209
193
|
|
@@ -253,7 +237,7 @@ class TestArtifactManager(unittest.TestCase):
|
|
253
237
|
|
254
238
|
@patch('gmicloud._internal._client._artifact_client.ArtifactClient.get_public_templates')
|
255
239
|
def test_get_artifact_templates_returns_templates(self, mock_get_public_templates):
|
256
|
-
mock_get_public_templates.return_value = [
|
240
|
+
mock_get_public_templates.return_value = [Template(template_id="1", template_data=TemplateData(name="Template1"))]
|
257
241
|
templates = self.artifact_manager.get_public_templates()
|
258
242
|
self.assertEqual(len(templates), 1)
|
259
243
|
self.assertEqual(templates[0].template_id, "1")
|
@@ -0,0 +1,237 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: gmicloud
|
3
|
+
Version: 0.1.7
|
4
|
+
Summary: GMI Cloud Python SDK
|
5
|
+
Author-email: GMI <gmi@gmitec.net>
|
6
|
+
License: MIT
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
9
|
+
Classifier: Operating System :: OS Independent
|
10
|
+
Requires-Python: >=3.6
|
11
|
+
Description-Content-Type: text/markdown
|
12
|
+
|
13
|
+
# GMICloud SDK
|
14
|
+
|
15
|
+
## Overview
|
16
|
+
Before you start: Our service and GPU resource is currenly invite-only so please contact our team (getstarted@gmicloud.ai) to get invited if you don't have one yet.
|
17
|
+
|
18
|
+
The GMI Inference Engine SDK provides a Python interface for deploying and managing machine learning models in production environments. It allows users to create model artifacts, schedule tasks for serving models, and call inference APIs easily.
|
19
|
+
|
20
|
+
This SDK streamlines the process of utilizing GMI Cloud capabilities such as deploying models with Kubernetes-based Ray services, managing resources automatically, and accessing model inference endpoints. With minimal setup, developers can focus on building ML solutions instead of infrastructure.
|
21
|
+
|
22
|
+
## Features
|
23
|
+
|
24
|
+
- Artifact Management: Easily create, update, and manage ML model artifacts.
|
25
|
+
- Task Management: Quickly create, schedule, and manage deployment tasks for model inference.
|
26
|
+
- Usage Data Retrieval : Fetch and analyze usage data to optimize resource allocation.
|
27
|
+
|
28
|
+
## Installation
|
29
|
+
|
30
|
+
To install the SDK, use pip:
|
31
|
+
|
32
|
+
```bash
|
33
|
+
pip install gmicloud
|
34
|
+
```
|
35
|
+
|
36
|
+
## Setup
|
37
|
+
|
38
|
+
You must configure authentication credentials for accessing the GMI Cloud API.
|
39
|
+
To create account and get log in info please visit **GMI inference platform: https://inference-engine.gmicloud.ai/**.
|
40
|
+
|
41
|
+
There are two ways to configure the SDK:
|
42
|
+
|
43
|
+
### Option 1: Using Environment Variables
|
44
|
+
|
45
|
+
Set the following environment variables:
|
46
|
+
|
47
|
+
```shell
|
48
|
+
export GMI_CLOUD_CLIENT_ID=<YOUR_CLIENT_ID> # Pick what every ID you need.
|
49
|
+
export GMI_CLOUD_EMAIL=<YOUR_EMAIL>
|
50
|
+
export GMI_CLOUD_PASSWORD=<YOUR_PASSWORD>
|
51
|
+
```
|
52
|
+
|
53
|
+
### Option 2: Passing Credentials as Parameters
|
54
|
+
|
55
|
+
Pass `client_id`, `email`, and `password` directly to the Client object when initializing it in your script:
|
56
|
+
|
57
|
+
```python
|
58
|
+
from gmicloud import Client
|
59
|
+
|
60
|
+
client = Client(client_id="<YOUR_CLIENT_ID>", email="<YOUR_EMAIL>", password="<YOUR_PASSWORD>")
|
61
|
+
```
|
62
|
+
|
63
|
+
## Quick Start
|
64
|
+
|
65
|
+
### 1. How to run the code in the example folder
|
66
|
+
```bash
|
67
|
+
cd path/to/gmicloud-sdk
|
68
|
+
# Create a virtual environment
|
69
|
+
python -m venv venv
|
70
|
+
source venv/bin/activate
|
71
|
+
|
72
|
+
pip install -r requirements.txt
|
73
|
+
python -m examples.create_task_from_artifact_template.py
|
74
|
+
```
|
75
|
+
|
76
|
+
### 2. Example of create an inference task from an artifact template
|
77
|
+
|
78
|
+
This is the simplest example to deploy an inference task using an existing artifact template:
|
79
|
+
|
80
|
+
Up-to-date code in /examples/create_task_from_artifact_template.py
|
81
|
+
|
82
|
+
```python
|
83
|
+
from datetime import datetime
|
84
|
+
import os
|
85
|
+
import sys
|
86
|
+
|
87
|
+
from gmicloud import *
|
88
|
+
from examples.completion import call_chat_completion
|
89
|
+
|
90
|
+
cli = Client()
|
91
|
+
|
92
|
+
# List templates offered by GMI cloud
|
93
|
+
templates = cli.list_templates()
|
94
|
+
print(f"Found {len(templates)} templates: {templates}")
|
95
|
+
|
96
|
+
# Pick a template from the list
|
97
|
+
pick_template = "Llama-3.1-8B"
|
98
|
+
|
99
|
+
# Create Artifact from template
|
100
|
+
artifact_id, recommended_replica_resources = cli.create_artifact_from_template(templates[0])
|
101
|
+
print(f"Created artifact {artifact_id} with recommended replica resources: {recommended_replica_resources}")
|
102
|
+
|
103
|
+
# Create Task based on Artifact
|
104
|
+
task_id = cli.create_task(artifact_id, recommended_replica_resources, TaskScheduling(
|
105
|
+
scheduling_oneoff=OneOffScheduling(
|
106
|
+
trigger_timestamp=int(datetime.now().timestamp()),
|
107
|
+
min_replicas=1,
|
108
|
+
max_replicas=1,
|
109
|
+
)
|
110
|
+
))
|
111
|
+
task = cli.task_manager.get_task(task_id)
|
112
|
+
print(f"Task created: {task.config.task_name}. You can check details at https://inference-engine.gmicloud.ai/user-console/task")
|
113
|
+
|
114
|
+
# Start Task and wait for it to be ready
|
115
|
+
cli.start_task_and_wait(task.task_id)
|
116
|
+
|
117
|
+
# Testing with calling chat completion
|
118
|
+
print(call_chat_completion(cli, task.task_id))
|
119
|
+
|
120
|
+
```
|
121
|
+
|
122
|
+
### 3. Example of creating an inference task based on custom model with local vllm / SGLang serve command
|
123
|
+
* Full example is available at [examples/inference_task_with_custom_model.py](https://github.com/GMISWE/python-sdk/blob/main/examples/inference_task_with_custom_model.py)
|
124
|
+
|
125
|
+
1. Prepare custom model checkpoint (using a model downloaded from HF as an example)
|
126
|
+
|
127
|
+
```python
|
128
|
+
# Download model from huggingface
|
129
|
+
from huggingface_hub import snapshot_download
|
130
|
+
|
131
|
+
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
|
132
|
+
model_checkpoint_save_dir = "files/model_garden"
|
133
|
+
snapshot_download(repo_id=model_name, local_dir=model_checkpoint_save_dir)
|
134
|
+
```
|
135
|
+
|
136
|
+
2. Find a template of specific SGLang version
|
137
|
+
|
138
|
+
```python
|
139
|
+
# export GMI_CLOUD_CLIENT_ID=<YOUR_CLIENT_ID>
|
140
|
+
# export GMI_CLOUD_EMAIL=<YOUR_EMAIL>
|
141
|
+
# export GMI_CLOUD_PASSWORD=<YOUR_PASSWORD>
|
142
|
+
cli = Client()
|
143
|
+
|
144
|
+
# List templates offered by GMI cloud
|
145
|
+
templates = cli.artifact_manager.list_public_template_names()
|
146
|
+
print(f"Found {len(templates)} templates: {templates}")
|
147
|
+
```
|
148
|
+
|
149
|
+
3. Pick a template (e.g. SGLang 0.4.5) and prepare a local serve command
|
150
|
+
|
151
|
+
```python
|
152
|
+
# Example for vllm server
|
153
|
+
picked_template_name = "gmi_vllm_0.8.4"
|
154
|
+
serve_command = "vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B --trust-remote-code --gpu-memory-utilization 0.8"
|
155
|
+
|
156
|
+
# Example for sglang server
|
157
|
+
picked_template_name = "gmi_sglang_0.4.5.post1"
|
158
|
+
serve_command = "python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B --trust-remote-code --mem-fraction-static 0.8 --tp 2"
|
159
|
+
```
|
160
|
+
|
161
|
+
4. Create an artifact and upload custom model. The artifact can be reused to create inference tasks later. Artifact also suggests recommended resources for each inference server replica
|
162
|
+
|
163
|
+
```python
|
164
|
+
artifact_id, recommended_replica_resources = cli.artifact_manager.create_artifact_from_template_name(
|
165
|
+
artifact_template_name=picked_template_name,
|
166
|
+
env_parameters={
|
167
|
+
"SERVER_COMMAND": serve_command,
|
168
|
+
"GPU_TYPE": "H100",
|
169
|
+
}
|
170
|
+
)
|
171
|
+
print(f"Created artifact {artifact_id} with recommended resources: {recommended_replica_resources}")
|
172
|
+
|
173
|
+
# Upload model files to artifact
|
174
|
+
cli.artifact_manager.upload_model_files_to_artifact(artifact_id, model_checkpoint_save_dir)
|
175
|
+
```
|
176
|
+
|
177
|
+
5. Create Inference task (defining min/max inference replica), start and wait
|
178
|
+
|
179
|
+
```python
|
180
|
+
new_task = Task(
|
181
|
+
config=TaskConfig(
|
182
|
+
ray_task_config=RayTaskConfig(
|
183
|
+
artifact_id=artifact_id,
|
184
|
+
file_path="serve",
|
185
|
+
deployment_name="app",
|
186
|
+
replica_resource=recommended_replica_resources,
|
187
|
+
),
|
188
|
+
task_scheduling = TaskScheduling(
|
189
|
+
scheduling_oneoff=OneOffScheduling(
|
190
|
+
trigger_timestamp=int(datetime.now().timestamp()),
|
191
|
+
min_replicas=1,
|
192
|
+
max_replicas=4,
|
193
|
+
)
|
194
|
+
),
|
195
|
+
),
|
196
|
+
)
|
197
|
+
task = cli.task_manager.create_task(new_task)
|
198
|
+
task_id = task.task_id
|
199
|
+
task = cli.task_manager.get_task(task_id)
|
200
|
+
print(f"Task created: {task.config.task_name}. You can check details at https://inference-engine.gmicloud.ai/user-console/task")
|
201
|
+
|
202
|
+
# Start Task and wait for it to be ready
|
203
|
+
cli.task_manager.start_task_and_wait(task_id)
|
204
|
+
```
|
205
|
+
|
206
|
+
6. Test with sample chat completion request
|
207
|
+
|
208
|
+
```python
|
209
|
+
print(call_chat_completion(cli, task_id))
|
210
|
+
```
|
211
|
+
|
212
|
+
|
213
|
+
## API Reference
|
214
|
+
|
215
|
+
### Client
|
216
|
+
|
217
|
+
Represents the entry point to interact with GMI Cloud APIs.
|
218
|
+
Client(
|
219
|
+
client_id: Optional[str] = "",
|
220
|
+
email: Optional[str] = "",
|
221
|
+
password: Optional[str] = ""
|
222
|
+
)
|
223
|
+
|
224
|
+
### Artifact Management
|
225
|
+
|
226
|
+
* get_artifact_templates(): Fetch a list of available artifact templates.
|
227
|
+
* create_artifact_from_template(template_id: str): Create a model artifact from a given template.
|
228
|
+
* get_artifact(artifact_id: str): Get details of a specific artifact.
|
229
|
+
|
230
|
+
### Task Management
|
231
|
+
|
232
|
+
* create_task_from_artifact_template(template_id: str, scheduling: TaskScheduling): Create and schedule a task using an
|
233
|
+
artifact template.
|
234
|
+
* start_task(task_id: str): Start a task.
|
235
|
+
* get_task(task_id: str): Retrieve the status and details of a specific task.
|
236
|
+
|
237
|
+
## Notes & Troubleshooting
|
@@ -0,0 +1,28 @@
|
|
1
|
+
gmicloud/__init__.py,sha256=xSzrAxiby5Te20yhy1ZylGHmQKVV_w1QjFe6D99VZxw,968
|
2
|
+
gmicloud/client.py,sha256=G0tD0xQnpqDKS-3l-AAU-K3FAHOsqsTzsAq2NVxiamY,10539
|
3
|
+
gmicloud/_internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
gmicloud/_internal/_config.py,sha256=BenHiCnedpHA5phz49UWBXa1mg_q9W8zYs7A8esqGcU,494
|
5
|
+
gmicloud/_internal/_constants.py,sha256=Y085dwFlqdFkCf39iBfxz39QiiB7lX59ayNJjB86_m4,378
|
6
|
+
gmicloud/_internal/_enums.py,sha256=5d6Z8TFJYCmhNI1TDbPpBbG1tNe96StIEH4tEw20RZk,789
|
7
|
+
gmicloud/_internal/_exceptions.py,sha256=hScBq7n2fOit4_umlkabZJchY8zVbWSRfWM2Y0rLCbw,306
|
8
|
+
gmicloud/_internal/_models.py,sha256=2l65aZdQxyXlY0Dj23P6NFf59_zopgf9OoUMLAz5T2U,22685
|
9
|
+
gmicloud/_internal/_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
+
gmicloud/_internal/_client/_artifact_client.py,sha256=0lyHAdUybN8A1mEwZ7p1yK2yQEyoDG2vTB4Qe5RI2ik,9974
|
11
|
+
gmicloud/_internal/_client/_decorator.py,sha256=sy4gxzsUB6ORXHw5pqmMf7TTlK41Nmu1fhIhK2AIsbY,670
|
12
|
+
gmicloud/_internal/_client/_file_upload_client.py,sha256=r29iXG_0DOi-uTLu9plpfZMWGqOck_AdDHJZprcf8uI,4918
|
13
|
+
gmicloud/_internal/_client/_http_client.py,sha256=j--3emTjJ_l9CTdnkTbcpf7gYcUEl341pv2O5cU67l0,5741
|
14
|
+
gmicloud/_internal/_client/_iam_client.py,sha256=pgOXIqp9aJvcIUCEVkYPEyMUyxBftecojHAbs8Gbl94,7013
|
15
|
+
gmicloud/_internal/_client/_task_client.py,sha256=69OqZC_kwSDkTSVVyi51Tn_OyUV6R0nin4z4gLfZ-Lg,6141
|
16
|
+
gmicloud/_internal/_manager/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
|
+
gmicloud/_internal/_manager/_artifact_manager.py,sha256=yK4veVwCY1cipy3rdnGvhnKNvkBx3SYYpHKqzjtXJn0,20731
|
18
|
+
gmicloud/_internal/_manager/_iam_manager.py,sha256=nAqPCaUfSXTnx2MEQa8e0YUOBFYWDRiETgK1PImdf4o,1167
|
19
|
+
gmicloud/_internal/_manager/_task_manager.py,sha256=zBW_TkYhbSvAc_p7Q3z6Vgl2Cayv8zIkawTT6OcB4x4,11291
|
20
|
+
gmicloud/_internal/_manager/serve_command_utils.py,sha256=xjB6B9CNAmohou41H755iCCgkLNrjvdnu9NcJApTm1k,4373
|
21
|
+
gmicloud/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
+
gmicloud/tests/test_artifacts.py,sha256=w0T0EpATIGLrSUPaBfTZ2ZC_X2XeaTlFEi3DZ4evIcE,15825
|
23
|
+
gmicloud/tests/test_tasks.py,sha256=yL-aFf80ShgTyxEONTWh-xbWDf5XnUNtIeA5hYvhKM0,10963
|
24
|
+
gmicloud/utils/uninstall_packages.py,sha256=zzuuaJPf39oTXWZ_7tUAGseoxocuCbbkoglJSD5yDrE,1127
|
25
|
+
gmicloud-0.1.7.dist-info/METADATA,sha256=LFLXvJeQ9ocyJQ8hFbTaNZAWJ7NvsO7FCN4tyaN5YY8,7927
|
26
|
+
gmicloud-0.1.7.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
27
|
+
gmicloud-0.1.7.dist-info/top_level.txt,sha256=AZimLw3y0WPpLiSiOidZ1gD0dxALh-jQNk4fxC05hYE,9
|
28
|
+
gmicloud-0.1.7.dist-info/RECORD,,
|