gmicloud 0.1.5__tar.gz → 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gmicloud-0.1.7/PKG-INFO +237 -0
- gmicloud-0.1.7/README.md +225 -0
- {gmicloud-0.1.5 → gmicloud-0.1.7}/gmicloud/__init__.py +2 -2
- {gmicloud-0.1.5 → gmicloud-0.1.7}/gmicloud/_internal/_client/_artifact_client.py +40 -7
- {gmicloud-0.1.5 → gmicloud-0.1.7}/gmicloud/_internal/_client/_file_upload_client.py +10 -7
- gmicloud-0.1.7/gmicloud/_internal/_config.py +9 -0
- {gmicloud-0.1.5 → gmicloud-0.1.7}/gmicloud/_internal/_enums.py +5 -0
- {gmicloud-0.1.5 → gmicloud-0.1.7}/gmicloud/_internal/_manager/_artifact_manager.py +198 -17
- {gmicloud-0.1.5 → gmicloud-0.1.7}/gmicloud/_internal/_manager/_task_manager.py +76 -2
- gmicloud-0.1.7/gmicloud/_internal/_manager/serve_command_utils.py +121 -0
- {gmicloud-0.1.5 → gmicloud-0.1.7}/gmicloud/_internal/_models.py +154 -34
- gmicloud-0.1.7/gmicloud/client.py +247 -0
- {gmicloud-0.1.5 → gmicloud-0.1.7}/gmicloud/tests/test_artifacts.py +6 -22
- gmicloud-0.1.7/gmicloud.egg-info/PKG-INFO +237 -0
- {gmicloud-0.1.5 → gmicloud-0.1.7}/gmicloud.egg-info/SOURCES.txt +1 -1
- {gmicloud-0.1.5 → gmicloud-0.1.7}/gmicloud.egg-info/top_level.txt +1 -0
- {gmicloud-0.1.5 → gmicloud-0.1.7}/pyproject.toml +7 -2
- gmicloud-0.1.5/PKG-INFO +0 -246
- gmicloud-0.1.5/README.md +0 -234
- gmicloud-0.1.5/gmicloud/_internal/_config.py +0 -3
- gmicloud-0.1.5/gmicloud/client.py +0 -143
- gmicloud-0.1.5/gmicloud/tests/__init__.py +0 -0
- gmicloud-0.1.5/gmicloud.egg-info/PKG-INFO +0 -246
- {gmicloud-0.1.5/examples → gmicloud-0.1.7/gmicloud/_internal}/__init__.py +0 -0
- {gmicloud-0.1.5/gmicloud/_internal → gmicloud-0.1.7/gmicloud/_internal/_client}/__init__.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.7}/gmicloud/_internal/_client/_decorator.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.7}/gmicloud/_internal/_client/_http_client.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.7}/gmicloud/_internal/_client/_iam_client.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.7}/gmicloud/_internal/_client/_task_client.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.7}/gmicloud/_internal/_constants.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.7}/gmicloud/_internal/_exceptions.py +0 -0
- {gmicloud-0.1.5/gmicloud/_internal/_client → gmicloud-0.1.7/gmicloud/_internal/_manager}/__init__.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.7}/gmicloud/_internal/_manager/_iam_manager.py +0 -0
- {gmicloud-0.1.5/gmicloud/_internal/_manager → gmicloud-0.1.7/gmicloud/tests}/__init__.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.7}/gmicloud/tests/test_tasks.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.7}/gmicloud/utils/uninstall_packages.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.7}/gmicloud.egg-info/dependency_links.txt +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.7}/setup.cfg +0 -0
gmicloud-0.1.7/PKG-INFO
ADDED
@@ -0,0 +1,237 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: gmicloud
|
3
|
+
Version: 0.1.7
|
4
|
+
Summary: GMI Cloud Python SDK
|
5
|
+
Author-email: GMI <gmi@gmitec.net>
|
6
|
+
License: MIT
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
9
|
+
Classifier: Operating System :: OS Independent
|
10
|
+
Requires-Python: >=3.6
|
11
|
+
Description-Content-Type: text/markdown
|
12
|
+
|
13
|
+
# GMICloud SDK
|
14
|
+
|
15
|
+
## Overview
|
16
|
+
Before you start: Our service and GPU resource is currenly invite-only so please contact our team (getstarted@gmicloud.ai) to get invited if you don't have one yet.
|
17
|
+
|
18
|
+
The GMI Inference Engine SDK provides a Python interface for deploying and managing machine learning models in production environments. It allows users to create model artifacts, schedule tasks for serving models, and call inference APIs easily.
|
19
|
+
|
20
|
+
This SDK streamlines the process of utilizing GMI Cloud capabilities such as deploying models with Kubernetes-based Ray services, managing resources automatically, and accessing model inference endpoints. With minimal setup, developers can focus on building ML solutions instead of infrastructure.
|
21
|
+
|
22
|
+
## Features
|
23
|
+
|
24
|
+
- Artifact Management: Easily create, update, and manage ML model artifacts.
|
25
|
+
- Task Management: Quickly create, schedule, and manage deployment tasks for model inference.
|
26
|
+
- Usage Data Retrieval : Fetch and analyze usage data to optimize resource allocation.
|
27
|
+
|
28
|
+
## Installation
|
29
|
+
|
30
|
+
To install the SDK, use pip:
|
31
|
+
|
32
|
+
```bash
|
33
|
+
pip install gmicloud
|
34
|
+
```
|
35
|
+
|
36
|
+
## Setup
|
37
|
+
|
38
|
+
You must configure authentication credentials for accessing the GMI Cloud API.
|
39
|
+
To create account and get log in info please visit **GMI inference platform: https://inference-engine.gmicloud.ai/**.
|
40
|
+
|
41
|
+
There are two ways to configure the SDK:
|
42
|
+
|
43
|
+
### Option 1: Using Environment Variables
|
44
|
+
|
45
|
+
Set the following environment variables:
|
46
|
+
|
47
|
+
```shell
|
48
|
+
export GMI_CLOUD_CLIENT_ID=<YOUR_CLIENT_ID> # Pick what every ID you need.
|
49
|
+
export GMI_CLOUD_EMAIL=<YOUR_EMAIL>
|
50
|
+
export GMI_CLOUD_PASSWORD=<YOUR_PASSWORD>
|
51
|
+
```
|
52
|
+
|
53
|
+
### Option 2: Passing Credentials as Parameters
|
54
|
+
|
55
|
+
Pass `client_id`, `email`, and `password` directly to the Client object when initializing it in your script:
|
56
|
+
|
57
|
+
```python
|
58
|
+
from gmicloud import Client
|
59
|
+
|
60
|
+
client = Client(client_id="<YOUR_CLIENT_ID>", email="<YOUR_EMAIL>", password="<YOUR_PASSWORD>")
|
61
|
+
```
|
62
|
+
|
63
|
+
## Quick Start
|
64
|
+
|
65
|
+
### 1. How to run the code in the example folder
|
66
|
+
```bash
|
67
|
+
cd path/to/gmicloud-sdk
|
68
|
+
# Create a virtual environment
|
69
|
+
python -m venv venv
|
70
|
+
source venv/bin/activate
|
71
|
+
|
72
|
+
pip install -r requirements.txt
|
73
|
+
python -m examples.create_task_from_artifact_template.py
|
74
|
+
```
|
75
|
+
|
76
|
+
### 2. Example of create an inference task from an artifact template
|
77
|
+
|
78
|
+
This is the simplest example to deploy an inference task using an existing artifact template:
|
79
|
+
|
80
|
+
Up-to-date code in /examples/create_task_from_artifact_template.py
|
81
|
+
|
82
|
+
```python
|
83
|
+
from datetime import datetime
|
84
|
+
import os
|
85
|
+
import sys
|
86
|
+
|
87
|
+
from gmicloud import *
|
88
|
+
from examples.completion import call_chat_completion
|
89
|
+
|
90
|
+
cli = Client()
|
91
|
+
|
92
|
+
# List templates offered by GMI cloud
|
93
|
+
templates = cli.list_templates()
|
94
|
+
print(f"Found {len(templates)} templates: {templates}")
|
95
|
+
|
96
|
+
# Pick a template from the list
|
97
|
+
pick_template = "Llama-3.1-8B"
|
98
|
+
|
99
|
+
# Create Artifact from template
|
100
|
+
artifact_id, recommended_replica_resources = cli.create_artifact_from_template(templates[0])
|
101
|
+
print(f"Created artifact {artifact_id} with recommended replica resources: {recommended_replica_resources}")
|
102
|
+
|
103
|
+
# Create Task based on Artifact
|
104
|
+
task_id = cli.create_task(artifact_id, recommended_replica_resources, TaskScheduling(
|
105
|
+
scheduling_oneoff=OneOffScheduling(
|
106
|
+
trigger_timestamp=int(datetime.now().timestamp()),
|
107
|
+
min_replicas=1,
|
108
|
+
max_replicas=1,
|
109
|
+
)
|
110
|
+
))
|
111
|
+
task = cli.task_manager.get_task(task_id)
|
112
|
+
print(f"Task created: {task.config.task_name}. You can check details at https://inference-engine.gmicloud.ai/user-console/task")
|
113
|
+
|
114
|
+
# Start Task and wait for it to be ready
|
115
|
+
cli.start_task_and_wait(task.task_id)
|
116
|
+
|
117
|
+
# Testing with calling chat completion
|
118
|
+
print(call_chat_completion(cli, task.task_id))
|
119
|
+
|
120
|
+
```
|
121
|
+
|
122
|
+
### 3. Example of creating an inference task based on custom model with local vllm / SGLang serve command
|
123
|
+
* Full example is available at [examples/inference_task_with_custom_model.py](https://github.com/GMISWE/python-sdk/blob/main/examples/inference_task_with_custom_model.py)
|
124
|
+
|
125
|
+
1. Prepare custom model checkpoint (using a model downloaded from HF as an example)
|
126
|
+
|
127
|
+
```python
|
128
|
+
# Download model from huggingface
|
129
|
+
from huggingface_hub import snapshot_download
|
130
|
+
|
131
|
+
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
|
132
|
+
model_checkpoint_save_dir = "files/model_garden"
|
133
|
+
snapshot_download(repo_id=model_name, local_dir=model_checkpoint_save_dir)
|
134
|
+
```
|
135
|
+
|
136
|
+
2. Find a template of specific SGLang version
|
137
|
+
|
138
|
+
```python
|
139
|
+
# export GMI_CLOUD_CLIENT_ID=<YOUR_CLIENT_ID>
|
140
|
+
# export GMI_CLOUD_EMAIL=<YOUR_EMAIL>
|
141
|
+
# export GMI_CLOUD_PASSWORD=<YOUR_PASSWORD>
|
142
|
+
cli = Client()
|
143
|
+
|
144
|
+
# List templates offered by GMI cloud
|
145
|
+
templates = cli.artifact_manager.list_public_template_names()
|
146
|
+
print(f"Found {len(templates)} templates: {templates}")
|
147
|
+
```
|
148
|
+
|
149
|
+
3. Pick a template (e.g. SGLang 0.4.5) and prepare a local serve command
|
150
|
+
|
151
|
+
```python
|
152
|
+
# Example for vllm server
|
153
|
+
picked_template_name = "gmi_vllm_0.8.4"
|
154
|
+
serve_command = "vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B --trust-remote-code --gpu-memory-utilization 0.8"
|
155
|
+
|
156
|
+
# Example for sglang server
|
157
|
+
picked_template_name = "gmi_sglang_0.4.5.post1"
|
158
|
+
serve_command = "python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B --trust-remote-code --mem-fraction-static 0.8 --tp 2"
|
159
|
+
```
|
160
|
+
|
161
|
+
4. Create an artifact and upload custom model. The artifact can be reused to create inference tasks later. Artifact also suggests recommended resources for each inference server replica
|
162
|
+
|
163
|
+
```python
|
164
|
+
artifact_id, recommended_replica_resources = cli.artifact_manager.create_artifact_from_template_name(
|
165
|
+
artifact_template_name=picked_template_name,
|
166
|
+
env_parameters={
|
167
|
+
"SERVER_COMMAND": serve_command,
|
168
|
+
"GPU_TYPE": "H100",
|
169
|
+
}
|
170
|
+
)
|
171
|
+
print(f"Created artifact {artifact_id} with recommended resources: {recommended_replica_resources}")
|
172
|
+
|
173
|
+
# Upload model files to artifact
|
174
|
+
cli.artifact_manager.upload_model_files_to_artifact(artifact_id, model_checkpoint_save_dir)
|
175
|
+
```
|
176
|
+
|
177
|
+
5. Create Inference task (defining min/max inference replica), start and wait
|
178
|
+
|
179
|
+
```python
|
180
|
+
new_task = Task(
|
181
|
+
config=TaskConfig(
|
182
|
+
ray_task_config=RayTaskConfig(
|
183
|
+
artifact_id=artifact_id,
|
184
|
+
file_path="serve",
|
185
|
+
deployment_name="app",
|
186
|
+
replica_resource=recommended_replica_resources,
|
187
|
+
),
|
188
|
+
task_scheduling = TaskScheduling(
|
189
|
+
scheduling_oneoff=OneOffScheduling(
|
190
|
+
trigger_timestamp=int(datetime.now().timestamp()),
|
191
|
+
min_replicas=1,
|
192
|
+
max_replicas=4,
|
193
|
+
)
|
194
|
+
),
|
195
|
+
),
|
196
|
+
)
|
197
|
+
task = cli.task_manager.create_task(new_task)
|
198
|
+
task_id = task.task_id
|
199
|
+
task = cli.task_manager.get_task(task_id)
|
200
|
+
print(f"Task created: {task.config.task_name}. You can check details at https://inference-engine.gmicloud.ai/user-console/task")
|
201
|
+
|
202
|
+
# Start Task and wait for it to be ready
|
203
|
+
cli.task_manager.start_task_and_wait(task_id)
|
204
|
+
```
|
205
|
+
|
206
|
+
6. Test with sample chat completion request
|
207
|
+
|
208
|
+
```python
|
209
|
+
print(call_chat_completion(cli, task_id))
|
210
|
+
```
|
211
|
+
|
212
|
+
|
213
|
+
## API Reference
|
214
|
+
|
215
|
+
### Client
|
216
|
+
|
217
|
+
Represents the entry point to interact with GMI Cloud APIs.
|
218
|
+
Client(
|
219
|
+
client_id: Optional[str] = "",
|
220
|
+
email: Optional[str] = "",
|
221
|
+
password: Optional[str] = ""
|
222
|
+
)
|
223
|
+
|
224
|
+
### Artifact Management
|
225
|
+
|
226
|
+
* get_artifact_templates(): Fetch a list of available artifact templates.
|
227
|
+
* create_artifact_from_template(template_id: str): Create a model artifact from a given template.
|
228
|
+
* get_artifact(artifact_id: str): Get details of a specific artifact.
|
229
|
+
|
230
|
+
### Task Management
|
231
|
+
|
232
|
+
* create_task_from_artifact_template(template_id: str, scheduling: TaskScheduling): Create and schedule a task using an
|
233
|
+
artifact template.
|
234
|
+
* start_task(task_id: str): Start a task.
|
235
|
+
* get_task(task_id: str): Retrieve the status and details of a specific task.
|
236
|
+
|
237
|
+
## Notes & Troubleshooting
|
gmicloud-0.1.7/README.md
ADDED
@@ -0,0 +1,225 @@
|
|
1
|
+
# GMICloud SDK
|
2
|
+
|
3
|
+
## Overview
|
4
|
+
Before you start: Our service and GPU resource is currenly invite-only so please contact our team (getstarted@gmicloud.ai) to get invited if you don't have one yet.
|
5
|
+
|
6
|
+
The GMI Inference Engine SDK provides a Python interface for deploying and managing machine learning models in production environments. It allows users to create model artifacts, schedule tasks for serving models, and call inference APIs easily.
|
7
|
+
|
8
|
+
This SDK streamlines the process of utilizing GMI Cloud capabilities such as deploying models with Kubernetes-based Ray services, managing resources automatically, and accessing model inference endpoints. With minimal setup, developers can focus on building ML solutions instead of infrastructure.
|
9
|
+
|
10
|
+
## Features
|
11
|
+
|
12
|
+
- Artifact Management: Easily create, update, and manage ML model artifacts.
|
13
|
+
- Task Management: Quickly create, schedule, and manage deployment tasks for model inference.
|
14
|
+
- Usage Data Retrieval : Fetch and analyze usage data to optimize resource allocation.
|
15
|
+
|
16
|
+
## Installation
|
17
|
+
|
18
|
+
To install the SDK, use pip:
|
19
|
+
|
20
|
+
```bash
|
21
|
+
pip install gmicloud
|
22
|
+
```
|
23
|
+
|
24
|
+
## Setup
|
25
|
+
|
26
|
+
You must configure authentication credentials for accessing the GMI Cloud API.
|
27
|
+
To create account and get log in info please visit **GMI inference platform: https://inference-engine.gmicloud.ai/**.
|
28
|
+
|
29
|
+
There are two ways to configure the SDK:
|
30
|
+
|
31
|
+
### Option 1: Using Environment Variables
|
32
|
+
|
33
|
+
Set the following environment variables:
|
34
|
+
|
35
|
+
```shell
|
36
|
+
export GMI_CLOUD_CLIENT_ID=<YOUR_CLIENT_ID> # Pick what every ID you need.
|
37
|
+
export GMI_CLOUD_EMAIL=<YOUR_EMAIL>
|
38
|
+
export GMI_CLOUD_PASSWORD=<YOUR_PASSWORD>
|
39
|
+
```
|
40
|
+
|
41
|
+
### Option 2: Passing Credentials as Parameters
|
42
|
+
|
43
|
+
Pass `client_id`, `email`, and `password` directly to the Client object when initializing it in your script:
|
44
|
+
|
45
|
+
```python
|
46
|
+
from gmicloud import Client
|
47
|
+
|
48
|
+
client = Client(client_id="<YOUR_CLIENT_ID>", email="<YOUR_EMAIL>", password="<YOUR_PASSWORD>")
|
49
|
+
```
|
50
|
+
|
51
|
+
## Quick Start
|
52
|
+
|
53
|
+
### 1. How to run the code in the example folder
|
54
|
+
```bash
|
55
|
+
cd path/to/gmicloud-sdk
|
56
|
+
# Create a virtual environment
|
57
|
+
python -m venv venv
|
58
|
+
source venv/bin/activate
|
59
|
+
|
60
|
+
pip install -r requirements.txt
|
61
|
+
python -m examples.create_task_from_artifact_template.py
|
62
|
+
```
|
63
|
+
|
64
|
+
### 2. Example of create an inference task from an artifact template
|
65
|
+
|
66
|
+
This is the simplest example to deploy an inference task using an existing artifact template:
|
67
|
+
|
68
|
+
Up-to-date code in /examples/create_task_from_artifact_template.py
|
69
|
+
|
70
|
+
```python
|
71
|
+
from datetime import datetime
|
72
|
+
import os
|
73
|
+
import sys
|
74
|
+
|
75
|
+
from gmicloud import *
|
76
|
+
from examples.completion import call_chat_completion
|
77
|
+
|
78
|
+
cli = Client()
|
79
|
+
|
80
|
+
# List templates offered by GMI cloud
|
81
|
+
templates = cli.list_templates()
|
82
|
+
print(f"Found {len(templates)} templates: {templates}")
|
83
|
+
|
84
|
+
# Pick a template from the list
|
85
|
+
pick_template = "Llama-3.1-8B"
|
86
|
+
|
87
|
+
# Create Artifact from template
|
88
|
+
artifact_id, recommended_replica_resources = cli.create_artifact_from_template(templates[0])
|
89
|
+
print(f"Created artifact {artifact_id} with recommended replica resources: {recommended_replica_resources}")
|
90
|
+
|
91
|
+
# Create Task based on Artifact
|
92
|
+
task_id = cli.create_task(artifact_id, recommended_replica_resources, TaskScheduling(
|
93
|
+
scheduling_oneoff=OneOffScheduling(
|
94
|
+
trigger_timestamp=int(datetime.now().timestamp()),
|
95
|
+
min_replicas=1,
|
96
|
+
max_replicas=1,
|
97
|
+
)
|
98
|
+
))
|
99
|
+
task = cli.task_manager.get_task(task_id)
|
100
|
+
print(f"Task created: {task.config.task_name}. You can check details at https://inference-engine.gmicloud.ai/user-console/task")
|
101
|
+
|
102
|
+
# Start Task and wait for it to be ready
|
103
|
+
cli.start_task_and_wait(task.task_id)
|
104
|
+
|
105
|
+
# Testing with calling chat completion
|
106
|
+
print(call_chat_completion(cli, task.task_id))
|
107
|
+
|
108
|
+
```
|
109
|
+
|
110
|
+
### 3. Example of creating an inference task based on custom model with local vllm / SGLang serve command
|
111
|
+
* Full example is available at [examples/inference_task_with_custom_model.py](https://github.com/GMISWE/python-sdk/blob/main/examples/inference_task_with_custom_model.py)
|
112
|
+
|
113
|
+
1. Prepare custom model checkpoint (using a model downloaded from HF as an example)
|
114
|
+
|
115
|
+
```python
|
116
|
+
# Download model from huggingface
|
117
|
+
from huggingface_hub import snapshot_download
|
118
|
+
|
119
|
+
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
|
120
|
+
model_checkpoint_save_dir = "files/model_garden"
|
121
|
+
snapshot_download(repo_id=model_name, local_dir=model_checkpoint_save_dir)
|
122
|
+
```
|
123
|
+
|
124
|
+
2. Find a template of specific SGLang version
|
125
|
+
|
126
|
+
```python
|
127
|
+
# export GMI_CLOUD_CLIENT_ID=<YOUR_CLIENT_ID>
|
128
|
+
# export GMI_CLOUD_EMAIL=<YOUR_EMAIL>
|
129
|
+
# export GMI_CLOUD_PASSWORD=<YOUR_PASSWORD>
|
130
|
+
cli = Client()
|
131
|
+
|
132
|
+
# List templates offered by GMI cloud
|
133
|
+
templates = cli.artifact_manager.list_public_template_names()
|
134
|
+
print(f"Found {len(templates)} templates: {templates}")
|
135
|
+
```
|
136
|
+
|
137
|
+
3. Pick a template (e.g. SGLang 0.4.5) and prepare a local serve command
|
138
|
+
|
139
|
+
```python
|
140
|
+
# Example for vllm server
|
141
|
+
picked_template_name = "gmi_vllm_0.8.4"
|
142
|
+
serve_command = "vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B --trust-remote-code --gpu-memory-utilization 0.8"
|
143
|
+
|
144
|
+
# Example for sglang server
|
145
|
+
picked_template_name = "gmi_sglang_0.4.5.post1"
|
146
|
+
serve_command = "python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B --trust-remote-code --mem-fraction-static 0.8 --tp 2"
|
147
|
+
```
|
148
|
+
|
149
|
+
4. Create an artifact and upload custom model. The artifact can be reused to create inference tasks later. Artifact also suggests recommended resources for each inference server replica
|
150
|
+
|
151
|
+
```python
|
152
|
+
artifact_id, recommended_replica_resources = cli.artifact_manager.create_artifact_from_template_name(
|
153
|
+
artifact_template_name=picked_template_name,
|
154
|
+
env_parameters={
|
155
|
+
"SERVER_COMMAND": serve_command,
|
156
|
+
"GPU_TYPE": "H100",
|
157
|
+
}
|
158
|
+
)
|
159
|
+
print(f"Created artifact {artifact_id} with recommended resources: {recommended_replica_resources}")
|
160
|
+
|
161
|
+
# Upload model files to artifact
|
162
|
+
cli.artifact_manager.upload_model_files_to_artifact(artifact_id, model_checkpoint_save_dir)
|
163
|
+
```
|
164
|
+
|
165
|
+
5. Create Inference task (defining min/max inference replica), start and wait
|
166
|
+
|
167
|
+
```python
|
168
|
+
new_task = Task(
|
169
|
+
config=TaskConfig(
|
170
|
+
ray_task_config=RayTaskConfig(
|
171
|
+
artifact_id=artifact_id,
|
172
|
+
file_path="serve",
|
173
|
+
deployment_name="app",
|
174
|
+
replica_resource=recommended_replica_resources,
|
175
|
+
),
|
176
|
+
task_scheduling = TaskScheduling(
|
177
|
+
scheduling_oneoff=OneOffScheduling(
|
178
|
+
trigger_timestamp=int(datetime.now().timestamp()),
|
179
|
+
min_replicas=1,
|
180
|
+
max_replicas=4,
|
181
|
+
)
|
182
|
+
),
|
183
|
+
),
|
184
|
+
)
|
185
|
+
task = cli.task_manager.create_task(new_task)
|
186
|
+
task_id = task.task_id
|
187
|
+
task = cli.task_manager.get_task(task_id)
|
188
|
+
print(f"Task created: {task.config.task_name}. You can check details at https://inference-engine.gmicloud.ai/user-console/task")
|
189
|
+
|
190
|
+
# Start Task and wait for it to be ready
|
191
|
+
cli.task_manager.start_task_and_wait(task_id)
|
192
|
+
```
|
193
|
+
|
194
|
+
6. Test with sample chat completion request
|
195
|
+
|
196
|
+
```python
|
197
|
+
print(call_chat_completion(cli, task_id))
|
198
|
+
```
|
199
|
+
|
200
|
+
|
201
|
+
## API Reference
|
202
|
+
|
203
|
+
### Client
|
204
|
+
|
205
|
+
Represents the entry point to interact with GMI Cloud APIs.
|
206
|
+
Client(
|
207
|
+
client_id: Optional[str] = "",
|
208
|
+
email: Optional[str] = "",
|
209
|
+
password: Optional[str] = ""
|
210
|
+
)
|
211
|
+
|
212
|
+
### Artifact Management
|
213
|
+
|
214
|
+
* get_artifact_templates(): Fetch a list of available artifact templates.
|
215
|
+
* create_artifact_from_template(template_id: str): Create a model artifact from a given template.
|
216
|
+
* get_artifact(artifact_id: str): Get details of a specific artifact.
|
217
|
+
|
218
|
+
### Task Management
|
219
|
+
|
220
|
+
* create_task_from_artifact_template(template_id: str, scheduling: TaskScheduling): Create and schedule a task using an
|
221
|
+
artifact template.
|
222
|
+
* start_task(task_id: str): Start a task.
|
223
|
+
* get_task(task_id: str): Retrieve the status and details of a specific task.
|
224
|
+
|
225
|
+
## Notes & Troubleshooting
|
@@ -15,7 +15,7 @@ from ._internal._models import (
|
|
15
15
|
OneOffScheduling,
|
16
16
|
DailyScheduling,
|
17
17
|
DailyTrigger,
|
18
|
-
|
18
|
+
Template,
|
19
19
|
)
|
20
20
|
from ._internal._enums import (
|
21
21
|
BuildStatus,
|
@@ -39,7 +39,7 @@ __all__ = [
|
|
39
39
|
"OneOffScheduling",
|
40
40
|
"DailyScheduling",
|
41
41
|
"DailyTrigger",
|
42
|
-
"
|
42
|
+
"Template",
|
43
43
|
"BuildStatus",
|
44
44
|
"TaskEndpointStatus",
|
45
45
|
]
|
@@ -1,7 +1,7 @@
|
|
1
1
|
from typing import List
|
2
2
|
import logging
|
3
3
|
from requests.exceptions import RequestException
|
4
|
-
|
4
|
+
import json
|
5
5
|
from ._http_client import HTTPClient
|
6
6
|
from ._iam_client import IAMClient
|
7
7
|
from ._decorator import handle_refresh_token
|
@@ -120,6 +120,39 @@ class ArtifactClient:
|
|
120
120
|
logger.error(f"Failed to rebuild artifact {artifact_id}: {e}")
|
121
121
|
return None
|
122
122
|
|
123
|
+
@handle_refresh_token
|
124
|
+
def add_env_parameters_to_artifact(self, artifact_id: str, env_parameters: dict[str, str]) -> None:
|
125
|
+
"""
|
126
|
+
Updates an artifact by its ID.
|
127
|
+
|
128
|
+
:param artifact_id: The ID of the artifact to update.
|
129
|
+
:param request: The request object containing the updated artifact details.
|
130
|
+
"""
|
131
|
+
try:
|
132
|
+
old_artifact = self.get_artifact(artifact_id)
|
133
|
+
if not old_artifact:
|
134
|
+
logger.error(f"Artifact {artifact_id} not found")
|
135
|
+
return
|
136
|
+
request = UpdateArtifactRequestBody(
|
137
|
+
artifact_description=old_artifact.artifact_metadata.artifact_description,
|
138
|
+
artifact_name=old_artifact.artifact_metadata.artifact_name,
|
139
|
+
artifact_tags=old_artifact.artifact_metadata.artifact_tags,
|
140
|
+
env_parameters=old_artifact.artifact_parameters.env_parameters,
|
141
|
+
model_parameters=old_artifact.artifact_parameters.model_parameters
|
142
|
+
)
|
143
|
+
new_env_parameters = [EnvParameter(key=k, value=v) for k, v in env_parameters.items()]
|
144
|
+
if not request.env_parameters:
|
145
|
+
request.env_parameters = []
|
146
|
+
request.env_parameters.extend(new_env_parameters)
|
147
|
+
response = self.client.put(
|
148
|
+
f"/update_artifact?artifact_id={artifact_id}",
|
149
|
+
self.iam_client.get_custom_headers(),
|
150
|
+
request.model_dump()
|
151
|
+
)
|
152
|
+
except (RequestException, ValueError) as e:
|
153
|
+
logger.error(f"Failed to add env parameters to artifact {artifact_id}: {e}")
|
154
|
+
return
|
155
|
+
|
123
156
|
@handle_refresh_token
|
124
157
|
def delete_artifact(self, artifact_id: str) -> Optional[DeleteArtifactResponse]:
|
125
158
|
"""
|
@@ -140,7 +173,7 @@ class ArtifactClient:
|
|
140
173
|
return None
|
141
174
|
|
142
175
|
@handle_refresh_token
|
143
|
-
def get_bigfile_upload_url(self, request:
|
176
|
+
def get_bigfile_upload_url(self, request: ResumableUploadLinkRequest) -> Optional[ResumableUploadLinkResponse]:
|
144
177
|
"""
|
145
178
|
Generates a pre-signed URL for uploading a large file.
|
146
179
|
|
@@ -156,7 +189,7 @@ class ArtifactClient:
|
|
156
189
|
logger.error("Empty response from /get_bigfile_upload_url")
|
157
190
|
return None
|
158
191
|
|
159
|
-
return
|
192
|
+
return ResumableUploadLinkResponse.model_validate(response)
|
160
193
|
|
161
194
|
except (RequestException, ValueError) as e:
|
162
195
|
logger.error(f"Failed to generate upload URL: {e}")
|
@@ -186,12 +219,12 @@ class ArtifactClient:
|
|
186
219
|
return None
|
187
220
|
|
188
221
|
@handle_refresh_token
|
189
|
-
def get_public_templates(self) -> List[
|
222
|
+
def get_public_templates(self) -> List[Template]:
|
190
223
|
"""
|
191
224
|
Fetches all artifact templates.
|
192
225
|
|
193
|
-
:return: A list of
|
194
|
-
:rtype: List[
|
226
|
+
:return: A list of Template objects.
|
227
|
+
:rtype: List[Template]
|
195
228
|
"""
|
196
229
|
try:
|
197
230
|
response = self.client.get("/get_public_templates", self.iam_client.get_custom_headers())
|
@@ -201,7 +234,7 @@ class ArtifactClient:
|
|
201
234
|
return []
|
202
235
|
|
203
236
|
try:
|
204
|
-
result =
|
237
|
+
result = GetTemplatesResponse.model_validate(response)
|
205
238
|
return result.artifact_templates
|
206
239
|
except ValueError as ve:
|
207
240
|
logger.error(f"Failed to validate response data: {ve}")
|
@@ -1,8 +1,10 @@
|
|
1
1
|
import os
|
2
2
|
import requests
|
3
|
+
import logging
|
3
4
|
|
4
5
|
from .._exceptions import UploadFileError
|
5
6
|
|
7
|
+
logger = logging.getLogger()
|
6
8
|
|
7
9
|
class FileUploadClient:
|
8
10
|
CHUNK_SIZE = 10 * 1024 * 1024 # 10MB Default Chunk Size
|
@@ -45,13 +47,13 @@ class FileUploadClient:
|
|
45
47
|
"""
|
46
48
|
try:
|
47
49
|
file_size = os.path.getsize(file_path)
|
48
|
-
|
50
|
+
logger.info(f"File {file_path} size: {file_size} bytes")
|
49
51
|
|
50
52
|
start_byte = 0
|
51
53
|
uploaded_range = FileUploadClient._check_file_status(upload_url, file_size)
|
52
54
|
if uploaded_range:
|
53
55
|
start_byte = int(uploaded_range.split("-")[1]) + 1
|
54
|
-
|
56
|
+
logger.info(f"Resuming uploading {file_path} from {start_byte} bytes")
|
55
57
|
|
56
58
|
with open(file_path, "rb") as file:
|
57
59
|
while start_byte < file_size:
|
@@ -74,14 +76,15 @@ class FileUploadClient:
|
|
74
76
|
# Ensure upload is successful for this chunk
|
75
77
|
if resp.status_code not in (200, 201, 308):
|
76
78
|
raise UploadFileError(
|
77
|
-
f"Failed to upload file, code:{resp.status_code} ,message: {resp.text}")
|
79
|
+
f"Failed to upload file {file_path}, code:{resp.status_code} ,message: {resp.text}")
|
78
80
|
|
79
81
|
start_byte = end_byte + 1
|
80
|
-
|
82
|
+
percentage = (start_byte / file_size) * 100
|
83
|
+
logger.info(f"File {file_path} uploaded {end_byte + 1:,}/{file_size:,} bytes ({percentage:.2f}%)")
|
81
84
|
|
82
|
-
|
85
|
+
logger.info(f"File {file_path} uploaded successfully.")
|
83
86
|
except Exception as e:
|
84
|
-
raise UploadFileError(f"Failed to upload file: {str(e)}")
|
87
|
+
raise UploadFileError(f"Failed to upload file {file_path}, got error: {str(e)}")
|
85
88
|
|
86
89
|
@staticmethod
|
87
90
|
def _check_file_status(upload_url: str, file_size: int) -> str:
|
@@ -104,7 +107,7 @@ class FileUploadClient:
|
|
104
107
|
if resp.status_code == 308:
|
105
108
|
range_header = resp.headers.get("Range")
|
106
109
|
if range_header:
|
107
|
-
|
110
|
+
logger.info(f"Server reports partial upload range: {range_header}")
|
108
111
|
return range_header
|
109
112
|
|
110
113
|
if resp.status_code in (200, 201):
|
@@ -0,0 +1,9 @@
|
|
1
|
+
# Dev environment
|
2
|
+
# ARTIFACT_SERVICE_BASE_URL = "https://ce-tot.gmicloud-dev.com/api/v1/ie/artifact"
|
3
|
+
# TASK_SERVICE_BASE_URL = "https://ce-tot.gmicloud-dev.com/api/v1/ie/task"
|
4
|
+
# IAM_SERVICE_BASE_URL = "https://ce-tot.gmicloud-dev.com/api/v1"
|
5
|
+
|
6
|
+
# Prod environment
|
7
|
+
ARTIFACT_SERVICE_BASE_URL = "https://inference-engine.gmicloud.ai/api/v1/ie/artifact"
|
8
|
+
TASK_SERVICE_BASE_URL = "https://inference-engine.gmicloud.ai/api/v1/ie/task"
|
9
|
+
IAM_SERVICE_BASE_URL = "https://inference-engine.gmicloud.ai/api/v1"
|