gmicloud 0.1.5__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gmicloud-0.1.6/PKG-INFO +147 -0
- gmicloud-0.1.6/README.md +135 -0
- {gmicloud-0.1.5 → gmicloud-0.1.6}/gmicloud/_internal/_enums.py +5 -0
- {gmicloud-0.1.5 → gmicloud-0.1.6}/gmicloud/_internal/_manager/_artifact_manager.py +84 -1
- {gmicloud-0.1.5 → gmicloud-0.1.6}/gmicloud/_internal/_manager/_task_manager.py +69 -0
- {gmicloud-0.1.5 → gmicloud-0.1.6}/gmicloud/_internal/_models.py +19 -3
- gmicloud-0.1.6/gmicloud/client.py +247 -0
- gmicloud-0.1.6/gmicloud.egg-info/PKG-INFO +147 -0
- {gmicloud-0.1.5 → gmicloud-0.1.6}/gmicloud.egg-info/SOURCES.txt +0 -1
- {gmicloud-0.1.5 → gmicloud-0.1.6}/pyproject.toml +3 -3
- gmicloud-0.1.5/PKG-INFO +0 -246
- gmicloud-0.1.5/README.md +0 -234
- gmicloud-0.1.5/gmicloud/client.py +0 -143
- gmicloud-0.1.5/gmicloud/tests/__init__.py +0 -0
- gmicloud-0.1.5/gmicloud.egg-info/PKG-INFO +0 -246
- {gmicloud-0.1.5 → gmicloud-0.1.6}/gmicloud/__init__.py +0 -0
- {gmicloud-0.1.5/examples → gmicloud-0.1.6/gmicloud/_internal}/__init__.py +0 -0
- {gmicloud-0.1.5/gmicloud/_internal → gmicloud-0.1.6/gmicloud/_internal/_client}/__init__.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.6}/gmicloud/_internal/_client/_artifact_client.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.6}/gmicloud/_internal/_client/_decorator.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.6}/gmicloud/_internal/_client/_file_upload_client.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.6}/gmicloud/_internal/_client/_http_client.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.6}/gmicloud/_internal/_client/_iam_client.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.6}/gmicloud/_internal/_client/_task_client.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.6}/gmicloud/_internal/_config.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.6}/gmicloud/_internal/_constants.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.6}/gmicloud/_internal/_exceptions.py +0 -0
- {gmicloud-0.1.5/gmicloud/_internal/_client → gmicloud-0.1.6/gmicloud/_internal/_manager}/__init__.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.6}/gmicloud/_internal/_manager/_iam_manager.py +0 -0
- {gmicloud-0.1.5/gmicloud/_internal/_manager → gmicloud-0.1.6/gmicloud/tests}/__init__.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.6}/gmicloud/tests/test_artifacts.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.6}/gmicloud/tests/test_tasks.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.6}/gmicloud/utils/uninstall_packages.py +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.6}/gmicloud.egg-info/dependency_links.txt +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.6}/gmicloud.egg-info/top_level.txt +0 -0
- {gmicloud-0.1.5 → gmicloud-0.1.6}/setup.cfg +0 -0
gmicloud-0.1.6/PKG-INFO
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
Metadata-Version: 2.2
|
2
|
+
Name: gmicloud
|
3
|
+
Version: 0.1.6
|
4
|
+
Summary: GMI Cloud Python SDK
|
5
|
+
Author-email: GMI <support@gmicloud.ai>
|
6
|
+
License: MIT
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
9
|
+
Classifier: Operating System :: OS Independent
|
10
|
+
Requires-Python: >=3.6
|
11
|
+
Description-Content-Type: text/markdown
|
12
|
+
|
13
|
+
# GMICloud SDK (Beta)
|
14
|
+
|
15
|
+
## Overview
|
16
|
+
Before you start: Our service and GPU resource is currenly invite-only so please contact our team (getstarted@gmicloud.ai) to get invited if you don't have one yet.
|
17
|
+
|
18
|
+
The GMI Inference Engine SDK provides a Python interface for deploying and managing machine learning models in production environments. It allows users to create model artifacts, schedule tasks for serving models, and call inference APIs easily.
|
19
|
+
|
20
|
+
This SDK streamlines the process of utilizing GMI Cloud capabilities such as deploying models with Kubernetes-based Ray services, managing resources automatically, and accessing model inference endpoints. With minimal setup, developers can focus on building ML solutions instead of infrastructure.
|
21
|
+
|
22
|
+
## Features
|
23
|
+
|
24
|
+
- Artifact Management: Easily create, update, and manage ML model artifacts.
|
25
|
+
- Task Management: Quickly create, schedule, and manage deployment tasks for model inference.
|
26
|
+
- Usage Data Retrieval : Fetch and analyze usage data to optimize resource allocation.
|
27
|
+
|
28
|
+
## Installation
|
29
|
+
|
30
|
+
To install the SDK, use pip:
|
31
|
+
|
32
|
+
```bash
|
33
|
+
pip install gmicloud
|
34
|
+
```
|
35
|
+
|
36
|
+
## Setup
|
37
|
+
|
38
|
+
You must configure authentication credentials for accessing the GMI Cloud API.
|
39
|
+
To create account and get log in info please visit **GMI inference platform: https://inference-engine.gmicloud.ai/**.
|
40
|
+
|
41
|
+
There are two ways to configure the SDK:
|
42
|
+
|
43
|
+
### Option 1: Using Environment Variables
|
44
|
+
|
45
|
+
Set the following environment variables:
|
46
|
+
|
47
|
+
```shell
|
48
|
+
export GMI_CLOUD_CLIENT_ID=<YOUR_CLIENT_ID>
|
49
|
+
export GMI_CLOUD_EMAIL=<YOUR_EMAIL>
|
50
|
+
export GMI_CLOUD_PASSWORD=<YOUR_PASSWORD>
|
51
|
+
```
|
52
|
+
|
53
|
+
### Option 2: Passing Credentials as Parameters
|
54
|
+
|
55
|
+
Pass `client_id`, `email`, and `password` directly to the Client object when initializing it in your script:
|
56
|
+
|
57
|
+
```python
|
58
|
+
from gmicloud import Client
|
59
|
+
|
60
|
+
client = Client(client_id="<YOUR_CLIENT_ID>", email="<YOUR_EMAIL>", password="<YOUR_PASSWORD>")
|
61
|
+
```
|
62
|
+
|
63
|
+
## Quick Start
|
64
|
+
|
65
|
+
### 1. How to run the code in the example folder
|
66
|
+
```bash
|
67
|
+
cd path/to/gmicloud-sdk
|
68
|
+
# Create a virtual environment
|
69
|
+
python -m venv venv
|
70
|
+
source venv/bin/activate
|
71
|
+
|
72
|
+
pip install -r requirements.txt
|
73
|
+
python -m examples.create_task_from_artifact_template.py
|
74
|
+
```
|
75
|
+
|
76
|
+
### 2. Create an inference task from an artifact template
|
77
|
+
|
78
|
+
This is the simplest example to deploy an inference task using an existing artifact template:
|
79
|
+
|
80
|
+
Up-to-date code in /examples/create_task_from_artifact_template.py
|
81
|
+
|
82
|
+
```python
|
83
|
+
from datetime import datetime
|
84
|
+
import os
|
85
|
+
import sys
|
86
|
+
|
87
|
+
from gmicloud import *
|
88
|
+
from examples.completion import call_chat_completion
|
89
|
+
|
90
|
+
cli = Client()
|
91
|
+
|
92
|
+
# List templates offered by GMI cloud
|
93
|
+
templates = cli.list_templates()
|
94
|
+
print(f"Found {len(templates)} templates: {templates}")
|
95
|
+
|
96
|
+
# Pick a template from the list
|
97
|
+
pick_template = "Llama-3.1-8B"
|
98
|
+
|
99
|
+
# Create Artifact from template
|
100
|
+
artifact_id, recommended_replica_resources = cli.create_artifact_from_template(templates[0])
|
101
|
+
print(f"Created artifact {artifact_id} with recommended replica resources: {recommended_replica_resources}")
|
102
|
+
|
103
|
+
# Create Task based on Artifact
|
104
|
+
task_id = cli.create_task(artifact_id, recommended_replica_resources, TaskScheduling(
|
105
|
+
scheduling_oneoff=OneOffScheduling(
|
106
|
+
trigger_timestamp=int(datetime.now().timestamp()),
|
107
|
+
min_replicas=1,
|
108
|
+
max_replicas=1,
|
109
|
+
)
|
110
|
+
))
|
111
|
+
task = cli.task_manager.get_task(task_id)
|
112
|
+
print(f"Task created: {task.config.task_name}. You can check details at https://inference-engine.gmicloud.ai/user-console/task")
|
113
|
+
|
114
|
+
# Start Task and wait for it to be ready
|
115
|
+
cli.start_task_and_wait(task.task_id)
|
116
|
+
|
117
|
+
# Testing with calling chat completion
|
118
|
+
print(call_chat_completion(cli, task.task_id))
|
119
|
+
|
120
|
+
```
|
121
|
+
|
122
|
+
## API Reference
|
123
|
+
|
124
|
+
### Client
|
125
|
+
|
126
|
+
Represents the entry point to interact with GMI Cloud APIs.
|
127
|
+
Client(
|
128
|
+
client_id: Optional[str] = "",
|
129
|
+
email: Optional[str] = "",
|
130
|
+
password: Optional[str] = ""
|
131
|
+
)
|
132
|
+
|
133
|
+
### Artifact Management
|
134
|
+
|
135
|
+
* get_artifact_templates(): Fetch a list of available artifact templates.
|
136
|
+
* create_artifact_from_template(template_id: str): Create a model artifact from a given template.
|
137
|
+
* get_artifact(artifact_id: str): Get details of a specific artifact.
|
138
|
+
|
139
|
+
### Task Management
|
140
|
+
|
141
|
+
* create_task_from_artifact_template(template_id: str, scheduling: TaskScheduling): Create and schedule a task using an
|
142
|
+
artifact template.
|
143
|
+
* start_task(task_id: str): Start a task.
|
144
|
+
* get_task(task_id: str): Retrieve the status and details of a specific task.
|
145
|
+
|
146
|
+
## Notes & Troubleshooting
|
147
|
+
k
|
gmicloud-0.1.6/README.md
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
# GMICloud SDK (Beta)
|
2
|
+
|
3
|
+
## Overview
|
4
|
+
Before you start: Our service and GPU resource is currenly invite-only so please contact our team (getstarted@gmicloud.ai) to get invited if you don't have one yet.
|
5
|
+
|
6
|
+
The GMI Inference Engine SDK provides a Python interface for deploying and managing machine learning models in production environments. It allows users to create model artifacts, schedule tasks for serving models, and call inference APIs easily.
|
7
|
+
|
8
|
+
This SDK streamlines the process of utilizing GMI Cloud capabilities such as deploying models with Kubernetes-based Ray services, managing resources automatically, and accessing model inference endpoints. With minimal setup, developers can focus on building ML solutions instead of infrastructure.
|
9
|
+
|
10
|
+
## Features
|
11
|
+
|
12
|
+
- Artifact Management: Easily create, update, and manage ML model artifacts.
|
13
|
+
- Task Management: Quickly create, schedule, and manage deployment tasks for model inference.
|
14
|
+
- Usage Data Retrieval : Fetch and analyze usage data to optimize resource allocation.
|
15
|
+
|
16
|
+
## Installation
|
17
|
+
|
18
|
+
To install the SDK, use pip:
|
19
|
+
|
20
|
+
```bash
|
21
|
+
pip install gmicloud
|
22
|
+
```
|
23
|
+
|
24
|
+
## Setup
|
25
|
+
|
26
|
+
You must configure authentication credentials for accessing the GMI Cloud API.
|
27
|
+
To create account and get log in info please visit **GMI inference platform: https://inference-engine.gmicloud.ai/**.
|
28
|
+
|
29
|
+
There are two ways to configure the SDK:
|
30
|
+
|
31
|
+
### Option 1: Using Environment Variables
|
32
|
+
|
33
|
+
Set the following environment variables:
|
34
|
+
|
35
|
+
```shell
|
36
|
+
export GMI_CLOUD_CLIENT_ID=<YOUR_CLIENT_ID>
|
37
|
+
export GMI_CLOUD_EMAIL=<YOUR_EMAIL>
|
38
|
+
export GMI_CLOUD_PASSWORD=<YOUR_PASSWORD>
|
39
|
+
```
|
40
|
+
|
41
|
+
### Option 2: Passing Credentials as Parameters
|
42
|
+
|
43
|
+
Pass `client_id`, `email`, and `password` directly to the Client object when initializing it in your script:
|
44
|
+
|
45
|
+
```python
|
46
|
+
from gmicloud import Client
|
47
|
+
|
48
|
+
client = Client(client_id="<YOUR_CLIENT_ID>", email="<YOUR_EMAIL>", password="<YOUR_PASSWORD>")
|
49
|
+
```
|
50
|
+
|
51
|
+
## Quick Start
|
52
|
+
|
53
|
+
### 1. How to run the code in the example folder
|
54
|
+
```bash
|
55
|
+
cd path/to/gmicloud-sdk
|
56
|
+
# Create a virtual environment
|
57
|
+
python -m venv venv
|
58
|
+
source venv/bin/activate
|
59
|
+
|
60
|
+
pip install -r requirements.txt
|
61
|
+
python -m examples.create_task_from_artifact_template.py
|
62
|
+
```
|
63
|
+
|
64
|
+
### 2. Create an inference task from an artifact template
|
65
|
+
|
66
|
+
This is the simplest example to deploy an inference task using an existing artifact template:
|
67
|
+
|
68
|
+
Up-to-date code in /examples/create_task_from_artifact_template.py
|
69
|
+
|
70
|
+
```python
|
71
|
+
from datetime import datetime
|
72
|
+
import os
|
73
|
+
import sys
|
74
|
+
|
75
|
+
from gmicloud import *
|
76
|
+
from examples.completion import call_chat_completion
|
77
|
+
|
78
|
+
cli = Client()
|
79
|
+
|
80
|
+
# List templates offered by GMI cloud
|
81
|
+
templates = cli.list_templates()
|
82
|
+
print(f"Found {len(templates)} templates: {templates}")
|
83
|
+
|
84
|
+
# Pick a template from the list
|
85
|
+
pick_template = "Llama-3.1-8B"
|
86
|
+
|
87
|
+
# Create Artifact from template
|
88
|
+
artifact_id, recommended_replica_resources = cli.create_artifact_from_template(templates[0])
|
89
|
+
print(f"Created artifact {artifact_id} with recommended replica resources: {recommended_replica_resources}")
|
90
|
+
|
91
|
+
# Create Task based on Artifact
|
92
|
+
task_id = cli.create_task(artifact_id, recommended_replica_resources, TaskScheduling(
|
93
|
+
scheduling_oneoff=OneOffScheduling(
|
94
|
+
trigger_timestamp=int(datetime.now().timestamp()),
|
95
|
+
min_replicas=1,
|
96
|
+
max_replicas=1,
|
97
|
+
)
|
98
|
+
))
|
99
|
+
task = cli.task_manager.get_task(task_id)
|
100
|
+
print(f"Task created: {task.config.task_name}. You can check details at https://inference-engine.gmicloud.ai/user-console/task")
|
101
|
+
|
102
|
+
# Start Task and wait for it to be ready
|
103
|
+
cli.start_task_and_wait(task.task_id)
|
104
|
+
|
105
|
+
# Testing with calling chat completion
|
106
|
+
print(call_chat_completion(cli, task.task_id))
|
107
|
+
|
108
|
+
```
|
109
|
+
|
110
|
+
## API Reference
|
111
|
+
|
112
|
+
### Client
|
113
|
+
|
114
|
+
Represents the entry point to interact with GMI Cloud APIs.
|
115
|
+
Client(
|
116
|
+
client_id: Optional[str] = "",
|
117
|
+
email: Optional[str] = "",
|
118
|
+
password: Optional[str] = ""
|
119
|
+
)
|
120
|
+
|
121
|
+
### Artifact Management
|
122
|
+
|
123
|
+
* get_artifact_templates(): Fetch a list of available artifact templates.
|
124
|
+
* create_artifact_from_template(template_id: str): Create a model artifact from a given template.
|
125
|
+
* get_artifact(artifact_id: str): Get details of a specific artifact.
|
126
|
+
|
127
|
+
### Task Management
|
128
|
+
|
129
|
+
* create_task_from_artifact_template(template_id: str, scheduling: TaskScheduling): Create and schedule a task using an
|
130
|
+
artifact template.
|
131
|
+
* start_task(task_id: str): Start a task.
|
132
|
+
* get_task(task_id: str): Retrieve the status and details of a specific task.
|
133
|
+
|
134
|
+
## Notes & Troubleshooting
|
135
|
+
k
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import os
|
2
|
+
import time
|
2
3
|
from typing import List
|
3
4
|
import mimetypes
|
4
5
|
|
@@ -7,6 +8,9 @@ from .._client._artifact_client import ArtifactClient
|
|
7
8
|
from .._client._file_upload_client import FileUploadClient
|
8
9
|
from .._models import *
|
9
10
|
|
11
|
+
import logging
|
12
|
+
|
13
|
+
logger = logging.getLogger(__name__)
|
10
14
|
|
11
15
|
class ArtifactManager:
|
12
16
|
"""
|
@@ -86,6 +90,41 @@ class ArtifactManager:
|
|
86
90
|
raise ValueError("Failed to create artifact from template.")
|
87
91
|
|
88
92
|
return resp.artifact_id
|
93
|
+
|
94
|
+
def create_artifact_from_template_name(self, artifact_template_name: str) -> tuple[str, ReplicaResource]:
|
95
|
+
"""
|
96
|
+
Create an artifact from a template.
|
97
|
+
:param artifact_template_name: The name of the template to use.
|
98
|
+
:return: A tuple containing the artifact ID and the recommended replica resources.
|
99
|
+
:rtype: tuple[str, ReplicaResource]
|
100
|
+
"""
|
101
|
+
|
102
|
+
recommended_replica_resources = None
|
103
|
+
template_id = None
|
104
|
+
try:
|
105
|
+
templates = self.get_public_templates()
|
106
|
+
except Exception as e:
|
107
|
+
logger.error(f"Failed to get artifact templates, Error: {e}")
|
108
|
+
for template in templates:
|
109
|
+
if template.template_data and template.template_data.name == artifact_template_name:
|
110
|
+
resources_template = template.template_data.resources
|
111
|
+
recommended_replica_resources = ReplicaResource(
|
112
|
+
cpu=resources_template.cpu,
|
113
|
+
ram_gb=resources_template.memory,
|
114
|
+
gpu=resources_template.gpu,
|
115
|
+
gpu_name=resources_template.gpu_name,
|
116
|
+
)
|
117
|
+
template_id = template.template_id
|
118
|
+
break
|
119
|
+
if not template_id:
|
120
|
+
raise ValueError(f"Template with name {artifact_template_name} not found.")
|
121
|
+
try:
|
122
|
+
artifact_id = self.create_artifact_from_template(template_id)
|
123
|
+
self.wait_for_artifact_ready(artifact_id)
|
124
|
+
return artifact_id, recommended_replica_resources
|
125
|
+
except Exception as e:
|
126
|
+
logger.error(f"Failed to create artifact from template, Error: {e}")
|
127
|
+
raise e
|
89
128
|
|
90
129
|
def rebuild_artifact(self, artifact_id: str) -> RebuildArtifactResponse:
|
91
130
|
"""
|
@@ -240,7 +279,31 @@ class ArtifactManager:
|
|
240
279
|
FileUploadClient.upload_large_file(bigfile_upload_url_resp.upload_link, model_file_path)
|
241
280
|
|
242
281
|
return artifact_id
|
243
|
-
|
282
|
+
|
283
|
+
|
284
|
+
def wait_for_artifact_ready(self, artifact_id: str, timeout_s: int = 900) -> None:
|
285
|
+
"""
|
286
|
+
Wait for an artifact to be ready.
|
287
|
+
|
288
|
+
:param artifact_id: The ID of the artifact to wait for.
|
289
|
+
:param timeout_s: The timeout in seconds.
|
290
|
+
:return: None
|
291
|
+
"""
|
292
|
+
start_time = time.time()
|
293
|
+
while True:
|
294
|
+
try:
|
295
|
+
artifact = self.get_artifact(artifact_id)
|
296
|
+
if artifact.build_status == BuildStatus.SUCCESS:
|
297
|
+
return
|
298
|
+
elif artifact.build_status in [BuildStatus.FAILED, BuildStatus.TIMEOUT, BuildStatus.CANCELLED]:
|
299
|
+
raise Exception(f"Artifact build failed, status: {artifact.build_status}")
|
300
|
+
except Exception as e:
|
301
|
+
logger.error(f"Failed to get artifact, Error: {e}")
|
302
|
+
if time.time() - start_time > timeout_s:
|
303
|
+
raise Exception(f"Artifact build takes more than {timeout_s // 60} minutes. Testing aborted.")
|
304
|
+
time.sleep(10)
|
305
|
+
|
306
|
+
|
244
307
|
def get_public_templates(self) -> List[ArtifactTemplate]:
|
245
308
|
"""
|
246
309
|
Fetch all artifact templates.
|
@@ -249,6 +312,26 @@ class ArtifactManager:
|
|
249
312
|
:rtype: List[ArtifactTemplate]
|
250
313
|
"""
|
251
314
|
return self.artifact_client.get_public_templates()
|
315
|
+
|
316
|
+
|
317
|
+
def list_public_template_names(self) -> list[str]:
|
318
|
+
"""
|
319
|
+
List all public templates.
|
320
|
+
|
321
|
+
:return: A list of template names.
|
322
|
+
:rtype: list[str]
|
323
|
+
"""
|
324
|
+
template_names = []
|
325
|
+
try:
|
326
|
+
templates = self.get_public_templates()
|
327
|
+
for template in templates:
|
328
|
+
if template.template_data and template.template_data.name:
|
329
|
+
template_names.append(template.template_data.name)
|
330
|
+
return template_names
|
331
|
+
except Exception as e:
|
332
|
+
logger.error(f"Failed to get artifact templates, Error: {e}")
|
333
|
+
return []
|
334
|
+
|
252
335
|
|
253
336
|
@staticmethod
|
254
337
|
def _validate_file_name(file_name: str) -> None:
|
@@ -4,6 +4,10 @@ from .._client._iam_client import IAMClient
|
|
4
4
|
from .._client._task_client import TaskClient
|
5
5
|
from .._models import *
|
6
6
|
|
7
|
+
import time
|
8
|
+
import logging
|
9
|
+
|
10
|
+
logger = logging.getLogger(__name__)
|
7
11
|
|
8
12
|
class TaskManager:
|
9
13
|
"""
|
@@ -132,6 +136,50 @@ class TaskManager:
|
|
132
136
|
self._validate_not_empty(task_id, "Task ID")
|
133
137
|
|
134
138
|
return self.task_client.start_task(task_id)
|
139
|
+
|
140
|
+
|
141
|
+
def start_task_and_wait(self, task_id: str, timeout_s: int = 900) -> Task:
|
142
|
+
"""
|
143
|
+
Start a task and wait for it to be ready.
|
144
|
+
|
145
|
+
:param task_id: The ID of the task to start.
|
146
|
+
:param timeout_s: The timeout in seconds.
|
147
|
+
:return: The task object.
|
148
|
+
:rtype: Task
|
149
|
+
"""
|
150
|
+
# trigger start task
|
151
|
+
try:
|
152
|
+
self.start_task(task_id)
|
153
|
+
logger.info(f"Started task ID: {task_id}")
|
154
|
+
except Exception as e:
|
155
|
+
logger.error(f"Failed to start task, Error: {e}")
|
156
|
+
raise e
|
157
|
+
|
158
|
+
start_time = time.time()
|
159
|
+
while True:
|
160
|
+
try:
|
161
|
+
task = self.get_task(task_id)
|
162
|
+
if task.task_status == TaskStatus.RUNNING:
|
163
|
+
return task
|
164
|
+
elif task.task_status in [TaskStatus.NEEDSTOP, TaskStatus.ARCHIVED]:
|
165
|
+
raise Exception(f"Unexpected task status after starting: {task.task_status}")
|
166
|
+
# Also check endpoint status.
|
167
|
+
elif task.task_status == TaskStatus.RUNNING:
|
168
|
+
if task.endpoint_info and task.endpoint_info.endpoint_status == TaskEndpointStatus.RUNNING:
|
169
|
+
return task
|
170
|
+
elif task.endpoint_info and task.endpoint_info.endpoint_status in [TaskEndpointStatus.UNKNOWN, TaskEndpointStatus.ARCHIVED]:
|
171
|
+
raise Exception(f"Unexpected endpoint status after starting: {task.endpoint_info.endpoint_status}")
|
172
|
+
else:
|
173
|
+
logger.info(f"Pending endpoint starting. endpoint status: {task.endpoint_info.endpoint_status}")
|
174
|
+
else:
|
175
|
+
logger.info(f"Pending task starting. Task status: {task.task_status}")
|
176
|
+
|
177
|
+
except Exception as e:
|
178
|
+
logger.error(f"Failed to get task, Error: {e}")
|
179
|
+
if time.time() - start_time > timeout_s:
|
180
|
+
raise Exception(f"Task creation takes more than {timeout_s // 60} minutes. Testing aborted.")
|
181
|
+
time.sleep(10)
|
182
|
+
|
135
183
|
|
136
184
|
def stop_task(self, task_id: str) -> bool:
|
137
185
|
"""
|
@@ -143,6 +191,27 @@ class TaskManager:
|
|
143
191
|
"""
|
144
192
|
self._validate_not_empty(task_id, "Task ID")
|
145
193
|
|
194
|
+
|
195
|
+
def stop_task_and_wait(self, task_id: str, timeout_s: int = 900):
|
196
|
+
task_manager = self.task_manager
|
197
|
+
try:
|
198
|
+
self.task_manager.stop_task(task_id)
|
199
|
+
logger.info(f"Stopping task ID: {task_id}")
|
200
|
+
except Exception as e:
|
201
|
+
logger.error(f"Failed to stop task, Error: {e}")
|
202
|
+
task_manager = self.task_manager
|
203
|
+
start_time = time.time()
|
204
|
+
while True:
|
205
|
+
try:
|
206
|
+
task = self.get_task(task_id)
|
207
|
+
if task.task_status == TaskStatus.IDLE:
|
208
|
+
break
|
209
|
+
except Exception as e:
|
210
|
+
logger.error(f"Failed to get task, Error: {e}")
|
211
|
+
if time.time() - start_time > timeout_s:
|
212
|
+
raise Exception(f"Task stopping takes more than {timeout_s // 60} minutes. Testing aborted.")
|
213
|
+
time.sleep(10)
|
214
|
+
|
146
215
|
return self.task_client.stop_task(task_id)
|
147
216
|
|
148
217
|
def get_usage_data(self, start_timestamp: str, end_timestamp: str) -> GetUsageDataResponse:
|
@@ -1,8 +1,8 @@
|
|
1
|
-
from typing import Optional, List
|
1
|
+
from typing import Optional, List, Union
|
2
2
|
from datetime import datetime
|
3
3
|
|
4
4
|
from pydantic import BaseModel
|
5
|
-
from gmicloud._internal._enums import BuildStatus, TaskStatus, TaskEndpointStatus
|
5
|
+
from gmicloud._internal._enums import BuildStatus, TaskStatus, TaskEndpointStatus, ModelParameterType
|
6
6
|
|
7
7
|
|
8
8
|
class BigFileMetadata(BaseModel):
|
@@ -70,6 +70,7 @@ class CreateArtifactRequest(BaseModel):
|
|
70
70
|
artifact_name: str # The name of the artifact to create.
|
71
71
|
artifact_description: Optional[str] = "" # Description of the artifact.
|
72
72
|
artifact_tags: Optional[List[str]] = None # Tags for the artifact, separated by commas.
|
73
|
+
model_parameters: Optional[List["ModelParameter"]] = None # Parameters for the artifact.
|
73
74
|
|
74
75
|
|
75
76
|
class CreateArtifactResponse(BaseModel):
|
@@ -158,6 +159,7 @@ class TemplateMetadata(BaseModel):
|
|
158
159
|
update_at: Optional[str] = None # Timestamp when the template was last updated.
|
159
160
|
update_by: Optional[str] = "" # ID of the user who last updated the template.
|
160
161
|
|
162
|
+
|
161
163
|
class TemplateData(BaseModel):
|
162
164
|
"""
|
163
165
|
Data for an artifact template.
|
@@ -165,6 +167,7 @@ class TemplateData(BaseModel):
|
|
165
167
|
description: Optional[str] = "" # Description of the artifact template.
|
166
168
|
icon_link: Optional[str] = "" # Link to the icon for the artifact template.
|
167
169
|
image_link: Optional[str] = "" # Link to the image for the artifact template.
|
170
|
+
model_parameters: Optional[List["ModelParameter"]] = None # Parameters for the artifact template.
|
168
171
|
name: Optional[str] = "" # Name of the artifact template.
|
169
172
|
ray: Optional["RayContent"] = None # Template for Ray-based artifacts.
|
170
173
|
resources: Optional["ResourcesTemplate"] = None # Resource allocation template.
|
@@ -172,6 +175,19 @@ class TemplateData(BaseModel):
|
|
172
175
|
volume_path: Optional[str] = "" # Path to the volume where the artifact is stored.
|
173
176
|
|
174
177
|
|
178
|
+
class ModelParameter(BaseModel):
|
179
|
+
"""
|
180
|
+
Parameter for an artifact template.
|
181
|
+
"""
|
182
|
+
category: Optional[str] = "" # Category of the parameter.
|
183
|
+
display_name: Optional[str] = "" # Display name of the parameter.
|
184
|
+
key: Optional[str] = "" # Key for the parameter.
|
185
|
+
max: Optional[float] = 0 # Maximum value for the parameter.
|
186
|
+
min: Optional[float] = 0 # Minimum value for the parameter.
|
187
|
+
step: Optional[float] = 0 # Step value for the parameter.
|
188
|
+
type: Optional[ModelParameterType] = ModelParameterType.TEXT # Type of the parameter (e.g., numeric, bool, text).
|
189
|
+
value: Optional[Union[int, float, bool, str]] = "" # Default value for the parameter.
|
190
|
+
|
175
191
|
class RayContent(BaseModel):
|
176
192
|
deployment_name: Optional[str] = "" # Name of the deployment.
|
177
193
|
file_path: Optional[str] = "" # Path to the task file in storage.
|
@@ -234,7 +250,6 @@ class RayTaskConfig(BaseModel):
|
|
234
250
|
Configuration settings for Ray tasks.
|
235
251
|
"""
|
236
252
|
artifact_id: Optional[str] = "" # Associated artifact ID.
|
237
|
-
ray_version: Optional[str] = "" # Version of Ray used.
|
238
253
|
ray_cluster_image: Optional[str] = "" # Docker image for the Ray cluster.
|
239
254
|
file_path: Optional[str] = "" # Path to the task file in storage.
|
240
255
|
deployment_name: Optional[str] = "" # Name of the deployment.
|
@@ -282,6 +297,7 @@ class TaskConfig(BaseModel):
|
|
282
297
|
"""
|
283
298
|
Configuration data for a task.
|
284
299
|
"""
|
300
|
+
task_name: Optional[str] = "" # Name of the task.
|
285
301
|
ray_task_config: Optional[RayTaskConfig] = None # Configuration for a Ray-based task.
|
286
302
|
task_scheduling: Optional[TaskScheduling] = None # Scheduling configuration for the task.
|
287
303
|
create_timestamp: Optional[int] = 0 # Timestamp when the task was created.
|