proximl 0.5.9__py3-none-any.whl → 0.5.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- proximl/__init__.py +1 -1
- proximl/checkpoints.py +46 -28
- proximl/cli/cloudbender/__init__.py +2 -1
- proximl/cli/cloudbender/datastore.py +2 -7
- proximl/cli/cloudbender/service.py +19 -2
- proximl/cli/project/__init__.py +3 -72
- proximl/cli/project/data_connector.py +61 -0
- proximl/cli/project/datastore.py +61 -0
- proximl/cli/project/service.py +61 -0
- proximl/cloudbender/cloudbender.py +4 -2
- proximl/cloudbender/data_connectors.py +8 -0
- proximl/cloudbender/datastores.py +9 -19
- proximl/cloudbender/nodes.py +44 -1
- proximl/cloudbender/providers.py +53 -0
- proximl/cloudbender/regions.py +48 -0
- proximl/cloudbender/services.py +65 -1
- proximl/datasets.py +41 -12
- proximl/exceptions.py +51 -0
- proximl/jobs.py +15 -19
- proximl/models.py +41 -22
- proximl/volumes.py +24 -5
- {proximl-0.5.9.dist-info → proximl-0.5.11.dist-info}/METADATA +1 -1
- {proximl-0.5.9.dist-info → proximl-0.5.11.dist-info}/RECORD +48 -46
- tests/integration/projects/conftest.py +3 -1
- tests/integration/projects/test_projects_data_connectors_integration.py +44 -0
- tests/integration/projects/test_projects_datastores_integration.py +42 -0
- tests/integration/projects/test_projects_services_integration.py +44 -0
- tests/integration/test_checkpoints_integration.py +1 -2
- tests/integration/test_jobs_integration.py +13 -0
- tests/integration/test_models_integration.py +0 -1
- tests/unit/cli/projects/__init__.py +0 -0
- tests/unit/cli/projects/test_cli_project_data_connector_unit.py +28 -0
- tests/unit/cli/projects/test_cli_project_datastore_unit.py +26 -0
- tests/unit/cli/projects/test_cli_project_key_unit.py +26 -0
- tests/unit/cli/projects/test_cli_project_secret_unit.py +26 -0
- tests/unit/cli/projects/test_cli_project_service_unit.py +26 -0
- tests/unit/cli/projects/test_cli_project_unit.py +19 -0
- tests/unit/cloudbender/test_datastores_unit.py +1 -5
- tests/unit/cloudbender/test_services_unit.py +6 -0
- tests/unit/conftest.py +158 -15
- tests/unit/test_checkpoints_unit.py +15 -23
- tests/unit/test_datasets_unit.py +15 -20
- tests/unit/test_models_unit.py +13 -16
- tests/unit/test_volumes_unit.py +3 -0
- proximl/cli/cloudbender/reservation.py +0 -159
- proximl/cli/project.py +0 -154
- proximl/cloudbender/reservations.py +0 -126
- proximl/projects.py +0 -187
- tests/integration/test_projects_integration.py +0 -44
- tests/unit/cli/cloudbender/test_cli_reservation_unit.py +0 -38
- tests/unit/cli/test_cli_project_unit.py +0 -46
- tests/unit/cloudbender/test_reservations_unit.py +0 -173
- tests/unit/test_auth.py +0 -30
- tests/unit/test_projects_unit.py +0 -294
- tests/unit/test_proximl.py +0 -54
- {proximl-0.5.9.dist-info → proximl-0.5.11.dist-info}/LICENSE +0 -0
- {proximl-0.5.9.dist-info → proximl-0.5.11.dist-info}/WHEEL +0 -0
- {proximl-0.5.9.dist-info → proximl-0.5.11.dist-info}/entry_points.txt +0 -0
- {proximl-0.5.9.dist-info → proximl-0.5.11.dist-info}/top_level.txt +0 -0
proximl/cloudbender/providers.py
CHANGED
|
@@ -1,7 +1,16 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
import asyncio
|
|
4
|
+
import math
|
|
3
5
|
from datetime import datetime
|
|
4
6
|
|
|
7
|
+
from proximl.exceptions import (
|
|
8
|
+
ApiError,
|
|
9
|
+
SpecificationError,
|
|
10
|
+
ProxiMLException,
|
|
11
|
+
ProviderError,
|
|
12
|
+
)
|
|
13
|
+
|
|
5
14
|
|
|
6
15
|
class Providers(object):
|
|
7
16
|
def __init__(self, proximl):
|
|
@@ -36,6 +45,7 @@ class Provider:
|
|
|
36
45
|
self._provider = kwargs
|
|
37
46
|
self._id = self._provider.get("provider_uuid")
|
|
38
47
|
self._type = self._provider.get("type")
|
|
48
|
+
self._status = self._provider.get("status")
|
|
39
49
|
self._credits = self._provider.get("credits")
|
|
40
50
|
|
|
41
51
|
@property
|
|
@@ -46,6 +56,10 @@ class Provider:
|
|
|
46
56
|
def type(self) -> str:
|
|
47
57
|
return self._type
|
|
48
58
|
|
|
59
|
+
@property
|
|
60
|
+
def status(self) -> str:
|
|
61
|
+
return self._status
|
|
62
|
+
|
|
49
63
|
@property
|
|
50
64
|
def credits(self) -> float:
|
|
51
65
|
return self._credits
|
|
@@ -69,3 +83,42 @@ class Provider:
|
|
|
69
83
|
)
|
|
70
84
|
self.__init__(self.proximl, **resp)
|
|
71
85
|
return self
|
|
86
|
+
|
|
87
|
+
async def wait_for(self, status, timeout=300):
|
|
88
|
+
if self.status == status:
|
|
89
|
+
return
|
|
90
|
+
valid_statuses = ["ready", "archived"]
|
|
91
|
+
if not status in valid_statuses:
|
|
92
|
+
raise SpecificationError(
|
|
93
|
+
"status",
|
|
94
|
+
f"Invalid wait_for status {status}. Valid statuses are: {valid_statuses}",
|
|
95
|
+
)
|
|
96
|
+
MAX_TIMEOUT = 24 * 60 * 60
|
|
97
|
+
if timeout > MAX_TIMEOUT:
|
|
98
|
+
raise SpecificationError(
|
|
99
|
+
"timeout",
|
|
100
|
+
f"timeout must be less than {MAX_TIMEOUT} seconds.",
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
POLL_INTERVAL_MIN = 5
|
|
104
|
+
POLL_INTERVAL_MAX = 60
|
|
105
|
+
POLL_INTERVAL = max(min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN)
|
|
106
|
+
retry_count = math.ceil(timeout / POLL_INTERVAL)
|
|
107
|
+
count = 0
|
|
108
|
+
while count < retry_count:
|
|
109
|
+
await asyncio.sleep(POLL_INTERVAL)
|
|
110
|
+
try:
|
|
111
|
+
await self.refresh()
|
|
112
|
+
except ApiError as e:
|
|
113
|
+
if status == "archived" and e.status == 404:
|
|
114
|
+
return
|
|
115
|
+
raise e
|
|
116
|
+
if self.status in ["errored", "failed"]:
|
|
117
|
+
raise ProviderError(self.status, self)
|
|
118
|
+
if self.status == status:
|
|
119
|
+
return self
|
|
120
|
+
else:
|
|
121
|
+
count += 1
|
|
122
|
+
logging.debug(f"self: {self}, retry count {count}")
|
|
123
|
+
|
|
124
|
+
raise ProxiMLException(f"Timeout waiting for {status}")
|
proximl/cloudbender/regions.py
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
import asyncio
|
|
4
|
+
import math
|
|
5
|
+
|
|
6
|
+
from proximl.exceptions import (
|
|
7
|
+
ApiError,
|
|
8
|
+
SpecificationError,
|
|
9
|
+
ProxiMLException,
|
|
10
|
+
RegionError,
|
|
11
|
+
)
|
|
3
12
|
|
|
4
13
|
|
|
5
14
|
class Regions(object):
|
|
@@ -111,3 +120,42 @@ class Region:
|
|
|
111
120
|
None,
|
|
112
121
|
dict(project_uuid=project_uuid, checkpoint_uuid=checkpoint_uuid),
|
|
113
122
|
)
|
|
123
|
+
|
|
124
|
+
async def wait_for(self, status, timeout=300):
|
|
125
|
+
if self.status == status:
|
|
126
|
+
return
|
|
127
|
+
valid_statuses = ["healthy", "offline", "archived"]
|
|
128
|
+
if not status in valid_statuses:
|
|
129
|
+
raise SpecificationError(
|
|
130
|
+
"status",
|
|
131
|
+
f"Invalid wait_for status {status}. Valid statuses are: {valid_statuses}",
|
|
132
|
+
)
|
|
133
|
+
MAX_TIMEOUT = 24 * 60 * 60
|
|
134
|
+
if timeout > MAX_TIMEOUT:
|
|
135
|
+
raise SpecificationError(
|
|
136
|
+
"timeout",
|
|
137
|
+
f"timeout must be less than {MAX_TIMEOUT} seconds.",
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
POLL_INTERVAL_MIN = 5
|
|
141
|
+
POLL_INTERVAL_MAX = 60
|
|
142
|
+
POLL_INTERVAL = max(min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN)
|
|
143
|
+
retry_count = math.ceil(timeout / POLL_INTERVAL)
|
|
144
|
+
count = 0
|
|
145
|
+
while count < retry_count:
|
|
146
|
+
await asyncio.sleep(POLL_INTERVAL)
|
|
147
|
+
try:
|
|
148
|
+
await self.refresh()
|
|
149
|
+
except ApiError as e:
|
|
150
|
+
if status == "archived" and e.status == 404:
|
|
151
|
+
return
|
|
152
|
+
raise e
|
|
153
|
+
if self.status in ["errored", "failed"]:
|
|
154
|
+
raise RegionError(self.status, self)
|
|
155
|
+
if self.status == status:
|
|
156
|
+
return self
|
|
157
|
+
else:
|
|
158
|
+
count += 1
|
|
159
|
+
logging.debug(f"self: {self}, retry count {count}")
|
|
160
|
+
|
|
161
|
+
raise ProxiMLException(f"Timeout waiting for {status}")
|
proximl/cloudbender/services.py
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
import asyncio
|
|
4
|
+
import math
|
|
5
|
+
|
|
6
|
+
from proximl.exceptions import (
|
|
7
|
+
ApiError,
|
|
8
|
+
SpecificationError,
|
|
9
|
+
ProxiMLException,
|
|
10
|
+
)
|
|
3
11
|
|
|
4
12
|
|
|
5
13
|
class Services(object):
|
|
@@ -28,12 +36,14 @@ class Services(object):
|
|
|
28
36
|
provider_uuid,
|
|
29
37
|
region_uuid,
|
|
30
38
|
name,
|
|
39
|
+
type,
|
|
31
40
|
public,
|
|
32
41
|
**kwargs,
|
|
33
42
|
):
|
|
34
43
|
logging.info(f"Creating Service {name}")
|
|
35
44
|
data = dict(
|
|
36
45
|
name=name,
|
|
46
|
+
type=type,
|
|
37
47
|
public=public,
|
|
38
48
|
**kwargs,
|
|
39
49
|
)
|
|
@@ -65,7 +75,12 @@ class Service:
|
|
|
65
75
|
self._region_uuid = self._service.get("region_uuid")
|
|
66
76
|
self._public = self._service.get("public")
|
|
67
77
|
self._name = self._service.get("name")
|
|
68
|
-
self.
|
|
78
|
+
self._type = self._service.get("type")
|
|
79
|
+
self._hostname = self._service.get("custom_hostname") or self._service.get(
|
|
80
|
+
"hostname"
|
|
81
|
+
)
|
|
82
|
+
self._status = self._service.get("status")
|
|
83
|
+
self._port = self._service.get("port")
|
|
69
84
|
|
|
70
85
|
@property
|
|
71
86
|
def id(self) -> str:
|
|
@@ -91,6 +106,18 @@ class Service:
|
|
|
91
106
|
def hostname(self) -> str:
|
|
92
107
|
return self._hostname
|
|
93
108
|
|
|
109
|
+
@property
|
|
110
|
+
def status(self) -> str:
|
|
111
|
+
return self._status
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def type(self) -> str:
|
|
115
|
+
return self._type
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
def port(self) -> str:
|
|
119
|
+
return self._port
|
|
120
|
+
|
|
94
121
|
def __str__(self):
|
|
95
122
|
return json.dumps({k: v for k, v in self._service.items()})
|
|
96
123
|
|
|
@@ -113,3 +140,40 @@ class Service:
|
|
|
113
140
|
)
|
|
114
141
|
self.__init__(self.proximl, **resp)
|
|
115
142
|
return self
|
|
143
|
+
|
|
144
|
+
async def wait_for(self, status, timeout=300):
|
|
145
|
+
if self.status == status:
|
|
146
|
+
return
|
|
147
|
+
valid_statuses = ["active", "archived"]
|
|
148
|
+
if not status in valid_statuses:
|
|
149
|
+
raise SpecificationError(
|
|
150
|
+
"status",
|
|
151
|
+
f"Invalid wait_for status {status}. Valid statuses are: {valid_statuses}",
|
|
152
|
+
)
|
|
153
|
+
MAX_TIMEOUT = 24 * 60 * 60
|
|
154
|
+
if timeout > MAX_TIMEOUT:
|
|
155
|
+
raise SpecificationError(
|
|
156
|
+
"timeout",
|
|
157
|
+
f"timeout must be less than {MAX_TIMEOUT} seconds.",
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
POLL_INTERVAL_MIN = 5
|
|
161
|
+
POLL_INTERVAL_MAX = 60
|
|
162
|
+
POLL_INTERVAL = max(min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN)
|
|
163
|
+
retry_count = math.ceil(timeout / POLL_INTERVAL)
|
|
164
|
+
count = 0
|
|
165
|
+
while count < retry_count:
|
|
166
|
+
await asyncio.sleep(POLL_INTERVAL)
|
|
167
|
+
try:
|
|
168
|
+
await self.refresh()
|
|
169
|
+
except ApiError as e:
|
|
170
|
+
if status == "archived" and e.status == 404:
|
|
171
|
+
return
|
|
172
|
+
raise e
|
|
173
|
+
if self.status == status:
|
|
174
|
+
return self
|
|
175
|
+
else:
|
|
176
|
+
count += 1
|
|
177
|
+
logging.debug(f"self: {self}, retry count {count}")
|
|
178
|
+
|
|
179
|
+
raise ProxiMLException(f"Timeout waiting for {status}")
|
proximl/datasets.py
CHANGED
|
@@ -31,13 +31,24 @@ class Datasets(object):
|
|
|
31
31
|
datasets = [Dataset(self.proximl, **dataset) for dataset in resp]
|
|
32
32
|
return datasets
|
|
33
33
|
|
|
34
|
-
async def create(
|
|
34
|
+
async def create(
|
|
35
|
+
self,
|
|
36
|
+
name,
|
|
37
|
+
source_type,
|
|
38
|
+
source_uri,
|
|
39
|
+
type="evefs",
|
|
40
|
+
project_uuid=None,
|
|
41
|
+
**kwargs,
|
|
42
|
+
):
|
|
43
|
+
if not project_uuid:
|
|
44
|
+
project_uuid = self.proximl.active_project
|
|
35
45
|
data = dict(
|
|
36
46
|
name=name,
|
|
37
47
|
source_type=source_type,
|
|
38
48
|
source_uri=source_uri,
|
|
39
|
-
|
|
40
|
-
|
|
49
|
+
project_uuid=project_uuid,
|
|
50
|
+
type=type,
|
|
51
|
+
**kwargs,
|
|
41
52
|
)
|
|
42
53
|
payload = {k: v for k, v in data.items() if v is not None}
|
|
43
54
|
logging.info(f"Creating Dataset {name}")
|
|
@@ -60,7 +71,10 @@ class Dataset:
|
|
|
60
71
|
self._id = self._dataset.get("id", self._dataset.get("dataset_uuid"))
|
|
61
72
|
self._status = self._dataset.get("status")
|
|
62
73
|
self._name = self._dataset.get("name")
|
|
63
|
-
self._size = self._dataset.get("size")
|
|
74
|
+
self._size = self._dataset.get("size") or self._dataset.get("used_size")
|
|
75
|
+
self._billed_size = self._dataset.get("billed_size") or self._dataset.get(
|
|
76
|
+
"size"
|
|
77
|
+
)
|
|
64
78
|
self._project_uuid = self._dataset.get("project_uuid")
|
|
65
79
|
|
|
66
80
|
@property
|
|
@@ -79,6 +93,10 @@ class Dataset:
|
|
|
79
93
|
def size(self) -> int:
|
|
80
94
|
return self._size or 0
|
|
81
95
|
|
|
96
|
+
@property
|
|
97
|
+
def billed_size(self) -> int:
|
|
98
|
+
return self._billed_size
|
|
99
|
+
|
|
82
100
|
def __str__(self):
|
|
83
101
|
return json.dumps({k: v for k, v in self._dataset.items()})
|
|
84
102
|
|
|
@@ -119,12 +137,16 @@ class Dataset:
|
|
|
119
137
|
project_uuid=self._dataset.get("project_uuid"),
|
|
120
138
|
cidr=self._dataset.get("vpn").get("cidr"),
|
|
121
139
|
ssh_port=self._dataset.get("vpn").get("client").get("ssh_port"),
|
|
122
|
-
input_path=
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
140
|
+
input_path=(
|
|
141
|
+
self._dataset.get("source_uri")
|
|
142
|
+
if self.status in ["new", "downloading"]
|
|
143
|
+
else None
|
|
144
|
+
),
|
|
145
|
+
output_path=(
|
|
146
|
+
self._dataset.get("output_uri")
|
|
147
|
+
if self.status == "exporting"
|
|
148
|
+
else None
|
|
149
|
+
),
|
|
128
150
|
)
|
|
129
151
|
else:
|
|
130
152
|
details = dict()
|
|
@@ -215,14 +237,21 @@ class Dataset:
|
|
|
215
237
|
return self
|
|
216
238
|
|
|
217
239
|
async def wait_for(self, status, timeout=300):
|
|
240
|
+
if self.status == status:
|
|
241
|
+
return
|
|
218
242
|
valid_statuses = ["downloading", "ready", "archived"]
|
|
219
243
|
if not status in valid_statuses:
|
|
220
244
|
raise SpecificationError(
|
|
221
245
|
"status",
|
|
222
246
|
f"Invalid wait_for status {status}. Valid statuses are: {valid_statuses}",
|
|
223
247
|
)
|
|
224
|
-
|
|
225
|
-
|
|
248
|
+
MAX_TIMEOUT = 24 * 60 * 60
|
|
249
|
+
if timeout > MAX_TIMEOUT:
|
|
250
|
+
raise SpecificationError(
|
|
251
|
+
"timeout",
|
|
252
|
+
f"timeout must be less than {MAX_TIMEOUT} seconds.",
|
|
253
|
+
)
|
|
254
|
+
|
|
226
255
|
POLL_INTERVAL_MIN = 5
|
|
227
256
|
POLL_INTERVAL_MAX = 60
|
|
228
257
|
POLL_INTERVAL = max(min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN)
|
proximl/exceptions.py
CHANGED
|
@@ -147,3 +147,54 @@ class SpecificationError(ProxiMLException):
|
|
|
147
147
|
|
|
148
148
|
def __str__(self):
|
|
149
149
|
return "SpecificationError({self.attribute}, {self.message})".format(self=self)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class NodeError(ProxiMLException):
|
|
153
|
+
def __init__(self, status, data, *args):
|
|
154
|
+
super().__init__(data, *args)
|
|
155
|
+
self._status = status
|
|
156
|
+
self._message = data
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def status(self) -> str:
|
|
160
|
+
return self._status
|
|
161
|
+
|
|
162
|
+
def __repr__(self):
|
|
163
|
+
return "NodeError({self.status}, {self.message})".format(self=self)
|
|
164
|
+
|
|
165
|
+
def __str__(self):
|
|
166
|
+
return "NodeError({self.status}, {self.message})".format(self=self)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class ProviderError(ProxiMLException):
|
|
170
|
+
def __init__(self, status, data, *args):
|
|
171
|
+
super().__init__(data, *args)
|
|
172
|
+
self._status = status
|
|
173
|
+
self._message = data
|
|
174
|
+
|
|
175
|
+
@property
|
|
176
|
+
def status(self) -> str:
|
|
177
|
+
return self._status
|
|
178
|
+
|
|
179
|
+
def __repr__(self):
|
|
180
|
+
return "ProviderError({self.status}, {self.message})".format(self=self)
|
|
181
|
+
|
|
182
|
+
def __str__(self):
|
|
183
|
+
return "ProviderError({self.status}, {self.message})".format(self=self)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class RegionError(ProxiMLException):
|
|
187
|
+
def __init__(self, status, data, *args):
|
|
188
|
+
super().__init__(data, *args)
|
|
189
|
+
self._status = status
|
|
190
|
+
self._message = data
|
|
191
|
+
|
|
192
|
+
@property
|
|
193
|
+
def status(self) -> str:
|
|
194
|
+
return self._status
|
|
195
|
+
|
|
196
|
+
def __repr__(self):
|
|
197
|
+
return "RegionError({self.status}, {self.message})".format(self=self)
|
|
198
|
+
|
|
199
|
+
def __str__(self):
|
|
200
|
+
return "RegionError({self.status}, {self.message})".format(self=self)
|
proximl/jobs.py
CHANGED
|
@@ -468,9 +468,14 @@ class Job:
|
|
|
468
468
|
return job
|
|
469
469
|
|
|
470
470
|
async def wait_for(self, status, timeout=300):
|
|
471
|
+
if self.status == status or (
|
|
472
|
+
self.type == "training"
|
|
473
|
+
and status == "finished"
|
|
474
|
+
and self.status == "stopped"
|
|
475
|
+
):
|
|
476
|
+
return
|
|
471
477
|
valid_statuses = [
|
|
472
478
|
"waiting for data/model download",
|
|
473
|
-
"waiting for GPUs",
|
|
474
479
|
"waiting for resources",
|
|
475
480
|
"running",
|
|
476
481
|
"stopped",
|
|
@@ -482,22 +487,18 @@ class Job:
|
|
|
482
487
|
"status",
|
|
483
488
|
f"Invalid wait_for status {status}. Valid statuses are: {valid_statuses}",
|
|
484
489
|
)
|
|
485
|
-
if status == "waiting for GPUs":
|
|
486
|
-
warnings.warn(
|
|
487
|
-
"'waiting for GPUs' status is deprecated, use 'waiting for resources' instead.",
|
|
488
|
-
DeprecationWarning,
|
|
489
|
-
)
|
|
490
490
|
if (self.type == "training") and status == "stopped":
|
|
491
491
|
warnings.warn(
|
|
492
492
|
"'stopped' status is deprecated for training jobs, use 'finished' instead.",
|
|
493
493
|
DeprecationWarning,
|
|
494
494
|
)
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
495
|
+
|
|
496
|
+
MAX_TIMEOUT = 24 * 60 * 60
|
|
497
|
+
if timeout > MAX_TIMEOUT:
|
|
498
|
+
raise SpecificationError(
|
|
499
|
+
"timeout",
|
|
500
|
+
f"timeout must be less than {MAX_TIMEOUT} seconds.",
|
|
501
|
+
)
|
|
501
502
|
|
|
502
503
|
POLL_INTERVAL_MIN = 5
|
|
503
504
|
POLL_INTERVAL_MAX = 60
|
|
@@ -516,12 +517,8 @@ class Job:
|
|
|
516
517
|
self.status == status
|
|
517
518
|
or (
|
|
518
519
|
status
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
"waiting for resources",
|
|
522
|
-
] ## this status could be very short and the polling could miss it
|
|
523
|
-
and self.status
|
|
524
|
-
not in ["new", "waiting for GPUs", "waiting for resources"]
|
|
520
|
+
== "waiting for resources" ## this status could be very short and the polling could miss it
|
|
521
|
+
and self.status not in ["new", "waiting for resources"]
|
|
525
522
|
)
|
|
526
523
|
or (
|
|
527
524
|
status
|
|
@@ -529,7 +526,6 @@ class Job:
|
|
|
529
526
|
and self.status
|
|
530
527
|
not in [
|
|
531
528
|
"new",
|
|
532
|
-
"waiting for GPUs",
|
|
533
529
|
"waiting for resources",
|
|
534
530
|
"waiting for data/model download",
|
|
535
531
|
]
|
proximl/models.py
CHANGED
|
@@ -26,14 +26,24 @@ class Models(object):
|
|
|
26
26
|
models = [Model(self.proximl, **model) for model in resp]
|
|
27
27
|
return models
|
|
28
28
|
|
|
29
|
-
async def create(
|
|
29
|
+
async def create(
|
|
30
|
+
self,
|
|
31
|
+
name,
|
|
32
|
+
source_type,
|
|
33
|
+
source_uri,
|
|
34
|
+
type="evefs",
|
|
35
|
+
project_uuid=None,
|
|
36
|
+
**kwargs,
|
|
37
|
+
):
|
|
38
|
+
if not project_uuid:
|
|
39
|
+
project_uuid = self.proximl.active_project
|
|
30
40
|
data = dict(
|
|
31
41
|
name=name,
|
|
32
42
|
source_type=source_type,
|
|
33
43
|
source_uri=source_uri,
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
44
|
+
project_uuid=project_uuid,
|
|
45
|
+
type=type,
|
|
46
|
+
**kwargs,
|
|
37
47
|
)
|
|
38
48
|
payload = {k: v for k, v in data.items() if v is not None}
|
|
39
49
|
logging.info(f"Creating Model {name}")
|
|
@@ -44,9 +54,7 @@ class Models(object):
|
|
|
44
54
|
return model
|
|
45
55
|
|
|
46
56
|
async def remove(self, id, **kwargs):
|
|
47
|
-
await self.proximl._query(
|
|
48
|
-
f"/model/{id}", "DELETE", dict(**kwargs, force=True)
|
|
49
|
-
)
|
|
57
|
+
await self.proximl._query(f"/model/{id}", "DELETE", dict(**kwargs, force=True))
|
|
50
58
|
|
|
51
59
|
|
|
52
60
|
class Model:
|
|
@@ -56,7 +64,8 @@ class Model:
|
|
|
56
64
|
self._id = self._model.get("id", self._model.get("model_uuid"))
|
|
57
65
|
self._status = self._model.get("status")
|
|
58
66
|
self._name = self._model.get("name")
|
|
59
|
-
self._size = self._model.get("size")
|
|
67
|
+
self._size = self._model.get("size") or self._model.get("used_size")
|
|
68
|
+
self._billed_size = self._model.get("billed_size") or self._model.get("size")
|
|
60
69
|
self._project_uuid = self._model.get("project_uuid")
|
|
61
70
|
|
|
62
71
|
@property
|
|
@@ -75,6 +84,10 @@ class Model:
|
|
|
75
84
|
def size(self) -> int:
|
|
76
85
|
return self._size
|
|
77
86
|
|
|
87
|
+
@property
|
|
88
|
+
def billed_size(self) -> int:
|
|
89
|
+
return self._billed_size
|
|
90
|
+
|
|
78
91
|
def __str__(self):
|
|
79
92
|
return json.dumps({k: v for k, v in self._model.items()})
|
|
80
93
|
|
|
@@ -115,12 +128,16 @@ class Model:
|
|
|
115
128
|
project_uuid=self._model.get("project_uuid"),
|
|
116
129
|
cidr=self._model.get("vpn").get("cidr"),
|
|
117
130
|
ssh_port=self._model.get("vpn").get("client").get("ssh_port"),
|
|
118
|
-
input_path=
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
131
|
+
input_path=(
|
|
132
|
+
self._model.get("source_uri")
|
|
133
|
+
if self.status in ["new", "downloading"]
|
|
134
|
+
else None
|
|
135
|
+
),
|
|
136
|
+
output_path=(
|
|
137
|
+
self._model.get("output_uri")
|
|
138
|
+
if self.status == "exporting"
|
|
139
|
+
else None
|
|
140
|
+
),
|
|
124
141
|
)
|
|
125
142
|
else:
|
|
126
143
|
details = dict()
|
|
@@ -185,9 +202,7 @@ class Model:
|
|
|
185
202
|
if msg_handler:
|
|
186
203
|
msg_handler(data)
|
|
187
204
|
else:
|
|
188
|
-
timestamp = datetime.fromtimestamp(
|
|
189
|
-
int(data.get("time")) / 1000
|
|
190
|
-
)
|
|
205
|
+
timestamp = datetime.fromtimestamp(int(data.get("time")) / 1000)
|
|
191
206
|
print(
|
|
192
207
|
f"{timestamp.strftime('%m/%d/%Y, %H:%M:%S')}: {data.get('msg').rstrip()}"
|
|
193
208
|
)
|
|
@@ -214,19 +229,23 @@ class Model:
|
|
|
214
229
|
return self
|
|
215
230
|
|
|
216
231
|
async def wait_for(self, status, timeout=300):
|
|
232
|
+
if self.status == status:
|
|
233
|
+
return
|
|
217
234
|
valid_statuses = ["downloading", "ready", "archived"]
|
|
218
235
|
if not status in valid_statuses:
|
|
219
236
|
raise SpecificationError(
|
|
220
237
|
"status",
|
|
221
238
|
f"Invalid wait_for status {status}. Valid statuses are: {valid_statuses}",
|
|
222
239
|
)
|
|
223
|
-
|
|
224
|
-
|
|
240
|
+
MAX_TIMEOUT = 24 * 60 * 60
|
|
241
|
+
if timeout > MAX_TIMEOUT:
|
|
242
|
+
raise SpecificationError(
|
|
243
|
+
"timeout",
|
|
244
|
+
f"timeout must be less than {MAX_TIMEOUT} seconds.",
|
|
245
|
+
)
|
|
225
246
|
POLL_INTERVAL_MIN = 5
|
|
226
247
|
POLL_INTERVAL_MAX = 60
|
|
227
|
-
POLL_INTERVAL = max(
|
|
228
|
-
min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN
|
|
229
|
-
)
|
|
248
|
+
POLL_INTERVAL = max(min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN)
|
|
230
249
|
retry_count = math.ceil(timeout / POLL_INTERVAL)
|
|
231
250
|
count = 0
|
|
232
251
|
while count < retry_count:
|
proximl/volumes.py
CHANGED
|
@@ -26,14 +26,26 @@ class Volumes(object):
|
|
|
26
26
|
volumes = [Volume(self.proximl, **volume) for volume in resp]
|
|
27
27
|
return volumes
|
|
28
28
|
|
|
29
|
-
async def create(
|
|
29
|
+
async def create(
|
|
30
|
+
self,
|
|
31
|
+
name,
|
|
32
|
+
source_type,
|
|
33
|
+
source_uri,
|
|
34
|
+
capacity,
|
|
35
|
+
type="evefs",
|
|
36
|
+
project_uuid=None,
|
|
37
|
+
**kwargs,
|
|
38
|
+
):
|
|
39
|
+
if not project_uuid:
|
|
40
|
+
project_uuid = self.proximl.active_project
|
|
30
41
|
data = dict(
|
|
31
42
|
name=name,
|
|
32
43
|
source_type=source_type,
|
|
33
44
|
source_uri=source_uri,
|
|
34
45
|
capacity=capacity,
|
|
35
|
-
|
|
36
|
-
|
|
46
|
+
project_uuid=project_uuid,
|
|
47
|
+
type=type,
|
|
48
|
+
**kwargs,
|
|
37
49
|
)
|
|
38
50
|
payload = {k: v for k, v in data.items() if v is not None}
|
|
39
51
|
logging.info(f"Creating Volume {name}")
|
|
@@ -223,14 +235,21 @@ class Volume:
|
|
|
223
235
|
return self
|
|
224
236
|
|
|
225
237
|
async def wait_for(self, status, timeout=300):
|
|
238
|
+
if self.status == status:
|
|
239
|
+
return
|
|
226
240
|
valid_statuses = ["downloading", "ready", "archived"]
|
|
227
241
|
if not status in valid_statuses:
|
|
228
242
|
raise SpecificationError(
|
|
229
243
|
"status",
|
|
230
244
|
f"Invalid wait_for status {status}. Valid statuses are: {valid_statuses}",
|
|
231
245
|
)
|
|
232
|
-
|
|
233
|
-
|
|
246
|
+
|
|
247
|
+
MAX_TIMEOUT = 24 * 60 * 60
|
|
248
|
+
if timeout > MAX_TIMEOUT:
|
|
249
|
+
raise SpecificationError(
|
|
250
|
+
"timeout",
|
|
251
|
+
f"timeout must be less than {MAX_TIMEOUT} seconds.",
|
|
252
|
+
)
|
|
234
253
|
POLL_INTERVAL_MIN = 5
|
|
235
254
|
POLL_INTERVAL_MAX = 60
|
|
236
255
|
POLL_INTERVAL = max(min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN)
|