proximl 0.5.10__py3-none-any.whl → 0.5.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- proximl/__init__.py +1 -1
- proximl/checkpoints.py +46 -28
- proximl/cli/cloudbender/__init__.py +2 -1
- proximl/cli/cloudbender/datastore.py +2 -7
- proximl/cli/cloudbender/service.py +19 -2
- proximl/cli/project/__init__.py +3 -72
- proximl/cli/project/credential.py +128 -0
- proximl/cli/project/data_connector.py +61 -0
- proximl/cli/project/datastore.py +61 -0
- proximl/cli/project/service.py +61 -0
- proximl/cloudbender/cloudbender.py +4 -2
- proximl/cloudbender/data_connectors.py +8 -0
- proximl/cloudbender/datastores.py +9 -19
- proximl/cloudbender/nodes.py +44 -1
- proximl/cloudbender/providers.py +53 -0
- proximl/cloudbender/regions.py +48 -0
- proximl/cloudbender/services.py +65 -1
- proximl/datasets.py +41 -12
- proximl/exceptions.py +51 -0
- proximl/jobs.py +15 -19
- proximl/models.py +41 -22
- proximl/projects/credentials.py +71 -0
- proximl/volumes.py +24 -5
- {proximl-0.5.10.dist-info → proximl-0.5.12.dist-info}/METADATA +1 -1
- {proximl-0.5.10.dist-info → proximl-0.5.12.dist-info}/RECORD +53 -46
- tests/integration/projects/conftest.py +3 -1
- tests/integration/projects/test_projects_credentials_integration.py +45 -0
- tests/integration/projects/test_projects_data_connectors_integration.py +44 -0
- tests/integration/projects/test_projects_datastores_integration.py +42 -0
- tests/integration/projects/test_projects_services_integration.py +44 -0
- tests/integration/test_checkpoints_integration.py +1 -2
- tests/integration/test_jobs_integration.py +13 -0
- tests/integration/test_models_integration.py +0 -1
- tests/unit/cli/projects/__init__.py +0 -0
- tests/unit/cli/projects/test_cli_project_credential_unit.py +26 -0
- tests/unit/cli/projects/test_cli_project_data_connector_unit.py +28 -0
- tests/unit/cli/projects/test_cli_project_datastore_unit.py +26 -0
- tests/unit/cli/projects/test_cli_project_key_unit.py +26 -0
- tests/unit/cli/projects/test_cli_project_secret_unit.py +26 -0
- tests/unit/cli/projects/test_cli_project_service_unit.py +26 -0
- tests/unit/cli/projects/test_cli_project_unit.py +19 -0
- tests/unit/cloudbender/test_datastores_unit.py +1 -5
- tests/unit/cloudbender/test_services_unit.py +6 -0
- tests/unit/conftest.py +158 -15
- tests/unit/projects/test_project_credentials_unit.py +100 -0
- tests/unit/test_checkpoints_unit.py +15 -23
- tests/unit/test_datasets_unit.py +15 -20
- tests/unit/test_models_unit.py +13 -16
- tests/unit/test_volumes_unit.py +3 -0
- proximl/cli/cloudbender/reservation.py +0 -159
- proximl/cli/project.py +0 -154
- proximl/cloudbender/reservations.py +0 -126
- proximl/projects.py +0 -187
- tests/integration/test_projects_integration.py +0 -44
- tests/unit/cli/cloudbender/test_cli_reservation_unit.py +0 -38
- tests/unit/cli/test_cli_project_unit.py +0 -46
- tests/unit/cloudbender/test_reservations_unit.py +0 -173
- tests/unit/test_auth.py +0 -30
- tests/unit/test_projects_unit.py +0 -294
- tests/unit/test_proximl.py +0 -54
- {proximl-0.5.10.dist-info → proximl-0.5.12.dist-info}/LICENSE +0 -0
- {proximl-0.5.10.dist-info → proximl-0.5.12.dist-info}/WHEEL +0 -0
- {proximl-0.5.10.dist-info → proximl-0.5.12.dist-info}/entry_points.txt +0 -0
- {proximl-0.5.10.dist-info → proximl-0.5.12.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
import asyncio
|
|
4
|
+
import math
|
|
5
|
+
|
|
6
|
+
from proximl.exceptions import (
|
|
7
|
+
ApiError,
|
|
8
|
+
SpecificationError,
|
|
9
|
+
ProxiMLException,
|
|
10
|
+
)
|
|
3
11
|
|
|
4
12
|
|
|
5
13
|
class Datastores(object):
|
|
@@ -20,9 +28,7 @@ class Datastores(object):
|
|
|
20
28
|
"GET",
|
|
21
29
|
kwargs,
|
|
22
30
|
)
|
|
23
|
-
datastores = [
|
|
24
|
-
Datastore(self.proximl, **datastore) for datastore in resp
|
|
25
|
-
]
|
|
31
|
+
datastores = [Datastore(self.proximl, **datastore) for datastore in resp]
|
|
26
32
|
return datastores
|
|
27
33
|
|
|
28
34
|
async def create(
|
|
@@ -31,18 +37,12 @@ class Datastores(object):
|
|
|
31
37
|
region_uuid,
|
|
32
38
|
name,
|
|
33
39
|
type,
|
|
34
|
-
uri,
|
|
35
|
-
root,
|
|
36
|
-
options=None,
|
|
37
40
|
**kwargs,
|
|
38
41
|
):
|
|
39
42
|
logging.info(f"Creating Datastore {name}")
|
|
40
43
|
data = dict(
|
|
41
44
|
name=name,
|
|
42
45
|
type=type,
|
|
43
|
-
uri=uri,
|
|
44
|
-
root=root,
|
|
45
|
-
options=options,
|
|
46
46
|
**kwargs,
|
|
47
47
|
)
|
|
48
48
|
payload = {k: v for k, v in data.items() if v is not None}
|
|
@@ -73,8 +73,6 @@ class Datastore:
|
|
|
73
73
|
self._region_uuid = self._datastore.get("region_uuid")
|
|
74
74
|
self._type = self._datastore.get("type")
|
|
75
75
|
self._name = self._datastore.get("name")
|
|
76
|
-
self._uri = self._datastore.get("uri")
|
|
77
|
-
self._root = self._datastore.get("root")
|
|
78
76
|
|
|
79
77
|
@property
|
|
80
78
|
def id(self) -> str:
|
|
@@ -96,14 +94,6 @@ class Datastore:
|
|
|
96
94
|
def name(self) -> str:
|
|
97
95
|
return self._name
|
|
98
96
|
|
|
99
|
-
@property
|
|
100
|
-
def uri(self) -> str:
|
|
101
|
-
return self._uri
|
|
102
|
-
|
|
103
|
-
@property
|
|
104
|
-
def root(self) -> str:
|
|
105
|
-
return self._root
|
|
106
|
-
|
|
107
97
|
def __str__(self):
|
|
108
98
|
return json.dumps({k: v for k, v in self._datastore.items()})
|
|
109
99
|
|
proximl/cloudbender/nodes.py
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
import asyncio
|
|
4
|
+
import math
|
|
5
|
+
|
|
6
|
+
from proximl.exceptions import ApiError, SpecificationError, ProxiMLException, NodeError
|
|
3
7
|
|
|
4
8
|
|
|
5
9
|
class Nodes(object):
|
|
@@ -29,7 +33,7 @@ class Nodes(object):
|
|
|
29
33
|
region_uuid,
|
|
30
34
|
friendly_name,
|
|
31
35
|
hostname,
|
|
32
|
-
minion_id,
|
|
36
|
+
minion_id=None,
|
|
33
37
|
type="permanent",
|
|
34
38
|
service="compute",
|
|
35
39
|
**kwargs,
|
|
@@ -153,3 +157,42 @@ class Node:
|
|
|
153
157
|
None,
|
|
154
158
|
dict(command=command),
|
|
155
159
|
)
|
|
160
|
+
|
|
161
|
+
async def wait_for(self, status, timeout=300):
|
|
162
|
+
if self.status == status:
|
|
163
|
+
return
|
|
164
|
+
valid_statuses = ["active", "maintenance", "offline", "stopped", "archived"]
|
|
165
|
+
if not status in valid_statuses:
|
|
166
|
+
raise SpecificationError(
|
|
167
|
+
"status",
|
|
168
|
+
f"Invalid wait_for status {status}. Valid statuses are: {valid_statuses}",
|
|
169
|
+
)
|
|
170
|
+
MAX_TIMEOUT = 24 * 60 * 60
|
|
171
|
+
if timeout > MAX_TIMEOUT:
|
|
172
|
+
raise SpecificationError(
|
|
173
|
+
"timeout",
|
|
174
|
+
f"timeout must be less than {MAX_TIMEOUT} seconds.",
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
POLL_INTERVAL_MIN = 5
|
|
178
|
+
POLL_INTERVAL_MAX = 60
|
|
179
|
+
POLL_INTERVAL = max(min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN)
|
|
180
|
+
retry_count = math.ceil(timeout / POLL_INTERVAL)
|
|
181
|
+
count = 0
|
|
182
|
+
while count < retry_count:
|
|
183
|
+
await asyncio.sleep(POLL_INTERVAL)
|
|
184
|
+
try:
|
|
185
|
+
await self.refresh()
|
|
186
|
+
except ApiError as e:
|
|
187
|
+
if status == "archived" and e.status == 404:
|
|
188
|
+
return
|
|
189
|
+
raise e
|
|
190
|
+
if self.status in ["errored", "failed"]:
|
|
191
|
+
raise NodeError(self.status, self)
|
|
192
|
+
if self.status == status:
|
|
193
|
+
return self
|
|
194
|
+
else:
|
|
195
|
+
count += 1
|
|
196
|
+
logging.debug(f"self: {self}, retry count {count}")
|
|
197
|
+
|
|
198
|
+
raise ProxiMLException(f"Timeout waiting for {status}")
|
proximl/cloudbender/providers.py
CHANGED
|
@@ -1,7 +1,16 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
import asyncio
|
|
4
|
+
import math
|
|
3
5
|
from datetime import datetime
|
|
4
6
|
|
|
7
|
+
from proximl.exceptions import (
|
|
8
|
+
ApiError,
|
|
9
|
+
SpecificationError,
|
|
10
|
+
ProxiMLException,
|
|
11
|
+
ProviderError,
|
|
12
|
+
)
|
|
13
|
+
|
|
5
14
|
|
|
6
15
|
class Providers(object):
|
|
7
16
|
def __init__(self, proximl):
|
|
@@ -36,6 +45,7 @@ class Provider:
|
|
|
36
45
|
self._provider = kwargs
|
|
37
46
|
self._id = self._provider.get("provider_uuid")
|
|
38
47
|
self._type = self._provider.get("type")
|
|
48
|
+
self._status = self._provider.get("status")
|
|
39
49
|
self._credits = self._provider.get("credits")
|
|
40
50
|
|
|
41
51
|
@property
|
|
@@ -46,6 +56,10 @@ class Provider:
|
|
|
46
56
|
def type(self) -> str:
|
|
47
57
|
return self._type
|
|
48
58
|
|
|
59
|
+
@property
|
|
60
|
+
def status(self) -> str:
|
|
61
|
+
return self._status
|
|
62
|
+
|
|
49
63
|
@property
|
|
50
64
|
def credits(self) -> float:
|
|
51
65
|
return self._credits
|
|
@@ -69,3 +83,42 @@ class Provider:
|
|
|
69
83
|
)
|
|
70
84
|
self.__init__(self.proximl, **resp)
|
|
71
85
|
return self
|
|
86
|
+
|
|
87
|
+
async def wait_for(self, status, timeout=300):
|
|
88
|
+
if self.status == status:
|
|
89
|
+
return
|
|
90
|
+
valid_statuses = ["ready", "archived"]
|
|
91
|
+
if not status in valid_statuses:
|
|
92
|
+
raise SpecificationError(
|
|
93
|
+
"status",
|
|
94
|
+
f"Invalid wait_for status {status}. Valid statuses are: {valid_statuses}",
|
|
95
|
+
)
|
|
96
|
+
MAX_TIMEOUT = 24 * 60 * 60
|
|
97
|
+
if timeout > MAX_TIMEOUT:
|
|
98
|
+
raise SpecificationError(
|
|
99
|
+
"timeout",
|
|
100
|
+
f"timeout must be less than {MAX_TIMEOUT} seconds.",
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
POLL_INTERVAL_MIN = 5
|
|
104
|
+
POLL_INTERVAL_MAX = 60
|
|
105
|
+
POLL_INTERVAL = max(min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN)
|
|
106
|
+
retry_count = math.ceil(timeout / POLL_INTERVAL)
|
|
107
|
+
count = 0
|
|
108
|
+
while count < retry_count:
|
|
109
|
+
await asyncio.sleep(POLL_INTERVAL)
|
|
110
|
+
try:
|
|
111
|
+
await self.refresh()
|
|
112
|
+
except ApiError as e:
|
|
113
|
+
if status == "archived" and e.status == 404:
|
|
114
|
+
return
|
|
115
|
+
raise e
|
|
116
|
+
if self.status in ["errored", "failed"]:
|
|
117
|
+
raise ProviderError(self.status, self)
|
|
118
|
+
if self.status == status:
|
|
119
|
+
return self
|
|
120
|
+
else:
|
|
121
|
+
count += 1
|
|
122
|
+
logging.debug(f"self: {self}, retry count {count}")
|
|
123
|
+
|
|
124
|
+
raise ProxiMLException(f"Timeout waiting for {status}")
|
proximl/cloudbender/regions.py
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
import asyncio
|
|
4
|
+
import math
|
|
5
|
+
|
|
6
|
+
from proximl.exceptions import (
|
|
7
|
+
ApiError,
|
|
8
|
+
SpecificationError,
|
|
9
|
+
ProxiMLException,
|
|
10
|
+
RegionError,
|
|
11
|
+
)
|
|
3
12
|
|
|
4
13
|
|
|
5
14
|
class Regions(object):
|
|
@@ -111,3 +120,42 @@ class Region:
|
|
|
111
120
|
None,
|
|
112
121
|
dict(project_uuid=project_uuid, checkpoint_uuid=checkpoint_uuid),
|
|
113
122
|
)
|
|
123
|
+
|
|
124
|
+
async def wait_for(self, status, timeout=300):
|
|
125
|
+
if self.status == status:
|
|
126
|
+
return
|
|
127
|
+
valid_statuses = ["healthy", "offline", "archived"]
|
|
128
|
+
if not status in valid_statuses:
|
|
129
|
+
raise SpecificationError(
|
|
130
|
+
"status",
|
|
131
|
+
f"Invalid wait_for status {status}. Valid statuses are: {valid_statuses}",
|
|
132
|
+
)
|
|
133
|
+
MAX_TIMEOUT = 24 * 60 * 60
|
|
134
|
+
if timeout > MAX_TIMEOUT:
|
|
135
|
+
raise SpecificationError(
|
|
136
|
+
"timeout",
|
|
137
|
+
f"timeout must be less than {MAX_TIMEOUT} seconds.",
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
POLL_INTERVAL_MIN = 5
|
|
141
|
+
POLL_INTERVAL_MAX = 60
|
|
142
|
+
POLL_INTERVAL = max(min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN)
|
|
143
|
+
retry_count = math.ceil(timeout / POLL_INTERVAL)
|
|
144
|
+
count = 0
|
|
145
|
+
while count < retry_count:
|
|
146
|
+
await asyncio.sleep(POLL_INTERVAL)
|
|
147
|
+
try:
|
|
148
|
+
await self.refresh()
|
|
149
|
+
except ApiError as e:
|
|
150
|
+
if status == "archived" and e.status == 404:
|
|
151
|
+
return
|
|
152
|
+
raise e
|
|
153
|
+
if self.status in ["errored", "failed"]:
|
|
154
|
+
raise RegionError(self.status, self)
|
|
155
|
+
if self.status == status:
|
|
156
|
+
return self
|
|
157
|
+
else:
|
|
158
|
+
count += 1
|
|
159
|
+
logging.debug(f"self: {self}, retry count {count}")
|
|
160
|
+
|
|
161
|
+
raise ProxiMLException(f"Timeout waiting for {status}")
|
proximl/cloudbender/services.py
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
import asyncio
|
|
4
|
+
import math
|
|
5
|
+
|
|
6
|
+
from proximl.exceptions import (
|
|
7
|
+
ApiError,
|
|
8
|
+
SpecificationError,
|
|
9
|
+
ProxiMLException,
|
|
10
|
+
)
|
|
3
11
|
|
|
4
12
|
|
|
5
13
|
class Services(object):
|
|
@@ -28,12 +36,14 @@ class Services(object):
|
|
|
28
36
|
provider_uuid,
|
|
29
37
|
region_uuid,
|
|
30
38
|
name,
|
|
39
|
+
type,
|
|
31
40
|
public,
|
|
32
41
|
**kwargs,
|
|
33
42
|
):
|
|
34
43
|
logging.info(f"Creating Service {name}")
|
|
35
44
|
data = dict(
|
|
36
45
|
name=name,
|
|
46
|
+
type=type,
|
|
37
47
|
public=public,
|
|
38
48
|
**kwargs,
|
|
39
49
|
)
|
|
@@ -65,7 +75,12 @@ class Service:
|
|
|
65
75
|
self._region_uuid = self._service.get("region_uuid")
|
|
66
76
|
self._public = self._service.get("public")
|
|
67
77
|
self._name = self._service.get("name")
|
|
68
|
-
self.
|
|
78
|
+
self._type = self._service.get("type")
|
|
79
|
+
self._hostname = self._service.get("custom_hostname") or self._service.get(
|
|
80
|
+
"hostname"
|
|
81
|
+
)
|
|
82
|
+
self._status = self._service.get("status")
|
|
83
|
+
self._port = self._service.get("port")
|
|
69
84
|
|
|
70
85
|
@property
|
|
71
86
|
def id(self) -> str:
|
|
@@ -91,6 +106,18 @@ class Service:
|
|
|
91
106
|
def hostname(self) -> str:
|
|
92
107
|
return self._hostname
|
|
93
108
|
|
|
109
|
+
@property
|
|
110
|
+
def status(self) -> str:
|
|
111
|
+
return self._status
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def type(self) -> str:
|
|
115
|
+
return self._type
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
def port(self) -> str:
|
|
119
|
+
return self._port
|
|
120
|
+
|
|
94
121
|
def __str__(self):
|
|
95
122
|
return json.dumps({k: v for k, v in self._service.items()})
|
|
96
123
|
|
|
@@ -113,3 +140,40 @@ class Service:
|
|
|
113
140
|
)
|
|
114
141
|
self.__init__(self.proximl, **resp)
|
|
115
142
|
return self
|
|
143
|
+
|
|
144
|
+
async def wait_for(self, status, timeout=300):
|
|
145
|
+
if self.status == status:
|
|
146
|
+
return
|
|
147
|
+
valid_statuses = ["active", "archived"]
|
|
148
|
+
if not status in valid_statuses:
|
|
149
|
+
raise SpecificationError(
|
|
150
|
+
"status",
|
|
151
|
+
f"Invalid wait_for status {status}. Valid statuses are: {valid_statuses}",
|
|
152
|
+
)
|
|
153
|
+
MAX_TIMEOUT = 24 * 60 * 60
|
|
154
|
+
if timeout > MAX_TIMEOUT:
|
|
155
|
+
raise SpecificationError(
|
|
156
|
+
"timeout",
|
|
157
|
+
f"timeout must be less than {MAX_TIMEOUT} seconds.",
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
POLL_INTERVAL_MIN = 5
|
|
161
|
+
POLL_INTERVAL_MAX = 60
|
|
162
|
+
POLL_INTERVAL = max(min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN)
|
|
163
|
+
retry_count = math.ceil(timeout / POLL_INTERVAL)
|
|
164
|
+
count = 0
|
|
165
|
+
while count < retry_count:
|
|
166
|
+
await asyncio.sleep(POLL_INTERVAL)
|
|
167
|
+
try:
|
|
168
|
+
await self.refresh()
|
|
169
|
+
except ApiError as e:
|
|
170
|
+
if status == "archived" and e.status == 404:
|
|
171
|
+
return
|
|
172
|
+
raise e
|
|
173
|
+
if self.status == status:
|
|
174
|
+
return self
|
|
175
|
+
else:
|
|
176
|
+
count += 1
|
|
177
|
+
logging.debug(f"self: {self}, retry count {count}")
|
|
178
|
+
|
|
179
|
+
raise ProxiMLException(f"Timeout waiting for {status}")
|
proximl/datasets.py
CHANGED
|
@@ -31,13 +31,24 @@ class Datasets(object):
|
|
|
31
31
|
datasets = [Dataset(self.proximl, **dataset) for dataset in resp]
|
|
32
32
|
return datasets
|
|
33
33
|
|
|
34
|
-
async def create(
|
|
34
|
+
async def create(
|
|
35
|
+
self,
|
|
36
|
+
name,
|
|
37
|
+
source_type,
|
|
38
|
+
source_uri,
|
|
39
|
+
type="evefs",
|
|
40
|
+
project_uuid=None,
|
|
41
|
+
**kwargs,
|
|
42
|
+
):
|
|
43
|
+
if not project_uuid:
|
|
44
|
+
project_uuid = self.proximl.active_project
|
|
35
45
|
data = dict(
|
|
36
46
|
name=name,
|
|
37
47
|
source_type=source_type,
|
|
38
48
|
source_uri=source_uri,
|
|
39
|
-
|
|
40
|
-
|
|
49
|
+
project_uuid=project_uuid,
|
|
50
|
+
type=type,
|
|
51
|
+
**kwargs,
|
|
41
52
|
)
|
|
42
53
|
payload = {k: v for k, v in data.items() if v is not None}
|
|
43
54
|
logging.info(f"Creating Dataset {name}")
|
|
@@ -60,7 +71,10 @@ class Dataset:
|
|
|
60
71
|
self._id = self._dataset.get("id", self._dataset.get("dataset_uuid"))
|
|
61
72
|
self._status = self._dataset.get("status")
|
|
62
73
|
self._name = self._dataset.get("name")
|
|
63
|
-
self._size = self._dataset.get("size")
|
|
74
|
+
self._size = self._dataset.get("size") or self._dataset.get("used_size")
|
|
75
|
+
self._billed_size = self._dataset.get("billed_size") or self._dataset.get(
|
|
76
|
+
"size"
|
|
77
|
+
)
|
|
64
78
|
self._project_uuid = self._dataset.get("project_uuid")
|
|
65
79
|
|
|
66
80
|
@property
|
|
@@ -79,6 +93,10 @@ class Dataset:
|
|
|
79
93
|
def size(self) -> int:
|
|
80
94
|
return self._size or 0
|
|
81
95
|
|
|
96
|
+
@property
|
|
97
|
+
def billed_size(self) -> int:
|
|
98
|
+
return self._billed_size
|
|
99
|
+
|
|
82
100
|
def __str__(self):
|
|
83
101
|
return json.dumps({k: v for k, v in self._dataset.items()})
|
|
84
102
|
|
|
@@ -119,12 +137,16 @@ class Dataset:
|
|
|
119
137
|
project_uuid=self._dataset.get("project_uuid"),
|
|
120
138
|
cidr=self._dataset.get("vpn").get("cidr"),
|
|
121
139
|
ssh_port=self._dataset.get("vpn").get("client").get("ssh_port"),
|
|
122
|
-
input_path=
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
140
|
+
input_path=(
|
|
141
|
+
self._dataset.get("source_uri")
|
|
142
|
+
if self.status in ["new", "downloading"]
|
|
143
|
+
else None
|
|
144
|
+
),
|
|
145
|
+
output_path=(
|
|
146
|
+
self._dataset.get("output_uri")
|
|
147
|
+
if self.status == "exporting"
|
|
148
|
+
else None
|
|
149
|
+
),
|
|
128
150
|
)
|
|
129
151
|
else:
|
|
130
152
|
details = dict()
|
|
@@ -215,14 +237,21 @@ class Dataset:
|
|
|
215
237
|
return self
|
|
216
238
|
|
|
217
239
|
async def wait_for(self, status, timeout=300):
|
|
240
|
+
if self.status == status:
|
|
241
|
+
return
|
|
218
242
|
valid_statuses = ["downloading", "ready", "archived"]
|
|
219
243
|
if not status in valid_statuses:
|
|
220
244
|
raise SpecificationError(
|
|
221
245
|
"status",
|
|
222
246
|
f"Invalid wait_for status {status}. Valid statuses are: {valid_statuses}",
|
|
223
247
|
)
|
|
224
|
-
|
|
225
|
-
|
|
248
|
+
MAX_TIMEOUT = 24 * 60 * 60
|
|
249
|
+
if timeout > MAX_TIMEOUT:
|
|
250
|
+
raise SpecificationError(
|
|
251
|
+
"timeout",
|
|
252
|
+
f"timeout must be less than {MAX_TIMEOUT} seconds.",
|
|
253
|
+
)
|
|
254
|
+
|
|
226
255
|
POLL_INTERVAL_MIN = 5
|
|
227
256
|
POLL_INTERVAL_MAX = 60
|
|
228
257
|
POLL_INTERVAL = max(min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN)
|
proximl/exceptions.py
CHANGED
|
@@ -147,3 +147,54 @@ class SpecificationError(ProxiMLException):
|
|
|
147
147
|
|
|
148
148
|
def __str__(self):
|
|
149
149
|
return "SpecificationError({self.attribute}, {self.message})".format(self=self)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class NodeError(ProxiMLException):
|
|
153
|
+
def __init__(self, status, data, *args):
|
|
154
|
+
super().__init__(data, *args)
|
|
155
|
+
self._status = status
|
|
156
|
+
self._message = data
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def status(self) -> str:
|
|
160
|
+
return self._status
|
|
161
|
+
|
|
162
|
+
def __repr__(self):
|
|
163
|
+
return "NodeError({self.status}, {self.message})".format(self=self)
|
|
164
|
+
|
|
165
|
+
def __str__(self):
|
|
166
|
+
return "NodeError({self.status}, {self.message})".format(self=self)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class ProviderError(ProxiMLException):
|
|
170
|
+
def __init__(self, status, data, *args):
|
|
171
|
+
super().__init__(data, *args)
|
|
172
|
+
self._status = status
|
|
173
|
+
self._message = data
|
|
174
|
+
|
|
175
|
+
@property
|
|
176
|
+
def status(self) -> str:
|
|
177
|
+
return self._status
|
|
178
|
+
|
|
179
|
+
def __repr__(self):
|
|
180
|
+
return "ProviderError({self.status}, {self.message})".format(self=self)
|
|
181
|
+
|
|
182
|
+
def __str__(self):
|
|
183
|
+
return "ProviderError({self.status}, {self.message})".format(self=self)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class RegionError(ProxiMLException):
|
|
187
|
+
def __init__(self, status, data, *args):
|
|
188
|
+
super().__init__(data, *args)
|
|
189
|
+
self._status = status
|
|
190
|
+
self._message = data
|
|
191
|
+
|
|
192
|
+
@property
|
|
193
|
+
def status(self) -> str:
|
|
194
|
+
return self._status
|
|
195
|
+
|
|
196
|
+
def __repr__(self):
|
|
197
|
+
return "RegionError({self.status}, {self.message})".format(self=self)
|
|
198
|
+
|
|
199
|
+
def __str__(self):
|
|
200
|
+
return "RegionError({self.status}, {self.message})".format(self=self)
|
proximl/jobs.py
CHANGED
|
@@ -468,9 +468,14 @@ class Job:
|
|
|
468
468
|
return job
|
|
469
469
|
|
|
470
470
|
async def wait_for(self, status, timeout=300):
|
|
471
|
+
if self.status == status or (
|
|
472
|
+
self.type == "training"
|
|
473
|
+
and status == "finished"
|
|
474
|
+
and self.status == "stopped"
|
|
475
|
+
):
|
|
476
|
+
return
|
|
471
477
|
valid_statuses = [
|
|
472
478
|
"waiting for data/model download",
|
|
473
|
-
"waiting for GPUs",
|
|
474
479
|
"waiting for resources",
|
|
475
480
|
"running",
|
|
476
481
|
"stopped",
|
|
@@ -482,22 +487,18 @@ class Job:
|
|
|
482
487
|
"status",
|
|
483
488
|
f"Invalid wait_for status {status}. Valid statuses are: {valid_statuses}",
|
|
484
489
|
)
|
|
485
|
-
if status == "waiting for GPUs":
|
|
486
|
-
warnings.warn(
|
|
487
|
-
"'waiting for GPUs' status is deprecated, use 'waiting for resources' instead.",
|
|
488
|
-
DeprecationWarning,
|
|
489
|
-
)
|
|
490
490
|
if (self.type == "training") and status == "stopped":
|
|
491
491
|
warnings.warn(
|
|
492
492
|
"'stopped' status is deprecated for training jobs, use 'finished' instead.",
|
|
493
493
|
DeprecationWarning,
|
|
494
494
|
)
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
495
|
+
|
|
496
|
+
MAX_TIMEOUT = 24 * 60 * 60
|
|
497
|
+
if timeout > MAX_TIMEOUT:
|
|
498
|
+
raise SpecificationError(
|
|
499
|
+
"timeout",
|
|
500
|
+
f"timeout must be less than {MAX_TIMEOUT} seconds.",
|
|
501
|
+
)
|
|
501
502
|
|
|
502
503
|
POLL_INTERVAL_MIN = 5
|
|
503
504
|
POLL_INTERVAL_MAX = 60
|
|
@@ -516,12 +517,8 @@ class Job:
|
|
|
516
517
|
self.status == status
|
|
517
518
|
or (
|
|
518
519
|
status
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
"waiting for resources",
|
|
522
|
-
] ## this status could be very short and the polling could miss it
|
|
523
|
-
and self.status
|
|
524
|
-
not in ["new", "waiting for GPUs", "waiting for resources"]
|
|
520
|
+
== "waiting for resources" ## this status could be very short and the polling could miss it
|
|
521
|
+
and self.status not in ["new", "waiting for resources"]
|
|
525
522
|
)
|
|
526
523
|
or (
|
|
527
524
|
status
|
|
@@ -529,7 +526,6 @@ class Job:
|
|
|
529
526
|
and self.status
|
|
530
527
|
not in [
|
|
531
528
|
"new",
|
|
532
|
-
"waiting for GPUs",
|
|
533
529
|
"waiting for resources",
|
|
534
530
|
"waiting for data/model download",
|
|
535
531
|
]
|