skypilot-nightly 1.0.0.dev20241025__py3-none-any.whl → 1.0.0.dev20241027__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/cloud_vm_ray_backend.py +15 -0
- sky/cli.py +1 -1
- sky/clouds/aws.py +4 -7
- sky/clouds/azure.py +7 -9
- sky/clouds/cloud.py +11 -7
- sky/clouds/cudo.py +4 -7
- sky/clouds/fluidstack.py +4 -7
- sky/clouds/gcp.py +2 -2
- sky/clouds/ibm.py +4 -7
- sky/clouds/kubernetes.py +4 -7
- sky/clouds/lambda_cloud.py +4 -7
- sky/clouds/oci.py +9 -8
- sky/clouds/paperspace.py +4 -7
- sky/clouds/runpod.py +4 -7
- sky/clouds/scp.py +4 -7
- sky/clouds/service_catalog/__init__.py +1 -1
- sky/clouds/service_catalog/aws_catalog.py +2 -2
- sky/clouds/service_catalog/azure_catalog.py +16 -5
- sky/clouds/service_catalog/common.py +15 -6
- sky/clouds/service_catalog/cudo_catalog.py +2 -2
- sky/clouds/service_catalog/data_fetchers/fetch_azure.py +21 -11
- sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
- sky/clouds/service_catalog/ibm_catalog.py +2 -2
- sky/clouds/service_catalog/lambda_catalog.py +2 -2
- sky/clouds/service_catalog/oci_catalog.py +2 -2
- sky/clouds/service_catalog/paperspace_catalog.py +2 -2
- sky/clouds/service_catalog/runpod_catalog.py +2 -2
- sky/clouds/service_catalog/scp_catalog.py +2 -2
- sky/clouds/service_catalog/vsphere_catalog.py +2 -2
- sky/clouds/vsphere.py +4 -7
- sky/jobs/controller.py +29 -34
- sky/provision/kubernetes/instance.py +66 -2
- sky/resources.py +1 -1
- sky/utils/resources_utils.py +13 -1
- {skypilot_nightly-1.0.0.dev20241025.dist-info → skypilot_nightly-1.0.0.dev20241027.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20241025.dist-info → skypilot_nightly-1.0.0.dev20241027.dist-info}/RECORD +41 -41
- {skypilot_nightly-1.0.0.dev20241025.dist-info → skypilot_nightly-1.0.0.dev20241027.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20241025.dist-info → skypilot_nightly-1.0.0.dev20241027.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20241025.dist-info → skypilot_nightly-1.0.0.dev20241027.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20241025.dist-info → skypilot_nightly-1.0.0.dev20241027.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = 'c0c17483d1f692ad639144050f5f6fa0966e47a5'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20241027'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
@@ -2713,6 +2713,21 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
2713
2713
|
f' Existing:\t{handle.launched_nodes}x '
|
2714
2714
|
f'{handle.launched_resources}\n'
|
2715
2715
|
f'{mismatch_str}')
|
2716
|
+
else:
|
2717
|
+
# For fractional acc count clusters, we round up the number of accs
|
2718
|
+
# to 1 (sky/utils/resources_utils.py::make_ray_custom_resources_str)
|
2719
|
+
# Here we scale the required acc count to (required / launched) * 1
|
2720
|
+
# so the total number of accs is the same as the requested number.
|
2721
|
+
launched_accs = launched_resources.accelerators
|
2722
|
+
if (launched_accs is not None and
|
2723
|
+
valid_resource.accelerators is not None):
|
2724
|
+
for _, count in launched_accs.items():
|
2725
|
+
if isinstance(count, float) and not count.is_integer():
|
2726
|
+
valid_resource = valid_resource.copy(
|
2727
|
+
accelerators={
|
2728
|
+
k: v / count
|
2729
|
+
for k, v in valid_resource.accelerators.items()
|
2730
|
+
})
|
2716
2731
|
return valid_resource
|
2717
2732
|
|
2718
2733
|
def _provision(
|
sky/cli.py
CHANGED
@@ -3519,7 +3519,7 @@ def jobs():
|
|
3519
3519
|
default=None,
|
3520
3520
|
type=str,
|
3521
3521
|
hidden=True,
|
3522
|
-
help=('Alias for --name, the name of the
|
3522
|
+
help=('Alias for --name, the name of the managed job.'))
|
3523
3523
|
@click.option('--job-recovery',
|
3524
3524
|
default=None,
|
3525
3525
|
type=str,
|
sky/clouds/aws.py
CHANGED
@@ -2,13 +2,12 @@
|
|
2
2
|
import enum
|
3
3
|
import fnmatch
|
4
4
|
import functools
|
5
|
-
import json
|
6
5
|
import os
|
7
6
|
import re
|
8
7
|
import subprocess
|
9
8
|
import time
|
10
9
|
import typing
|
11
|
-
from typing import Any, Dict, Iterator, List, Optional, Set, Tuple
|
10
|
+
from typing import Any, Dict, Iterator, List, Optional, Set, Tuple, Union
|
12
11
|
|
13
12
|
from sky import clouds
|
14
13
|
from sky import exceptions
|
@@ -383,7 +382,7 @@ class AWS(clouds.Cloud):
|
|
383
382
|
def get_accelerators_from_instance_type(
|
384
383
|
cls,
|
385
384
|
instance_type: str,
|
386
|
-
) -> Optional[Dict[str, int]]:
|
385
|
+
) -> Optional[Dict[str, Union[int, float]]]:
|
387
386
|
return service_catalog.get_accelerators_from_instance_type(
|
388
387
|
instance_type, clouds='aws')
|
389
388
|
|
@@ -411,10 +410,8 @@ class AWS(clouds.Cloud):
|
|
411
410
|
r = resources
|
412
411
|
# r.accelerators is cleared but .instance_type encodes the info.
|
413
412
|
acc_dict = self.get_accelerators_from_instance_type(r.instance_type)
|
414
|
-
|
415
|
-
|
416
|
-
else:
|
417
|
-
custom_resources = None
|
413
|
+
custom_resources = resources_utils.make_ray_custom_resources_str(
|
414
|
+
acc_dict)
|
418
415
|
|
419
416
|
if r.extract_docker_image() is not None:
|
420
417
|
image_id_to_use = None
|
sky/clouds/azure.py
CHANGED
@@ -1,12 +1,11 @@
|
|
1
1
|
"""Azure."""
|
2
2
|
import functools
|
3
|
-
import json
|
4
3
|
import os
|
5
4
|
import re
|
6
5
|
import subprocess
|
7
6
|
import textwrap
|
8
7
|
import typing
|
9
|
-
from typing import Any, Dict, Iterator, List, Optional, Tuple
|
8
|
+
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
10
9
|
|
11
10
|
import colorama
|
12
11
|
|
@@ -39,9 +38,9 @@ _DEFAULT_AZURE_UBUNTU_HPC_IMAGE_GB = 30
|
|
39
38
|
_DEFAULT_AZURE_UBUNTU_2004_IMAGE_GB = 150
|
40
39
|
_DEFAULT_SKYPILOT_IMAGE_GB = 30
|
41
40
|
|
42
|
-
_DEFAULT_CPU_IMAGE_ID = 'skypilot:
|
43
|
-
_DEFAULT_GPU_IMAGE_ID = 'skypilot:gpu-ubuntu-
|
44
|
-
_DEFAULT_V1_IMAGE_ID = 'skypilot:
|
41
|
+
_DEFAULT_CPU_IMAGE_ID = 'skypilot:custom-cpu-ubuntu-v2'
|
42
|
+
_DEFAULT_GPU_IMAGE_ID = 'skypilot:custom-gpu-ubuntu-v2'
|
43
|
+
_DEFAULT_V1_IMAGE_ID = 'skypilot:custom-gpu-ubuntu-v1'
|
45
44
|
_DEFAULT_GPU_K80_IMAGE_ID = 'skypilot:k80-ubuntu-2004'
|
46
45
|
_FALLBACK_IMAGE_ID = 'skypilot:gpu-ubuntu-2204'
|
47
46
|
|
@@ -272,7 +271,7 @@ class Azure(clouds.Cloud):
|
|
272
271
|
def get_accelerators_from_instance_type(
|
273
272
|
cls,
|
274
273
|
instance_type: str,
|
275
|
-
) -> Optional[Dict[str, int]]:
|
274
|
+
) -> Optional[Dict[str, Union[int, float]]]:
|
276
275
|
return service_catalog.get_accelerators_from_instance_type(
|
277
276
|
instance_type, clouds='azure')
|
278
277
|
|
@@ -304,10 +303,9 @@ class Azure(clouds.Cloud):
|
|
304
303
|
acc_dict = self.get_accelerators_from_instance_type(r.instance_type)
|
305
304
|
acc_count = None
|
306
305
|
if acc_dict is not None:
|
307
|
-
custom_resources = json.dumps(acc_dict, separators=(',', ':'))
|
308
306
|
acc_count = str(sum(acc_dict.values()))
|
309
|
-
|
310
|
-
|
307
|
+
custom_resources = resources_utils.make_ray_custom_resources_str(
|
308
|
+
acc_dict)
|
311
309
|
|
312
310
|
if (resources.image_id is None or
|
313
311
|
resources.extract_docker_image() is not None):
|
sky/clouds/cloud.py
CHANGED
@@ -9,8 +9,9 @@ reused across cloud object creation.
|
|
9
9
|
"""
|
10
10
|
import collections
|
11
11
|
import enum
|
12
|
+
import math
|
12
13
|
import typing
|
13
|
-
from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple
|
14
|
+
from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple, Union
|
14
15
|
|
15
16
|
from sky import exceptions
|
16
17
|
from sky import skypilot_config
|
@@ -306,7 +307,7 @@ class Cloud:
|
|
306
307
|
def get_accelerators_from_instance_type(
|
307
308
|
cls,
|
308
309
|
instance_type: str,
|
309
|
-
) -> Optional[Dict[str, int]]:
|
310
|
+
) -> Optional[Dict[str, Union[int, float]]]:
|
310
311
|
"""Returns {acc: acc_count} held by 'instance_type', if any."""
|
311
312
|
raise NotImplementedError
|
312
313
|
|
@@ -673,8 +674,9 @@ class Cloud:
|
|
673
674
|
assert resources.is_launchable(), resources
|
674
675
|
|
675
676
|
def _equal_accelerators(
|
676
|
-
|
677
|
-
|
677
|
+
acc_requested: Optional[Dict[str, Union[int, float]]],
|
678
|
+
acc_from_instance_type: Optional[Dict[str, Union[int,
|
679
|
+
float]]]) -> bool:
|
678
680
|
"""Check the requested accelerators equals to the instance type
|
679
681
|
|
680
682
|
Check the requested accelerators equals to the accelerators
|
@@ -689,12 +691,14 @@ class Cloud:
|
|
689
691
|
for acc in acc_requested:
|
690
692
|
if acc not in acc_from_instance_type:
|
691
693
|
return False
|
692
|
-
|
694
|
+
# Avoid float point precision issue.
|
695
|
+
if not math.isclose(acc_requested[acc],
|
696
|
+
acc_from_instance_type[acc]):
|
693
697
|
return False
|
694
698
|
return True
|
695
699
|
|
696
|
-
acc_from_instance_type =
|
697
|
-
resources.instance_type)
|
700
|
+
acc_from_instance_type = cls.get_accelerators_from_instance_type(
|
701
|
+
resources.instance_type)
|
698
702
|
if not _equal_accelerators(resources.accelerators,
|
699
703
|
acc_from_instance_type):
|
700
704
|
with ux_utils.print_exception_no_traceback():
|
sky/clouds/cudo.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
"""Cudo Compute"""
|
2
|
-
import json
|
3
2
|
import subprocess
|
4
3
|
import typing
|
5
|
-
from typing import Dict, Iterator, List, Optional, Tuple
|
4
|
+
from typing import Dict, Iterator, List, Optional, Tuple, Union
|
6
5
|
|
7
6
|
from sky import clouds
|
8
7
|
from sky.clouds import service_catalog
|
@@ -183,7 +182,7 @@ class Cudo(clouds.Cloud):
|
|
183
182
|
def get_accelerators_from_instance_type(
|
184
183
|
cls,
|
185
184
|
instance_type: str,
|
186
|
-
) -> Optional[Dict[str, int]]:
|
185
|
+
) -> Optional[Dict[str, Union[int, float]]]:
|
187
186
|
return service_catalog.get_accelerators_from_instance_type(
|
188
187
|
instance_type, clouds='cudo')
|
189
188
|
|
@@ -202,10 +201,8 @@ class Cudo(clouds.Cloud):
|
|
202
201
|
del zones, cluster_name # unused
|
203
202
|
r = resources
|
204
203
|
acc_dict = self.get_accelerators_from_instance_type(r.instance_type)
|
205
|
-
|
206
|
-
|
207
|
-
else:
|
208
|
-
custom_resources = None
|
204
|
+
custom_resources = resources_utils.make_ray_custom_resources_str(
|
205
|
+
acc_dict)
|
209
206
|
|
210
207
|
return {
|
211
208
|
'instance_type': resources.instance_type,
|
sky/clouds/fluidstack.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
"""Fluidstack Cloud."""
|
2
|
-
import json
|
3
2
|
import os
|
4
3
|
import typing
|
5
|
-
from typing import Dict, Iterator, List, Optional, Tuple
|
4
|
+
from typing import Dict, Iterator, List, Optional, Tuple, Union
|
6
5
|
|
7
6
|
import requests
|
8
7
|
|
@@ -155,7 +154,7 @@ class Fluidstack(clouds.Cloud):
|
|
155
154
|
def get_accelerators_from_instance_type(
|
156
155
|
cls,
|
157
156
|
instance_type: str,
|
158
|
-
) -> Optional[Dict[str, int]]:
|
157
|
+
) -> Optional[Dict[str, Union[int, float]]]:
|
159
158
|
return service_catalog.get_accelerators_from_instance_type(
|
160
159
|
instance_type, clouds='fluidstack')
|
161
160
|
|
@@ -184,10 +183,8 @@ class Fluidstack(clouds.Cloud):
|
|
184
183
|
|
185
184
|
r = resources
|
186
185
|
acc_dict = self.get_accelerators_from_instance_type(r.instance_type)
|
187
|
-
|
188
|
-
|
189
|
-
else:
|
190
|
-
custom_resources = None
|
186
|
+
custom_resources = resources_utils.make_ray_custom_resources_str(
|
187
|
+
acc_dict)
|
191
188
|
|
192
189
|
return {
|
193
190
|
'instance_type': resources.instance_type,
|
sky/clouds/gcp.py
CHANGED
@@ -7,7 +7,7 @@ import re
|
|
7
7
|
import subprocess
|
8
8
|
import time
|
9
9
|
import typing
|
10
|
-
from typing import Any, Dict, Iterator, List, Optional, Set, Tuple
|
10
|
+
from typing import Any, Dict, Iterator, List, Optional, Set, Tuple, Union
|
11
11
|
|
12
12
|
import colorama
|
13
13
|
|
@@ -669,7 +669,7 @@ class GCP(clouds.Cloud):
|
|
669
669
|
def get_accelerators_from_instance_type(
|
670
670
|
cls,
|
671
671
|
instance_type: str,
|
672
|
-
) -> Optional[Dict[str, int]]:
|
672
|
+
) -> Optional[Dict[str, Union[int, float]]]:
|
673
673
|
# GCP handles accelerators separately from regular instance types,
|
674
674
|
# hence return none here.
|
675
675
|
return None
|
sky/clouds/ibm.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
"""IBM Web Services."""
|
2
|
-
import json
|
3
2
|
import os
|
4
3
|
import typing
|
5
|
-
from typing import Any, Dict, Iterator, List, Optional, Tuple
|
4
|
+
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
6
5
|
|
7
6
|
import colorama
|
8
7
|
|
@@ -206,10 +205,8 @@ class IBM(clouds.Cloud):
|
|
206
205
|
'IBM does not currently support spot instances in this framework'
|
207
206
|
|
208
207
|
acc_dict = self.get_accelerators_from_instance_type(r.instance_type)
|
209
|
-
|
210
|
-
|
211
|
-
else:
|
212
|
-
custom_resources = None
|
208
|
+
custom_resources = resources_utils.make_ray_custom_resources_str(
|
209
|
+
acc_dict)
|
213
210
|
|
214
211
|
instance_resources = _get_profile_resources(r.instance_type)
|
215
212
|
|
@@ -247,7 +244,7 @@ class IBM(clouds.Cloud):
|
|
247
244
|
def get_accelerators_from_instance_type(
|
248
245
|
cls,
|
249
246
|
instance_type: str,
|
250
|
-
) -> Optional[Dict[str, int]]:
|
247
|
+
) -> Optional[Dict[str, Union[int, float]]]:
|
251
248
|
"""Returns {acc: acc_count} held by 'instance_type', if any."""
|
252
249
|
return service_catalog.get_accelerators_from_instance_type(
|
253
250
|
instance_type, clouds='ibm')
|
sky/clouds/kubernetes.py
CHANGED
@@ -1,10 +1,9 @@
|
|
1
1
|
"""Kubernetes."""
|
2
2
|
import functools
|
3
|
-
import json
|
4
3
|
import os
|
5
4
|
import re
|
6
5
|
import typing
|
7
|
-
from typing import Dict, Iterator, List, Optional, Tuple
|
6
|
+
from typing import Dict, Iterator, List, Optional, Tuple, Union
|
8
7
|
|
9
8
|
from sky import clouds
|
10
9
|
from sky import sky_logging
|
@@ -271,7 +270,7 @@ class Kubernetes(clouds.Cloud):
|
|
271
270
|
def get_accelerators_from_instance_type(
|
272
271
|
cls,
|
273
272
|
instance_type: str,
|
274
|
-
) -> Optional[Dict[str, int]]:
|
273
|
+
) -> Optional[Dict[str, Union[int, float]]]:
|
275
274
|
inst = kubernetes_utils.KubernetesInstanceType.from_instance_type(
|
276
275
|
instance_type)
|
277
276
|
return {
|
@@ -328,10 +327,8 @@ class Kubernetes(clouds.Cloud):
|
|
328
327
|
|
329
328
|
r = resources
|
330
329
|
acc_dict = self.get_accelerators_from_instance_type(r.instance_type)
|
331
|
-
|
332
|
-
|
333
|
-
else:
|
334
|
-
custom_resources = None
|
330
|
+
custom_resources = resources_utils.make_ray_custom_resources_str(
|
331
|
+
acc_dict)
|
335
332
|
|
336
333
|
# resources.memory and cpus are None if they are not explicitly set.
|
337
334
|
# We fetch the default values for the instance type in that case.
|
sky/clouds/lambda_cloud.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
"""Lambda Cloud."""
|
2
|
-
import json
|
3
2
|
import typing
|
4
|
-
from typing import Dict, Iterator, List, Optional, Tuple
|
3
|
+
from typing import Dict, Iterator, List, Optional, Tuple, Union
|
5
4
|
|
6
5
|
import requests
|
7
6
|
|
@@ -136,7 +135,7 @@ class Lambda(clouds.Cloud):
|
|
136
135
|
def get_accelerators_from_instance_type(
|
137
136
|
cls,
|
138
137
|
instance_type: str,
|
139
|
-
) -> Optional[Dict[str, int]]:
|
138
|
+
) -> Optional[Dict[str, Union[int, float]]]:
|
140
139
|
return service_catalog.get_accelerators_from_instance_type(
|
141
140
|
instance_type, clouds='lambda')
|
142
141
|
|
@@ -164,10 +163,8 @@ class Lambda(clouds.Cloud):
|
|
164
163
|
|
165
164
|
r = resources
|
166
165
|
acc_dict = self.get_accelerators_from_instance_type(r.instance_type)
|
167
|
-
|
168
|
-
|
169
|
-
else:
|
170
|
-
custom_resources = None
|
166
|
+
custom_resources = resources_utils.make_ray_custom_resources_str(
|
167
|
+
acc_dict)
|
171
168
|
|
172
169
|
resources_vars = {
|
173
170
|
'instance_type': resources.instance_type,
|
sky/clouds/oci.py
CHANGED
@@ -20,11 +20,10 @@ History:
|
|
20
20
|
- Hysun He (hysun.he@oracle.com) @ Oct 13, 2024:
|
21
21
|
Support more OS types additional to ubuntu for OCI resources.
|
22
22
|
"""
|
23
|
-
import json
|
24
23
|
import logging
|
25
24
|
import os
|
26
25
|
import typing
|
27
|
-
from typing import Dict, Iterator, List, Optional, Tuple
|
26
|
+
from typing import Dict, Iterator, List, Optional, Tuple, Union
|
28
27
|
|
29
28
|
from sky import clouds
|
30
29
|
from sky import exceptions
|
@@ -193,7 +192,7 @@ class OCI(clouds.Cloud):
|
|
193
192
|
def get_accelerators_from_instance_type(
|
194
193
|
cls,
|
195
194
|
instance_type: str,
|
196
|
-
) -> Optional[Dict[str, int]]:
|
195
|
+
) -> Optional[Dict[str, Union[int, float]]]:
|
197
196
|
return service_catalog.get_accelerators_from_instance_type(
|
198
197
|
instance_type, clouds='oci')
|
199
198
|
|
@@ -213,10 +212,8 @@ class OCI(clouds.Cloud):
|
|
213
212
|
|
214
213
|
acc_dict = self.get_accelerators_from_instance_type(
|
215
214
|
resources.instance_type)
|
216
|
-
|
217
|
-
|
218
|
-
else:
|
219
|
-
custom_resources = None
|
215
|
+
custom_resources = resources_utils.make_ray_custom_resources_str(
|
216
|
+
acc_dict)
|
220
217
|
|
221
218
|
image_str = self._get_image_id(resources.image_id, region.name,
|
222
219
|
resources.instance_type)
|
@@ -468,7 +465,11 @@ class OCI(clouds.Cloud):
|
|
468
465
|
api_key_file = oci_cfg[
|
469
466
|
'key_file'] if 'key_file' in oci_cfg else 'BadConf'
|
470
467
|
sky_cfg_file = oci_utils.oci_config.get_sky_user_config_file()
|
471
|
-
|
468
|
+
# Must catch ImportError before any oci_adaptor.oci.exceptions
|
469
|
+
# because oci_adaptor.oci.exceptions can throw ImportError.
|
470
|
+
except ImportError:
|
471
|
+
return {}
|
472
|
+
except oci_adaptor.oci.exceptions.ConfigFileNotFound:
|
472
473
|
return {}
|
473
474
|
|
474
475
|
# OCI config and API key file are mandatory
|
sky/clouds/paperspace.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
""" Paperspace Cloud. """
|
2
2
|
|
3
|
-
import json
|
4
3
|
import typing
|
5
|
-
from typing import Dict, Iterator, List, Optional, Tuple
|
4
|
+
from typing import Dict, Iterator, List, Optional, Tuple, Union
|
6
5
|
|
7
6
|
import requests
|
8
7
|
|
@@ -162,7 +161,7 @@ class Paperspace(clouds.Cloud):
|
|
162
161
|
|
163
162
|
@classmethod
|
164
163
|
def get_accelerators_from_instance_type(
|
165
|
-
cls, instance_type: str) -> Optional[Dict[str, int]]:
|
164
|
+
cls, instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
|
166
165
|
return service_catalog.get_accelerators_from_instance_type(
|
167
166
|
instance_type, clouds='paperspace')
|
168
167
|
|
@@ -181,10 +180,8 @@ class Paperspace(clouds.Cloud):
|
|
181
180
|
|
182
181
|
r = resources
|
183
182
|
acc_dict = self.get_accelerators_from_instance_type(r.instance_type)
|
184
|
-
|
185
|
-
|
186
|
-
else:
|
187
|
-
custom_resources = None
|
183
|
+
custom_resources = resources_utils.make_ray_custom_resources_str(
|
184
|
+
acc_dict)
|
188
185
|
|
189
186
|
return {
|
190
187
|
'instance_type': resources.instance_type,
|
sky/clouds/runpod.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
""" RunPod Cloud. """
|
2
2
|
|
3
|
-
import json
|
4
3
|
import typing
|
5
|
-
from typing import Dict, Iterator, List, Optional, Tuple
|
4
|
+
from typing import Dict, Iterator, List, Optional, Tuple, Union
|
6
5
|
|
7
6
|
from sky import clouds
|
8
7
|
from sky.clouds import service_catalog
|
@@ -147,7 +146,7 @@ class RunPod(clouds.Cloud):
|
|
147
146
|
|
148
147
|
@classmethod
|
149
148
|
def get_accelerators_from_instance_type(
|
150
|
-
cls, instance_type: str) -> Optional[Dict[str, int]]:
|
149
|
+
cls, instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
|
151
150
|
return service_catalog.get_accelerators_from_instance_type(
|
152
151
|
instance_type, clouds='runpod')
|
153
152
|
|
@@ -166,10 +165,8 @@ class RunPod(clouds.Cloud):
|
|
166
165
|
|
167
166
|
r = resources
|
168
167
|
acc_dict = self.get_accelerators_from_instance_type(r.instance_type)
|
169
|
-
|
170
|
-
|
171
|
-
else:
|
172
|
-
custom_resources = None
|
168
|
+
custom_resources = resources_utils.make_ray_custom_resources_str(
|
169
|
+
acc_dict)
|
173
170
|
|
174
171
|
if r.image_id is None:
|
175
172
|
image_id = 'runpod/base:0.0.2'
|
sky/clouds/scp.py
CHANGED
@@ -4,9 +4,8 @@ This module includes the set of functions
|
|
4
4
|
to access the SCP catalog and check credentials for the SCP access.
|
5
5
|
"""
|
6
6
|
|
7
|
-
import json
|
8
7
|
import typing
|
9
|
-
from typing import Dict, Iterator, List, Optional, Tuple
|
8
|
+
from typing import Dict, Iterator, List, Optional, Tuple, Union
|
10
9
|
|
11
10
|
from sky import clouds
|
12
11
|
from sky import exceptions
|
@@ -160,7 +159,7 @@ class SCP(clouds.Cloud):
|
|
160
159
|
def get_accelerators_from_instance_type(
|
161
160
|
cls,
|
162
161
|
instance_type: str,
|
163
|
-
) -> Optional[Dict[str, int]]:
|
162
|
+
) -> Optional[Dict[str, Union[int, float]]]:
|
164
163
|
return service_catalog.get_accelerators_from_instance_type(
|
165
164
|
instance_type, clouds='scp')
|
166
165
|
|
@@ -188,11 +187,9 @@ class SCP(clouds.Cloud):
|
|
188
187
|
|
189
188
|
r = resources
|
190
189
|
acc_dict = self.get_accelerators_from_instance_type(r.instance_type)
|
190
|
+
custom_resources = resources_utils.make_ray_custom_resources_str(
|
191
|
+
acc_dict)
|
191
192
|
|
192
|
-
if acc_dict is not None:
|
193
|
-
custom_resources = json.dumps(acc_dict, separators=(',', ':'))
|
194
|
-
else:
|
195
|
-
custom_resources = None
|
196
193
|
image_id = self._get_image_id(r.image_id, region.name, r.instance_type)
|
197
194
|
return {
|
198
195
|
'instance_type': resources.instance_type,
|
@@ -238,7 +238,7 @@ def get_default_instance_type(cpus: Optional[str] = None,
|
|
238
238
|
|
239
239
|
def get_accelerators_from_instance_type(
|
240
240
|
instance_type: str,
|
241
|
-
clouds: CloudFilter = None) -> Optional[Dict[str, int]]:
|
241
|
+
clouds: CloudFilter = None) -> Optional[Dict[str, Union[int, float]]]:
|
242
242
|
"""Returns the accelerators from a instance type."""
|
243
243
|
return _map_clouds_catalog(clouds, 'get_accelerators_from_instance_type',
|
244
244
|
instance_type)
|
@@ -8,7 +8,7 @@ import hashlib
|
|
8
8
|
import os
|
9
9
|
import threading
|
10
10
|
import typing
|
11
|
-
from typing import Dict, List, Optional, Tuple
|
11
|
+
from typing import Dict, List, Optional, Tuple, Union
|
12
12
|
|
13
13
|
from sky import exceptions
|
14
14
|
from sky import sky_logging
|
@@ -243,7 +243,7 @@ def get_default_instance_type(
|
|
243
243
|
|
244
244
|
|
245
245
|
def get_accelerators_from_instance_type(
|
246
|
-
instance_type: str) -> Optional[Dict[str, int]]:
|
246
|
+
instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
|
247
247
|
return common.get_accelerators_from_instance_type_impl(
|
248
248
|
_get_df(), instance_type)
|
249
249
|
|
@@ -4,14 +4,17 @@ This module loads the service catalog file and can be used to query
|
|
4
4
|
instance types and pricing information for Azure.
|
5
5
|
"""
|
6
6
|
import re
|
7
|
-
from typing import Dict, List, Optional, Tuple
|
7
|
+
from typing import Dict, List, Optional, Tuple, Union
|
8
8
|
|
9
9
|
from sky import clouds as cloud_lib
|
10
|
+
from sky import sky_logging
|
10
11
|
from sky.clouds import Azure
|
11
12
|
from sky.clouds.service_catalog import common
|
12
13
|
from sky.utils import resources_utils
|
13
14
|
from sky.utils import ux_utils
|
14
15
|
|
16
|
+
logger = sky_logging.init_logger(__name__)
|
17
|
+
|
15
18
|
# This list should match the list of regions in
|
16
19
|
# skypilot image generation Packer script's replication_regions
|
17
20
|
# sky/clouds/service_catalog/images/skypilot-azure-cpu-ubuntu.pkr.hcl
|
@@ -134,7 +137,7 @@ def get_default_instance_type(
|
|
134
137
|
|
135
138
|
|
136
139
|
def get_accelerators_from_instance_type(
|
137
|
-
instance_type: str) -> Optional[Dict[str, int]]:
|
140
|
+
instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
|
138
141
|
return common.get_accelerators_from_instance_type_impl(_df, instance_type)
|
139
142
|
|
140
143
|
|
@@ -154,6 +157,7 @@ def get_instance_type_for_accelerator(
|
|
154
157
|
if zone is not None:
|
155
158
|
with ux_utils.print_exception_no_traceback():
|
156
159
|
raise ValueError('Azure does not support zones.')
|
160
|
+
|
157
161
|
return common.get_instance_type_for_accelerator_impl(df=_df,
|
158
162
|
acc_name=acc_name,
|
159
163
|
acc_count=acc_count,
|
@@ -191,9 +195,16 @@ def list_accelerators(
|
|
191
195
|
|
192
196
|
def get_image_id_from_tag(tag: str, region: Optional[str]) -> Optional[str]:
|
193
197
|
"""Returns the image id from the tag."""
|
194
|
-
|
195
|
-
|
196
|
-
|
198
|
+
global _image_df
|
199
|
+
image_id = common.get_image_id_from_tag_impl(_image_df, tag, region)
|
200
|
+
if image_id is None:
|
201
|
+
# Refresh the image catalog and try again, if the image tag is not
|
202
|
+
# found.
|
203
|
+
logger.debug('Refreshing the image catalog and trying again.')
|
204
|
+
_image_df = common.read_catalog('azure/images.csv',
|
205
|
+
pull_frequency_hours=0)
|
206
|
+
image_id = common.get_image_id_from_tag_impl(_image_df, tag, region)
|
207
|
+
return image_id
|
197
208
|
|
198
209
|
|
199
210
|
def is_image_tag_valid(tag: str, region: Optional[str]) -> bool:
|
@@ -5,7 +5,7 @@ import hashlib
|
|
5
5
|
import os
|
6
6
|
import time
|
7
7
|
import typing
|
8
|
-
from typing import Callable, Dict, List, NamedTuple, Optional, Tuple
|
8
|
+
from typing import Callable, Dict, List, NamedTuple, Optional, Tuple, Union
|
9
9
|
|
10
10
|
import filelock
|
11
11
|
import requests
|
@@ -481,7 +481,7 @@ def get_instance_type_for_cpus_mem_impl(
|
|
481
481
|
def get_accelerators_from_instance_type_impl(
|
482
482
|
df: 'pd.DataFrame',
|
483
483
|
instance_type: str,
|
484
|
-
) -> Optional[Dict[str, int]]:
|
484
|
+
) -> Optional[Dict[str, Union[int, float]]]:
|
485
485
|
df = _get_instance_type(df, instance_type, None)
|
486
486
|
if len(df) == 0:
|
487
487
|
with ux_utils.print_exception_no_traceback():
|
@@ -490,13 +490,19 @@ def get_accelerators_from_instance_type_impl(
|
|
490
490
|
acc_name, acc_count = row['AcceleratorName'], row['AcceleratorCount']
|
491
491
|
if pd.isnull(acc_name):
|
492
492
|
return None
|
493
|
-
|
493
|
+
|
494
|
+
def _convert(value):
|
495
|
+
if int(value) == value:
|
496
|
+
return int(value)
|
497
|
+
return float(value)
|
498
|
+
|
499
|
+
return {acc_name: _convert(acc_count)}
|
494
500
|
|
495
501
|
|
496
502
|
def get_instance_type_for_accelerator_impl(
|
497
503
|
df: 'pd.DataFrame',
|
498
504
|
acc_name: str,
|
499
|
-
acc_count: int,
|
505
|
+
acc_count: Union[int, float],
|
500
506
|
cpus: Optional[str] = None,
|
501
507
|
memory: Optional[str] = None,
|
502
508
|
use_spot: bool = False,
|
@@ -509,7 +515,7 @@ def get_instance_type_for_accelerator_impl(
|
|
509
515
|
accelerators with sorted prices and a list of candidates with fuzzy search.
|
510
516
|
"""
|
511
517
|
result = df[(df['AcceleratorName'].str.fullmatch(acc_name, case=False)) &
|
512
|
-
(df['AcceleratorCount']
|
518
|
+
(abs(df['AcceleratorCount'] - acc_count) <= 0.01)]
|
513
519
|
result = _filter_region_zone(result, region, zone)
|
514
520
|
if len(result) == 0:
|
515
521
|
fuzzy_result = df[
|
@@ -522,8 +528,11 @@ def get_instance_type_for_accelerator_impl(
|
|
522
528
|
fuzzy_candidate_list = []
|
523
529
|
if len(fuzzy_result) > 0:
|
524
530
|
for _, row in fuzzy_result.iterrows():
|
531
|
+
acc_cnt = float(row['AcceleratorCount'])
|
532
|
+
acc_count_display = (int(acc_cnt) if acc_cnt.is_integer() else
|
533
|
+
f'{acc_cnt:.2f}')
|
525
534
|
fuzzy_candidate_list.append(f'{row["AcceleratorName"]}:'
|
526
|
-
f'{
|
535
|
+
f'{acc_count_display}')
|
527
536
|
return (None, fuzzy_candidate_list)
|
528
537
|
|
529
538
|
result = _filter_with_cpus(result, cpus)
|