skypilot-nightly 1.0.0.dev20241222__py3-none-any.whl → 1.0.0.dev20241224__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/backend_utils.py +0 -4
- sky/backends/cloud_vm_ray_backend.py +2 -2
- sky/benchmark/benchmark_utils.py +1 -1
- sky/check.py +1 -1
- sky/cli.py +28 -30
- sky/cloud_stores.py +1 -1
- sky/clouds/gcp.py +1 -1
- sky/clouds/kubernetes.py +1 -1
- sky/clouds/service_catalog/common.py +11 -10
- sky/clouds/service_catalog/data_fetchers/fetch_azure.py +1 -1
- sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
- sky/clouds/utils/scp_utils.py +3 -3
- sky/core.py +3 -3
- sky/data/data_utils.py +25 -32
- sky/data/storage.py +58 -15
- sky/jobs/core.py +2 -2
- sky/jobs/state.py +2 -2
- sky/jobs/utils.py +6 -6
- sky/optimizer.py +3 -3
- sky/provision/aws/config.py +2 -2
- sky/provision/gcp/config.py +3 -3
- sky/provision/kubernetes/config.py +7 -7
- sky/provision/kubernetes/network_utils.py +1 -1
- sky/provision/kubernetes/utils.py +2 -2
- sky/provision/lambda_cloud/lambda_utils.py +3 -3
- sky/provision/oci/query_utils.py +3 -3
- sky/provision/vsphere/common/vim_utils.py +1 -1
- sky/provision/vsphere/instance.py +6 -7
- sky/provision/vsphere/vsphere_utils.py +1 -1
- sky/resources.py +4 -4
- sky/serve/autoscalers.py +2 -2
- sky/serve/core.py +4 -4
- sky/serve/replica_managers.py +1 -1
- sky/serve/serve_state.py +1 -1
- sky/serve/serve_utils.py +11 -10
- sky/serve/service_spec.py +8 -5
- sky/sky_logging.py +17 -1
- sky/skylet/job_lib.py +1 -1
- sky/skylet/providers/ibm/node_provider.py +2 -2
- sky/skylet/providers/scp/config.py +1 -1
- sky/skylet/providers/scp/node_provider.py +7 -7
- sky/task.py +1 -1
- sky/utils/accelerator_registry.py +1 -1
- sky/utils/common_utils.py +1 -1
- sky/utils/dag_utils.py +1 -1
- sky/utils/kubernetes/gpu_labeler.py +1 -1
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
- {skypilot_nightly-1.0.0.dev20241222.dist-info → skypilot_nightly-1.0.0.dev20241224.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20241222.dist-info → skypilot_nightly-1.0.0.dev20241224.dist-info}/RECORD +54 -54
- {skypilot_nightly-1.0.0.dev20241222.dist-info → skypilot_nightly-1.0.0.dev20241224.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20241222.dist-info → skypilot_nightly-1.0.0.dev20241224.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20241222.dist-info → skypilot_nightly-1.0.0.dev20241224.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20241222.dist-info → skypilot_nightly-1.0.0.dev20241224.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = '6b62570e8b29bb682ae46157ccc757d70fbb975c'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20241224'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
sky/backends/backend_utils.py
CHANGED
@@ -1019,10 +1019,6 @@ def _add_auth_to_cluster_config(cloud: clouds.Cloud, cluster_config_file: str):
|
|
1019
1019
|
common_utils.dump_yaml(cluster_config_file, config)
|
1020
1020
|
|
1021
1021
|
|
1022
|
-
def get_run_timestamp() -> str:
|
1023
|
-
return 'sky-' + datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
|
1024
|
-
|
1025
|
-
|
1026
1022
|
def get_timestamp_from_run_timestamp(run_timestamp: str) -> float:
|
1027
1023
|
return datetime.strptime(
|
1028
1024
|
run_timestamp.partition('-')[2], '%Y-%m-%d-%H-%M-%S-%f').timestamp()
|
@@ -2599,7 +2599,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
2599
2599
|
ResourceHandle = CloudVmRayResourceHandle # pylint: disable=invalid-name
|
2600
2600
|
|
2601
2601
|
def __init__(self):
|
2602
|
-
self.run_timestamp =
|
2602
|
+
self.run_timestamp = sky_logging.get_run_timestamp()
|
2603
2603
|
# NOTE: do not expanduser() here, as this '~/...' path is used for
|
2604
2604
|
# remote as well to be expanded on the remote side.
|
2605
2605
|
self.log_dir = os.path.join(constants.SKY_LOGS_DIRECTORY,
|
@@ -2626,7 +2626,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
2626
2626
|
self._optimize_target) or optimizer.OptimizeTarget.COST
|
2627
2627
|
self._requested_features = kwargs.pop('requested_features',
|
2628
2628
|
self._requested_features)
|
2629
|
-
assert
|
2629
|
+
assert not kwargs, f'Unexpected kwargs: {kwargs}'
|
2630
2630
|
|
2631
2631
|
def check_resources_fit_cluster(
|
2632
2632
|
self,
|
sky/benchmark/benchmark_utils.py
CHANGED
@@ -535,7 +535,7 @@ def launch_benchmark_clusters(benchmark: str, clusters: List[str],
|
|
535
535
|
for yaml_fd, cluster in zip(yaml_fds, clusters)]
|
536
536
|
|
537
537
|
# Save stdout/stderr from cluster launches.
|
538
|
-
run_timestamp =
|
538
|
+
run_timestamp = sky_logging.get_run_timestamp()
|
539
539
|
log_dir = os.path.join(constants.SKY_LOGS_DIRECTORY, run_timestamp)
|
540
540
|
log_dir = os.path.expanduser(log_dir)
|
541
541
|
logger.info(
|
sky/check.py
CHANGED
@@ -127,7 +127,7 @@ def check(
|
|
127
127
|
'\nNote: The following clouds were disabled because they were not '
|
128
128
|
'included in allowed_clouds in ~/.sky/config.yaml: '
|
129
129
|
f'{", ".join([c for c in disallowed_cloud_names])}')
|
130
|
-
if
|
130
|
+
if not all_enabled_clouds:
|
131
131
|
echo(
|
132
132
|
click.style(
|
133
133
|
'No cloud is enabled. SkyPilot will not be able to run any '
|
sky/cli.py
CHANGED
@@ -114,7 +114,7 @@ def _get_glob_clusters(clusters: List[str], silent: bool = False) -> List[str]:
|
|
114
114
|
glob_clusters = []
|
115
115
|
for cluster in clusters:
|
116
116
|
glob_cluster = global_user_state.get_glob_cluster_names(cluster)
|
117
|
-
if
|
117
|
+
if not glob_cluster and not silent:
|
118
118
|
click.echo(f'Cluster {cluster} not found.')
|
119
119
|
glob_clusters.extend(glob_cluster)
|
120
120
|
return list(set(glob_clusters))
|
@@ -125,7 +125,7 @@ def _get_glob_storages(storages: List[str]) -> List[str]:
|
|
125
125
|
glob_storages = []
|
126
126
|
for storage_object in storages:
|
127
127
|
glob_storage = global_user_state.get_glob_storage_name(storage_object)
|
128
|
-
if
|
128
|
+
if not glob_storage:
|
129
129
|
click.echo(f'Storage {storage_object} not found.')
|
130
130
|
glob_storages.extend(glob_storage)
|
131
131
|
return list(set(glob_storages))
|
@@ -830,7 +830,7 @@ class _NaturalOrderGroup(click.Group):
|
|
830
830
|
Reference: https://github.com/pallets/click/issues/513
|
831
831
|
"""
|
832
832
|
|
833
|
-
def list_commands(self, ctx):
|
833
|
+
def list_commands(self, ctx): # pylint: disable=unused-argument
|
834
834
|
return self.commands.keys()
|
835
835
|
|
836
836
|
@usage_lib.entrypoint('sky.cli', fallback=True)
|
@@ -1473,7 +1473,7 @@ def _get_services(service_names: Optional[List[str]],
|
|
1473
1473
|
if len(service_records) != 1:
|
1474
1474
|
plural = 's' if len(service_records) > 1 else ''
|
1475
1475
|
service_num = (str(len(service_records))
|
1476
|
-
if
|
1476
|
+
if service_records else 'No')
|
1477
1477
|
raise click.UsageError(
|
1478
1478
|
f'{service_num} service{plural} found. Please specify '
|
1479
1479
|
'an existing service to show its endpoint. Usage: '
|
@@ -1696,8 +1696,7 @@ def status(all: bool, refresh: bool, ip: bool, endpoints: bool,
|
|
1696
1696
|
if len(clusters) != 1:
|
1697
1697
|
with ux_utils.print_exception_no_traceback():
|
1698
1698
|
plural = 's' if len(clusters) > 1 else ''
|
1699
|
-
cluster_num = (str(len(clusters))
|
1700
|
-
if len(clusters) > 0 else 'No')
|
1699
|
+
cluster_num = (str(len(clusters)) if clusters else 'No')
|
1701
1700
|
cause = 'a single' if len(clusters) > 1 else 'an existing'
|
1702
1701
|
raise ValueError(
|
1703
1702
|
_STATUS_PROPERTY_CLUSTER_NUM_ERROR_MESSAGE.format(
|
@@ -1722,9 +1721,8 @@ def status(all: bool, refresh: bool, ip: bool, endpoints: bool,
|
|
1722
1721
|
with ux_utils.print_exception_no_traceback():
|
1723
1722
|
plural = 's' if len(cluster_records) > 1 else ''
|
1724
1723
|
cluster_num = (str(len(cluster_records))
|
1725
|
-
if
|
1726
|
-
|
1727
|
-
verb = 'found' if len(cluster_records) > 0 else 'not found'
|
1724
|
+
if cluster_records else f'{clusters[0]!r}')
|
1725
|
+
verb = 'found' if cluster_records else 'not found'
|
1728
1726
|
cause = 'a single' if len(clusters) > 1 else 'an existing'
|
1729
1727
|
raise ValueError(
|
1730
1728
|
_STATUS_PROPERTY_CLUSTER_NUM_ERROR_MESSAGE.format(
|
@@ -2470,7 +2468,7 @@ def start(
|
|
2470
2468
|
'(see `sky status`), or the -a/--all flag.')
|
2471
2469
|
|
2472
2470
|
if all:
|
2473
|
-
if
|
2471
|
+
if clusters:
|
2474
2472
|
click.echo('Both --all and cluster(s) specified for sky start. '
|
2475
2473
|
'Letting --all take effect.')
|
2476
2474
|
|
@@ -2800,7 +2798,7 @@ def _down_or_stop_clusters(
|
|
2800
2798
|
option_str = '{stop,down}'
|
2801
2799
|
operation = f'{verb} auto{option_str} on'
|
2802
2800
|
|
2803
|
-
if
|
2801
|
+
if names:
|
2804
2802
|
controllers = [
|
2805
2803
|
name for name in names
|
2806
2804
|
if controller_utils.Controllers.from_name(name) is not None
|
@@ -2814,7 +2812,7 @@ def _down_or_stop_clusters(
|
|
2814
2812
|
# Make sure the controllers are explicitly specified without other
|
2815
2813
|
# normal clusters.
|
2816
2814
|
if controllers:
|
2817
|
-
if
|
2815
|
+
if names:
|
2818
2816
|
names_str = ', '.join(map(repr, names))
|
2819
2817
|
raise click.UsageError(
|
2820
2818
|
f'{operation} controller(s) '
|
@@ -2867,7 +2865,7 @@ def _down_or_stop_clusters(
|
|
2867
2865
|
|
2868
2866
|
if apply_to_all:
|
2869
2867
|
all_clusters = global_user_state.get_clusters()
|
2870
|
-
if
|
2868
|
+
if names:
|
2871
2869
|
click.echo(
|
2872
2870
|
f'Both --all and cluster(s) specified for `sky {command}`. '
|
2873
2871
|
'Letting --all take effect.')
|
@@ -2894,7 +2892,7 @@ def _down_or_stop_clusters(
|
|
2894
2892
|
click.echo('Cluster(s) not found (tip: see `sky status`).')
|
2895
2893
|
return
|
2896
2894
|
|
2897
|
-
if not no_confirm and
|
2895
|
+
if not no_confirm and clusters:
|
2898
2896
|
cluster_str = 'clusters' if len(clusters) > 1 else 'cluster'
|
2899
2897
|
cluster_list = ', '.join(clusters)
|
2900
2898
|
click.confirm(
|
@@ -3003,7 +3001,7 @@ def check(clouds: Tuple[str], verbose: bool):
|
|
3003
3001
|
# Check only specific clouds - AWS and GCP.
|
3004
3002
|
sky check aws gcp
|
3005
3003
|
"""
|
3006
|
-
clouds_arg = clouds if
|
3004
|
+
clouds_arg = clouds if clouds else None
|
3007
3005
|
sky_check.check(verbose=verbose, clouds=clouds_arg)
|
3008
3006
|
|
3009
3007
|
|
@@ -3138,7 +3136,7 @@ def show_gpus(
|
|
3138
3136
|
f'capacity ({list(capacity.keys())}), '
|
3139
3137
|
f'and available ({list(available.keys())}) '
|
3140
3138
|
'must be same.')
|
3141
|
-
if
|
3139
|
+
if not counts:
|
3142
3140
|
err_msg = 'No GPUs found in Kubernetes cluster. '
|
3143
3141
|
debug_msg = 'To further debug, run: sky check '
|
3144
3142
|
if name_filter is not None:
|
@@ -3282,7 +3280,7 @@ def show_gpus(
|
|
3282
3280
|
for tpu in service_catalog.get_tpus():
|
3283
3281
|
if tpu in result:
|
3284
3282
|
tpu_table.add_row([tpu, _list_to_str(result.pop(tpu))])
|
3285
|
-
if
|
3283
|
+
if tpu_table.get_string():
|
3286
3284
|
yield '\n\n'
|
3287
3285
|
yield from tpu_table.get_string()
|
3288
3286
|
|
@@ -3393,7 +3391,7 @@ def show_gpus(
|
|
3393
3391
|
yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
|
3394
3392
|
f'Cloud GPUs{colorama.Style.RESET_ALL}\n')
|
3395
3393
|
|
3396
|
-
if
|
3394
|
+
if not result:
|
3397
3395
|
quantity_str = (f' with requested quantity {quantity}'
|
3398
3396
|
if quantity else '')
|
3399
3397
|
cloud_str = f' on {cloud_obj}.' if cloud_name else ' in cloud catalogs.'
|
@@ -3522,7 +3520,7 @@ def storage_delete(names: List[str], all: bool, yes: bool): # pylint: disable=r
|
|
3522
3520
|
# Delete all storage objects.
|
3523
3521
|
sky storage delete -a
|
3524
3522
|
"""
|
3525
|
-
if sum([
|
3523
|
+
if sum([bool(names), all]) != 1:
|
3526
3524
|
raise click.UsageError('Either --all or a name must be specified.')
|
3527
3525
|
if all:
|
3528
3526
|
storages = sky.storage_ls()
|
@@ -3881,8 +3879,8 @@ def jobs_cancel(name: Optional[str], job_ids: Tuple[int], all: bool, yes: bool):
|
|
3881
3879
|
exit_if_not_accessible=True)
|
3882
3880
|
|
3883
3881
|
job_id_str = ','.join(map(str, job_ids))
|
3884
|
-
if sum([
|
3885
|
-
argument_str = f'--job-ids {job_id_str}' if
|
3882
|
+
if sum([bool(job_ids), name is not None, all]) != 1:
|
3883
|
+
argument_str = f'--job-ids {job_id_str}' if job_ids else ''
|
3886
3884
|
argument_str += f' --name {name}' if name is not None else ''
|
3887
3885
|
argument_str += ' --all' if all else ''
|
3888
3886
|
raise click.UsageError(
|
@@ -4523,9 +4521,9 @@ def serve_down(service_names: List[str], all: bool, purge: bool, yes: bool,
|
|
4523
4521
|
# Forcefully tear down a specific replica, even in failed status.
|
4524
4522
|
sky serve down my-service --replica-id 1 --purge
|
4525
4523
|
"""
|
4526
|
-
if sum([
|
4527
|
-
argument_str = f'SERVICE_NAMES={",".join(service_names)}'
|
4528
|
-
|
4524
|
+
if sum([bool(service_names), all]) != 1:
|
4525
|
+
argument_str = (f'SERVICE_NAMES={",".join(service_names)}'
|
4526
|
+
if service_names else '')
|
4529
4527
|
argument_str += ' --all' if all else ''
|
4530
4528
|
raise click.UsageError(
|
4531
4529
|
'Can only specify one of SERVICE_NAMES or --all. '
|
@@ -4898,7 +4896,7 @@ def benchmark_launch(
|
|
4898
4896
|
if idle_minutes_to_autostop is None:
|
4899
4897
|
idle_minutes_to_autostop = 5
|
4900
4898
|
commandline_args['idle-minutes-to-autostop'] = idle_minutes_to_autostop
|
4901
|
-
if
|
4899
|
+
if env:
|
4902
4900
|
commandline_args['env'] = [f'{k}={v}' for k, v in env]
|
4903
4901
|
|
4904
4902
|
# Launch the benchmarking clusters in detach mode in parallel.
|
@@ -5177,7 +5175,7 @@ def benchmark_delete(benchmarks: Tuple[str], all: Optional[bool],
|
|
5177
5175
|
raise click.BadParameter(
|
5178
5176
|
'Either specify benchmarks or use --all to delete all benchmarks.')
|
5179
5177
|
to_delete = []
|
5180
|
-
if
|
5178
|
+
if benchmarks:
|
5181
5179
|
for benchmark in benchmarks:
|
5182
5180
|
record = benchmark_state.get_benchmark_from_name(benchmark)
|
5183
5181
|
if record is None:
|
@@ -5186,7 +5184,7 @@ def benchmark_delete(benchmarks: Tuple[str], all: Optional[bool],
|
|
5186
5184
|
to_delete.append(record)
|
5187
5185
|
if all:
|
5188
5186
|
to_delete = benchmark_state.get_benchmarks()
|
5189
|
-
if
|
5187
|
+
if benchmarks:
|
5190
5188
|
print('Both --all and benchmark(s) specified '
|
5191
5189
|
'for sky bench delete. Letting --all take effect.')
|
5192
5190
|
|
@@ -5288,7 +5286,7 @@ def _deploy_local_cluster(gpus: bool):
|
|
5288
5286
|
run_command = shlex.split(run_command)
|
5289
5287
|
|
5290
5288
|
# Setup logging paths
|
5291
|
-
run_timestamp =
|
5289
|
+
run_timestamp = sky_logging.get_run_timestamp()
|
5292
5290
|
log_path = os.path.join(constants.SKY_LOGS_DIRECTORY, run_timestamp,
|
5293
5291
|
'local_up.log')
|
5294
5292
|
tail_cmd = 'tail -n100 -f ' + log_path
|
@@ -5402,7 +5400,7 @@ def _deploy_remote_cluster(ip_file: str, ssh_user: str, ssh_key_path: str,
|
|
5402
5400
|
deploy_command = shlex.split(deploy_command)
|
5403
5401
|
|
5404
5402
|
# Setup logging paths
|
5405
|
-
run_timestamp =
|
5403
|
+
run_timestamp = sky_logging.get_run_timestamp()
|
5406
5404
|
log_path = os.path.join(constants.SKY_LOGS_DIRECTORY, run_timestamp,
|
5407
5405
|
'local_up.log')
|
5408
5406
|
tail_cmd = 'tail -n100 -f ' + log_path
|
@@ -5517,7 +5515,7 @@ def local_down():
|
|
5517
5515
|
run_command = shlex.split(down_script_path)
|
5518
5516
|
|
5519
5517
|
# Setup logging paths
|
5520
|
-
run_timestamp =
|
5518
|
+
run_timestamp = sky_logging.get_run_timestamp()
|
5521
5519
|
log_path = os.path.join(constants.SKY_LOGS_DIRECTORY, run_timestamp,
|
5522
5520
|
'local_down.log')
|
5523
5521
|
tail_cmd = 'tail -n100 -f ' + log_path
|
sky/cloud_stores.py
CHANGED
@@ -133,7 +133,7 @@ class GcsCloudStorage(CloudStorage):
|
|
133
133
|
# If <url> is a bucket root, then we only need `gsutil` to succeed
|
134
134
|
# to make sure the bucket exists. It is already a directory.
|
135
135
|
_, key = data_utils.split_gcs_path(url)
|
136
|
-
if
|
136
|
+
if not key:
|
137
137
|
return True
|
138
138
|
# Otherwise, gsutil ls -d url will return:
|
139
139
|
# --> url.rstrip('/') if url is not a directory
|
sky/clouds/gcp.py
CHANGED
@@ -830,7 +830,7 @@ class GCP(clouds.Cloud):
|
|
830
830
|
ret_permissions = request.execute().get('permissions', [])
|
831
831
|
|
832
832
|
diffs = set(gcp_minimal_permissions).difference(set(ret_permissions))
|
833
|
-
if
|
833
|
+
if diffs:
|
834
834
|
identity_str = identity[0] if identity else None
|
835
835
|
return False, (
|
836
836
|
'The following permissions are not enabled for the current '
|
sky/clouds/kubernetes.py
CHANGED
@@ -139,7 +139,7 @@ class Kubernetes(clouds.Cloud):
|
|
139
139
|
use the service account mounted in the pod.
|
140
140
|
"""
|
141
141
|
all_contexts = kubernetes_utils.get_all_kube_context_names()
|
142
|
-
if
|
142
|
+
if not all_contexts:
|
143
143
|
return []
|
144
144
|
|
145
145
|
all_contexts = set(all_contexts)
|
@@ -270,9 +270,10 @@ def validate_region_zone_impl(
|
|
270
270
|
candidate_loc = difflib.get_close_matches(loc, all_loc, n=5, cutoff=0.9)
|
271
271
|
candidate_loc = sorted(candidate_loc)
|
272
272
|
candidate_strs = ''
|
273
|
-
if
|
273
|
+
if candidate_loc:
|
274
274
|
candidate_strs = ', '.join(candidate_loc)
|
275
275
|
candidate_strs = f'\nDid you mean one of these: {candidate_strs!r}?'
|
276
|
+
|
276
277
|
return candidate_strs
|
277
278
|
|
278
279
|
def _get_all_supported_regions_str() -> str:
|
@@ -286,7 +287,7 @@ def validate_region_zone_impl(
|
|
286
287
|
filter_df = df
|
287
288
|
if region is not None:
|
288
289
|
filter_df = _filter_region_zone(filter_df, region, zone=None)
|
289
|
-
if
|
290
|
+
if filter_df.empty:
|
290
291
|
with ux_utils.print_exception_no_traceback():
|
291
292
|
error_msg = (f'Invalid region {region!r}')
|
292
293
|
candidate_strs = _get_candidate_str(
|
@@ -310,7 +311,7 @@ def validate_region_zone_impl(
|
|
310
311
|
if zone is not None:
|
311
312
|
maybe_region_df = filter_df
|
312
313
|
filter_df = filter_df[filter_df['AvailabilityZone'] == zone]
|
313
|
-
if
|
314
|
+
if filter_df.empty:
|
314
315
|
region_str = f' for region {region!r}' if region else ''
|
315
316
|
df = maybe_region_df if region else df
|
316
317
|
with ux_utils.print_exception_no_traceback():
|
@@ -378,7 +379,7 @@ def get_vcpus_mem_from_instance_type_impl(
|
|
378
379
|
instance_type: str,
|
379
380
|
) -> Tuple[Optional[float], Optional[float]]:
|
380
381
|
df = _get_instance_type(df, instance_type, None)
|
381
|
-
if
|
382
|
+
if df.empty:
|
382
383
|
with ux_utils.print_exception_no_traceback():
|
383
384
|
raise ValueError(f'No instance type {instance_type} found.')
|
384
385
|
assert len(set(df['vCPUs'])) == 1, ('Cannot determine the number of vCPUs '
|
@@ -484,7 +485,7 @@ def get_accelerators_from_instance_type_impl(
|
|
484
485
|
instance_type: str,
|
485
486
|
) -> Optional[Dict[str, Union[int, float]]]:
|
486
487
|
df = _get_instance_type(df, instance_type, None)
|
487
|
-
if
|
488
|
+
if df.empty:
|
488
489
|
with ux_utils.print_exception_no_traceback():
|
489
490
|
raise ValueError(f'No instance type {instance_type} found.')
|
490
491
|
row = df.iloc[0]
|
@@ -518,7 +519,7 @@ def get_instance_type_for_accelerator_impl(
|
|
518
519
|
result = df[(df['AcceleratorName'].str.fullmatch(acc_name, case=False)) &
|
519
520
|
(abs(df['AcceleratorCount'] - acc_count) <= 0.01)]
|
520
521
|
result = _filter_region_zone(result, region, zone)
|
521
|
-
if
|
522
|
+
if result.empty:
|
522
523
|
fuzzy_result = df[
|
523
524
|
(df['AcceleratorName'].str.contains(acc_name, case=False)) &
|
524
525
|
(df['AcceleratorCount'] >= acc_count)]
|
@@ -527,7 +528,7 @@ def get_instance_type_for_accelerator_impl(
|
|
527
528
|
fuzzy_result = fuzzy_result[['AcceleratorName',
|
528
529
|
'AcceleratorCount']].drop_duplicates()
|
529
530
|
fuzzy_candidate_list = []
|
530
|
-
if
|
531
|
+
if not fuzzy_result.empty:
|
531
532
|
for _, row in fuzzy_result.iterrows():
|
532
533
|
acc_cnt = float(row['AcceleratorCount'])
|
533
534
|
acc_count_display = (int(acc_cnt) if acc_cnt.is_integer() else
|
@@ -539,7 +540,7 @@ def get_instance_type_for_accelerator_impl(
|
|
539
540
|
result = _filter_with_cpus(result, cpus)
|
540
541
|
result = _filter_with_mem(result, memory)
|
541
542
|
result = _filter_region_zone(result, region, zone)
|
542
|
-
if
|
543
|
+
if result.empty:
|
543
544
|
return ([], [])
|
544
545
|
|
545
546
|
# Current strategy: choose the cheapest instance
|
@@ -680,7 +681,7 @@ def get_image_id_from_tag_impl(df: 'pd.DataFrame', tag: str,
|
|
680
681
|
df = _filter_region_zone(df, region, zone=None)
|
681
682
|
assert len(df) <= 1, ('Multiple images found for tag '
|
682
683
|
f'{tag} in region {region}')
|
683
|
-
if
|
684
|
+
if df.empty:
|
684
685
|
return None
|
685
686
|
image_id = df['ImageId'].iloc[0]
|
686
687
|
if pd.isna(image_id):
|
@@ -694,4 +695,4 @@ def is_image_tag_valid_impl(df: 'pd.DataFrame', tag: str,
|
|
694
695
|
df = df[df['Tag'] == tag]
|
695
696
|
df = _filter_region_zone(df, region, zone=None)
|
696
697
|
df = df.dropna(subset=['ImageId'])
|
697
|
-
return
|
698
|
+
return not df.empty
|
@@ -134,7 +134,7 @@ def get_pricing_df(region: Optional[str] = None) -> 'pd.DataFrame':
|
|
134
134
|
content_str = r.content.decode('ascii')
|
135
135
|
content = json.loads(content_str)
|
136
136
|
items = content.get('Items', [])
|
137
|
-
if
|
137
|
+
if not items:
|
138
138
|
break
|
139
139
|
all_items += items
|
140
140
|
url = content.get('NextPageLink')
|
@@ -534,7 +534,7 @@ def initialize_images_csv(csv_saving_path: str, vc_object,
|
|
534
534
|
gpu_name = tag_name.split('-')[1]
|
535
535
|
if gpu_name not in gpu_tags:
|
536
536
|
gpu_tags.append(gpu_name)
|
537
|
-
if
|
537
|
+
if gpu_tags:
|
538
538
|
gpu_tags_str = str(gpu_tags).replace('\'', '\"')
|
539
539
|
f.write(f'{item.id},{vcenter_name},{item_cpu},{item_memory}'
|
540
540
|
f',,,\'{gpu_tags_str}\'\n')
|
sky/clouds/utils/scp_utils.py
CHANGED
@@ -65,7 +65,7 @@ class Metadata:
|
|
65
65
|
if value is None:
|
66
66
|
if instance_id in metadata:
|
67
67
|
metadata.pop(instance_id) # del entry
|
68
|
-
if
|
68
|
+
if not metadata:
|
69
69
|
if os.path.exists(self.path):
|
70
70
|
os.remove(self.path)
|
71
71
|
return
|
@@ -84,7 +84,7 @@ class Metadata:
|
|
84
84
|
for instance_id in list(metadata.keys()):
|
85
85
|
if instance_id not in instance_ids:
|
86
86
|
del metadata[instance_id]
|
87
|
-
if
|
87
|
+
if not metadata:
|
88
88
|
os.remove(self.path)
|
89
89
|
return
|
90
90
|
with open(self.path, 'w', encoding='utf-8') as f:
|
@@ -410,7 +410,7 @@ class SCPClient:
|
|
410
410
|
parameter.append('vpcId=' + vpc_id)
|
411
411
|
if sg_name is not None:
|
412
412
|
parameter.append('securityGroupName=' + sg_name)
|
413
|
-
if
|
413
|
+
if parameter:
|
414
414
|
url = url + '?' + '&'.join(parameter)
|
415
415
|
return self._get(url)
|
416
416
|
|
sky/core.py
CHANGED
@@ -732,7 +732,7 @@ def cancel(
|
|
732
732
|
f'{colorama.Fore.YELLOW}'
|
733
733
|
f'Cancelling latest running job on cluster {cluster_name!r}...'
|
734
734
|
f'{colorama.Style.RESET_ALL}')
|
735
|
-
elif
|
735
|
+
elif job_ids:
|
736
736
|
# all = False, len(job_ids) > 0 => cancel the specified jobs.
|
737
737
|
jobs_str = ', '.join(map(str, job_ids))
|
738
738
|
sky_logging.print(
|
@@ -817,7 +817,7 @@ def download_logs(
|
|
817
817
|
backend = backend_utils.get_backend_from_handle(handle)
|
818
818
|
assert isinstance(backend, backends.CloudVmRayBackend), backend
|
819
819
|
|
820
|
-
if job_ids is not None and
|
820
|
+
if job_ids is not None and not job_ids:
|
821
821
|
return {}
|
822
822
|
|
823
823
|
usage_lib.record_cluster_name_for_current_operation(cluster_name)
|
@@ -866,7 +866,7 @@ def job_status(cluster_name: str,
|
|
866
866
|
f'of type {backend.__class__.__name__!r}.')
|
867
867
|
assert isinstance(handle, backends.CloudVmRayResourceHandle), handle
|
868
868
|
|
869
|
-
if job_ids is not None and
|
869
|
+
if job_ids is not None and not job_ids:
|
870
870
|
return {}
|
871
871
|
|
872
872
|
sky_logging.print(f'{colorama.Fore.YELLOW}'
|
sky/data/data_utils.py
CHANGED
@@ -20,6 +20,7 @@ from sky.adaptors import azure
|
|
20
20
|
from sky.adaptors import cloudflare
|
21
21
|
from sky.adaptors import gcp
|
22
22
|
from sky.adaptors import ibm
|
23
|
+
from sky.skylet import log_lib
|
23
24
|
from sky.utils import common_utils
|
24
25
|
from sky.utils import ux_utils
|
25
26
|
|
@@ -430,6 +431,7 @@ def _group_files_by_dir(
|
|
430
431
|
def parallel_upload(source_path_list: List[str],
|
431
432
|
filesync_command_generator: Callable[[str, List[str]], str],
|
432
433
|
dirsync_command_generator: Callable[[str, str], str],
|
434
|
+
log_path: str,
|
433
435
|
bucket_name: str,
|
434
436
|
access_denied_message: str,
|
435
437
|
create_dirs: bool = False,
|
@@ -445,6 +447,7 @@ def parallel_upload(source_path_list: List[str],
|
|
445
447
|
for a list of files belonging to the same dir.
|
446
448
|
dirsync_command_generator: Callable that generates rsync command
|
447
449
|
for a directory.
|
450
|
+
log_path: Path to the log file.
|
448
451
|
access_denied_message: Message to intercept from the underlying
|
449
452
|
upload utility when permissions are insufficient. Used in
|
450
453
|
exception handling.
|
@@ -477,7 +480,7 @@ def parallel_upload(source_path_list: List[str],
|
|
477
480
|
p.starmap(
|
478
481
|
run_upload_cli,
|
479
482
|
zip(commands, [access_denied_message] * len(commands),
|
480
|
-
[bucket_name] * len(commands)))
|
483
|
+
[bucket_name] * len(commands), [log_path] * len(commands)))
|
481
484
|
|
482
485
|
|
483
486
|
def get_gsutil_command() -> Tuple[str, str]:
|
@@ -518,37 +521,27 @@ def get_gsutil_command() -> Tuple[str, str]:
|
|
518
521
|
return gsutil_alias, alias_gen
|
519
522
|
|
520
523
|
|
521
|
-
def run_upload_cli(command: str, access_denied_message: str, bucket_name: str
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
'write permissions. It is possible that '
|
543
|
-
'the bucket is public.')
|
544
|
-
returncode = process.wait()
|
545
|
-
if returncode != 0:
|
546
|
-
stderr_str = '\n'.join(stderr)
|
547
|
-
with ux_utils.print_exception_no_traceback():
|
548
|
-
logger.error(stderr_str)
|
549
|
-
raise exceptions.StorageUploadError(
|
550
|
-
f'Upload to bucket failed for store {bucket_name}. '
|
551
|
-
'Please check the logs.')
|
524
|
+
def run_upload_cli(command: str, access_denied_message: str, bucket_name: str,
|
525
|
+
log_path: str):
|
526
|
+
returncode, stdout, stderr = log_lib.run_with_log(command,
|
527
|
+
log_path,
|
528
|
+
shell=True,
|
529
|
+
require_outputs=True)
|
530
|
+
if access_denied_message in stderr:
|
531
|
+
with ux_utils.print_exception_no_traceback():
|
532
|
+
raise PermissionError('Failed to upload files to '
|
533
|
+
'the remote bucket. The bucket does not have '
|
534
|
+
'write permissions. It is possible that '
|
535
|
+
'the bucket is public.')
|
536
|
+
if returncode != 0:
|
537
|
+
with ux_utils.print_exception_no_traceback():
|
538
|
+
logger.error(stderr)
|
539
|
+
raise exceptions.StorageUploadError(
|
540
|
+
f'Upload to bucket failed for store {bucket_name}. '
|
541
|
+
f'Please check the logs: {log_path}')
|
542
|
+
if not stdout:
|
543
|
+
logger.debug('No file uploaded. This could be due to an error or '
|
544
|
+
'because all files already exist on the cloud.')
|
552
545
|
|
553
546
|
|
554
547
|
def get_cos_regions() -> List[str]:
|