skypilot-nightly 1.0.0.dev20241221__py3-none-any.whl → 1.0.0.dev20241223__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/cloud_vm_ray_backend.py +1 -1
- sky/check.py +1 -1
- sky/cli.py +24 -26
- sky/cloud_stores.py +1 -1
- sky/clouds/gcp.py +1 -1
- sky/clouds/kubernetes.py +1 -1
- sky/clouds/service_catalog/common.py +11 -10
- sky/clouds/service_catalog/data_fetchers/fetch_azure.py +1 -1
- sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
- sky/clouds/utils/scp_utils.py +3 -3
- sky/core.py +3 -3
- sky/data/storage.py +1 -1
- sky/jobs/core.py +2 -2
- sky/jobs/state.py +2 -2
- sky/jobs/utils.py +6 -6
- sky/optimizer.py +3 -3
- sky/provision/aws/config.py +2 -2
- sky/provision/gcp/config.py +3 -3
- sky/provision/kubernetes/config.py +7 -7
- sky/provision/kubernetes/network_utils.py +1 -1
- sky/provision/kubernetes/utils.py +2 -2
- sky/provision/lambda_cloud/lambda_utils.py +3 -3
- sky/provision/oci/query_utils.py +3 -3
- sky/provision/vsphere/common/vim_utils.py +1 -1
- sky/provision/vsphere/instance.py +6 -7
- sky/provision/vsphere/vsphere_utils.py +1 -1
- sky/resources.py +4 -4
- sky/serve/autoscalers.py +2 -2
- sky/serve/core.py +4 -4
- sky/serve/replica_managers.py +1 -1
- sky/serve/serve_state.py +1 -1
- sky/serve/serve_utils.py +11 -10
- sky/serve/service_spec.py +8 -5
- sky/skylet/job_lib.py +1 -1
- sky/skylet/providers/ibm/node_provider.py +2 -2
- sky/skylet/providers/scp/config.py +1 -1
- sky/skylet/providers/scp/node_provider.py +7 -7
- sky/task.py +1 -1
- sky/utils/accelerator_registry.py +1 -1
- sky/utils/common_utils.py +1 -1
- sky/utils/dag_utils.py +1 -1
- sky/utils/kubernetes/gpu_labeler.py +1 -1
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
- {skypilot_nightly-1.0.0.dev20241221.dist-info → skypilot_nightly-1.0.0.dev20241223.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20241221.dist-info → skypilot_nightly-1.0.0.dev20241223.dist-info}/RECORD +50 -50
- {skypilot_nightly-1.0.0.dev20241221.dist-info → skypilot_nightly-1.0.0.dev20241223.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20241221.dist-info → skypilot_nightly-1.0.0.dev20241223.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20241221.dist-info → skypilot_nightly-1.0.0.dev20241223.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20241221.dist-info → skypilot_nightly-1.0.0.dev20241223.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = '2bd7c3ed35ce27d5ffeb9010e23da8d9ebb3ffa7'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20241223'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
@@ -2626,7 +2626,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
2626
2626
|
self._optimize_target) or optimizer.OptimizeTarget.COST
|
2627
2627
|
self._requested_features = kwargs.pop('requested_features',
|
2628
2628
|
self._requested_features)
|
2629
|
-
assert
|
2629
|
+
assert not kwargs, f'Unexpected kwargs: {kwargs}'
|
2630
2630
|
|
2631
2631
|
def check_resources_fit_cluster(
|
2632
2632
|
self,
|
sky/check.py
CHANGED
@@ -127,7 +127,7 @@ def check(
|
|
127
127
|
'\nNote: The following clouds were disabled because they were not '
|
128
128
|
'included in allowed_clouds in ~/.sky/config.yaml: '
|
129
129
|
f'{", ".join([c for c in disallowed_cloud_names])}')
|
130
|
-
if
|
130
|
+
if not all_enabled_clouds:
|
131
131
|
echo(
|
132
132
|
click.style(
|
133
133
|
'No cloud is enabled. SkyPilot will not be able to run any '
|
sky/cli.py
CHANGED
@@ -114,7 +114,7 @@ def _get_glob_clusters(clusters: List[str], silent: bool = False) -> List[str]:
|
|
114
114
|
glob_clusters = []
|
115
115
|
for cluster in clusters:
|
116
116
|
glob_cluster = global_user_state.get_glob_cluster_names(cluster)
|
117
|
-
if
|
117
|
+
if not glob_cluster and not silent:
|
118
118
|
click.echo(f'Cluster {cluster} not found.')
|
119
119
|
glob_clusters.extend(glob_cluster)
|
120
120
|
return list(set(glob_clusters))
|
@@ -125,7 +125,7 @@ def _get_glob_storages(storages: List[str]) -> List[str]:
|
|
125
125
|
glob_storages = []
|
126
126
|
for storage_object in storages:
|
127
127
|
glob_storage = global_user_state.get_glob_storage_name(storage_object)
|
128
|
-
if
|
128
|
+
if not glob_storage:
|
129
129
|
click.echo(f'Storage {storage_object} not found.')
|
130
130
|
glob_storages.extend(glob_storage)
|
131
131
|
return list(set(glob_storages))
|
@@ -1473,7 +1473,7 @@ def _get_services(service_names: Optional[List[str]],
|
|
1473
1473
|
if len(service_records) != 1:
|
1474
1474
|
plural = 's' if len(service_records) > 1 else ''
|
1475
1475
|
service_num = (str(len(service_records))
|
1476
|
-
if
|
1476
|
+
if service_records else 'No')
|
1477
1477
|
raise click.UsageError(
|
1478
1478
|
f'{service_num} service{plural} found. Please specify '
|
1479
1479
|
'an existing service to show its endpoint. Usage: '
|
@@ -1696,8 +1696,7 @@ def status(all: bool, refresh: bool, ip: bool, endpoints: bool,
|
|
1696
1696
|
if len(clusters) != 1:
|
1697
1697
|
with ux_utils.print_exception_no_traceback():
|
1698
1698
|
plural = 's' if len(clusters) > 1 else ''
|
1699
|
-
cluster_num = (str(len(clusters))
|
1700
|
-
if len(clusters) > 0 else 'No')
|
1699
|
+
cluster_num = (str(len(clusters)) if clusters else 'No')
|
1701
1700
|
cause = 'a single' if len(clusters) > 1 else 'an existing'
|
1702
1701
|
raise ValueError(
|
1703
1702
|
_STATUS_PROPERTY_CLUSTER_NUM_ERROR_MESSAGE.format(
|
@@ -1722,9 +1721,8 @@ def status(all: bool, refresh: bool, ip: bool, endpoints: bool,
|
|
1722
1721
|
with ux_utils.print_exception_no_traceback():
|
1723
1722
|
plural = 's' if len(cluster_records) > 1 else ''
|
1724
1723
|
cluster_num = (str(len(cluster_records))
|
1725
|
-
if
|
1726
|
-
|
1727
|
-
verb = 'found' if len(cluster_records) > 0 else 'not found'
|
1724
|
+
if cluster_records else f'{clusters[0]!r}')
|
1725
|
+
verb = 'found' if cluster_records else 'not found'
|
1728
1726
|
cause = 'a single' if len(clusters) > 1 else 'an existing'
|
1729
1727
|
raise ValueError(
|
1730
1728
|
_STATUS_PROPERTY_CLUSTER_NUM_ERROR_MESSAGE.format(
|
@@ -2470,7 +2468,7 @@ def start(
|
|
2470
2468
|
'(see `sky status`), or the -a/--all flag.')
|
2471
2469
|
|
2472
2470
|
if all:
|
2473
|
-
if
|
2471
|
+
if clusters:
|
2474
2472
|
click.echo('Both --all and cluster(s) specified for sky start. '
|
2475
2473
|
'Letting --all take effect.')
|
2476
2474
|
|
@@ -2800,7 +2798,7 @@ def _down_or_stop_clusters(
|
|
2800
2798
|
option_str = '{stop,down}'
|
2801
2799
|
operation = f'{verb} auto{option_str} on'
|
2802
2800
|
|
2803
|
-
if
|
2801
|
+
if names:
|
2804
2802
|
controllers = [
|
2805
2803
|
name for name in names
|
2806
2804
|
if controller_utils.Controllers.from_name(name) is not None
|
@@ -2814,7 +2812,7 @@ def _down_or_stop_clusters(
|
|
2814
2812
|
# Make sure the controllers are explicitly specified without other
|
2815
2813
|
# normal clusters.
|
2816
2814
|
if controllers:
|
2817
|
-
if
|
2815
|
+
if names:
|
2818
2816
|
names_str = ', '.join(map(repr, names))
|
2819
2817
|
raise click.UsageError(
|
2820
2818
|
f'{operation} controller(s) '
|
@@ -2867,7 +2865,7 @@ def _down_or_stop_clusters(
|
|
2867
2865
|
|
2868
2866
|
if apply_to_all:
|
2869
2867
|
all_clusters = global_user_state.get_clusters()
|
2870
|
-
if
|
2868
|
+
if names:
|
2871
2869
|
click.echo(
|
2872
2870
|
f'Both --all and cluster(s) specified for `sky {command}`. '
|
2873
2871
|
'Letting --all take effect.')
|
@@ -2894,7 +2892,7 @@ def _down_or_stop_clusters(
|
|
2894
2892
|
click.echo('Cluster(s) not found (tip: see `sky status`).')
|
2895
2893
|
return
|
2896
2894
|
|
2897
|
-
if not no_confirm and
|
2895
|
+
if not no_confirm and clusters:
|
2898
2896
|
cluster_str = 'clusters' if len(clusters) > 1 else 'cluster'
|
2899
2897
|
cluster_list = ', '.join(clusters)
|
2900
2898
|
click.confirm(
|
@@ -3003,7 +3001,7 @@ def check(clouds: Tuple[str], verbose: bool):
|
|
3003
3001
|
# Check only specific clouds - AWS and GCP.
|
3004
3002
|
sky check aws gcp
|
3005
3003
|
"""
|
3006
|
-
clouds_arg = clouds if
|
3004
|
+
clouds_arg = clouds if clouds else None
|
3007
3005
|
sky_check.check(verbose=verbose, clouds=clouds_arg)
|
3008
3006
|
|
3009
3007
|
|
@@ -3138,7 +3136,7 @@ def show_gpus(
|
|
3138
3136
|
f'capacity ({list(capacity.keys())}), '
|
3139
3137
|
f'and available ({list(available.keys())}) '
|
3140
3138
|
'must be same.')
|
3141
|
-
if
|
3139
|
+
if not counts:
|
3142
3140
|
err_msg = 'No GPUs found in Kubernetes cluster. '
|
3143
3141
|
debug_msg = 'To further debug, run: sky check '
|
3144
3142
|
if name_filter is not None:
|
@@ -3282,7 +3280,7 @@ def show_gpus(
|
|
3282
3280
|
for tpu in service_catalog.get_tpus():
|
3283
3281
|
if tpu in result:
|
3284
3282
|
tpu_table.add_row([tpu, _list_to_str(result.pop(tpu))])
|
3285
|
-
if
|
3283
|
+
if tpu_table.get_string():
|
3286
3284
|
yield '\n\n'
|
3287
3285
|
yield from tpu_table.get_string()
|
3288
3286
|
|
@@ -3393,7 +3391,7 @@ def show_gpus(
|
|
3393
3391
|
yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
|
3394
3392
|
f'Cloud GPUs{colorama.Style.RESET_ALL}\n')
|
3395
3393
|
|
3396
|
-
if
|
3394
|
+
if not result:
|
3397
3395
|
quantity_str = (f' with requested quantity {quantity}'
|
3398
3396
|
if quantity else '')
|
3399
3397
|
cloud_str = f' on {cloud_obj}.' if cloud_name else ' in cloud catalogs.'
|
@@ -3522,7 +3520,7 @@ def storage_delete(names: List[str], all: bool, yes: bool): # pylint: disable=r
|
|
3522
3520
|
# Delete all storage objects.
|
3523
3521
|
sky storage delete -a
|
3524
3522
|
"""
|
3525
|
-
if sum([
|
3523
|
+
if sum([bool(names), all]) != 1:
|
3526
3524
|
raise click.UsageError('Either --all or a name must be specified.')
|
3527
3525
|
if all:
|
3528
3526
|
storages = sky.storage_ls()
|
@@ -3881,8 +3879,8 @@ def jobs_cancel(name: Optional[str], job_ids: Tuple[int], all: bool, yes: bool):
|
|
3881
3879
|
exit_if_not_accessible=True)
|
3882
3880
|
|
3883
3881
|
job_id_str = ','.join(map(str, job_ids))
|
3884
|
-
if sum([
|
3885
|
-
argument_str = f'--job-ids {job_id_str}' if
|
3882
|
+
if sum([bool(job_ids), name is not None, all]) != 1:
|
3883
|
+
argument_str = f'--job-ids {job_id_str}' if job_ids else ''
|
3886
3884
|
argument_str += f' --name {name}' if name is not None else ''
|
3887
3885
|
argument_str += ' --all' if all else ''
|
3888
3886
|
raise click.UsageError(
|
@@ -4523,9 +4521,9 @@ def serve_down(service_names: List[str], all: bool, purge: bool, yes: bool,
|
|
4523
4521
|
# Forcefully tear down a specific replica, even in failed status.
|
4524
4522
|
sky serve down my-service --replica-id 1 --purge
|
4525
4523
|
"""
|
4526
|
-
if sum([
|
4527
|
-
argument_str = f'SERVICE_NAMES={",".join(service_names)}'
|
4528
|
-
|
4524
|
+
if sum([bool(service_names), all]) != 1:
|
4525
|
+
argument_str = (f'SERVICE_NAMES={",".join(service_names)}'
|
4526
|
+
if service_names else '')
|
4529
4527
|
argument_str += ' --all' if all else ''
|
4530
4528
|
raise click.UsageError(
|
4531
4529
|
'Can only specify one of SERVICE_NAMES or --all. '
|
@@ -4898,7 +4896,7 @@ def benchmark_launch(
|
|
4898
4896
|
if idle_minutes_to_autostop is None:
|
4899
4897
|
idle_minutes_to_autostop = 5
|
4900
4898
|
commandline_args['idle-minutes-to-autostop'] = idle_minutes_to_autostop
|
4901
|
-
if
|
4899
|
+
if env:
|
4902
4900
|
commandline_args['env'] = [f'{k}={v}' for k, v in env]
|
4903
4901
|
|
4904
4902
|
# Launch the benchmarking clusters in detach mode in parallel.
|
@@ -5177,7 +5175,7 @@ def benchmark_delete(benchmarks: Tuple[str], all: Optional[bool],
|
|
5177
5175
|
raise click.BadParameter(
|
5178
5176
|
'Either specify benchmarks or use --all to delete all benchmarks.')
|
5179
5177
|
to_delete = []
|
5180
|
-
if
|
5178
|
+
if benchmarks:
|
5181
5179
|
for benchmark in benchmarks:
|
5182
5180
|
record = benchmark_state.get_benchmark_from_name(benchmark)
|
5183
5181
|
if record is None:
|
@@ -5186,7 +5184,7 @@ def benchmark_delete(benchmarks: Tuple[str], all: Optional[bool],
|
|
5186
5184
|
to_delete.append(record)
|
5187
5185
|
if all:
|
5188
5186
|
to_delete = benchmark_state.get_benchmarks()
|
5189
|
-
if
|
5187
|
+
if benchmarks:
|
5190
5188
|
print('Both --all and benchmark(s) specified '
|
5191
5189
|
'for sky bench delete. Letting --all take effect.')
|
5192
5190
|
|
sky/cloud_stores.py
CHANGED
@@ -133,7 +133,7 @@ class GcsCloudStorage(CloudStorage):
|
|
133
133
|
# If <url> is a bucket root, then we only need `gsutil` to succeed
|
134
134
|
# to make sure the bucket exists. It is already a directory.
|
135
135
|
_, key = data_utils.split_gcs_path(url)
|
136
|
-
if
|
136
|
+
if not key:
|
137
137
|
return True
|
138
138
|
# Otherwise, gsutil ls -d url will return:
|
139
139
|
# --> url.rstrip('/') if url is not a directory
|
sky/clouds/gcp.py
CHANGED
@@ -830,7 +830,7 @@ class GCP(clouds.Cloud):
|
|
830
830
|
ret_permissions = request.execute().get('permissions', [])
|
831
831
|
|
832
832
|
diffs = set(gcp_minimal_permissions).difference(set(ret_permissions))
|
833
|
-
if
|
833
|
+
if diffs:
|
834
834
|
identity_str = identity[0] if identity else None
|
835
835
|
return False, (
|
836
836
|
'The following permissions are not enabled for the current '
|
sky/clouds/kubernetes.py
CHANGED
@@ -139,7 +139,7 @@ class Kubernetes(clouds.Cloud):
|
|
139
139
|
use the service account mounted in the pod.
|
140
140
|
"""
|
141
141
|
all_contexts = kubernetes_utils.get_all_kube_context_names()
|
142
|
-
if
|
142
|
+
if not all_contexts:
|
143
143
|
return []
|
144
144
|
|
145
145
|
all_contexts = set(all_contexts)
|
@@ -270,9 +270,10 @@ def validate_region_zone_impl(
|
|
270
270
|
candidate_loc = difflib.get_close_matches(loc, all_loc, n=5, cutoff=0.9)
|
271
271
|
candidate_loc = sorted(candidate_loc)
|
272
272
|
candidate_strs = ''
|
273
|
-
if
|
273
|
+
if candidate_loc:
|
274
274
|
candidate_strs = ', '.join(candidate_loc)
|
275
275
|
candidate_strs = f'\nDid you mean one of these: {candidate_strs!r}?'
|
276
|
+
|
276
277
|
return candidate_strs
|
277
278
|
|
278
279
|
def _get_all_supported_regions_str() -> str:
|
@@ -286,7 +287,7 @@ def validate_region_zone_impl(
|
|
286
287
|
filter_df = df
|
287
288
|
if region is not None:
|
288
289
|
filter_df = _filter_region_zone(filter_df, region, zone=None)
|
289
|
-
if
|
290
|
+
if filter_df.empty:
|
290
291
|
with ux_utils.print_exception_no_traceback():
|
291
292
|
error_msg = (f'Invalid region {region!r}')
|
292
293
|
candidate_strs = _get_candidate_str(
|
@@ -310,7 +311,7 @@ def validate_region_zone_impl(
|
|
310
311
|
if zone is not None:
|
311
312
|
maybe_region_df = filter_df
|
312
313
|
filter_df = filter_df[filter_df['AvailabilityZone'] == zone]
|
313
|
-
if
|
314
|
+
if filter_df.empty:
|
314
315
|
region_str = f' for region {region!r}' if region else ''
|
315
316
|
df = maybe_region_df if region else df
|
316
317
|
with ux_utils.print_exception_no_traceback():
|
@@ -378,7 +379,7 @@ def get_vcpus_mem_from_instance_type_impl(
|
|
378
379
|
instance_type: str,
|
379
380
|
) -> Tuple[Optional[float], Optional[float]]:
|
380
381
|
df = _get_instance_type(df, instance_type, None)
|
381
|
-
if
|
382
|
+
if df.empty:
|
382
383
|
with ux_utils.print_exception_no_traceback():
|
383
384
|
raise ValueError(f'No instance type {instance_type} found.')
|
384
385
|
assert len(set(df['vCPUs'])) == 1, ('Cannot determine the number of vCPUs '
|
@@ -484,7 +485,7 @@ def get_accelerators_from_instance_type_impl(
|
|
484
485
|
instance_type: str,
|
485
486
|
) -> Optional[Dict[str, Union[int, float]]]:
|
486
487
|
df = _get_instance_type(df, instance_type, None)
|
487
|
-
if
|
488
|
+
if df.empty:
|
488
489
|
with ux_utils.print_exception_no_traceback():
|
489
490
|
raise ValueError(f'No instance type {instance_type} found.')
|
490
491
|
row = df.iloc[0]
|
@@ -518,7 +519,7 @@ def get_instance_type_for_accelerator_impl(
|
|
518
519
|
result = df[(df['AcceleratorName'].str.fullmatch(acc_name, case=False)) &
|
519
520
|
(abs(df['AcceleratorCount'] - acc_count) <= 0.01)]
|
520
521
|
result = _filter_region_zone(result, region, zone)
|
521
|
-
if
|
522
|
+
if result.empty:
|
522
523
|
fuzzy_result = df[
|
523
524
|
(df['AcceleratorName'].str.contains(acc_name, case=False)) &
|
524
525
|
(df['AcceleratorCount'] >= acc_count)]
|
@@ -527,7 +528,7 @@ def get_instance_type_for_accelerator_impl(
|
|
527
528
|
fuzzy_result = fuzzy_result[['AcceleratorName',
|
528
529
|
'AcceleratorCount']].drop_duplicates()
|
529
530
|
fuzzy_candidate_list = []
|
530
|
-
if
|
531
|
+
if not fuzzy_result.empty:
|
531
532
|
for _, row in fuzzy_result.iterrows():
|
532
533
|
acc_cnt = float(row['AcceleratorCount'])
|
533
534
|
acc_count_display = (int(acc_cnt) if acc_cnt.is_integer() else
|
@@ -539,7 +540,7 @@ def get_instance_type_for_accelerator_impl(
|
|
539
540
|
result = _filter_with_cpus(result, cpus)
|
540
541
|
result = _filter_with_mem(result, memory)
|
541
542
|
result = _filter_region_zone(result, region, zone)
|
542
|
-
if
|
543
|
+
if result.empty:
|
543
544
|
return ([], [])
|
544
545
|
|
545
546
|
# Current strategy: choose the cheapest instance
|
@@ -680,7 +681,7 @@ def get_image_id_from_tag_impl(df: 'pd.DataFrame', tag: str,
|
|
680
681
|
df = _filter_region_zone(df, region, zone=None)
|
681
682
|
assert len(df) <= 1, ('Multiple images found for tag '
|
682
683
|
f'{tag} in region {region}')
|
683
|
-
if
|
684
|
+
if df.empty:
|
684
685
|
return None
|
685
686
|
image_id = df['ImageId'].iloc[0]
|
686
687
|
if pd.isna(image_id):
|
@@ -694,4 +695,4 @@ def is_image_tag_valid_impl(df: 'pd.DataFrame', tag: str,
|
|
694
695
|
df = df[df['Tag'] == tag]
|
695
696
|
df = _filter_region_zone(df, region, zone=None)
|
696
697
|
df = df.dropna(subset=['ImageId'])
|
697
|
-
return
|
698
|
+
return not df.empty
|
@@ -134,7 +134,7 @@ def get_pricing_df(region: Optional[str] = None) -> 'pd.DataFrame':
|
|
134
134
|
content_str = r.content.decode('ascii')
|
135
135
|
content = json.loads(content_str)
|
136
136
|
items = content.get('Items', [])
|
137
|
-
if
|
137
|
+
if not items:
|
138
138
|
break
|
139
139
|
all_items += items
|
140
140
|
url = content.get('NextPageLink')
|
@@ -534,7 +534,7 @@ def initialize_images_csv(csv_saving_path: str, vc_object,
|
|
534
534
|
gpu_name = tag_name.split('-')[1]
|
535
535
|
if gpu_name not in gpu_tags:
|
536
536
|
gpu_tags.append(gpu_name)
|
537
|
-
if
|
537
|
+
if gpu_tags:
|
538
538
|
gpu_tags_str = str(gpu_tags).replace('\'', '\"')
|
539
539
|
f.write(f'{item.id},{vcenter_name},{item_cpu},{item_memory}'
|
540
540
|
f',,,\'{gpu_tags_str}\'\n')
|
sky/clouds/utils/scp_utils.py
CHANGED
@@ -65,7 +65,7 @@ class Metadata:
|
|
65
65
|
if value is None:
|
66
66
|
if instance_id in metadata:
|
67
67
|
metadata.pop(instance_id) # del entry
|
68
|
-
if
|
68
|
+
if not metadata:
|
69
69
|
if os.path.exists(self.path):
|
70
70
|
os.remove(self.path)
|
71
71
|
return
|
@@ -84,7 +84,7 @@ class Metadata:
|
|
84
84
|
for instance_id in list(metadata.keys()):
|
85
85
|
if instance_id not in instance_ids:
|
86
86
|
del metadata[instance_id]
|
87
|
-
if
|
87
|
+
if not metadata:
|
88
88
|
os.remove(self.path)
|
89
89
|
return
|
90
90
|
with open(self.path, 'w', encoding='utf-8') as f:
|
@@ -410,7 +410,7 @@ class SCPClient:
|
|
410
410
|
parameter.append('vpcId=' + vpc_id)
|
411
411
|
if sg_name is not None:
|
412
412
|
parameter.append('securityGroupName=' + sg_name)
|
413
|
-
if
|
413
|
+
if parameter:
|
414
414
|
url = url + '?' + '&'.join(parameter)
|
415
415
|
return self._get(url)
|
416
416
|
|
sky/core.py
CHANGED
@@ -732,7 +732,7 @@ def cancel(
|
|
732
732
|
f'{colorama.Fore.YELLOW}'
|
733
733
|
f'Cancelling latest running job on cluster {cluster_name!r}...'
|
734
734
|
f'{colorama.Style.RESET_ALL}')
|
735
|
-
elif
|
735
|
+
elif job_ids:
|
736
736
|
# all = False, len(job_ids) > 0 => cancel the specified jobs.
|
737
737
|
jobs_str = ', '.join(map(str, job_ids))
|
738
738
|
sky_logging.print(
|
@@ -817,7 +817,7 @@ def download_logs(
|
|
817
817
|
backend = backend_utils.get_backend_from_handle(handle)
|
818
818
|
assert isinstance(backend, backends.CloudVmRayBackend), backend
|
819
819
|
|
820
|
-
if job_ids is not None and
|
820
|
+
if job_ids is not None and not job_ids:
|
821
821
|
return {}
|
822
822
|
|
823
823
|
usage_lib.record_cluster_name_for_current_operation(cluster_name)
|
@@ -866,7 +866,7 @@ def job_status(cluster_name: str,
|
|
866
866
|
f'of type {backend.__class__.__name__!r}.')
|
867
867
|
assert isinstance(handle, backends.CloudVmRayResourceHandle), handle
|
868
868
|
|
869
|
-
if job_ids is not None and
|
869
|
+
if job_ids is not None and not job_ids:
|
870
870
|
return {}
|
871
871
|
|
872
872
|
sky_logging.print(f'{colorama.Fore.YELLOW}'
|
sky/data/storage.py
CHANGED
@@ -1067,7 +1067,7 @@ class Storage(object):
|
|
1067
1067
|
add_if_not_none('source', self.source)
|
1068
1068
|
|
1069
1069
|
stores = None
|
1070
|
-
if
|
1070
|
+
if self.stores:
|
1071
1071
|
stores = ','.join([store.value for store in self.stores])
|
1072
1072
|
add_if_not_none('store', stores)
|
1073
1073
|
add_if_not_none('persistent', self.persistent)
|
sky/jobs/core.py
CHANGED
@@ -347,8 +347,8 @@ def cancel(name: Optional[str] = None,
|
|
347
347
|
stopped_message='All managed jobs should have finished.')
|
348
348
|
|
349
349
|
job_id_str = ','.join(map(str, job_ids))
|
350
|
-
if sum([
|
351
|
-
argument_str = f'job_ids={job_id_str}' if
|
350
|
+
if sum([bool(job_ids), name is not None, all]) != 1:
|
351
|
+
argument_str = f'job_ids={job_id_str}' if job_ids else ''
|
352
352
|
argument_str += f' name={name}' if name is not None else ''
|
353
353
|
argument_str += ' all' if all else ''
|
354
354
|
with ux_utils.print_exception_no_traceback():
|
sky/jobs/state.py
CHANGED
@@ -591,7 +591,7 @@ def get_latest_task_id_status(
|
|
591
591
|
If the job_id does not exist, (None, None) will be returned.
|
592
592
|
"""
|
593
593
|
id_statuses = _get_all_task_ids_statuses(job_id)
|
594
|
-
if
|
594
|
+
if not id_statuses:
|
595
595
|
return None, None
|
596
596
|
task_id, status = id_statuses[-1]
|
597
597
|
for task_id, status in id_statuses:
|
@@ -617,7 +617,7 @@ def get_failure_reason(job_id: int) -> Optional[str]:
|
|
617
617
|
WHERE spot_job_id=(?)
|
618
618
|
ORDER BY task_id ASC""", (job_id,)).fetchall()
|
619
619
|
reason = [r[0] for r in reason if r[0] is not None]
|
620
|
-
if
|
620
|
+
if not reason:
|
621
621
|
return None
|
622
622
|
return reason[0]
|
623
623
|
|
sky/jobs/utils.py
CHANGED
@@ -234,11 +234,11 @@ def cancel_jobs_by_id(job_ids: Optional[List[int]]) -> str:
|
|
234
234
|
if job_ids is None:
|
235
235
|
job_ids = managed_job_state.get_nonterminal_job_ids_by_name(None)
|
236
236
|
job_ids = list(set(job_ids))
|
237
|
-
if
|
237
|
+
if not job_ids:
|
238
238
|
return 'No job to cancel.'
|
239
239
|
job_id_str = ', '.join(map(str, job_ids))
|
240
240
|
logger.info(f'Cancelling jobs {job_id_str}.')
|
241
|
-
cancelled_job_ids = []
|
241
|
+
cancelled_job_ids: List[int] = []
|
242
242
|
for job_id in job_ids:
|
243
243
|
# Check the status of the managed job status. If it is in
|
244
244
|
# terminal state, we can safely skip it.
|
@@ -268,7 +268,7 @@ def cancel_jobs_by_id(job_ids: Optional[List[int]]) -> str:
|
|
268
268
|
shutil.copy(str(signal_file), str(legacy_signal_file))
|
269
269
|
cancelled_job_ids.append(job_id)
|
270
270
|
|
271
|
-
if
|
271
|
+
if not cancelled_job_ids:
|
272
272
|
return 'No job to cancel.'
|
273
273
|
identity_str = f'Job with ID {cancelled_job_ids[0]} is'
|
274
274
|
if len(cancelled_job_ids) > 1:
|
@@ -281,7 +281,7 @@ def cancel_jobs_by_id(job_ids: Optional[List[int]]) -> str:
|
|
281
281
|
def cancel_job_by_name(job_name: str) -> str:
|
282
282
|
"""Cancel a job by name."""
|
283
283
|
job_ids = managed_job_state.get_nonterminal_job_ids_by_name(job_name)
|
284
|
-
if
|
284
|
+
if not job_ids:
|
285
285
|
return f'No running job found with name {job_name!r}.'
|
286
286
|
if len(job_ids) > 1:
|
287
287
|
return (f'{colorama.Fore.RED}Multiple running jobs found '
|
@@ -515,7 +515,7 @@ def stream_logs(job_id: Optional[int],
|
|
515
515
|
for job in managed_jobs
|
516
516
|
if job['job_name'] == job_name
|
517
517
|
}
|
518
|
-
if
|
518
|
+
if not managed_job_ids:
|
519
519
|
return f'No managed job found with name {job_name!r}.'
|
520
520
|
if len(managed_job_ids) > 1:
|
521
521
|
job_ids_str = ', '.join(
|
@@ -541,7 +541,7 @@ def stream_logs(job_id: Optional[int],
|
|
541
541
|
if job_id is None:
|
542
542
|
assert job_name is not None
|
543
543
|
job_ids = managed_job_state.get_nonterminal_job_ids_by_name(job_name)
|
544
|
-
if
|
544
|
+
if not job_ids:
|
545
545
|
return f'No running managed job found with name {job_name!r}.'
|
546
546
|
if len(job_ids) > 1:
|
547
547
|
raise ValueError(
|
sky/optimizer.py
CHANGED
@@ -188,7 +188,7 @@ class Optimizer:
|
|
188
188
|
"""Removes special Source and Sink nodes."""
|
189
189
|
source = [t for t in dag.tasks if t.name == _DUMMY_SOURCE_NAME]
|
190
190
|
sink = [t for t in dag.tasks if t.name == _DUMMY_SINK_NAME]
|
191
|
-
if
|
191
|
+
if not source and not sink:
|
192
192
|
return
|
193
193
|
assert len(source) == len(sink) == 1, dag.tasks
|
194
194
|
dag.remove(source[0])
|
@@ -1298,7 +1298,7 @@ def _fill_in_launchable_resources(
|
|
1298
1298
|
resources, num_nodes=task.num_nodes)
|
1299
1299
|
if feasible_resources.hint is not None:
|
1300
1300
|
hints[cloud] = feasible_resources.hint
|
1301
|
-
if
|
1301
|
+
if feasible_resources.resources_list:
|
1302
1302
|
# Assume feasible_resources is sorted by prices. Guaranteed by
|
1303
1303
|
# the implementation of get_feasible_launchable_resources and
|
1304
1304
|
# the underlying service_catalog filtering
|
@@ -1310,7 +1310,7 @@ def _fill_in_launchable_resources(
|
|
1310
1310
|
else:
|
1311
1311
|
all_fuzzy_candidates.update(
|
1312
1312
|
feasible_resources.fuzzy_candidate_list)
|
1313
|
-
if
|
1313
|
+
if not launchable[resources]:
|
1314
1314
|
clouds_str = str(clouds_list) if len(clouds_list) > 1 else str(
|
1315
1315
|
clouds_list[0])
|
1316
1316
|
num_node_str = ''
|
sky/provision/aws/config.py
CHANGED
@@ -279,7 +279,7 @@ def _is_subnet_public(ec2, subnet_id, vpc_id: Optional[str]) -> bool:
|
|
279
279
|
logger.debug(f'subnet {subnet_id} route tables: {route_tables}')
|
280
280
|
if _has_igw_route(route_tables):
|
281
281
|
return True
|
282
|
-
if
|
282
|
+
if route_tables:
|
283
283
|
return False
|
284
284
|
|
285
285
|
# Handle the case that a "main" route table is implicitly associated with
|
@@ -454,7 +454,7 @@ def _vpc_id_from_security_group_ids(ec2, sg_ids: List[str]) -> Any:
|
|
454
454
|
|
455
455
|
no_sg_msg = ('Failed to detect a security group with id equal to any of '
|
456
456
|
'the configured SecurityGroupIds.')
|
457
|
-
assert
|
457
|
+
assert vpc_ids, no_sg_msg
|
458
458
|
|
459
459
|
return vpc_ids[0]
|
460
460
|
|
sky/provision/gcp/config.py
CHANGED
@@ -397,7 +397,7 @@ def _check_firewall_rules(cluster_name: str, vpc_name: str, project_id: str,
|
|
397
397
|
operation = compute.networks().getEffectiveFirewalls(project=project_id,
|
398
398
|
network=vpc_name)
|
399
399
|
response = operation.execute()
|
400
|
-
if
|
400
|
+
if not response:
|
401
401
|
return False
|
402
402
|
effective_rules = response['firewalls']
|
403
403
|
|
@@ -515,7 +515,7 @@ def _create_rules(project_id: str, compute, rules, vpc_name):
|
|
515
515
|
rule_list = _list_firewall_rules(project_id,
|
516
516
|
compute,
|
517
517
|
filter=f'(name={rule_name})')
|
518
|
-
if
|
518
|
+
if rule_list:
|
519
519
|
_delete_firewall_rule(project_id, compute, rule_name)
|
520
520
|
|
521
521
|
body = rule.copy()
|
@@ -624,7 +624,7 @@ def get_usable_vpc_and_subnet(
|
|
624
624
|
vpc_list = _list_vpcnets(project_id,
|
625
625
|
compute,
|
626
626
|
filter=f'name={constants.SKYPILOT_VPC_NAME}')
|
627
|
-
if
|
627
|
+
if not vpc_list:
|
628
628
|
body = constants.VPC_TEMPLATE.copy()
|
629
629
|
body['name'] = body['name'].format(VPC_NAME=constants.SKYPILOT_VPC_NAME)
|
630
630
|
body['selfLink'] = body['selfLink'].format(
|
@@ -232,7 +232,7 @@ def _get_resource(container_resources: Dict[str, Any], resource_name: str,
|
|
232
232
|
# Look for keys containing the resource_name. For example,
|
233
233
|
# the key 'nvidia.com/gpu' contains the key 'gpu'.
|
234
234
|
matching_keys = [key for key in resources if resource_name in key.lower()]
|
235
|
-
if
|
235
|
+
if not matching_keys:
|
236
236
|
return float('inf')
|
237
237
|
if len(matching_keys) > 1:
|
238
238
|
# Should have only one match -- mostly relevant for gpu.
|
@@ -265,7 +265,7 @@ def _configure_autoscaler_service_account(
|
|
265
265
|
field_selector = f'metadata.name={name}'
|
266
266
|
accounts = (kubernetes.core_api(context).list_namespaced_service_account(
|
267
267
|
namespace, field_selector=field_selector).items)
|
268
|
-
if
|
268
|
+
if accounts:
|
269
269
|
assert len(accounts) == 1
|
270
270
|
# Nothing to check for equality and patch here,
|
271
271
|
# since the service_account.metadata.name is the only important
|
@@ -308,7 +308,7 @@ def _configure_autoscaler_role(namespace: str, context: Optional[str],
|
|
308
308
|
field_selector = f'metadata.name={name}'
|
309
309
|
roles = (kubernetes.auth_api(context).list_namespaced_role(
|
310
310
|
namespace, field_selector=field_selector).items)
|
311
|
-
if
|
311
|
+
if roles:
|
312
312
|
assert len(roles) == 1
|
313
313
|
existing_role = roles[0]
|
314
314
|
# Convert to k8s object to compare
|
@@ -374,7 +374,7 @@ def _configure_autoscaler_role_binding(
|
|
374
374
|
field_selector = f'metadata.name={name}'
|
375
375
|
role_bindings = (kubernetes.auth_api(context).list_namespaced_role_binding(
|
376
376
|
rb_namespace, field_selector=field_selector).items)
|
377
|
-
if
|
377
|
+
if role_bindings:
|
378
378
|
assert len(role_bindings) == 1
|
379
379
|
existing_binding = role_bindings[0]
|
380
380
|
new_rb = kubernetes_utils.dict_to_k8s_object(binding, 'V1RoleBinding')
|
@@ -415,7 +415,7 @@ def _configure_autoscaler_cluster_role(namespace, context,
|
|
415
415
|
field_selector = f'metadata.name={name}'
|
416
416
|
cluster_roles = (kubernetes.auth_api(context).list_cluster_role(
|
417
417
|
field_selector=field_selector).items)
|
418
|
-
if
|
418
|
+
if cluster_roles:
|
419
419
|
assert len(cluster_roles) == 1
|
420
420
|
existing_cr = cluster_roles[0]
|
421
421
|
new_cr = kubernetes_utils.dict_to_k8s_object(role, 'V1ClusterRole')
|
@@ -460,7 +460,7 @@ def _configure_autoscaler_cluster_role_binding(
|
|
460
460
|
field_selector = f'metadata.name={name}'
|
461
461
|
cr_bindings = (kubernetes.auth_api(context).list_cluster_role_binding(
|
462
462
|
field_selector=field_selector).items)
|
463
|
-
if
|
463
|
+
if cr_bindings:
|
464
464
|
assert len(cr_bindings) == 1
|
465
465
|
existing_binding = cr_bindings[0]
|
466
466
|
new_binding = kubernetes_utils.dict_to_k8s_object(
|
@@ -639,7 +639,7 @@ def _configure_services(namespace: str, context: Optional[str],
|
|
639
639
|
field_selector = f'metadata.name={name}'
|
640
640
|
services = (kubernetes.core_api(context).list_namespaced_service(
|
641
641
|
namespace, field_selector=field_selector).items)
|
642
|
-
if
|
642
|
+
if services:
|
643
643
|
assert len(services) == 1
|
644
644
|
existing_service = services[0]
|
645
645
|
# Convert to k8s object to compare
|
@@ -230,7 +230,7 @@ def get_ingress_external_ip_and_ports(
|
|
230
230
|
namespace, _request_timeout=kubernetes.API_TIMEOUT).items
|
231
231
|
if item.metadata.name == 'ingress-nginx-controller'
|
232
232
|
]
|
233
|
-
if
|
233
|
+
if not ingress_services:
|
234
234
|
return (None, None)
|
235
235
|
|
236
236
|
ingress_service = ingress_services[0]
|