skypilot-nightly 1.0.0.dev20241222__py3-none-any.whl → 1.0.0.dev20241223__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/cloud_vm_ray_backend.py +1 -1
- sky/check.py +1 -1
- sky/cli.py +24 -26
- sky/cloud_stores.py +1 -1
- sky/clouds/gcp.py +1 -1
- sky/clouds/kubernetes.py +1 -1
- sky/clouds/service_catalog/common.py +11 -10
- sky/clouds/service_catalog/data_fetchers/fetch_azure.py +1 -1
- sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
- sky/clouds/utils/scp_utils.py +3 -3
- sky/core.py +3 -3
- sky/data/storage.py +1 -1
- sky/jobs/core.py +2 -2
- sky/jobs/state.py +2 -2
- sky/jobs/utils.py +6 -6
- sky/optimizer.py +3 -3
- sky/provision/aws/config.py +2 -2
- sky/provision/gcp/config.py +3 -3
- sky/provision/kubernetes/config.py +7 -7
- sky/provision/kubernetes/network_utils.py +1 -1
- sky/provision/kubernetes/utils.py +2 -2
- sky/provision/lambda_cloud/lambda_utils.py +3 -3
- sky/provision/oci/query_utils.py +3 -3
- sky/provision/vsphere/common/vim_utils.py +1 -1
- sky/provision/vsphere/instance.py +6 -7
- sky/provision/vsphere/vsphere_utils.py +1 -1
- sky/resources.py +4 -4
- sky/serve/autoscalers.py +2 -2
- sky/serve/core.py +4 -4
- sky/serve/replica_managers.py +1 -1
- sky/serve/serve_state.py +1 -1
- sky/serve/serve_utils.py +11 -10
- sky/serve/service_spec.py +8 -5
- sky/skylet/job_lib.py +1 -1
- sky/skylet/providers/ibm/node_provider.py +2 -2
- sky/skylet/providers/scp/config.py +1 -1
- sky/skylet/providers/scp/node_provider.py +7 -7
- sky/task.py +1 -1
- sky/utils/accelerator_registry.py +1 -1
- sky/utils/common_utils.py +1 -1
- sky/utils/dag_utils.py +1 -1
- sky/utils/kubernetes/gpu_labeler.py +1 -1
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
- {skypilot_nightly-1.0.0.dev20241222.dist-info → skypilot_nightly-1.0.0.dev20241223.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20241222.dist-info → skypilot_nightly-1.0.0.dev20241223.dist-info}/RECORD +50 -50
- {skypilot_nightly-1.0.0.dev20241222.dist-info → skypilot_nightly-1.0.0.dev20241223.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20241222.dist-info → skypilot_nightly-1.0.0.dev20241223.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20241222.dist-info → skypilot_nightly-1.0.0.dev20241223.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20241222.dist-info → skypilot_nightly-1.0.0.dev20241223.dist-info}/top_level.txt +0 -0
@@ -583,7 +583,7 @@ def check_instance_fits(context: Optional[str],
|
|
583
583
|
node for node in nodes if gpu_label_key in node.metadata.labels and
|
584
584
|
node.metadata.labels[gpu_label_key] == gpu_label_val
|
585
585
|
]
|
586
|
-
assert
|
586
|
+
assert gpu_nodes, 'GPU nodes not found'
|
587
587
|
if is_tpu_on_gke(acc_type):
|
588
588
|
# If requested accelerator is a TPU type, check if the cluster
|
589
589
|
# has sufficient TPU resource to meet the requirement.
|
@@ -1526,7 +1526,7 @@ def clean_zombie_ssh_jump_pod(namespace: str, context: Optional[str],
|
|
1526
1526
|
def find(l, predicate):
|
1527
1527
|
"""Utility function to find element in given list"""
|
1528
1528
|
results = [x for x in l if predicate(x)]
|
1529
|
-
return results[0] if
|
1529
|
+
return results[0] if results else None
|
1530
1530
|
|
1531
1531
|
# Get the SSH jump pod name from the head pod
|
1532
1532
|
try:
|
@@ -50,7 +50,7 @@ class Metadata:
|
|
50
50
|
if value is None:
|
51
51
|
if instance_id in metadata:
|
52
52
|
metadata.pop(instance_id) # del entry
|
53
|
-
if
|
53
|
+
if not metadata:
|
54
54
|
if os.path.exists(self.path):
|
55
55
|
os.remove(self.path)
|
56
56
|
return
|
@@ -69,7 +69,7 @@ class Metadata:
|
|
69
69
|
for instance_id in list(metadata.keys()):
|
70
70
|
if instance_id not in instance_ids:
|
71
71
|
del metadata[instance_id]
|
72
|
-
if
|
72
|
+
if not metadata:
|
73
73
|
os.remove(self.path)
|
74
74
|
return
|
75
75
|
with open(self.path, 'w', encoding='utf-8') as f:
|
@@ -150,7 +150,7 @@ class LambdaCloudClient:
|
|
150
150
|
['regions_with_capacity_available'])
|
151
151
|
available_regions = [reg['name'] for reg in available_regions]
|
152
152
|
if region not in available_regions:
|
153
|
-
if
|
153
|
+
if available_regions:
|
154
154
|
aval_reg = ' '.join(available_regions)
|
155
155
|
else:
|
156
156
|
aval_reg = 'None'
|
sky/provision/oci/query_utils.py
CHANGED
@@ -248,7 +248,7 @@ class QueryHelper:
|
|
248
248
|
limit=1)
|
249
249
|
|
250
250
|
compartments = list_compartments_response.data
|
251
|
-
if
|
251
|
+
if compartments:
|
252
252
|
skypilot_compartment = compartments[0].id
|
253
253
|
return skypilot_compartment
|
254
254
|
|
@@ -274,7 +274,7 @@ class QueryHelper:
|
|
274
274
|
display_name=oci_utils.oci_config.VCN_NAME,
|
275
275
|
lifecycle_state='AVAILABLE')
|
276
276
|
vcns = list_vcns_response.data
|
277
|
-
if
|
277
|
+
if vcns:
|
278
278
|
# Found the VCN.
|
279
279
|
skypilot_vcn = vcns[0].id
|
280
280
|
list_subnets_response = net_client.list_subnets(
|
@@ -359,7 +359,7 @@ class QueryHelper:
|
|
359
359
|
if str(s.cidr_block).startswith('all-') and str(s.cidr_block).
|
360
360
|
endswith('-services-in-oracle-services-network')
|
361
361
|
]
|
362
|
-
if
|
362
|
+
if services:
|
363
363
|
# Create service gateway for regional services.
|
364
364
|
create_sg_response = net_client.create_service_gateway(
|
365
365
|
create_service_gateway_details=oci_adaptor.oci.core.models.
|
@@ -56,7 +56,7 @@ def get_hosts_by_cluster_names(content, vcenter_name, cluster_name_dicts=None):
|
|
56
56
|
'name': cluster.name
|
57
57
|
} for cluster in cluster_view.view]
|
58
58
|
cluster_view.Destroy()
|
59
|
-
if
|
59
|
+
if not cluster_name_dicts:
|
60
60
|
logger.warning(f'vCenter \'{vcenter_name}\' has no clusters')
|
61
61
|
|
62
62
|
# Retrieve all cluster names from the cluster_name_dicts
|
@@ -162,7 +162,7 @@ def _create_instances(
|
|
162
162
|
if not gpu_instance:
|
163
163
|
# Find an image for CPU
|
164
164
|
images_df = images_df[images_df['GpuTags'] == '\'[]\'']
|
165
|
-
if
|
165
|
+
if not images_df:
|
166
166
|
logger.error(
|
167
167
|
f'Can not find an image for instance type: {instance_type}.')
|
168
168
|
raise Exception(
|
@@ -185,7 +185,7 @@ def _create_instances(
|
|
185
185
|
image_instance_mapping_df = image_instance_mapping_df[
|
186
186
|
image_instance_mapping_df['InstanceType'] == instance_type]
|
187
187
|
|
188
|
-
if
|
188
|
+
if not image_instance_mapping_df:
|
189
189
|
raise Exception(f"""There is no image can match instance type named
|
190
190
|
{instance_type}
|
191
191
|
If you are using CPU-only instance, assign an image with tag
|
@@ -218,10 +218,9 @@ def _create_instances(
|
|
218
218
|
hosts_df = hosts_df[(hosts_df['AvailableCPUs'] /
|
219
219
|
hosts_df['cpuMhz']) >= cpus_needed]
|
220
220
|
hosts_df = hosts_df[hosts_df['AvailableMemory(MB)'] >= memory_needed]
|
221
|
-
assert
|
222
|
-
|
223
|
-
|
224
|
-
f'cpus and {memory_needed}MB memory are required.')
|
221
|
+
assert hosts_df, (f'There is no host available to create the instance '
|
222
|
+
f'{vms_item["InstanceType"]}, at least {cpus_needed} '
|
223
|
+
f'cpus and {memory_needed}MB memory are required.')
|
225
224
|
|
226
225
|
# Sort the hosts df by AvailableCPUs to get the compatible host with the
|
227
226
|
# least resource
|
@@ -365,7 +364,7 @@ def _choose_vsphere_cluster_name(config: common.ProvisionConfig, region: str,
|
|
365
364
|
skypilot framework-optimized availability_zones"""
|
366
365
|
vsphere_cluster_name = None
|
367
366
|
vsphere_cluster_name_str = config.provider_config['availability_zone']
|
368
|
-
if
|
367
|
+
if vc_object.clusters:
|
369
368
|
for optimized_cluster_name in vsphere_cluster_name_str.split(','):
|
370
369
|
if optimized_cluster_name in [
|
371
370
|
item['name'] for item in vc_object.clusters
|
@@ -257,7 +257,7 @@ class VsphereClient:
|
|
257
257
|
# hard code here. should support configure later.
|
258
258
|
profile_name = 'skypilot_policy'
|
259
259
|
storage_profile_id = None
|
260
|
-
if
|
260
|
+
if profile_ids:
|
261
261
|
profiles = pm.PbmRetrieveContent(profileIds=profile_ids)
|
262
262
|
for profile in profiles:
|
263
263
|
if profile_name in profile.name:
|
sky/resources.py
CHANGED
@@ -661,7 +661,7 @@ class Resources:
|
|
661
661
|
continue
|
662
662
|
valid_clouds.append(cloud)
|
663
663
|
|
664
|
-
if
|
664
|
+
if not valid_clouds:
|
665
665
|
if len(enabled_clouds) == 1:
|
666
666
|
cloud_str = f'for cloud {enabled_clouds[0]}'
|
667
667
|
else:
|
@@ -773,7 +773,7 @@ class Resources:
|
|
773
773
|
for cloud in enabled_clouds:
|
774
774
|
if cloud.instance_type_exists(self._instance_type):
|
775
775
|
valid_clouds.append(cloud)
|
776
|
-
if
|
776
|
+
if not valid_clouds:
|
777
777
|
if len(enabled_clouds) == 1:
|
778
778
|
cloud_str = f'for cloud {enabled_clouds[0]}'
|
779
779
|
else:
|
@@ -1008,7 +1008,7 @@ class Resources:
|
|
1008
1008
|
f'Label rejected due to {cloud}: {err_msg}'
|
1009
1009
|
])
|
1010
1010
|
break
|
1011
|
-
if
|
1011
|
+
if invalid_table.rows:
|
1012
1012
|
with ux_utils.print_exception_no_traceback():
|
1013
1013
|
raise ValueError(
|
1014
1014
|
'The following labels are invalid:'
|
@@ -1283,7 +1283,7 @@ class Resources:
|
|
1283
1283
|
_cluster_config_overrides=override.pop(
|
1284
1284
|
'_cluster_config_overrides', self._cluster_config_overrides),
|
1285
1285
|
)
|
1286
|
-
assert
|
1286
|
+
assert not override
|
1287
1287
|
return resources
|
1288
1288
|
|
1289
1289
|
def valid_on_region_zones(self, region: str, zones: List[str]) -> bool:
|
sky/serve/autoscalers.py
CHANGED
@@ -320,8 +320,8 @@ class RequestRateAutoscaler(Autoscaler):
|
|
320
320
|
"""Select outdated replicas to scale down."""
|
321
321
|
|
322
322
|
if self.update_mode == serve_utils.UpdateMode.ROLLING:
|
323
|
-
latest_ready_replicas = []
|
324
|
-
old_nonterminal_replicas = []
|
323
|
+
latest_ready_replicas: List['replica_managers.ReplicaInfo'] = []
|
324
|
+
old_nonterminal_replicas: List['replica_managers.ReplicaInfo'] = []
|
325
325
|
for info in replica_infos:
|
326
326
|
if info.version == self.latest_version:
|
327
327
|
if info.is_ready:
|
sky/serve/core.py
CHANGED
@@ -360,7 +360,7 @@ def update(
|
|
360
360
|
raise RuntimeError(e.error_msg) from e
|
361
361
|
|
362
362
|
service_statuses = serve_utils.load_service_status(serve_status_payload)
|
363
|
-
if
|
363
|
+
if not service_statuses:
|
364
364
|
with ux_utils.print_exception_no_traceback():
|
365
365
|
raise RuntimeError(f'Cannot find service {service_name!r}.'
|
366
366
|
f'To spin up a service, use {ux_utils.BOLD}'
|
@@ -491,9 +491,9 @@ def down(
|
|
491
491
|
stopped_message='All services should have terminated.')
|
492
492
|
|
493
493
|
service_names_str = ','.join(service_names)
|
494
|
-
if sum([
|
495
|
-
argument_str = f'service_names={service_names_str}'
|
496
|
-
|
494
|
+
if sum([bool(service_names), all]) != 1:
|
495
|
+
argument_str = (f'service_names={service_names_str}'
|
496
|
+
if service_names else '')
|
497
497
|
argument_str += ' all' if all else ''
|
498
498
|
raise ValueError('Can only specify one of service_names or all. '
|
499
499
|
f'Provided {argument_str!r}.')
|
sky/serve/replica_managers.py
CHANGED
@@ -172,7 +172,7 @@ def _get_resources_ports(task_yaml: str) -> str:
|
|
172
172
|
"""Get the resources ports used by the task."""
|
173
173
|
task = sky.Task.from_yaml(task_yaml)
|
174
174
|
# Already checked all ports are the same in sky.serve.core.up
|
175
|
-
assert
|
175
|
+
assert task.resources, task
|
176
176
|
task_resources: 'resources.Resources' = list(task.resources)[0]
|
177
177
|
# Already checked the resources have and only have one port
|
178
178
|
# before upload the task yaml.
|
sky/serve/serve_state.py
CHANGED
@@ -226,7 +226,7 @@ class ServiceStatus(enum.Enum):
|
|
226
226
|
for status in ReplicaStatus.failed_statuses()) > 0:
|
227
227
|
return cls.FAILED
|
228
228
|
# When min_replicas = 0, there is no (provisioning) replica.
|
229
|
-
if
|
229
|
+
if not replica_statuses:
|
230
230
|
return cls.NO_REPLICA
|
231
231
|
return cls.REPLICA_INIT
|
232
232
|
|
sky/serve/serve_utils.py
CHANGED
@@ -110,7 +110,7 @@ ValueType = TypeVar('ValueType')
|
|
110
110
|
class ThreadSafeDict(Generic[KeyType, ValueType]):
|
111
111
|
"""A thread-safe dict."""
|
112
112
|
|
113
|
-
def __init__(self, *args, **kwargs) -> None:
|
113
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
114
114
|
self._dict: Dict[KeyType, ValueType] = dict(*args, **kwargs)
|
115
115
|
self._lock = threading.Lock()
|
116
116
|
|
@@ -383,7 +383,7 @@ def _get_service_status(
|
|
383
383
|
|
384
384
|
|
385
385
|
def get_service_status_encoded(service_names: Optional[List[str]]) -> str:
|
386
|
-
service_statuses = []
|
386
|
+
service_statuses: List[Dict[str, str]] = []
|
387
387
|
if service_names is None:
|
388
388
|
# Get all service names
|
389
389
|
service_names = serve_state.get_glob_service_names(None)
|
@@ -400,7 +400,7 @@ def get_service_status_encoded(service_names: Optional[List[str]]) -> str:
|
|
400
400
|
|
401
401
|
def load_service_status(payload: str) -> List[Dict[str, Any]]:
|
402
402
|
service_statuses_encoded = common_utils.decode_payload(payload)
|
403
|
-
service_statuses = []
|
403
|
+
service_statuses: List[Dict[str, Any]] = []
|
404
404
|
for service_status in service_statuses_encoded:
|
405
405
|
service_statuses.append({
|
406
406
|
k: pickle.loads(base64.b64decode(v))
|
@@ -432,7 +432,7 @@ def _terminate_failed_services(
|
|
432
432
|
A message indicating potential resource leak (if any). If no
|
433
433
|
resource leak is detected, return None.
|
434
434
|
"""
|
435
|
-
remaining_replica_clusters = []
|
435
|
+
remaining_replica_clusters: List[str] = []
|
436
436
|
# The controller should have already attempted to terminate those
|
437
437
|
# replicas, so we don't need to try again here.
|
438
438
|
for replica_info in serve_state.get_replica_infos(service_name):
|
@@ -459,8 +459,8 @@ def _terminate_failed_services(
|
|
459
459
|
|
460
460
|
def terminate_services(service_names: Optional[List[str]], purge: bool) -> str:
|
461
461
|
service_names = serve_state.get_glob_service_names(service_names)
|
462
|
-
terminated_service_names = []
|
463
|
-
messages = []
|
462
|
+
terminated_service_names: List[str] = []
|
463
|
+
messages: List[str] = []
|
464
464
|
for service_name in service_names:
|
465
465
|
service_status = _get_service_status(service_name,
|
466
466
|
with_replica_info=False)
|
@@ -506,7 +506,7 @@ def terminate_services(service_names: Optional[List[str]], purge: bool) -> str:
|
|
506
506
|
f.write(UserSignal.TERMINATE.value)
|
507
507
|
f.flush()
|
508
508
|
terminated_service_names.append(f'{service_name!r}')
|
509
|
-
if
|
509
|
+
if not terminated_service_names:
|
510
510
|
messages.append('No service to terminate.')
|
511
511
|
else:
|
512
512
|
identity_str = f'Service {terminated_service_names[0]} is'
|
@@ -784,9 +784,9 @@ def get_endpoint(service_record: Dict[str, Any]) -> str:
|
|
784
784
|
# Don't use backend_utils.is_controller_accessible since it is too slow.
|
785
785
|
handle = global_user_state.get_handle_from_cluster_name(
|
786
786
|
SKY_SERVE_CONTROLLER_NAME)
|
787
|
-
assert isinstance(handle, backends.CloudVmRayResourceHandle)
|
788
787
|
if handle is None:
|
789
788
|
return '-'
|
789
|
+
assert isinstance(handle, backends.CloudVmRayResourceHandle)
|
790
790
|
load_balancer_port = service_record['load_balancer_port']
|
791
791
|
if load_balancer_port is None:
|
792
792
|
return '-'
|
@@ -816,7 +816,7 @@ def format_service_table(service_records: List[Dict[str, Any]],
|
|
816
816
|
])
|
817
817
|
service_table = log_utils.create_table(service_columns)
|
818
818
|
|
819
|
-
replica_infos = []
|
819
|
+
replica_infos: List[Dict[str, Any]] = []
|
820
820
|
for record in service_records:
|
821
821
|
for replica in record['replica_info']:
|
822
822
|
replica['service_name'] = record['name']
|
@@ -888,7 +888,8 @@ def _format_replica_table(replica_records: List[Dict[str, Any]],
|
|
888
888
|
region = '-'
|
889
889
|
zone = '-'
|
890
890
|
|
891
|
-
replica_handle: 'backends.CloudVmRayResourceHandle' = record[
|
891
|
+
replica_handle: Optional['backends.CloudVmRayResourceHandle'] = record[
|
892
|
+
'handle']
|
892
893
|
if replica_handle is not None:
|
893
894
|
resources_str = resources_utils.get_readable_resources_repr(
|
894
895
|
replica_handle, simplify=not show_all)
|
sky/serve/service_spec.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
import json
|
3
3
|
import os
|
4
4
|
import textwrap
|
5
|
-
from typing import Any, Dict, Optional
|
5
|
+
from typing import Any, Dict, List, Optional
|
6
6
|
|
7
7
|
import yaml
|
8
8
|
|
@@ -186,9 +186,12 @@ class SkyServiceSpec:
|
|
186
186
|
return SkyServiceSpec.from_yaml_config(config['service'])
|
187
187
|
|
188
188
|
def to_yaml_config(self) -> Dict[str, Any]:
|
189
|
-
config =
|
189
|
+
config: Dict[str, Any] = {}
|
190
190
|
|
191
|
-
def add_if_not_none(section
|
191
|
+
def add_if_not_none(section: str,
|
192
|
+
key: Optional[str],
|
193
|
+
value: Any,
|
194
|
+
no_empty: bool = False):
|
192
195
|
if no_empty and not value:
|
193
196
|
return
|
194
197
|
if value is not None:
|
@@ -231,8 +234,8 @@ class SkyServiceSpec:
|
|
231
234
|
' with custom headers')
|
232
235
|
return f'{method}{headers}'
|
233
236
|
|
234
|
-
def spot_policy_str(self):
|
235
|
-
policy_strs = []
|
237
|
+
def spot_policy_str(self) -> str:
|
238
|
+
policy_strs: List[str] = []
|
236
239
|
if (self.dynamic_ondemand_fallback is not None and
|
237
240
|
self.dynamic_ondemand_fallback):
|
238
241
|
policy_strs.append('Dynamic on-demand fallback')
|
sky/skylet/job_lib.py
CHANGED
@@ -586,7 +586,7 @@ def update_job_status(job_ids: List[int],
|
|
586
586
|
This function should only be run on the remote instance with ray>=2.4.0.
|
587
587
|
"""
|
588
588
|
echo = logger.info if not silent else logger.debug
|
589
|
-
if
|
589
|
+
if not job_ids:
|
590
590
|
return []
|
591
591
|
|
592
592
|
statuses = []
|
@@ -377,7 +377,7 @@ class IBMVPCNodeProvider(NodeProvider):
|
|
377
377
|
node["id"], nic_id
|
378
378
|
).get_result()
|
379
379
|
floating_ips = res["floating_ips"]
|
380
|
-
if
|
380
|
+
if not floating_ips:
|
381
381
|
# not adding a node that's yet/failed to
|
382
382
|
# to get a floating ip provisioned
|
383
383
|
continue
|
@@ -485,7 +485,7 @@ class IBMVPCNodeProvider(NodeProvider):
|
|
485
485
|
"""Returns instance (node) information matching the specified name"""
|
486
486
|
|
487
487
|
instances_data = self.ibm_vpc_client.list_instances(name=name).get_result()
|
488
|
-
if
|
488
|
+
if instances_data["instances"]:
|
489
489
|
return instances_data["instances"][0]
|
490
490
|
return None
|
491
491
|
|
@@ -259,7 +259,7 @@ class SCPNodeProvider(NodeProvider):
|
|
259
259
|
for sg in sg_contents
|
260
260
|
if sg["securityGroupId"] == sg_id
|
261
261
|
]
|
262
|
-
if
|
262
|
+
if sg and sg[0] == "ACTIVE":
|
263
263
|
break
|
264
264
|
time.sleep(5)
|
265
265
|
|
@@ -282,16 +282,16 @@ class SCPNodeProvider(NodeProvider):
|
|
282
282
|
for sg in sg_contents
|
283
283
|
if sg["securityGroupId"] == sg_id
|
284
284
|
]
|
285
|
-
if
|
285
|
+
if not sg:
|
286
286
|
break
|
287
287
|
|
288
288
|
def _refresh_security_group(self, vms):
|
289
|
-
if
|
289
|
+
if vms:
|
290
290
|
return
|
291
291
|
# remove security group if vm does not exist
|
292
292
|
keys = self.metadata.keys()
|
293
293
|
security_group_id = self.metadata[
|
294
|
-
keys[0]]['creation']['securityGroupId'] if
|
294
|
+
keys[0]]['creation']['securityGroupId'] if keys else None
|
295
295
|
if security_group_id:
|
296
296
|
try:
|
297
297
|
self._del_security_group(security_group_id)
|
@@ -308,7 +308,7 @@ class SCPNodeProvider(NodeProvider):
|
|
308
308
|
for vm in vm_contents
|
309
309
|
if vm["virtualServerId"] == vm_id
|
310
310
|
]
|
311
|
-
if
|
311
|
+
if not vms:
|
312
312
|
break
|
313
313
|
|
314
314
|
def _del_firwall_rules(self, firewall_id, rule_ids):
|
@@ -391,7 +391,7 @@ class SCPNodeProvider(NodeProvider):
|
|
391
391
|
return None, None, None, None
|
392
392
|
|
393
393
|
def _undo_funcs(self, undo_func_list):
|
394
|
-
while
|
394
|
+
while undo_func_list:
|
395
395
|
func = undo_func_list.pop()
|
396
396
|
func()
|
397
397
|
|
@@ -468,7 +468,7 @@ class SCPNodeProvider(NodeProvider):
|
|
468
468
|
|
469
469
|
zone_config = ZoneConfig(self.scp_client, node_config)
|
470
470
|
vpc_subnets = zone_config.get_vcp_subnets()
|
471
|
-
if
|
471
|
+
if not vpc_subnets:
|
472
472
|
raise SCPError("This region/zone does not have available VPCs.")
|
473
473
|
|
474
474
|
instance_config = zone_config.bootstrap_instance_config(node_config)
|
sky/task.py
CHANGED
@@ -956,7 +956,7 @@ class Task:
|
|
956
956
|
}``.
|
957
957
|
"""
|
958
958
|
for storage in self.storage_mounts.values():
|
959
|
-
if
|
959
|
+
if not storage.stores:
|
960
960
|
store_type, store_region = self._get_preferred_store()
|
961
961
|
self.storage_plans[storage] = store_type
|
962
962
|
storage.add_store(store_type, store_region)
|
sky/utils/common_utils.py
CHANGED
@@ -633,7 +633,7 @@ def get_cleaned_username(username: str = '') -> str:
|
|
633
633
|
return username
|
634
634
|
|
635
635
|
|
636
|
-
def fill_template(template_name: str, variables: Dict,
|
636
|
+
def fill_template(template_name: str, variables: Dict[str, Any],
|
637
637
|
output_path: str) -> None:
|
638
638
|
"""Create a file from a Jinja template and return the filename."""
|
639
639
|
assert template_name.endswith('.j2'), template_name
|
sky/utils/dag_utils.py
CHANGED
@@ -139,7 +139,7 @@ def label():
|
|
139
139
|
# Create the job for this node`
|
140
140
|
batch_v1.create_namespaced_job(namespace, job_manifest)
|
141
141
|
print(f'Created GPU labeler job for node {node_name}')
|
142
|
-
if
|
142
|
+
if not gpu_nodes:
|
143
143
|
print('No GPU nodes found in the cluster. If you have GPU nodes, '
|
144
144
|
'please ensure that they have the label '
|
145
145
|
f'`{kubernetes_utils.get_gpu_resource_key()}: <number of GPUs>`')
|