skypilot-nightly 1.0.0.dev20251012__py3-none-any.whl → 1.0.0.dev20251014__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +4 -2
- sky/adaptors/shadeform.py +89 -0
- sky/authentication.py +52 -2
- sky/backends/backend_utils.py +35 -25
- sky/backends/cloud_vm_ray_backend.py +5 -5
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/catalog/kubernetes_catalog.py +19 -25
- sky/catalog/shadeform_catalog.py +165 -0
- sky/client/cli/command.py +53 -19
- sky/client/sdk.py +13 -1
- sky/clouds/__init__.py +2 -0
- sky/clouds/shadeform.py +393 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/jobs/controller.py +122 -145
- sky/jobs/recovery_strategy.py +59 -82
- sky/jobs/scheduler.py +5 -5
- sky/jobs/state.py +65 -21
- sky/jobs/utils.py +58 -22
- sky/metrics/utils.py +27 -6
- sky/provision/__init__.py +1 -0
- sky/provision/kubernetes/utils.py +44 -39
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/server/common.py +4 -2
- sky/server/requests/executor.py +25 -3
- sky/server/server.py +9 -3
- sky/setup_files/dependencies.py +1 -0
- sky/sky_logging.py +0 -2
- sky/skylet/constants.py +23 -6
- sky/skylet/log_lib.py +0 -1
- sky/skylet/log_lib.pyi +1 -1
- sky/templates/shadeform-ray.yml.j2 +72 -0
- sky/utils/common.py +2 -0
- sky/utils/context.py +57 -51
- sky/utils/context_utils.py +15 -11
- sky/utils/controller_utils.py +35 -8
- sky/utils/locks.py +20 -5
- sky/utils/subprocess_utils.py +4 -3
- {skypilot_nightly-1.0.0.dev20251012.dist-info → skypilot_nightly-1.0.0.dev20251014.dist-info}/METADATA +39 -38
- {skypilot_nightly-1.0.0.dev20251012.dist-info → skypilot_nightly-1.0.0.dev20251014.dist-info}/RECORD +63 -54
- /sky/dashboard/out/_next/static/{yOfMelBaFp8uL5F9atyAK → 9Fek73R28lDp1A5J4N7g7}/_buildManifest.js +0 -0
- /sky/dashboard/out/_next/static/{yOfMelBaFp8uL5F9atyAK → 9Fek73R28lDp1A5J4N7g7}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20251012.dist-info → skypilot_nightly-1.0.0.dev20251014.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20251012.dist-info → skypilot_nightly-1.0.0.dev20251014.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20251012.dist-info → skypilot_nightly-1.0.0.dev20251014.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20251012.dist-info → skypilot_nightly-1.0.0.dev20251014.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
""" Shadeform | Catalog
|
|
2
|
+
|
|
3
|
+
This module loads pricing and instance information from the Shadeform API
|
|
4
|
+
and can be used to query instance types and pricing information for Shadeform.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import typing
|
|
8
|
+
from typing import Dict, List, Optional, Tuple, Union
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from sky.catalog import common
|
|
13
|
+
|
|
14
|
+
if typing.TYPE_CHECKING:
|
|
15
|
+
from sky.clouds import cloud
|
|
16
|
+
|
|
17
|
+
# We'll use dynamic fetching, so no static CSV file to load
|
|
18
|
+
_df = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _get_df():
|
|
22
|
+
"""Get the dataframe, fetching from API if needed."""
|
|
23
|
+
global _df
|
|
24
|
+
if _df is None:
|
|
25
|
+
# For now, we'll fall back to a minimal static catalog
|
|
26
|
+
# In a full implementation, this would call the Shadeform API
|
|
27
|
+
# to dynamically fetch the latest instance types and pricing
|
|
28
|
+
try:
|
|
29
|
+
df = common.read_catalog('shadeform/vms.csv')
|
|
30
|
+
except FileNotFoundError:
|
|
31
|
+
# If no static catalog exists, create an empty one
|
|
32
|
+
# This would be replaced with dynamic API fetching
|
|
33
|
+
_df = pd.DataFrame(columns=[
|
|
34
|
+
'InstanceType', 'AcceleratorName', 'AcceleratorCount', 'vCPUs',
|
|
35
|
+
'MemoryGiB', 'Price', 'Region', 'GpuInfo', 'SpotPrice'
|
|
36
|
+
])
|
|
37
|
+
else:
|
|
38
|
+
df = df[df['InstanceType'].notna()]
|
|
39
|
+
if 'AcceleratorName' in df.columns:
|
|
40
|
+
df = df[df['AcceleratorName'].notna()]
|
|
41
|
+
df = df.assign(AcceleratorName=df['AcceleratorName'].astype(
|
|
42
|
+
str).str.strip())
|
|
43
|
+
_df = df.reset_index(drop=True)
|
|
44
|
+
return _df
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _is_not_found_error(err: ValueError) -> bool:
|
|
48
|
+
msg = str(err).lower()
|
|
49
|
+
return 'not found' in msg or 'not supported' in msg
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _call_or_default(func, default):
|
|
53
|
+
try:
|
|
54
|
+
return func()
|
|
55
|
+
except ValueError as err:
|
|
56
|
+
if _is_not_found_error(err):
|
|
57
|
+
return default
|
|
58
|
+
raise
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def instance_type_exists(instance_type: str) -> bool:
|
|
62
|
+
"""Check if an instance type exists."""
|
|
63
|
+
return common.instance_type_exists_impl(_get_df(), instance_type)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def validate_region_zone(
|
|
67
|
+
region: Optional[str],
|
|
68
|
+
zone: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
|
|
69
|
+
"""Validate region and zone for Shadeform."""
|
|
70
|
+
return common.validate_region_zone_impl('shadeform', _get_df(), region,
|
|
71
|
+
zone)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def get_hourly_cost(instance_type: str,
|
|
75
|
+
use_spot: bool = False,
|
|
76
|
+
region: Optional[str] = None,
|
|
77
|
+
zone: Optional[str] = None) -> float:
|
|
78
|
+
"""Returns the cost, or the cheapest cost among all zones for spot."""
|
|
79
|
+
# Shadeform doesn't support spot instances currently
|
|
80
|
+
if use_spot:
|
|
81
|
+
raise ValueError('Spot instances are not supported on Shadeform')
|
|
82
|
+
|
|
83
|
+
return common.get_hourly_cost_impl(_get_df(), instance_type, use_spot,
|
|
84
|
+
region, zone)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def get_vcpus_mem_from_instance_type(
|
|
88
|
+
instance_type: str) -> Tuple[Optional[float], Optional[float]]:
|
|
89
|
+
"""Get vCPUs and memory from instance type."""
|
|
90
|
+
return _call_or_default(
|
|
91
|
+
lambda: common.get_vcpus_mem_from_instance_type_impl(
|
|
92
|
+
_get_df(), instance_type), (None, None))
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def get_default_instance_type(cpus: Optional[str] = None,
|
|
96
|
+
memory: Optional[str] = None,
|
|
97
|
+
disk_tier: Optional[str] = None,
|
|
98
|
+
region: Optional[str] = None,
|
|
99
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
100
|
+
"""Get default instance type based on requirements."""
|
|
101
|
+
del disk_tier # Shadeform doesn't support custom disk tiers yet
|
|
102
|
+
return _call_or_default(
|
|
103
|
+
lambda: common.get_instance_type_for_cpus_mem_impl(
|
|
104
|
+
_get_df(), cpus, memory, region, zone), None)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def get_accelerators_from_instance_type(
|
|
108
|
+
instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
|
|
109
|
+
"""Get accelerator information from instance type."""
|
|
110
|
+
return _call_or_default(
|
|
111
|
+
lambda: common.get_accelerators_from_instance_type_impl(
|
|
112
|
+
_get_df(), instance_type), None)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def get_instance_type_for_accelerator(
|
|
116
|
+
acc_name: str,
|
|
117
|
+
acc_count: int,
|
|
118
|
+
cpus: Optional[str] = None,
|
|
119
|
+
memory: Optional[str] = None,
|
|
120
|
+
use_spot: bool = False,
|
|
121
|
+
region: Optional[str] = None,
|
|
122
|
+
zone: Optional[str] = None) -> Tuple[Optional[List[str]], List[str]]:
|
|
123
|
+
"""Returns a list of instance types that have the given accelerator."""
|
|
124
|
+
if use_spot:
|
|
125
|
+
# Return empty lists since spot is not supported
|
|
126
|
+
return None, ['Spot instances are not supported on Shadeform']
|
|
127
|
+
|
|
128
|
+
return _call_or_default(
|
|
129
|
+
lambda: common.get_instance_type_for_accelerator_impl(
|
|
130
|
+
df=_get_df(),
|
|
131
|
+
acc_name=acc_name,
|
|
132
|
+
acc_count=acc_count,
|
|
133
|
+
cpus=cpus,
|
|
134
|
+
memory=memory,
|
|
135
|
+
use_spot=use_spot,
|
|
136
|
+
region=region,
|
|
137
|
+
zone=zone), (None, []))
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def get_region_zones_for_instance_type(instance_type: str,
|
|
141
|
+
use_spot: bool) -> List['cloud.Region']:
|
|
142
|
+
"""Get regions and zones for an instance type."""
|
|
143
|
+
if use_spot:
|
|
144
|
+
return [] # No spot support
|
|
145
|
+
|
|
146
|
+
df = _get_df()
|
|
147
|
+
df_filtered = df[df['InstanceType'] == instance_type]
|
|
148
|
+
return _call_or_default(
|
|
149
|
+
lambda: common.get_region_zones(df_filtered, use_spot), [])
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def list_accelerators(
|
|
153
|
+
gpus_only: bool,
|
|
154
|
+
name_filter: Optional[str],
|
|
155
|
+
region_filter: Optional[str],
|
|
156
|
+
quantity_filter: Optional[int],
|
|
157
|
+
case_sensitive: bool = True,
|
|
158
|
+
all_regions: bool = False,
|
|
159
|
+
require_price: bool = True) -> Dict[str, List[common.InstanceTypeInfo]]:
|
|
160
|
+
"""Returns all instance types in Shadeform offering GPUs."""
|
|
161
|
+
del require_price # Unused.
|
|
162
|
+
return common.list_accelerators_impl('Shadeform', _get_df(), gpus_only,
|
|
163
|
+
name_filter, region_filter,
|
|
164
|
+
quantity_filter, case_sensitive,
|
|
165
|
+
all_regions)
|
sky/client/cli/command.py
CHANGED
|
@@ -158,12 +158,17 @@ def _get_cluster_records_and_set_ssh_config(
|
|
|
158
158
|
# Update the SSH config for all clusters
|
|
159
159
|
for record in cluster_records:
|
|
160
160
|
handle = record['handle']
|
|
161
|
-
|
|
161
|
+
name = record['name']
|
|
162
162
|
if not (handle is not None and handle.cached_external_ips is not None
|
|
163
163
|
and 'credentials' in record):
|
|
164
164
|
# If the cluster is not UP or does not have credentials available,
|
|
165
165
|
# we need to remove the cluster from the SSH config.
|
|
166
|
-
cluster_utils.SSHConfigHelper.remove_cluster(
|
|
166
|
+
cluster_utils.SSHConfigHelper.remove_cluster(name)
|
|
167
|
+
continue
|
|
168
|
+
if not record['credentials']:
|
|
169
|
+
# The credential is missing for some reason, continue.
|
|
170
|
+
logger.debug(
|
|
171
|
+
f'Client did not receive SSH credential for cluster {name}')
|
|
167
172
|
continue
|
|
168
173
|
|
|
169
174
|
# During the failover, even though a cluster does not exist, the handle
|
|
@@ -1868,7 +1873,8 @@ def status(verbose: bool, refresh: bool, ip: bool, endpoints: bool,
|
|
|
1868
1873
|
controllers = []
|
|
1869
1874
|
for cluster_record in cluster_records:
|
|
1870
1875
|
cluster_name = cluster_record['name']
|
|
1871
|
-
controller = controller_utils.Controllers.from_name(
|
|
1876
|
+
controller = controller_utils.Controllers.from_name(
|
|
1877
|
+
cluster_name, expect_exact_match=False)
|
|
1872
1878
|
if controller is not None:
|
|
1873
1879
|
controllers.append(cluster_record)
|
|
1874
1880
|
else:
|
|
@@ -2034,7 +2040,8 @@ def cost_report(all: bool, days: int): # pylint: disable=redefined-builtin
|
|
|
2034
2040
|
for cluster_record in cluster_records:
|
|
2035
2041
|
cluster_name = cluster_record['name']
|
|
2036
2042
|
try:
|
|
2037
|
-
controller = controller_utils.Controllers.from_name(
|
|
2043
|
+
controller = controller_utils.Controllers.from_name(
|
|
2044
|
+
cluster_name, expect_exact_match=False)
|
|
2038
2045
|
except AssertionError:
|
|
2039
2046
|
# There could be some old controller clusters from previous
|
|
2040
2047
|
# versions that we should not show in the cost report.
|
|
@@ -2406,7 +2413,8 @@ def cancel(
|
|
|
2406
2413
|
job_ids=job_ids_to_cancel)
|
|
2407
2414
|
_async_call_or_wait(request_id, async_call, 'sky.cancel')
|
|
2408
2415
|
except exceptions.NotSupportedError as e:
|
|
2409
|
-
controller = controller_utils.Controllers.from_name(
|
|
2416
|
+
controller = controller_utils.Controllers.from_name(
|
|
2417
|
+
cluster, expect_exact_match=False)
|
|
2410
2418
|
assert controller is not None, cluster
|
|
2411
2419
|
with ux_utils.print_exception_no_traceback():
|
|
2412
2420
|
raise click.UsageError(
|
|
@@ -2707,7 +2715,8 @@ def start(
|
|
|
2707
2715
|
# Get all clusters that are not controllers.
|
|
2708
2716
|
cluster_records = [
|
|
2709
2717
|
cluster for cluster in all_clusters
|
|
2710
|
-
if controller_utils.Controllers.from_name(
|
|
2718
|
+
if controller_utils.Controllers.from_name(
|
|
2719
|
+
cluster['name'], expect_exact_match=False) is None
|
|
2711
2720
|
]
|
|
2712
2721
|
if cluster_records is None:
|
|
2713
2722
|
# Get GLOB cluster names
|
|
@@ -2769,7 +2778,8 @@ def start(
|
|
|
2769
2778
|
# Checks for controller clusters (jobs controller / sky serve controller).
|
|
2770
2779
|
controllers, normal_clusters = [], []
|
|
2771
2780
|
for name in to_start:
|
|
2772
|
-
if controller_utils.Controllers.from_name(
|
|
2781
|
+
if controller_utils.Controllers.from_name(
|
|
2782
|
+
name, expect_exact_match=False) is not None:
|
|
2773
2783
|
controllers.append(name)
|
|
2774
2784
|
else:
|
|
2775
2785
|
normal_clusters.append(name)
|
|
@@ -2905,7 +2915,8 @@ def _hint_or_raise_for_down_jobs_controller(controller_name: str,
|
|
|
2905
2915
|
to be torn down (e.g., because it has jobs running or
|
|
2906
2916
|
it is in init state)
|
|
2907
2917
|
"""
|
|
2908
|
-
controller = controller_utils.Controllers.from_name(
|
|
2918
|
+
controller = controller_utils.Controllers.from_name(
|
|
2919
|
+
controller_name, expect_exact_match=False)
|
|
2909
2920
|
assert controller is not None, controller_name
|
|
2910
2921
|
|
|
2911
2922
|
with rich_utils.client_status(
|
|
@@ -3004,7 +3015,8 @@ def _hint_or_raise_for_down_sky_serve_controller(controller_name: str,
|
|
|
3004
3015
|
to be torn down (e.g., because it has services running or
|
|
3005
3016
|
it is in init state)
|
|
3006
3017
|
"""
|
|
3007
|
-
controller = controller_utils.Controllers.from_name(
|
|
3018
|
+
controller = controller_utils.Controllers.from_name(
|
|
3019
|
+
controller_name, expect_exact_match=False)
|
|
3008
3020
|
assert controller is not None, controller_name
|
|
3009
3021
|
with rich_utils.client_status('[bold cyan]Checking for live services[/]'):
|
|
3010
3022
|
try:
|
|
@@ -3115,14 +3127,15 @@ def _down_or_stop_clusters(
|
|
|
3115
3127
|
names = list(names)
|
|
3116
3128
|
if names:
|
|
3117
3129
|
controllers = [
|
|
3118
|
-
name for name in names
|
|
3119
|
-
|
|
3130
|
+
name for name in names if controller_utils.Controllers.from_name(
|
|
3131
|
+
name, expect_exact_match=False) is not None
|
|
3120
3132
|
]
|
|
3121
3133
|
controllers_str = ', '.join(map(repr, controllers))
|
|
3122
3134
|
names = [
|
|
3123
3135
|
cluster['name']
|
|
3124
3136
|
for cluster in _get_cluster_records_and_set_ssh_config(names)
|
|
3125
|
-
if controller_utils.Controllers.from_name(
|
|
3137
|
+
if controller_utils.Controllers.from_name(
|
|
3138
|
+
cluster['name'], expect_exact_match=False) is None
|
|
3126
3139
|
]
|
|
3127
3140
|
|
|
3128
3141
|
# Make sure the controllers are explicitly specified without other
|
|
@@ -3147,7 +3160,7 @@ def _down_or_stop_clusters(
|
|
|
3147
3160
|
f'{controllers_str} is currently not supported.')
|
|
3148
3161
|
else:
|
|
3149
3162
|
controller = controller_utils.Controllers.from_name(
|
|
3150
|
-
controller_name)
|
|
3163
|
+
controller_name, expect_exact_match=False)
|
|
3151
3164
|
assert controller is not None
|
|
3152
3165
|
hint_or_raise = _controller_to_hint_or_raise(controller)
|
|
3153
3166
|
try:
|
|
@@ -3195,9 +3208,10 @@ def _down_or_stop_clusters(
|
|
|
3195
3208
|
names = [
|
|
3196
3209
|
record['name']
|
|
3197
3210
|
for record in all_clusters
|
|
3198
|
-
if controller_utils.Controllers.from_name(
|
|
3199
|
-
|
|
3200
|
-
|
|
3211
|
+
if controller_utils.Controllers.from_name(
|
|
3212
|
+
record['name'], expect_exact_match=False) is None and
|
|
3213
|
+
(down or idle_minutes_to_autostop is not None or
|
|
3214
|
+
record['status'] != status_lib.ClusterStatus.STOPPED)
|
|
3201
3215
|
]
|
|
3202
3216
|
|
|
3203
3217
|
clusters = names
|
|
@@ -3227,6 +3241,9 @@ def _down_or_stop_clusters(
|
|
|
3227
3241
|
|
|
3228
3242
|
request_ids = []
|
|
3229
3243
|
|
|
3244
|
+
successes: List[str] = []
|
|
3245
|
+
failures: List[Tuple[str, str]] = []
|
|
3246
|
+
|
|
3230
3247
|
def _down_or_stop(name: str):
|
|
3231
3248
|
success_progress = False
|
|
3232
3249
|
if idle_minutes_to_autostop is not None:
|
|
@@ -3237,9 +3254,10 @@ def _down_or_stop_clusters(
|
|
|
3237
3254
|
_async_call_or_wait(
|
|
3238
3255
|
request_id, async_call,
|
|
3239
3256
|
server_constants.REQUEST_NAME_PREFIX + operation)
|
|
3240
|
-
except (exceptions.NotSupportedError,
|
|
3241
|
-
exceptions.
|
|
3257
|
+
except (exceptions.NotSupportedError, exceptions.ClusterNotUpError,
|
|
3258
|
+
exceptions.CloudError) as e:
|
|
3242
3259
|
message = str(e)
|
|
3260
|
+
failures.append((name, str(e)))
|
|
3243
3261
|
else: # no exception raised
|
|
3244
3262
|
success_progress = True
|
|
3245
3263
|
message = (f'{colorama.Fore.GREEN}{operation} '
|
|
@@ -3275,13 +3293,17 @@ def _down_or_stop_clusters(
|
|
|
3275
3293
|
f'{colorama.Fore.RED}{operation} cluster {name}...failed. '
|
|
3276
3294
|
f'{colorama.Style.RESET_ALL}'
|
|
3277
3295
|
f'\nReason: {common_utils.format_exception(e)}.')
|
|
3296
|
+
failures.append((name, str(e)))
|
|
3278
3297
|
except (exceptions.NotSupportedError,
|
|
3279
|
-
exceptions.ClusterOwnerIdentityMismatchError
|
|
3298
|
+
exceptions.ClusterOwnerIdentityMismatchError,
|
|
3299
|
+
exceptions.CloudError) as e:
|
|
3280
3300
|
message = str(e)
|
|
3301
|
+
failures.append((name, str(e)))
|
|
3281
3302
|
else: # no exception raised
|
|
3282
3303
|
message = (
|
|
3283
3304
|
f'{colorama.Fore.GREEN}{operation} cluster {name}...done.'
|
|
3284
3305
|
f'{colorama.Style.RESET_ALL}')
|
|
3306
|
+
successes.append(name)
|
|
3285
3307
|
if not down:
|
|
3286
3308
|
message += ('\n To restart the cluster, run: '
|
|
3287
3309
|
f'{colorama.Style.BRIGHT}sky start {name}'
|
|
@@ -3304,6 +3326,18 @@ def _down_or_stop_clusters(
|
|
|
3304
3326
|
click.secho(f'{operation} requests are sent. Check the requests\' '
|
|
3305
3327
|
'status with `sky request get <request_id>`.')
|
|
3306
3328
|
|
|
3329
|
+
click.echo('\nSummary:')
|
|
3330
|
+
if successes:
|
|
3331
|
+
click.echo(' ✓ Succeeded: ' + ', '.join(successes))
|
|
3332
|
+
if failures:
|
|
3333
|
+
failed_pretty = []
|
|
3334
|
+
for name, reason in failures:
|
|
3335
|
+
first = reason.strip().splitlines()[0]
|
|
3336
|
+
first = first if len(first) <= 120 else first[:120] + '…'
|
|
3337
|
+
failed_pretty.append(f'{name} ({first})')
|
|
3338
|
+
click.echo(' ✗ Failed: ' + ', '.join(failed_pretty))
|
|
3339
|
+
raise click.ClickException('Some clusters failed. See summary above.')
|
|
3340
|
+
|
|
3307
3341
|
|
|
3308
3342
|
@cli.command(cls=_DocumentedCodeCommand)
|
|
3309
3343
|
@flags.config_option(expose_value=False)
|
sky/client/sdk.py
CHANGED
|
@@ -98,6 +98,9 @@ def reload_config() -> None:
|
|
|
98
98
|
skypilot_config.safe_reload_config()
|
|
99
99
|
|
|
100
100
|
|
|
101
|
+
# The overloads are not comprehensive - e.g. get_result Literal[False] could be
|
|
102
|
+
# specified to return None. We can add more overloads if needed. To do that see
|
|
103
|
+
# https://github.com/python/mypy/issues/8634#issuecomment-609411104
|
|
101
104
|
@typing.overload
|
|
102
105
|
def stream_response(request_id: None,
|
|
103
106
|
response: 'requests.Response',
|
|
@@ -112,7 +115,16 @@ def stream_response(request_id: server_common.RequestId[T],
|
|
|
112
115
|
response: 'requests.Response',
|
|
113
116
|
output_stream: Optional['io.TextIOBase'] = None,
|
|
114
117
|
resumable: bool = False,
|
|
115
|
-
get_result:
|
|
118
|
+
get_result: Literal[True] = True) -> T:
|
|
119
|
+
...
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@typing.overload
|
|
123
|
+
def stream_response(request_id: server_common.RequestId[T],
|
|
124
|
+
response: 'requests.Response',
|
|
125
|
+
output_stream: Optional['io.TextIOBase'] = None,
|
|
126
|
+
resumable: bool = False,
|
|
127
|
+
get_result: bool = True) -> Optional[T]:
|
|
116
128
|
...
|
|
117
129
|
|
|
118
130
|
|
sky/clouds/__init__.py
CHANGED
|
@@ -30,6 +30,7 @@ from sky.clouds.primeintellect import PrimeIntellect
|
|
|
30
30
|
from sky.clouds.runpod import RunPod
|
|
31
31
|
from sky.clouds.scp import SCP
|
|
32
32
|
from sky.clouds.seeweb import Seeweb
|
|
33
|
+
from sky.clouds.shadeform import Shadeform
|
|
33
34
|
from sky.clouds.ssh import SSH
|
|
34
35
|
from sky.clouds.vast import Vast
|
|
35
36
|
from sky.clouds.vsphere import Vsphere
|
|
@@ -48,6 +49,7 @@ __all__ = [
|
|
|
48
49
|
'PrimeIntellect',
|
|
49
50
|
'SCP',
|
|
50
51
|
'RunPod',
|
|
52
|
+
'Shadeform',
|
|
51
53
|
'Vast',
|
|
52
54
|
'OCI',
|
|
53
55
|
'Vsphere',
|