skypilot-nightly 1.0.0.dev20250522__py3-none-any.whl → 1.0.0.dev20250524__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/kubernetes.py +46 -16
- sky/backends/backend_utils.py +62 -45
- sky/backends/cloud_vm_ray_backend.py +19 -5
- sky/check.py +398 -171
- sky/cli.py +302 -98
- sky/client/cli.py +302 -98
- sky/client/sdk.py +104 -12
- sky/clouds/__init__.py +3 -0
- sky/clouds/aws.py +4 -2
- sky/clouds/azure.py +4 -2
- sky/clouds/cloud.py +24 -6
- sky/clouds/cudo.py +2 -1
- sky/clouds/do.py +2 -1
- sky/clouds/fluidstack.py +2 -1
- sky/clouds/gcp.py +23 -5
- sky/clouds/ibm.py +4 -2
- sky/clouds/kubernetes.py +66 -22
- sky/clouds/lambda_cloud.py +2 -1
- sky/clouds/nebius.py +18 -2
- sky/clouds/oci.py +4 -2
- sky/clouds/paperspace.py +2 -1
- sky/clouds/runpod.py +2 -1
- sky/clouds/scp.py +2 -1
- sky/clouds/service_catalog/constants.py +1 -1
- sky/clouds/service_catalog/ssh_catalog.py +167 -0
- sky/clouds/ssh.py +203 -0
- sky/clouds/vast.py +2 -1
- sky/clouds/vsphere.py +2 -1
- sky/core.py +58 -11
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/aHej19bZyl4hoHgrzPCn7/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/480-ee58038f1a4afd5c.js +1 -0
- sky/dashboard/out/_next/static/chunks/488-50d843fdb5396d32.js +15 -0
- sky/dashboard/out/_next/static/chunks/498-d7722313e5e5b4e6.js +21 -0
- sky/dashboard/out/_next/static/chunks/573-f17bd89d9f9118b3.js +66 -0
- sky/dashboard/out/_next/static/chunks/578-7a4795009a56430c.js +6 -0
- sky/dashboard/out/_next/static/chunks/734-5f5ce8f347b7f417.js +1 -0
- sky/dashboard/out/_next/static/chunks/937.f97f83652028e944.js +1 -0
- sky/dashboard/out/_next/static/chunks/938-f347f6144075b0c8.js +1 -0
- sky/dashboard/out/_next/static/chunks/9f96d65d-5a3e4af68c26849e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-dec800f9ef1b10f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-37c042a356f8e608.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-9529d9e882a0e75c.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-9e6d1ec6e1ac5b29.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-e690d864aa00e2ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-db6558a5ec687011.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-73d5e0c369d00346.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/users-2d319455c3f1c3e2.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-02a7b60f2ead275f.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-deda68c926e8d0bc.js +1 -0
- sky/dashboard/out/_next/static/css/d2cdba64c9202dd7.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/storage.py +1 -1
- sky/global_user_state.py +42 -19
- sky/jobs/constants.py +1 -1
- sky/jobs/server/core.py +72 -56
- sky/jobs/state.py +26 -5
- sky/jobs/utils.py +65 -13
- sky/optimizer.py +29 -7
- sky/provision/__init__.py +1 -0
- sky/provision/aws/instance.py +17 -1
- sky/provision/fluidstack/instance.py +1 -0
- sky/provision/kubernetes/instance.py +16 -5
- sky/provision/kubernetes/utils.py +37 -19
- sky/provision/nebius/instance.py +3 -1
- sky/provision/nebius/utils.py +14 -2
- sky/provision/ssh/__init__.py +18 -0
- sky/resources.py +4 -1
- sky/serve/server/core.py +9 -6
- sky/server/html/token_page.html +6 -1
- sky/server/requests/executor.py +1 -0
- sky/server/requests/payloads.py +18 -0
- sky/server/server.py +108 -5
- sky/setup_files/dependencies.py +1 -0
- sky/skylet/constants.py +4 -1
- sky/skypilot_config.py +83 -9
- sky/templates/nebius-ray.yml.j2 +12 -0
- sky/utils/cli_utils/status_utils.py +18 -8
- sky/utils/infra_utils.py +21 -1
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/create_cluster.sh +1 -0
- sky/utils/kubernetes/deploy_remote_cluster.py +1440 -0
- sky/utils/kubernetes/kubernetes_deploy_utils.py +117 -10
- sky/utils/kubernetes/ssh-tunnel.sh +387 -0
- sky/utils/log_utils.py +218 -1
- sky/utils/schemas.py +75 -0
- sky/utils/ux_utils.py +2 -1
- {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250524.dist-info}/METADATA +6 -1
- {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250524.dist-info}/RECORD +103 -91
- sky/dashboard/out/_next/static/CzOVV6JpRQBRt5GhZuhyK/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-1a3a9440417720eb.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-d584022b0da4ac3b.js +0 -6
- sky/dashboard/out/_next/static/chunks/393-e1eaa440481337ec.js +0 -1
- sky/dashboard/out/_next/static/chunks/480-f28cd152a98997de.js +0 -1
- sky/dashboard/out/_next/static/chunks/582-683f4f27b81996dc.js +0 -59
- sky/dashboard/out/_next/static/chunks/pages/_app-8cfab319f9fb3ae8.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33bc2bec322249b1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-e2fc2dd1955e6c36.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-3a748bd76e5c2984.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra-9180cd91cee64b96.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-70756c2dad850a7e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-ecd804b9272f4a7c.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/7e7ce4ff31d3977b.css +0 -3
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- /sky/dashboard/out/_next/static/{CzOVV6JpRQBRt5GhZuhyK → aHej19bZyl4hoHgrzPCn7}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250524.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250524.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250524.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250524.dist-info}/top_level.txt +0 -0
sky/client/sdk.py
CHANGED
@@ -94,12 +94,15 @@ def stream_response(request_id: Optional[str],
|
|
94
94
|
@server_common.check_server_healthy_or_start
|
95
95
|
@annotations.client_api
|
96
96
|
def check(infra_list: Optional[Tuple[str, ...]],
|
97
|
-
verbose: bool
|
97
|
+
verbose: bool,
|
98
|
+
workspace: Optional[str] = None) -> server_common.RequestId:
|
98
99
|
"""Checks the credentials to enable clouds.
|
99
100
|
|
100
101
|
Args:
|
101
102
|
infra: The infra to check.
|
102
103
|
verbose: Whether to show verbose output.
|
104
|
+
workspace: The workspace to check. If None, all workspaces will be
|
105
|
+
checked.
|
103
106
|
|
104
107
|
Returns:
|
105
108
|
The request ID of the check request.
|
@@ -123,7 +126,9 @@ def check(infra_list: Optional[Tuple[str, ...]],
|
|
123
126
|
f'ignoring {region_zone}')
|
124
127
|
specified_clouds.append(infra.cloud)
|
125
128
|
clouds = tuple(specified_clouds)
|
126
|
-
body = payloads.CheckBody(clouds=clouds,
|
129
|
+
body = payloads.CheckBody(clouds=clouds,
|
130
|
+
verbose=verbose,
|
131
|
+
workspace=workspace)
|
127
132
|
response = requests.post(f'{server_common.get_server_url()}/check',
|
128
133
|
json=json.loads(body.model_dump_json()),
|
129
134
|
cookies=server_common.get_api_cookie_jar())
|
@@ -133,16 +138,23 @@ def check(infra_list: Optional[Tuple[str, ...]],
|
|
133
138
|
@usage_lib.entrypoint
|
134
139
|
@server_common.check_server_healthy_or_start
|
135
140
|
@annotations.client_api
|
136
|
-
def enabled_clouds() -> server_common.RequestId:
|
141
|
+
def enabled_clouds(workspace: Optional[str] = None) -> server_common.RequestId:
|
137
142
|
"""Gets the enabled clouds.
|
138
143
|
|
144
|
+
Args:
|
145
|
+
workspace: The workspace to get the enabled clouds for. If None, the
|
146
|
+
active workspace will be used.
|
147
|
+
|
139
148
|
Returns:
|
140
149
|
The request ID of the enabled clouds request.
|
141
150
|
|
142
151
|
Request Returns:
|
143
152
|
A list of enabled clouds in string format.
|
144
153
|
"""
|
145
|
-
|
154
|
+
if workspace is None:
|
155
|
+
workspace = skypilot_config.get_active_workspace()
|
156
|
+
response = requests.get((f'{server_common.get_server_url()}/enabled_clouds?'
|
157
|
+
f'workspace={workspace}'),
|
146
158
|
cookies=server_common.get_api_cookie_jar())
|
147
159
|
return server_common.get_request_id(response)
|
148
160
|
|
@@ -225,7 +237,7 @@ def list_accelerator_counts(
|
|
225
237
|
accelerator names mapped to a list of available counts. See usage
|
226
238
|
in cli.py.
|
227
239
|
"""
|
228
|
-
body = payloads.
|
240
|
+
body = payloads.ListAcceleratorCountsBody(
|
229
241
|
gpus_only=gpus_only,
|
230
242
|
name_filter=name_filter,
|
231
243
|
region_filter=region_filter,
|
@@ -278,6 +290,13 @@ def optimize(
|
|
278
290
|
return server_common.get_request_id(response)
|
279
291
|
|
280
292
|
|
293
|
+
def workspaces() -> server_common.RequestId:
|
294
|
+
"""Gets the workspaces."""
|
295
|
+
response = requests.get(f'{server_common.get_server_url()}/workspaces',
|
296
|
+
cookies=server_common.get_api_cookie_jar())
|
297
|
+
return server_common.get_request_id(response)
|
298
|
+
|
299
|
+
|
281
300
|
@usage_lib.entrypoint
|
282
301
|
@server_common.check_server_healthy_or_start
|
283
302
|
@annotations.client_api
|
@@ -1396,13 +1415,60 @@ def local_down() -> server_common.RequestId:
|
|
1396
1415
|
return server_common.get_request_id(response)
|
1397
1416
|
|
1398
1417
|
|
1418
|
+
@usage_lib.entrypoint
|
1419
|
+
@server_common.check_server_healthy_or_start
|
1420
|
+
@annotations.client_api
|
1421
|
+
def ssh_up(infra: Optional[str] = None) -> server_common.RequestId:
|
1422
|
+
"""Deploys the SSH Node Pools defined in ~/.sky/ssh_targets.yaml.
|
1423
|
+
|
1424
|
+
Args:
|
1425
|
+
infra: Name of the cluster configuration in ssh_targets.yaml.
|
1426
|
+
If None, the first cluster in the file is used.
|
1427
|
+
|
1428
|
+
Returns:
|
1429
|
+
request_id: The request ID of the SSH cluster deployment request.
|
1430
|
+
"""
|
1431
|
+
body = payloads.SSHUpBody(
|
1432
|
+
infra=infra,
|
1433
|
+
cleanup=False,
|
1434
|
+
)
|
1435
|
+
response = requests.post(f'{server_common.get_server_url()}/ssh_up',
|
1436
|
+
json=json.loads(body.model_dump_json()),
|
1437
|
+
cookies=server_common.get_api_cookie_jar())
|
1438
|
+
return server_common.get_request_id(response)
|
1439
|
+
|
1440
|
+
|
1441
|
+
@usage_lib.entrypoint
|
1442
|
+
@server_common.check_server_healthy_or_start
|
1443
|
+
@annotations.client_api
|
1444
|
+
def ssh_down(infra: Optional[str] = None) -> server_common.RequestId:
|
1445
|
+
"""Tears down a Kubernetes cluster on SSH targets.
|
1446
|
+
|
1447
|
+
Args:
|
1448
|
+
infra: Name of the cluster configuration in ssh_targets.yaml.
|
1449
|
+
If None, the first cluster in the file is used.
|
1450
|
+
|
1451
|
+
Returns:
|
1452
|
+
request_id: The request ID of the SSH cluster teardown request.
|
1453
|
+
"""
|
1454
|
+
body = payloads.SSHUpBody(
|
1455
|
+
infra=infra,
|
1456
|
+
cleanup=True,
|
1457
|
+
)
|
1458
|
+
response = requests.post(f'{server_common.get_server_url()}/ssh_down',
|
1459
|
+
json=json.loads(body.model_dump_json()),
|
1460
|
+
cookies=server_common.get_api_cookie_jar())
|
1461
|
+
return server_common.get_request_id(response)
|
1462
|
+
|
1463
|
+
|
1399
1464
|
@usage_lib.entrypoint
|
1400
1465
|
@server_common.check_server_healthy_or_start
|
1401
1466
|
@annotations.client_api
|
1402
1467
|
def realtime_kubernetes_gpu_availability(
|
1403
1468
|
context: Optional[str] = None,
|
1404
1469
|
name_filter: Optional[str] = None,
|
1405
|
-
quantity_filter: Optional[int] = None
|
1470
|
+
quantity_filter: Optional[int] = None,
|
1471
|
+
is_ssh: Optional[bool] = None) -> server_common.RequestId:
|
1406
1472
|
"""Gets the real-time Kubernetes GPU availability.
|
1407
1473
|
|
1408
1474
|
Returns:
|
@@ -1412,6 +1478,7 @@ def realtime_kubernetes_gpu_availability(
|
|
1412
1478
|
context=context,
|
1413
1479
|
name_filter=name_filter,
|
1414
1480
|
quantity_filter=quantity_filter,
|
1481
|
+
is_ssh=is_ssh,
|
1415
1482
|
)
|
1416
1483
|
response = requests.post(
|
1417
1484
|
f'{server_common.get_server_url()}/'
|
@@ -1683,7 +1750,7 @@ def api_status(
|
|
1683
1750
|
@usage_lib.entrypoint
|
1684
1751
|
@server_common.check_server_healthy_or_start
|
1685
1752
|
@annotations.client_api
|
1686
|
-
def api_info() -> Dict[str,
|
1753
|
+
def api_info() -> Dict[str, Any]:
|
1687
1754
|
"""Gets the server's status, commit and version.
|
1688
1755
|
|
1689
1756
|
Returns:
|
@@ -1696,8 +1763,15 @@ def api_info() -> Dict[str, str]:
|
|
1696
1763
|
'api_version': '1',
|
1697
1764
|
'commit': 'abc1234567890',
|
1698
1765
|
'version': '1.0.0',
|
1766
|
+
'version_on_disk': '1.0.0',
|
1767
|
+
'user': {
|
1768
|
+
'name': 'test@example.com',
|
1769
|
+
'id': '12345abcd',
|
1770
|
+
},
|
1699
1771
|
}
|
1700
1772
|
|
1773
|
+
Note that user may be None if we are not using an auth proxy.
|
1774
|
+
|
1701
1775
|
"""
|
1702
1776
|
response = requests.get(f'{server_common.get_server_url()}/api/health',
|
1703
1777
|
cookies=server_common.get_api_cookie_jar())
|
@@ -1820,7 +1894,7 @@ def api_server_logs(follow: bool = True, tail: Optional[int] = None) -> None:
|
|
1820
1894
|
|
1821
1895
|
@usage_lib.entrypoint
|
1822
1896
|
@annotations.client_api
|
1823
|
-
def api_login(endpoint: Optional[str] = None) -> None:
|
1897
|
+
def api_login(endpoint: Optional[str] = None, get_token: bool = False) -> None:
|
1824
1898
|
"""Logs into a SkyPilot API server.
|
1825
1899
|
|
1826
1900
|
This sets the endpoint globally, i.e., all SkyPilot CLI and SDK calls will
|
@@ -1847,7 +1921,7 @@ def api_login(endpoint: Optional[str] = None) -> None:
|
|
1847
1921
|
raise click.BadParameter('Endpoint must be a valid URL.')
|
1848
1922
|
|
1849
1923
|
server_status = server_common.check_server_healthy(endpoint)
|
1850
|
-
if server_status == server_common.ApiServerStatus.NEEDS_AUTH:
|
1924
|
+
if server_status == server_common.ApiServerStatus.NEEDS_AUTH or get_token:
|
1851
1925
|
# We detected an auth proxy, so go through the auth proxy cookie flow.
|
1852
1926
|
parsed_url = urlparse.urlparse(endpoint)
|
1853
1927
|
token_url = f'{endpoint}/token'
|
@@ -1867,11 +1941,20 @@ def api_login(endpoint: Optional[str] = None) -> None:
|
|
1867
1941
|
raise ValueError(f'Malformed token: {token}') from e
|
1868
1942
|
logger.debug(f'Token data: {data!r}')
|
1869
1943
|
try:
|
1870
|
-
|
1944
|
+
json_data = json.loads(data)
|
1871
1945
|
except (json.JSONDecodeError, UnicodeDecodeError) as e:
|
1872
1946
|
raise ValueError(f'Malformed token data: {data!r}') from e
|
1873
|
-
if not isinstance(
|
1874
|
-
raise ValueError(f'Malformed token JSON: {
|
1947
|
+
if not isinstance(json_data, dict):
|
1948
|
+
raise ValueError(f'Malformed token JSON: {json_data}')
|
1949
|
+
|
1950
|
+
if json_data.get('v') == 1:
|
1951
|
+
user_hash = json_data.get('user')
|
1952
|
+
cookie_dict = json_data['cookies']
|
1953
|
+
elif 'v' not in json_data:
|
1954
|
+
user_hash = None
|
1955
|
+
cookie_dict = json_data
|
1956
|
+
else:
|
1957
|
+
raise ValueError(f'Unsupported token version: {json_data.get("v")}')
|
1875
1958
|
|
1876
1959
|
cookie_jar = cookiejar.MozillaCookieJar()
|
1877
1960
|
for (name, value) in cookie_dict.items():
|
@@ -1914,6 +1997,15 @@ def api_login(endpoint: Optional[str] = None) -> None:
|
|
1914
1997
|
server_common.get_api_cookie_jar_path())
|
1915
1998
|
cookie_jar.save(cookie_jar_path)
|
1916
1999
|
|
2000
|
+
# If we have a user_hash, save it to the local file
|
2001
|
+
if user_hash is not None:
|
2002
|
+
if not common_utils.is_valid_user_hash(user_hash):
|
2003
|
+
raise ValueError(f'Invalid user hash: {user_hash}')
|
2004
|
+
with open(os.path.expanduser('~/.sky/user_hash'),
|
2005
|
+
'w',
|
2006
|
+
encoding='utf-8') as f:
|
2007
|
+
f.write(user_hash)
|
2008
|
+
|
1917
2009
|
# Set the endpoint in the config file
|
1918
2010
|
config_path = pathlib.Path(
|
1919
2011
|
skypilot_config.get_user_config_path()).expanduser()
|
sky/clouds/__init__.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
from sky.clouds.cloud import Cloud
|
4
4
|
from sky.clouds.cloud import cloud_in_iterable
|
5
|
+
from sky.clouds.cloud import CloudCapability
|
5
6
|
from sky.clouds.cloud import CloudImplementationFeatures
|
6
7
|
from sky.clouds.cloud import DummyCloud
|
7
8
|
from sky.clouds.cloud import OpenPortsVersion
|
@@ -26,6 +27,7 @@ from sky.clouds.oci import OCI
|
|
26
27
|
from sky.clouds.paperspace import Paperspace
|
27
28
|
from sky.clouds.runpod import RunPod
|
28
29
|
from sky.clouds.scp import SCP
|
30
|
+
from sky.clouds.ssh import SSH
|
29
31
|
from sky.clouds.vast import Vast
|
30
32
|
from sky.clouds.vsphere import Vsphere
|
31
33
|
|
@@ -46,6 +48,7 @@ __all__ = [
|
|
46
48
|
'OCI',
|
47
49
|
'Vsphere',
|
48
50
|
'Kubernetes',
|
51
|
+
'SSH',
|
49
52
|
'CloudImplementationFeatures',
|
50
53
|
'Region',
|
51
54
|
'Zone',
|
sky/clouds/aws.py
CHANGED
@@ -565,12 +565,14 @@ class AWS(clouds.Cloud):
|
|
565
565
|
fuzzy_candidate_list, None)
|
566
566
|
|
567
567
|
@classmethod
|
568
|
-
def _check_compute_credentials(
|
568
|
+
def _check_compute_credentials(
|
569
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
569
570
|
"""Checks if the user has access credentials to this AWS's compute service."""
|
570
571
|
return cls._check_credentials()
|
571
572
|
|
572
573
|
@classmethod
|
573
|
-
def _check_storage_credentials(
|
574
|
+
def _check_storage_credentials(
|
575
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
574
576
|
"""Checks if the user has access credentials to this AWS's storage service."""
|
575
577
|
# TODO(seungjin): Implement separate check for
|
576
578
|
# if the user has access to S3.
|
sky/clouds/azure.py
CHANGED
@@ -518,12 +518,14 @@ class Azure(clouds.Cloud):
|
|
518
518
|
fuzzy_candidate_list, None)
|
519
519
|
|
520
520
|
@classmethod
|
521
|
-
def _check_compute_credentials(
|
521
|
+
def _check_compute_credentials(
|
522
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
522
523
|
"""Checks if the user has access credentials to this cloud's compute service."""
|
523
524
|
return cls._check_credentials()
|
524
525
|
|
525
526
|
@classmethod
|
526
|
-
def _check_storage_credentials(
|
527
|
+
def _check_storage_credentials(
|
528
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
527
529
|
"""Checks if the user has access credentials to this cloud's storage service."""
|
528
530
|
# TODO(seungjin): Implement separate check for
|
529
531
|
# if the user has access to Azure Blob Storage.
|
sky/clouds/cloud.py
CHANGED
@@ -457,12 +457,14 @@ class Cloud:
|
|
457
457
|
|
458
458
|
@classmethod
|
459
459
|
def check_credentials(
|
460
|
-
|
461
|
-
|
460
|
+
cls, cloud_capability: CloudCapability
|
461
|
+
) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
462
462
|
"""Checks if the user has access credentials to this cloud.
|
463
463
|
|
464
|
-
Returns a boolean of whether the user can access this cloud, and
|
465
|
-
|
464
|
+
Returns a boolean of whether the user can access this cloud, and:
|
465
|
+
- For SSH and Kubernetes, a dictionary that maps context names to
|
466
|
+
the status of the context.
|
467
|
+
- For others, a string describing the reason if cannot access.
|
466
468
|
|
467
469
|
Raises NotSupportedError if the capability is
|
468
470
|
not supported by this cloud.
|
@@ -474,19 +476,30 @@ class Cloud:
|
|
474
476
|
assert_never(cloud_capability)
|
475
477
|
|
476
478
|
@classmethod
|
477
|
-
def _check_compute_credentials(
|
479
|
+
def _check_compute_credentials(
|
480
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
478
481
|
"""Checks if the user has access credentials to
|
479
482
|
this cloud's compute service."""
|
480
483
|
raise exceptions.NotSupportedError(
|
481
484
|
f'{cls._REPR} does not support {CloudCapability.COMPUTE.value}.')
|
482
485
|
|
483
486
|
@classmethod
|
484
|
-
def _check_storage_credentials(
|
487
|
+
def _check_storage_credentials(
|
488
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
485
489
|
"""Checks if the user has access credentials to
|
486
490
|
this cloud's storage service."""
|
487
491
|
raise exceptions.NotSupportedError(
|
488
492
|
f'{cls._REPR} does not support {CloudCapability.STORAGE.value}.')
|
489
493
|
|
494
|
+
@classmethod
|
495
|
+
def get_infras(cls) -> List[str]:
|
496
|
+
"""Returns a list of enabled infrastructures for this cloud.
|
497
|
+
|
498
|
+
For Kubernetes and SSH, return a list of resource pools.
|
499
|
+
For all other clouds, return self.
|
500
|
+
"""
|
501
|
+
return [cls._REPR.lower()]
|
502
|
+
|
490
503
|
# TODO(zhwu): Make the return type immutable.
|
491
504
|
@classmethod
|
492
505
|
def get_user_identities(cls) -> Optional[List[List[str]]]:
|
@@ -878,6 +891,11 @@ class Cloud:
|
|
878
891
|
def canonical_name(cls) -> str:
|
879
892
|
return cls.__name__.lower()
|
880
893
|
|
894
|
+
@classmethod
|
895
|
+
def display_name(cls) -> str:
|
896
|
+
"""Name of the cloud used in messages displayed to the user."""
|
897
|
+
return cls.canonical_name()
|
898
|
+
|
881
899
|
def __repr__(self):
|
882
900
|
return self._REPR
|
883
901
|
|
sky/clouds/cudo.py
CHANGED
@@ -270,7 +270,8 @@ class Cudo(clouds.Cloud):
|
|
270
270
|
fuzzy_candidate_list, None)
|
271
271
|
|
272
272
|
@classmethod
|
273
|
-
def _check_compute_credentials(
|
273
|
+
def _check_compute_credentials(
|
274
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
274
275
|
"""Checks if the user has access credentials to
|
275
276
|
Cudo's compute service."""
|
276
277
|
try:
|
sky/clouds/do.py
CHANGED
@@ -264,7 +264,8 @@ class DO(clouds.Cloud):
|
|
264
264
|
fuzzy_candidate_list, None)
|
265
265
|
|
266
266
|
@classmethod
|
267
|
-
def _check_compute_credentials(
|
267
|
+
def _check_compute_credentials(
|
268
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
268
269
|
"""Verify that the user has valid credentials for
|
269
270
|
DO's compute service."""
|
270
271
|
|
sky/clouds/fluidstack.py
CHANGED
@@ -261,7 +261,8 @@ class Fluidstack(clouds.Cloud):
|
|
261
261
|
fuzzy_candidate_list, None)
|
262
262
|
|
263
263
|
@classmethod
|
264
|
-
def _check_compute_credentials(
|
264
|
+
def _check_compute_credentials(
|
265
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
265
266
|
"""Checks if the user has access credentials to
|
266
267
|
FluidStack's compute service."""
|
267
268
|
try:
|
sky/clouds/gcp.py
CHANGED
@@ -791,7 +791,8 @@ class GCP(clouds.Cloud):
|
|
791
791
|
return DEFAULT_GCP_APPLICATION_CREDENTIAL_PATH
|
792
792
|
|
793
793
|
@classmethod
|
794
|
-
def _check_compute_credentials(
|
794
|
+
def _check_compute_credentials(
|
795
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
795
796
|
"""Checks if the user has access credentials to this cloud's compute service."""
|
796
797
|
return cls._check_credentials(
|
797
798
|
[
|
@@ -803,7 +804,8 @@ class GCP(clouds.Cloud):
|
|
803
804
|
gcp_utils.get_minimal_compute_permissions())
|
804
805
|
|
805
806
|
@classmethod
|
806
|
-
def _check_storage_credentials(
|
807
|
+
def _check_storage_credentials(
|
808
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
807
809
|
"""Checks if the user has access credentials to this cloud's storage service."""
|
808
810
|
return cls._check_credentials(
|
809
811
|
[('storage', 'Cloud Storage')],
|
@@ -995,10 +997,21 @@ class GCP(clouds.Cloud):
|
|
995
997
|
return GCPIdentityType.SHARED_CREDENTIALS_FILE
|
996
998
|
|
997
999
|
@classmethod
|
998
|
-
@annotations.lru_cache(scope='request',
|
999
|
-
maxsize=1) # Cache since getting identity is slow.
|
1000
1000
|
def get_user_identities(cls) -> List[List[str]]:
|
1001
1001
|
"""Returns the email address + project id of the active user."""
|
1002
|
+
gcp_workspace_config = json.dumps(
|
1003
|
+
skypilot_config.get_workspace_cloud('gcp'), sort_keys=True)
|
1004
|
+
return cls._get_user_identities(gcp_workspace_config)
|
1005
|
+
|
1006
|
+
@classmethod
|
1007
|
+
@annotations.lru_cache(scope='request', maxsize=5)
|
1008
|
+
def _get_user_identities(
|
1009
|
+
cls, workspace_config: Optional[str]) -> List[List[str]]:
|
1010
|
+
# We add workspace_config in args to avoid caching the GCP identity
|
1011
|
+
# for when different workspace configs are used. Use json.dumps to
|
1012
|
+
# ensure the config is hashable.
|
1013
|
+
del workspace_config # Unused
|
1014
|
+
|
1002
1015
|
try:
|
1003
1016
|
account = _run_output('gcloud auth list --filter=status:ACTIVE '
|
1004
1017
|
'--format="value(account)"')
|
@@ -1029,7 +1042,8 @@ class GCP(clouds.Cloud):
|
|
1029
1042
|
f'{common_utils.format_exception(e, use_bracket=True)}'
|
1030
1043
|
) from e
|
1031
1044
|
# TODO: Return a list of identities in the profile when we support
|
1032
|
-
#
|
1045
|
+
# automatic switching for GCP. Currently we only support one
|
1046
|
+
# identity.
|
1033
1047
|
return [[f'{account} [project_id={project_id}]']]
|
1034
1048
|
|
1035
1049
|
@classmethod
|
@@ -1059,6 +1073,10 @@ class GCP(clouds.Cloud):
|
|
1059
1073
|
return 'dryrun-project-id'
|
1060
1074
|
# pylint: disable=import-outside-toplevel
|
1061
1075
|
from google import auth # type: ignore
|
1076
|
+
config_project_id = skypilot_config.get_workspace_cloud('gcp').get(
|
1077
|
+
'project_id', None)
|
1078
|
+
if config_project_id:
|
1079
|
+
return config_project_id
|
1062
1080
|
_, project_id = auth.default()
|
1063
1081
|
if project_id is None:
|
1064
1082
|
raise exceptions.CloudUserIdentityError(
|
sky/clouds/ibm.py
CHANGED
@@ -399,13 +399,15 @@ class IBM(clouds.Cloud):
|
|
399
399
|
return image_size
|
400
400
|
|
401
401
|
@classmethod
|
402
|
-
def _check_compute_credentials(
|
402
|
+
def _check_compute_credentials(
|
403
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
403
404
|
"""Checks if the user has access credentials to
|
404
405
|
IBM's compute service."""
|
405
406
|
return cls._check_credentials()
|
406
407
|
|
407
408
|
@classmethod
|
408
|
-
def _check_storage_credentials(
|
409
|
+
def _check_storage_credentials(
|
410
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
409
411
|
"""Checks if the user has access credentials to
|
410
412
|
IBM's storage service."""
|
411
413
|
# TODO(seungjin): Implement separate check for
|
sky/clouds/kubernetes.py
CHANGED
@@ -4,6 +4,8 @@ import re
|
|
4
4
|
import typing
|
5
5
|
from typing import Dict, Iterator, List, Optional, Set, Tuple, Union
|
6
6
|
|
7
|
+
import colorama
|
8
|
+
|
7
9
|
from sky import clouds
|
8
10
|
from sky import exceptions
|
9
11
|
from sky import sky_logging
|
@@ -149,7 +151,7 @@ class Kubernetes(clouds.Cloud):
|
|
149
151
|
'Ignoring these contexts.')
|
150
152
|
|
151
153
|
@classmethod
|
152
|
-
def existing_allowed_contexts(cls) -> List[str]:
|
154
|
+
def existing_allowed_contexts(cls, silent: bool = False) -> List[str]:
|
153
155
|
"""Get existing allowed contexts.
|
154
156
|
|
155
157
|
If None is returned in the list, it means that we are running in a pod
|
@@ -162,6 +164,12 @@ class Kubernetes(clouds.Cloud):
|
|
162
164
|
|
163
165
|
all_contexts = set(all_contexts)
|
164
166
|
|
167
|
+
# Exclude contexts starting with `ssh-`
|
168
|
+
# TODO(romilb): Remove when SSH Node Pools use a separate kubeconfig.
|
169
|
+
all_contexts = [
|
170
|
+
ctx for ctx in all_contexts if not ctx.startswith('ssh-')
|
171
|
+
]
|
172
|
+
|
165
173
|
allowed_contexts = skypilot_config.get_nested(
|
166
174
|
('kubernetes', 'allowed_contexts'), None)
|
167
175
|
|
@@ -183,8 +191,12 @@ class Kubernetes(clouds.Cloud):
|
|
183
191
|
if context in all_contexts:
|
184
192
|
existing_contexts.append(context)
|
185
193
|
else:
|
194
|
+
# Skip SSH Node Pool contexts
|
195
|
+
if context.startswith('ssh-'):
|
196
|
+
continue
|
186
197
|
skipped_contexts.append(context)
|
187
|
-
|
198
|
+
if not silent:
|
199
|
+
cls._log_skipped_contexts_once(tuple(skipped_contexts))
|
188
200
|
return existing_contexts
|
189
201
|
|
190
202
|
@classmethod
|
@@ -640,7 +652,7 @@ class Kubernetes(clouds.Cloud):
|
|
640
652
|
resource_list = []
|
641
653
|
for instance_type in instance_list:
|
642
654
|
r = resources.copy(
|
643
|
-
cloud=
|
655
|
+
cloud=self.__class__(),
|
644
656
|
instance_type=instance_type,
|
645
657
|
accelerators=None,
|
646
658
|
)
|
@@ -692,7 +704,43 @@ class Kubernetes(clouds.Cloud):
|
|
692
704
|
[], None)
|
693
705
|
|
694
706
|
@classmethod
|
695
|
-
def
|
707
|
+
def _check_single_context(cls, context: str) -> Tuple[bool, str]:
|
708
|
+
"""Check if the user has access credentials to a single SSH context."""
|
709
|
+
|
710
|
+
def _red_color(str_to_format: str) -> str:
|
711
|
+
return (f'{colorama.Fore.LIGHTRED_EX}'
|
712
|
+
f'{str_to_format}'
|
713
|
+
f'{colorama.Style.RESET_ALL}')
|
714
|
+
|
715
|
+
def _dim_color(str_to_format: str) -> str:
|
716
|
+
return (f'{colorama.Style.DIM}'
|
717
|
+
f'{str_to_format}'
|
718
|
+
f'{colorama.Style.RESET_ALL}')
|
719
|
+
|
720
|
+
def _bright_green_color(str_to_format: str) -> str:
|
721
|
+
return (f'{colorama.Fore.GREEN}'
|
722
|
+
f'{str_to_format}'
|
723
|
+
f'{colorama.Style.RESET_ALL}')
|
724
|
+
|
725
|
+
try:
|
726
|
+
check_result = kubernetes_utils.check_credentials(
|
727
|
+
context, run_optional_checks=True)
|
728
|
+
if check_result[0]:
|
729
|
+
if check_result[1] is not None:
|
730
|
+
return True, (_bright_green_color('enabled.') +
|
731
|
+
_dim_color(f' Note: {check_result[1]}'))
|
732
|
+
else:
|
733
|
+
return True, _bright_green_color('enabled.')
|
734
|
+
else:
|
735
|
+
assert check_result[1] is not None
|
736
|
+
return False, (_red_color('disabled.') +
|
737
|
+
_dim_color(f' Reason: {check_result[1]}'))
|
738
|
+
except Exception as e: # pylint: disable=broad-except
|
739
|
+
return False, _red_color(str(e))
|
740
|
+
|
741
|
+
@classmethod
|
742
|
+
def _check_compute_credentials(
|
743
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
696
744
|
"""Checks if the user has access credentials to
|
697
745
|
Kubernetes."""
|
698
746
|
# Check for port forward dependencies
|
@@ -719,26 +767,15 @@ class Kubernetes(clouds.Cloud):
|
|
719
767
|
return (False, 'No available context found in kubeconfig. '
|
720
768
|
'Check if you have a valid kubeconfig file' +
|
721
769
|
check_skypilot_config_msg)
|
722
|
-
|
723
|
-
|
770
|
+
|
771
|
+
ctx2text = {}
|
724
772
|
success = False
|
725
773
|
for context in existing_allowed_contexts:
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
if check_result[1] is not None:
|
732
|
-
hints.append(f'Context {context}: {check_result[1]}')
|
733
|
-
else:
|
734
|
-
reasons.append(f'Context {context}: {check_result[1]}')
|
735
|
-
except Exception as e: # pylint: disable=broad-except
|
736
|
-
return (False, f'Credential check failed for {context}: '
|
737
|
-
f'{common_utils.format_exception(e)}')
|
738
|
-
if success:
|
739
|
-
return (True, cls._format_credential_check_results(hints, reasons))
|
740
|
-
return (False, 'Failed to find available context with working '
|
741
|
-
'credentials. Details:\n' + '\n'.join(reasons))
|
774
|
+
suc, text = cls._check_single_context(context)
|
775
|
+
success = success or suc
|
776
|
+
ctx2text[context] = text
|
777
|
+
|
778
|
+
return success, ctx2text
|
742
779
|
|
743
780
|
@classmethod
|
744
781
|
def _format_credential_check_results(cls, hints: List[str],
|
@@ -855,3 +892,10 @@ class Kubernetes(clouds.Cloud):
|
|
855
892
|
if not key_valid or not value_valid:
|
856
893
|
return False, error_msg
|
857
894
|
return True, None
|
895
|
+
|
896
|
+
@classmethod
|
897
|
+
def get_infras(cls) -> List[str]:
|
898
|
+
return [
|
899
|
+
f'{cls._REPR.lower()}/{c}'
|
900
|
+
for c in cls.existing_allowed_contexts(silent=True)
|
901
|
+
]
|
sky/clouds/lambda_cloud.py
CHANGED
@@ -244,7 +244,8 @@ class Lambda(clouds.Cloud):
|
|
244
244
|
fuzzy_candidate_list, None)
|
245
245
|
|
246
246
|
@classmethod
|
247
|
-
def _check_compute_credentials(
|
247
|
+
def _check_compute_credentials(
|
248
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
248
249
|
"""Checks if the user has access credentials to
|
249
250
|
Lambda's compute service."""
|
250
251
|
try:
|
sky/clouds/nebius.py
CHANGED
@@ -4,6 +4,7 @@ import typing
|
|
4
4
|
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
5
5
|
|
6
6
|
from sky import clouds
|
7
|
+
from sky import skypilot_config
|
7
8
|
from sky.adaptors import nebius
|
8
9
|
from sky.clouds import service_catalog
|
9
10
|
from sky.utils import annotations
|
@@ -210,6 +211,18 @@ class Nebius(clouds.Cloud):
|
|
210
211
|
raise RuntimeError('Unsupported instance type for Nebius cloud:'
|
211
212
|
f' {resources.instance_type}')
|
212
213
|
|
214
|
+
config_fs = skypilot_config.get_nested(
|
215
|
+
('nebius', region.name, 'filesystems'), [])
|
216
|
+
resources_vars_fs = []
|
217
|
+
for i, fs in enumerate(config_fs):
|
218
|
+
resources_vars_fs.append({
|
219
|
+
'filesystem_id': fs['filesystem_id'],
|
220
|
+
'filesystem_attach_mode': fs.get('attach_mode', 'READ_WRITE'),
|
221
|
+
'filesystem_mount_path': fs.get(
|
222
|
+
'mount_path', f'/mnt/filesystem-skypilot-{i+1}'),
|
223
|
+
'filesystem_mount_tag': f'filesystem-skypilot-{i+1}'
|
224
|
+
})
|
225
|
+
|
213
226
|
resources_vars: Dict[str, Any] = {
|
214
227
|
'instance_type': resources.instance_type,
|
215
228
|
'custom_resources': custom_resources,
|
@@ -217,6 +230,7 @@ class Nebius(clouds.Cloud):
|
|
217
230
|
'image_id': image_family,
|
218
231
|
# Nebius does not support specific zones.
|
219
232
|
'zones': None,
|
233
|
+
'filesystems': resources_vars_fs
|
220
234
|
}
|
221
235
|
|
222
236
|
if acc_dict is not None:
|
@@ -283,7 +297,8 @@ class Nebius(clouds.Cloud):
|
|
283
297
|
|
284
298
|
@classmethod
|
285
299
|
@annotations.lru_cache(scope='request')
|
286
|
-
def _check_compute_credentials(
|
300
|
+
def _check_compute_credentials(
|
301
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
287
302
|
"""Checks if the user has access credentials to
|
288
303
|
Nebius's compute service."""
|
289
304
|
token_cred_msg = (
|
@@ -314,7 +329,8 @@ class Nebius(clouds.Cloud):
|
|
314
329
|
|
315
330
|
@classmethod
|
316
331
|
@annotations.lru_cache(scope='request')
|
317
|
-
def _check_storage_credentials(
|
332
|
+
def _check_storage_credentials(
|
333
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
318
334
|
"""Checks if the user has access credentials to Nebius Object Storage.
|
319
335
|
|
320
336
|
Returns:
|