skypilot-nightly 1.0.0.dev20250522__py3-none-any.whl → 1.0.0.dev20250524__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/kubernetes.py +46 -16
  3. sky/backends/backend_utils.py +62 -45
  4. sky/backends/cloud_vm_ray_backend.py +19 -5
  5. sky/check.py +398 -171
  6. sky/cli.py +302 -98
  7. sky/client/cli.py +302 -98
  8. sky/client/sdk.py +104 -12
  9. sky/clouds/__init__.py +3 -0
  10. sky/clouds/aws.py +4 -2
  11. sky/clouds/azure.py +4 -2
  12. sky/clouds/cloud.py +24 -6
  13. sky/clouds/cudo.py +2 -1
  14. sky/clouds/do.py +2 -1
  15. sky/clouds/fluidstack.py +2 -1
  16. sky/clouds/gcp.py +23 -5
  17. sky/clouds/ibm.py +4 -2
  18. sky/clouds/kubernetes.py +66 -22
  19. sky/clouds/lambda_cloud.py +2 -1
  20. sky/clouds/nebius.py +18 -2
  21. sky/clouds/oci.py +4 -2
  22. sky/clouds/paperspace.py +2 -1
  23. sky/clouds/runpod.py +2 -1
  24. sky/clouds/scp.py +2 -1
  25. sky/clouds/service_catalog/constants.py +1 -1
  26. sky/clouds/service_catalog/ssh_catalog.py +167 -0
  27. sky/clouds/ssh.py +203 -0
  28. sky/clouds/vast.py +2 -1
  29. sky/clouds/vsphere.py +2 -1
  30. sky/core.py +58 -11
  31. sky/dashboard/out/404.html +1 -1
  32. sky/dashboard/out/_next/static/aHej19bZyl4hoHgrzPCn7/_buildManifest.js +1 -0
  33. sky/dashboard/out/_next/static/chunks/480-ee58038f1a4afd5c.js +1 -0
  34. sky/dashboard/out/_next/static/chunks/488-50d843fdb5396d32.js +15 -0
  35. sky/dashboard/out/_next/static/chunks/498-d7722313e5e5b4e6.js +21 -0
  36. sky/dashboard/out/_next/static/chunks/573-f17bd89d9f9118b3.js +66 -0
  37. sky/dashboard/out/_next/static/chunks/578-7a4795009a56430c.js +6 -0
  38. sky/dashboard/out/_next/static/chunks/734-5f5ce8f347b7f417.js +1 -0
  39. sky/dashboard/out/_next/static/chunks/937.f97f83652028e944.js +1 -0
  40. sky/dashboard/out/_next/static/chunks/938-f347f6144075b0c8.js +1 -0
  41. sky/dashboard/out/_next/static/chunks/9f96d65d-5a3e4af68c26849e.js +1 -0
  42. sky/dashboard/out/_next/static/chunks/pages/_app-dec800f9ef1b10f4.js +1 -0
  43. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-37c042a356f8e608.js +1 -0
  44. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-9529d9e882a0e75c.js +16 -0
  45. sky/dashboard/out/_next/static/chunks/pages/clusters-9e6d1ec6e1ac5b29.js +1 -0
  46. sky/dashboard/out/_next/static/chunks/pages/infra-e690d864aa00e2ea.js +1 -0
  47. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-db6558a5ec687011.js +1 -0
  48. sky/dashboard/out/_next/static/chunks/pages/jobs-73d5e0c369d00346.js +16 -0
  49. sky/dashboard/out/_next/static/chunks/pages/users-2d319455c3f1c3e2.js +1 -0
  50. sky/dashboard/out/_next/static/chunks/pages/workspaces-02a7b60f2ead275f.js +1 -0
  51. sky/dashboard/out/_next/static/chunks/webpack-deda68c926e8d0bc.js +1 -0
  52. sky/dashboard/out/_next/static/css/d2cdba64c9202dd7.css +3 -0
  53. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  54. sky/dashboard/out/clusters/[cluster].html +1 -1
  55. sky/dashboard/out/clusters.html +1 -1
  56. sky/dashboard/out/index.html +1 -1
  57. sky/dashboard/out/infra.html +1 -1
  58. sky/dashboard/out/jobs/[job].html +1 -1
  59. sky/dashboard/out/jobs.html +1 -1
  60. sky/dashboard/out/users.html +1 -0
  61. sky/dashboard/out/workspaces.html +1 -0
  62. sky/data/storage.py +1 -1
  63. sky/global_user_state.py +42 -19
  64. sky/jobs/constants.py +1 -1
  65. sky/jobs/server/core.py +72 -56
  66. sky/jobs/state.py +26 -5
  67. sky/jobs/utils.py +65 -13
  68. sky/optimizer.py +29 -7
  69. sky/provision/__init__.py +1 -0
  70. sky/provision/aws/instance.py +17 -1
  71. sky/provision/fluidstack/instance.py +1 -0
  72. sky/provision/kubernetes/instance.py +16 -5
  73. sky/provision/kubernetes/utils.py +37 -19
  74. sky/provision/nebius/instance.py +3 -1
  75. sky/provision/nebius/utils.py +14 -2
  76. sky/provision/ssh/__init__.py +18 -0
  77. sky/resources.py +4 -1
  78. sky/serve/server/core.py +9 -6
  79. sky/server/html/token_page.html +6 -1
  80. sky/server/requests/executor.py +1 -0
  81. sky/server/requests/payloads.py +18 -0
  82. sky/server/server.py +108 -5
  83. sky/setup_files/dependencies.py +1 -0
  84. sky/skylet/constants.py +4 -1
  85. sky/skypilot_config.py +83 -9
  86. sky/templates/nebius-ray.yml.j2 +12 -0
  87. sky/utils/cli_utils/status_utils.py +18 -8
  88. sky/utils/infra_utils.py +21 -1
  89. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  90. sky/utils/kubernetes/create_cluster.sh +1 -0
  91. sky/utils/kubernetes/deploy_remote_cluster.py +1440 -0
  92. sky/utils/kubernetes/kubernetes_deploy_utils.py +117 -10
  93. sky/utils/kubernetes/ssh-tunnel.sh +387 -0
  94. sky/utils/log_utils.py +218 -1
  95. sky/utils/schemas.py +75 -0
  96. sky/utils/ux_utils.py +2 -1
  97. {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250524.dist-info}/METADATA +6 -1
  98. {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250524.dist-info}/RECORD +103 -91
  99. sky/dashboard/out/_next/static/CzOVV6JpRQBRt5GhZuhyK/_buildManifest.js +0 -1
  100. sky/dashboard/out/_next/static/chunks/236-1a3a9440417720eb.js +0 -6
  101. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  102. sky/dashboard/out/_next/static/chunks/37-d584022b0da4ac3b.js +0 -6
  103. sky/dashboard/out/_next/static/chunks/393-e1eaa440481337ec.js +0 -1
  104. sky/dashboard/out/_next/static/chunks/480-f28cd152a98997de.js +0 -1
  105. sky/dashboard/out/_next/static/chunks/582-683f4f27b81996dc.js +0 -59
  106. sky/dashboard/out/_next/static/chunks/pages/_app-8cfab319f9fb3ae8.js +0 -1
  107. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33bc2bec322249b1.js +0 -1
  108. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-e2fc2dd1955e6c36.js +0 -1
  109. sky/dashboard/out/_next/static/chunks/pages/clusters-3a748bd76e5c2984.js +0 -1
  110. sky/dashboard/out/_next/static/chunks/pages/infra-9180cd91cee64b96.js +0 -1
  111. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-70756c2dad850a7e.js +0 -1
  112. sky/dashboard/out/_next/static/chunks/pages/jobs-ecd804b9272f4a7c.js +0 -1
  113. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  114. sky/dashboard/out/_next/static/css/7e7ce4ff31d3977b.css +0 -3
  115. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  116. /sky/dashboard/out/_next/static/{CzOVV6JpRQBRt5GhZuhyK → aHej19bZyl4hoHgrzPCn7}/_ssgManifest.js +0 -0
  117. {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250524.dist-info}/WHEEL +0 -0
  118. {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250524.dist-info}/entry_points.txt +0 -0
  119. {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250524.dist-info}/licenses/LICENSE +0 -0
  120. {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250524.dist-info}/top_level.txt +0 -0
sky/client/sdk.py CHANGED
@@ -94,12 +94,15 @@ def stream_response(request_id: Optional[str],
94
94
  @server_common.check_server_healthy_or_start
95
95
  @annotations.client_api
96
96
  def check(infra_list: Optional[Tuple[str, ...]],
97
- verbose: bool) -> server_common.RequestId:
97
+ verbose: bool,
98
+ workspace: Optional[str] = None) -> server_common.RequestId:
98
99
  """Checks the credentials to enable clouds.
99
100
 
100
101
  Args:
101
102
  infra: The infra to check.
102
103
  verbose: Whether to show verbose output.
104
+ workspace: The workspace to check. If None, all workspaces will be
105
+ checked.
103
106
 
104
107
  Returns:
105
108
  The request ID of the check request.
@@ -123,7 +126,9 @@ def check(infra_list: Optional[Tuple[str, ...]],
123
126
  f'ignoring {region_zone}')
124
127
  specified_clouds.append(infra.cloud)
125
128
  clouds = tuple(specified_clouds)
126
- body = payloads.CheckBody(clouds=clouds, verbose=verbose)
129
+ body = payloads.CheckBody(clouds=clouds,
130
+ verbose=verbose,
131
+ workspace=workspace)
127
132
  response = requests.post(f'{server_common.get_server_url()}/check',
128
133
  json=json.loads(body.model_dump_json()),
129
134
  cookies=server_common.get_api_cookie_jar())
@@ -133,16 +138,23 @@ def check(infra_list: Optional[Tuple[str, ...]],
133
138
  @usage_lib.entrypoint
134
139
  @server_common.check_server_healthy_or_start
135
140
  @annotations.client_api
136
- def enabled_clouds() -> server_common.RequestId:
141
+ def enabled_clouds(workspace: Optional[str] = None) -> server_common.RequestId:
137
142
  """Gets the enabled clouds.
138
143
 
144
+ Args:
145
+ workspace: The workspace to get the enabled clouds for. If None, the
146
+ active workspace will be used.
147
+
139
148
  Returns:
140
149
  The request ID of the enabled clouds request.
141
150
 
142
151
  Request Returns:
143
152
  A list of enabled clouds in string format.
144
153
  """
145
- response = requests.get(f'{server_common.get_server_url()}/enabled_clouds',
154
+ if workspace is None:
155
+ workspace = skypilot_config.get_active_workspace()
156
+ response = requests.get((f'{server_common.get_server_url()}/enabled_clouds?'
157
+ f'workspace={workspace}'),
146
158
  cookies=server_common.get_api_cookie_jar())
147
159
  return server_common.get_request_id(response)
148
160
 
@@ -225,7 +237,7 @@ def list_accelerator_counts(
225
237
  accelerator names mapped to a list of available counts. See usage
226
238
  in cli.py.
227
239
  """
228
- body = payloads.ListAcceleratorsBody(
240
+ body = payloads.ListAcceleratorCountsBody(
229
241
  gpus_only=gpus_only,
230
242
  name_filter=name_filter,
231
243
  region_filter=region_filter,
@@ -278,6 +290,13 @@ def optimize(
278
290
  return server_common.get_request_id(response)
279
291
 
280
292
 
293
+ def workspaces() -> server_common.RequestId:
294
+ """Gets the workspaces."""
295
+ response = requests.get(f'{server_common.get_server_url()}/workspaces',
296
+ cookies=server_common.get_api_cookie_jar())
297
+ return server_common.get_request_id(response)
298
+
299
+
281
300
  @usage_lib.entrypoint
282
301
  @server_common.check_server_healthy_or_start
283
302
  @annotations.client_api
@@ -1396,13 +1415,60 @@ def local_down() -> server_common.RequestId:
1396
1415
  return server_common.get_request_id(response)
1397
1416
 
1398
1417
 
1418
+ @usage_lib.entrypoint
1419
+ @server_common.check_server_healthy_or_start
1420
+ @annotations.client_api
1421
+ def ssh_up(infra: Optional[str] = None) -> server_common.RequestId:
1422
+ """Deploys the SSH Node Pools defined in ~/.sky/ssh_targets.yaml.
1423
+
1424
+ Args:
1425
+ infra: Name of the cluster configuration in ssh_targets.yaml.
1426
+ If None, the first cluster in the file is used.
1427
+
1428
+ Returns:
1429
+ request_id: The request ID of the SSH cluster deployment request.
1430
+ """
1431
+ body = payloads.SSHUpBody(
1432
+ infra=infra,
1433
+ cleanup=False,
1434
+ )
1435
+ response = requests.post(f'{server_common.get_server_url()}/ssh_up',
1436
+ json=json.loads(body.model_dump_json()),
1437
+ cookies=server_common.get_api_cookie_jar())
1438
+ return server_common.get_request_id(response)
1439
+
1440
+
1441
+ @usage_lib.entrypoint
1442
+ @server_common.check_server_healthy_or_start
1443
+ @annotations.client_api
1444
+ def ssh_down(infra: Optional[str] = None) -> server_common.RequestId:
1445
+ """Tears down a Kubernetes cluster on SSH targets.
1446
+
1447
+ Args:
1448
+ infra: Name of the cluster configuration in ssh_targets.yaml.
1449
+ If None, the first cluster in the file is used.
1450
+
1451
+ Returns:
1452
+ request_id: The request ID of the SSH cluster teardown request.
1453
+ """
1454
+ body = payloads.SSHUpBody(
1455
+ infra=infra,
1456
+ cleanup=True,
1457
+ )
1458
+ response = requests.post(f'{server_common.get_server_url()}/ssh_down',
1459
+ json=json.loads(body.model_dump_json()),
1460
+ cookies=server_common.get_api_cookie_jar())
1461
+ return server_common.get_request_id(response)
1462
+
1463
+
1399
1464
  @usage_lib.entrypoint
1400
1465
  @server_common.check_server_healthy_or_start
1401
1466
  @annotations.client_api
1402
1467
  def realtime_kubernetes_gpu_availability(
1403
1468
  context: Optional[str] = None,
1404
1469
  name_filter: Optional[str] = None,
1405
- quantity_filter: Optional[int] = None) -> server_common.RequestId:
1470
+ quantity_filter: Optional[int] = None,
1471
+ is_ssh: Optional[bool] = None) -> server_common.RequestId:
1406
1472
  """Gets the real-time Kubernetes GPU availability.
1407
1473
 
1408
1474
  Returns:
@@ -1412,6 +1478,7 @@ def realtime_kubernetes_gpu_availability(
1412
1478
  context=context,
1413
1479
  name_filter=name_filter,
1414
1480
  quantity_filter=quantity_filter,
1481
+ is_ssh=is_ssh,
1415
1482
  )
1416
1483
  response = requests.post(
1417
1484
  f'{server_common.get_server_url()}/'
@@ -1683,7 +1750,7 @@ def api_status(
1683
1750
  @usage_lib.entrypoint
1684
1751
  @server_common.check_server_healthy_or_start
1685
1752
  @annotations.client_api
1686
- def api_info() -> Dict[str, str]:
1753
+ def api_info() -> Dict[str, Any]:
1687
1754
  """Gets the server's status, commit and version.
1688
1755
 
1689
1756
  Returns:
@@ -1696,8 +1763,15 @@ def api_info() -> Dict[str, str]:
1696
1763
  'api_version': '1',
1697
1764
  'commit': 'abc1234567890',
1698
1765
  'version': '1.0.0',
1766
+ 'version_on_disk': '1.0.0',
1767
+ 'user': {
1768
+ 'name': 'test@example.com',
1769
+ 'id': '12345abcd',
1770
+ },
1699
1771
  }
1700
1772
 
1773
+ Note that user may be None if we are not using an auth proxy.
1774
+
1701
1775
  """
1702
1776
  response = requests.get(f'{server_common.get_server_url()}/api/health',
1703
1777
  cookies=server_common.get_api_cookie_jar())
@@ -1820,7 +1894,7 @@ def api_server_logs(follow: bool = True, tail: Optional[int] = None) -> None:
1820
1894
 
1821
1895
  @usage_lib.entrypoint
1822
1896
  @annotations.client_api
1823
- def api_login(endpoint: Optional[str] = None) -> None:
1897
+ def api_login(endpoint: Optional[str] = None, get_token: bool = False) -> None:
1824
1898
  """Logs into a SkyPilot API server.
1825
1899
 
1826
1900
  This sets the endpoint globally, i.e., all SkyPilot CLI and SDK calls will
@@ -1847,7 +1921,7 @@ def api_login(endpoint: Optional[str] = None) -> None:
1847
1921
  raise click.BadParameter('Endpoint must be a valid URL.')
1848
1922
 
1849
1923
  server_status = server_common.check_server_healthy(endpoint)
1850
- if server_status == server_common.ApiServerStatus.NEEDS_AUTH:
1924
+ if server_status == server_common.ApiServerStatus.NEEDS_AUTH or get_token:
1851
1925
  # We detected an auth proxy, so go through the auth proxy cookie flow.
1852
1926
  parsed_url = urlparse.urlparse(endpoint)
1853
1927
  token_url = f'{endpoint}/token'
@@ -1867,11 +1941,20 @@ def api_login(endpoint: Optional[str] = None) -> None:
1867
1941
  raise ValueError(f'Malformed token: {token}') from e
1868
1942
  logger.debug(f'Token data: {data!r}')
1869
1943
  try:
1870
- cookie_dict = json.loads(data)
1944
+ json_data = json.loads(data)
1871
1945
  except (json.JSONDecodeError, UnicodeDecodeError) as e:
1872
1946
  raise ValueError(f'Malformed token data: {data!r}') from e
1873
- if not isinstance(cookie_dict, dict):
1874
- raise ValueError(f'Malformed token JSON: {cookie_dict}')
1947
+ if not isinstance(json_data, dict):
1948
+ raise ValueError(f'Malformed token JSON: {json_data}')
1949
+
1950
+ if json_data.get('v') == 1:
1951
+ user_hash = json_data.get('user')
1952
+ cookie_dict = json_data['cookies']
1953
+ elif 'v' not in json_data:
1954
+ user_hash = None
1955
+ cookie_dict = json_data
1956
+ else:
1957
+ raise ValueError(f'Unsupported token version: {json_data.get("v")}')
1875
1958
 
1876
1959
  cookie_jar = cookiejar.MozillaCookieJar()
1877
1960
  for (name, value) in cookie_dict.items():
@@ -1914,6 +1997,15 @@ def api_login(endpoint: Optional[str] = None) -> None:
1914
1997
  server_common.get_api_cookie_jar_path())
1915
1998
  cookie_jar.save(cookie_jar_path)
1916
1999
 
2000
+ # If we have a user_hash, save it to the local file
2001
+ if user_hash is not None:
2002
+ if not common_utils.is_valid_user_hash(user_hash):
2003
+ raise ValueError(f'Invalid user hash: {user_hash}')
2004
+ with open(os.path.expanduser('~/.sky/user_hash'),
2005
+ 'w',
2006
+ encoding='utf-8') as f:
2007
+ f.write(user_hash)
2008
+
1917
2009
  # Set the endpoint in the config file
1918
2010
  config_path = pathlib.Path(
1919
2011
  skypilot_config.get_user_config_path()).expanduser()
sky/clouds/__init__.py CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  from sky.clouds.cloud import Cloud
4
4
  from sky.clouds.cloud import cloud_in_iterable
5
+ from sky.clouds.cloud import CloudCapability
5
6
  from sky.clouds.cloud import CloudImplementationFeatures
6
7
  from sky.clouds.cloud import DummyCloud
7
8
  from sky.clouds.cloud import OpenPortsVersion
@@ -26,6 +27,7 @@ from sky.clouds.oci import OCI
26
27
  from sky.clouds.paperspace import Paperspace
27
28
  from sky.clouds.runpod import RunPod
28
29
  from sky.clouds.scp import SCP
30
+ from sky.clouds.ssh import SSH
29
31
  from sky.clouds.vast import Vast
30
32
  from sky.clouds.vsphere import Vsphere
31
33
 
@@ -46,6 +48,7 @@ __all__ = [
46
48
  'OCI',
47
49
  'Vsphere',
48
50
  'Kubernetes',
51
+ 'SSH',
49
52
  'CloudImplementationFeatures',
50
53
  'Region',
51
54
  'Zone',
sky/clouds/aws.py CHANGED
@@ -565,12 +565,14 @@ class AWS(clouds.Cloud):
565
565
  fuzzy_candidate_list, None)
566
566
 
567
567
  @classmethod
568
- def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
568
+ def _check_compute_credentials(
569
+ cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
569
570
  """Checks if the user has access credentials to this AWS's compute service."""
570
571
  return cls._check_credentials()
571
572
 
572
573
  @classmethod
573
- def _check_storage_credentials(cls) -> Tuple[bool, Optional[str]]:
574
+ def _check_storage_credentials(
575
+ cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
574
576
  """Checks if the user has access credentials to this AWS's storage service."""
575
577
  # TODO(seungjin): Implement separate check for
576
578
  # if the user has access to S3.
sky/clouds/azure.py CHANGED
@@ -518,12 +518,14 @@ class Azure(clouds.Cloud):
518
518
  fuzzy_candidate_list, None)
519
519
 
520
520
  @classmethod
521
- def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
521
+ def _check_compute_credentials(
522
+ cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
522
523
  """Checks if the user has access credentials to this cloud's compute service."""
523
524
  return cls._check_credentials()
524
525
 
525
526
  @classmethod
526
- def _check_storage_credentials(cls) -> Tuple[bool, Optional[str]]:
527
+ def _check_storage_credentials(
528
+ cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
527
529
  """Checks if the user has access credentials to this cloud's storage service."""
528
530
  # TODO(seungjin): Implement separate check for
529
531
  # if the user has access to Azure Blob Storage.
sky/clouds/cloud.py CHANGED
@@ -457,12 +457,14 @@ class Cloud:
457
457
 
458
458
  @classmethod
459
459
  def check_credentials(
460
- cls,
461
- cloud_capability: CloudCapability) -> Tuple[bool, Optional[str]]:
460
+ cls, cloud_capability: CloudCapability
461
+ ) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
462
462
  """Checks if the user has access credentials to this cloud.
463
463
 
464
- Returns a boolean of whether the user can access this cloud, and a
465
- string describing the reason if the user cannot access.
464
+ Returns a boolean of whether the user can access this cloud, and:
465
+ - For SSH and Kubernetes, a dictionary that maps context names to
466
+ the status of the context.
467
+ - For others, a string describing the reason if cannot access.
466
468
 
467
469
  Raises NotSupportedError if the capability is
468
470
  not supported by this cloud.
@@ -474,19 +476,30 @@ class Cloud:
474
476
  assert_never(cloud_capability)
475
477
 
476
478
  @classmethod
477
- def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
479
+ def _check_compute_credentials(
480
+ cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
478
481
  """Checks if the user has access credentials to
479
482
  this cloud's compute service."""
480
483
  raise exceptions.NotSupportedError(
481
484
  f'{cls._REPR} does not support {CloudCapability.COMPUTE.value}.')
482
485
 
483
486
  @classmethod
484
- def _check_storage_credentials(cls) -> Tuple[bool, Optional[str]]:
487
+ def _check_storage_credentials(
488
+ cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
485
489
  """Checks if the user has access credentials to
486
490
  this cloud's storage service."""
487
491
  raise exceptions.NotSupportedError(
488
492
  f'{cls._REPR} does not support {CloudCapability.STORAGE.value}.')
489
493
 
494
+ @classmethod
495
+ def get_infras(cls) -> List[str]:
496
+ """Returns a list of enabled infrastructures for this cloud.
497
+
498
+ For Kubernetes and SSH, return a list of resource pools.
499
+ For all other clouds, return self.
500
+ """
501
+ return [cls._REPR.lower()]
502
+
490
503
  # TODO(zhwu): Make the return type immutable.
491
504
  @classmethod
492
505
  def get_user_identities(cls) -> Optional[List[List[str]]]:
@@ -878,6 +891,11 @@ class Cloud:
878
891
  def canonical_name(cls) -> str:
879
892
  return cls.__name__.lower()
880
893
 
894
+ @classmethod
895
+ def display_name(cls) -> str:
896
+ """Name of the cloud used in messages displayed to the user."""
897
+ return cls.canonical_name()
898
+
881
899
  def __repr__(self):
882
900
  return self._REPR
883
901
 
sky/clouds/cudo.py CHANGED
@@ -270,7 +270,8 @@ class Cudo(clouds.Cloud):
270
270
  fuzzy_candidate_list, None)
271
271
 
272
272
  @classmethod
273
- def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
273
+ def _check_compute_credentials(
274
+ cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
274
275
  """Checks if the user has access credentials to
275
276
  Cudo's compute service."""
276
277
  try:
sky/clouds/do.py CHANGED
@@ -264,7 +264,8 @@ class DO(clouds.Cloud):
264
264
  fuzzy_candidate_list, None)
265
265
 
266
266
  @classmethod
267
- def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
267
+ def _check_compute_credentials(
268
+ cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
268
269
  """Verify that the user has valid credentials for
269
270
  DO's compute service."""
270
271
 
sky/clouds/fluidstack.py CHANGED
@@ -261,7 +261,8 @@ class Fluidstack(clouds.Cloud):
261
261
  fuzzy_candidate_list, None)
262
262
 
263
263
  @classmethod
264
- def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
264
+ def _check_compute_credentials(
265
+ cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
265
266
  """Checks if the user has access credentials to
266
267
  FluidStack's compute service."""
267
268
  try:
sky/clouds/gcp.py CHANGED
@@ -791,7 +791,8 @@ class GCP(clouds.Cloud):
791
791
  return DEFAULT_GCP_APPLICATION_CREDENTIAL_PATH
792
792
 
793
793
  @classmethod
794
- def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
794
+ def _check_compute_credentials(
795
+ cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
795
796
  """Checks if the user has access credentials to this cloud's compute service."""
796
797
  return cls._check_credentials(
797
798
  [
@@ -803,7 +804,8 @@ class GCP(clouds.Cloud):
803
804
  gcp_utils.get_minimal_compute_permissions())
804
805
 
805
806
  @classmethod
806
- def _check_storage_credentials(cls) -> Tuple[bool, Optional[str]]:
807
+ def _check_storage_credentials(
808
+ cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
807
809
  """Checks if the user has access credentials to this cloud's storage service."""
808
810
  return cls._check_credentials(
809
811
  [('storage', 'Cloud Storage')],
@@ -995,10 +997,21 @@ class GCP(clouds.Cloud):
995
997
  return GCPIdentityType.SHARED_CREDENTIALS_FILE
996
998
 
997
999
  @classmethod
998
- @annotations.lru_cache(scope='request',
999
- maxsize=1) # Cache since getting identity is slow.
1000
1000
  def get_user_identities(cls) -> List[List[str]]:
1001
1001
  """Returns the email address + project id of the active user."""
1002
+ gcp_workspace_config = json.dumps(
1003
+ skypilot_config.get_workspace_cloud('gcp'), sort_keys=True)
1004
+ return cls._get_user_identities(gcp_workspace_config)
1005
+
1006
+ @classmethod
1007
+ @annotations.lru_cache(scope='request', maxsize=5)
1008
+ def _get_user_identities(
1009
+ cls, workspace_config: Optional[str]) -> List[List[str]]:
1010
+ # We add workspace_config in args to avoid caching the GCP identity
1011
+ # for when different workspace configs are used. Use json.dumps to
1012
+ # ensure the config is hashable.
1013
+ del workspace_config # Unused
1014
+
1002
1015
  try:
1003
1016
  account = _run_output('gcloud auth list --filter=status:ACTIVE '
1004
1017
  '--format="value(account)"')
@@ -1029,7 +1042,8 @@ class GCP(clouds.Cloud):
1029
1042
  f'{common_utils.format_exception(e, use_bracket=True)}'
1030
1043
  ) from e
1031
1044
  # TODO: Return a list of identities in the profile when we support
1032
- # automatic switching for GCP. Currently we only support one identity.
1045
+ # automatic switching for GCP. Currently we only support one
1046
+ # identity.
1033
1047
  return [[f'{account} [project_id={project_id}]']]
1034
1048
 
1035
1049
  @classmethod
@@ -1059,6 +1073,10 @@ class GCP(clouds.Cloud):
1059
1073
  return 'dryrun-project-id'
1060
1074
  # pylint: disable=import-outside-toplevel
1061
1075
  from google import auth # type: ignore
1076
+ config_project_id = skypilot_config.get_workspace_cloud('gcp').get(
1077
+ 'project_id', None)
1078
+ if config_project_id:
1079
+ return config_project_id
1062
1080
  _, project_id = auth.default()
1063
1081
  if project_id is None:
1064
1082
  raise exceptions.CloudUserIdentityError(
sky/clouds/ibm.py CHANGED
@@ -399,13 +399,15 @@ class IBM(clouds.Cloud):
399
399
  return image_size
400
400
 
401
401
  @classmethod
402
- def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
402
+ def _check_compute_credentials(
403
+ cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
403
404
  """Checks if the user has access credentials to
404
405
  IBM's compute service."""
405
406
  return cls._check_credentials()
406
407
 
407
408
  @classmethod
408
- def _check_storage_credentials(cls) -> Tuple[bool, Optional[str]]:
409
+ def _check_storage_credentials(
410
+ cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
409
411
  """Checks if the user has access credentials to
410
412
  IBM's storage service."""
411
413
  # TODO(seungjin): Implement separate check for
sky/clouds/kubernetes.py CHANGED
@@ -4,6 +4,8 @@ import re
4
4
  import typing
5
5
  from typing import Dict, Iterator, List, Optional, Set, Tuple, Union
6
6
 
7
+ import colorama
8
+
7
9
  from sky import clouds
8
10
  from sky import exceptions
9
11
  from sky import sky_logging
@@ -149,7 +151,7 @@ class Kubernetes(clouds.Cloud):
149
151
  'Ignoring these contexts.')
150
152
 
151
153
  @classmethod
152
- def existing_allowed_contexts(cls) -> List[str]:
154
+ def existing_allowed_contexts(cls, silent: bool = False) -> List[str]:
153
155
  """Get existing allowed contexts.
154
156
 
155
157
  If None is returned in the list, it means that we are running in a pod
@@ -162,6 +164,12 @@ class Kubernetes(clouds.Cloud):
162
164
 
163
165
  all_contexts = set(all_contexts)
164
166
 
167
+ # Exclude contexts starting with `ssh-`
168
+ # TODO(romilb): Remove when SSH Node Pools use a separate kubeconfig.
169
+ all_contexts = [
170
+ ctx for ctx in all_contexts if not ctx.startswith('ssh-')
171
+ ]
172
+
165
173
  allowed_contexts = skypilot_config.get_nested(
166
174
  ('kubernetes', 'allowed_contexts'), None)
167
175
 
@@ -183,8 +191,12 @@ class Kubernetes(clouds.Cloud):
183
191
  if context in all_contexts:
184
192
  existing_contexts.append(context)
185
193
  else:
194
+ # Skip SSH Node Pool contexts
195
+ if context.startswith('ssh-'):
196
+ continue
186
197
  skipped_contexts.append(context)
187
- cls._log_skipped_contexts_once(tuple(skipped_contexts))
198
+ if not silent:
199
+ cls._log_skipped_contexts_once(tuple(skipped_contexts))
188
200
  return existing_contexts
189
201
 
190
202
  @classmethod
@@ -640,7 +652,7 @@ class Kubernetes(clouds.Cloud):
640
652
  resource_list = []
641
653
  for instance_type in instance_list:
642
654
  r = resources.copy(
643
- cloud=Kubernetes(),
655
+ cloud=self.__class__(),
644
656
  instance_type=instance_type,
645
657
  accelerators=None,
646
658
  )
@@ -692,7 +704,43 @@ class Kubernetes(clouds.Cloud):
692
704
  [], None)
693
705
 
694
706
  @classmethod
695
- def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
707
+ def _check_single_context(cls, context: str) -> Tuple[bool, str]:
708
+ """Check if the user has access credentials to a single SSH context."""
709
+
710
+ def _red_color(str_to_format: str) -> str:
711
+ return (f'{colorama.Fore.LIGHTRED_EX}'
712
+ f'{str_to_format}'
713
+ f'{colorama.Style.RESET_ALL}')
714
+
715
+ def _dim_color(str_to_format: str) -> str:
716
+ return (f'{colorama.Style.DIM}'
717
+ f'{str_to_format}'
718
+ f'{colorama.Style.RESET_ALL}')
719
+
720
+ def _bright_green_color(str_to_format: str) -> str:
721
+ return (f'{colorama.Fore.GREEN}'
722
+ f'{str_to_format}'
723
+ f'{colorama.Style.RESET_ALL}')
724
+
725
+ try:
726
+ check_result = kubernetes_utils.check_credentials(
727
+ context, run_optional_checks=True)
728
+ if check_result[0]:
729
+ if check_result[1] is not None:
730
+ return True, (_bright_green_color('enabled.') +
731
+ _dim_color(f' Note: {check_result[1]}'))
732
+ else:
733
+ return True, _bright_green_color('enabled.')
734
+ else:
735
+ assert check_result[1] is not None
736
+ return False, (_red_color('disabled.') +
737
+ _dim_color(f' Reason: {check_result[1]}'))
738
+ except Exception as e: # pylint: disable=broad-except
739
+ return False, _red_color(str(e))
740
+
741
+ @classmethod
742
+ def _check_compute_credentials(
743
+ cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
696
744
  """Checks if the user has access credentials to
697
745
  Kubernetes."""
698
746
  # Check for port forward dependencies
@@ -719,26 +767,15 @@ class Kubernetes(clouds.Cloud):
719
767
  return (False, 'No available context found in kubeconfig. '
720
768
  'Check if you have a valid kubeconfig file' +
721
769
  check_skypilot_config_msg)
722
- reasons = []
723
- hints = []
770
+
771
+ ctx2text = {}
724
772
  success = False
725
773
  for context in existing_allowed_contexts:
726
- try:
727
- check_result = kubernetes_utils.check_credentials(
728
- context, run_optional_checks=True)
729
- if check_result[0]:
730
- success = True
731
- if check_result[1] is not None:
732
- hints.append(f'Context {context}: {check_result[1]}')
733
- else:
734
- reasons.append(f'Context {context}: {check_result[1]}')
735
- except Exception as e: # pylint: disable=broad-except
736
- return (False, f'Credential check failed for {context}: '
737
- f'{common_utils.format_exception(e)}')
738
- if success:
739
- return (True, cls._format_credential_check_results(hints, reasons))
740
- return (False, 'Failed to find available context with working '
741
- 'credentials. Details:\n' + '\n'.join(reasons))
774
+ suc, text = cls._check_single_context(context)
775
+ success = success or suc
776
+ ctx2text[context] = text
777
+
778
+ return success, ctx2text
742
779
 
743
780
  @classmethod
744
781
  def _format_credential_check_results(cls, hints: List[str],
@@ -855,3 +892,10 @@ class Kubernetes(clouds.Cloud):
855
892
  if not key_valid or not value_valid:
856
893
  return False, error_msg
857
894
  return True, None
895
+
896
+ @classmethod
897
+ def get_infras(cls) -> List[str]:
898
+ return [
899
+ f'{cls._REPR.lower()}/{c}'
900
+ for c in cls.existing_allowed_contexts(silent=True)
901
+ ]
@@ -244,7 +244,8 @@ class Lambda(clouds.Cloud):
244
244
  fuzzy_candidate_list, None)
245
245
 
246
246
  @classmethod
247
- def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
247
+ def _check_compute_credentials(
248
+ cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
248
249
  """Checks if the user has access credentials to
249
250
  Lambda's compute service."""
250
251
  try:
sky/clouds/nebius.py CHANGED
@@ -4,6 +4,7 @@ import typing
4
4
  from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
5
5
 
6
6
  from sky import clouds
7
+ from sky import skypilot_config
7
8
  from sky.adaptors import nebius
8
9
  from sky.clouds import service_catalog
9
10
  from sky.utils import annotations
@@ -210,6 +211,18 @@ class Nebius(clouds.Cloud):
210
211
  raise RuntimeError('Unsupported instance type for Nebius cloud:'
211
212
  f' {resources.instance_type}')
212
213
 
214
+ config_fs = skypilot_config.get_nested(
215
+ ('nebius', region.name, 'filesystems'), [])
216
+ resources_vars_fs = []
217
+ for i, fs in enumerate(config_fs):
218
+ resources_vars_fs.append({
219
+ 'filesystem_id': fs['filesystem_id'],
220
+ 'filesystem_attach_mode': fs.get('attach_mode', 'READ_WRITE'),
221
+ 'filesystem_mount_path': fs.get(
222
+ 'mount_path', f'/mnt/filesystem-skypilot-{i+1}'),
223
+ 'filesystem_mount_tag': f'filesystem-skypilot-{i+1}'
224
+ })
225
+
213
226
  resources_vars: Dict[str, Any] = {
214
227
  'instance_type': resources.instance_type,
215
228
  'custom_resources': custom_resources,
@@ -217,6 +230,7 @@ class Nebius(clouds.Cloud):
217
230
  'image_id': image_family,
218
231
  # Nebius does not support specific zones.
219
232
  'zones': None,
233
+ 'filesystems': resources_vars_fs
220
234
  }
221
235
 
222
236
  if acc_dict is not None:
@@ -283,7 +297,8 @@ class Nebius(clouds.Cloud):
283
297
 
284
298
  @classmethod
285
299
  @annotations.lru_cache(scope='request')
286
- def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
300
+ def _check_compute_credentials(
301
+ cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
287
302
  """Checks if the user has access credentials to
288
303
  Nebius's compute service."""
289
304
  token_cred_msg = (
@@ -314,7 +329,8 @@ class Nebius(clouds.Cloud):
314
329
 
315
330
  @classmethod
316
331
  @annotations.lru_cache(scope='request')
317
- def _check_storage_credentials(cls) -> Tuple[bool, Optional[str]]:
332
+ def _check_storage_credentials(
333
+ cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
318
334
  """Checks if the user has access credentials to Nebius Object Storage.
319
335
 
320
336
  Returns: