skypilot-nightly 1.0.0.dev20250523__py3-none-any.whl → 1.0.0.dev20250524__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend_utils.py +62 -45
  3. sky/backends/cloud_vm_ray_backend.py +3 -1
  4. sky/check.py +332 -170
  5. sky/cli.py +44 -11
  6. sky/client/cli.py +44 -11
  7. sky/client/sdk.py +54 -10
  8. sky/clouds/gcp.py +19 -3
  9. sky/core.py +5 -2
  10. sky/dashboard/out/404.html +1 -1
  11. sky/dashboard/out/_next/static/aHej19bZyl4hoHgrzPCn7/_buildManifest.js +1 -0
  12. sky/dashboard/out/_next/static/chunks/480-ee58038f1a4afd5c.js +1 -0
  13. sky/dashboard/out/_next/static/chunks/488-50d843fdb5396d32.js +15 -0
  14. sky/dashboard/out/_next/static/chunks/498-d7722313e5e5b4e6.js +21 -0
  15. sky/dashboard/out/_next/static/chunks/573-f17bd89d9f9118b3.js +66 -0
  16. sky/dashboard/out/_next/static/chunks/578-7a4795009a56430c.js +6 -0
  17. sky/dashboard/out/_next/static/chunks/734-5f5ce8f347b7f417.js +1 -0
  18. sky/dashboard/out/_next/static/chunks/937.f97f83652028e944.js +1 -0
  19. sky/dashboard/out/_next/static/chunks/938-f347f6144075b0c8.js +1 -0
  20. sky/dashboard/out/_next/static/chunks/9f96d65d-5a3e4af68c26849e.js +1 -0
  21. sky/dashboard/out/_next/static/chunks/pages/_app-dec800f9ef1b10f4.js +1 -0
  22. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-37c042a356f8e608.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-9529d9e882a0e75c.js +16 -0
  24. sky/dashboard/out/_next/static/chunks/pages/clusters-9e6d1ec6e1ac5b29.js +1 -0
  25. sky/dashboard/out/_next/static/chunks/pages/infra-e690d864aa00e2ea.js +1 -0
  26. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-db6558a5ec687011.js +1 -0
  27. sky/dashboard/out/_next/static/chunks/pages/jobs-73d5e0c369d00346.js +16 -0
  28. sky/dashboard/out/_next/static/chunks/pages/users-2d319455c3f1c3e2.js +1 -0
  29. sky/dashboard/out/_next/static/chunks/pages/workspaces-02a7b60f2ead275f.js +1 -0
  30. sky/dashboard/out/_next/static/chunks/webpack-deda68c926e8d0bc.js +1 -0
  31. sky/dashboard/out/_next/static/css/d2cdba64c9202dd7.css +3 -0
  32. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  33. sky/dashboard/out/clusters/[cluster].html +1 -1
  34. sky/dashboard/out/clusters.html +1 -1
  35. sky/dashboard/out/index.html +1 -1
  36. sky/dashboard/out/infra.html +1 -1
  37. sky/dashboard/out/jobs/[job].html +1 -1
  38. sky/dashboard/out/jobs.html +1 -1
  39. sky/dashboard/out/users.html +1 -0
  40. sky/dashboard/out/workspaces.html +1 -0
  41. sky/data/storage.py +1 -1
  42. sky/global_user_state.py +42 -19
  43. sky/jobs/constants.py +1 -1
  44. sky/jobs/server/core.py +72 -56
  45. sky/jobs/state.py +26 -5
  46. sky/jobs/utils.py +65 -13
  47. sky/optimizer.py +6 -3
  48. sky/provision/fluidstack/instance.py +1 -0
  49. sky/serve/server/core.py +9 -6
  50. sky/server/html/token_page.html +6 -1
  51. sky/server/requests/executor.py +1 -0
  52. sky/server/requests/payloads.py +11 -0
  53. sky/server/server.py +68 -5
  54. sky/skylet/constants.py +4 -1
  55. sky/skypilot_config.py +83 -9
  56. sky/utils/cli_utils/status_utils.py +18 -8
  57. sky/utils/kubernetes/deploy_remote_cluster.py +150 -147
  58. sky/utils/log_utils.py +4 -0
  59. sky/utils/schemas.py +54 -0
  60. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250524.dist-info}/METADATA +1 -1
  61. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250524.dist-info}/RECORD +66 -59
  62. sky/dashboard/out/_next/static/ECKwDNS9v9y3_IKFZ2lpp/_buildManifest.js +0 -1
  63. sky/dashboard/out/_next/static/chunks/236-1a3a9440417720eb.js +0 -6
  64. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  65. sky/dashboard/out/_next/static/chunks/37-d584022b0da4ac3b.js +0 -6
  66. sky/dashboard/out/_next/static/chunks/393-e1eaa440481337ec.js +0 -1
  67. sky/dashboard/out/_next/static/chunks/480-f28cd152a98997de.js +0 -1
  68. sky/dashboard/out/_next/static/chunks/582-683f4f27b81996dc.js +0 -59
  69. sky/dashboard/out/_next/static/chunks/pages/_app-8cfab319f9fb3ae8.js +0 -1
  70. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33bc2bec322249b1.js +0 -1
  71. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-e2fc2dd1955e6c36.js +0 -1
  72. sky/dashboard/out/_next/static/chunks/pages/clusters-3a748bd76e5c2984.js +0 -1
  73. sky/dashboard/out/_next/static/chunks/pages/infra-abf08c4384190a39.js +0 -1
  74. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-70756c2dad850a7e.js +0 -1
  75. sky/dashboard/out/_next/static/chunks/pages/jobs-ecd804b9272f4a7c.js +0 -1
  76. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  77. sky/dashboard/out/_next/static/css/7e7ce4ff31d3977b.css +0 -3
  78. /sky/dashboard/out/_next/static/{ECKwDNS9v9y3_IKFZ2lpp → aHej19bZyl4hoHgrzPCn7}/_ssgManifest.js +0 -0
  79. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250524.dist-info}/WHEEL +0 -0
  80. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250524.dist-info}/entry_points.txt +0 -0
  81. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250524.dist-info}/licenses/LICENSE +0 -0
  82. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250524.dist-info}/top_level.txt +0 -0
@@ -48,7 +48,8 @@ class StatusColumn:
48
48
  def show_status_table(cluster_records: List[_ClusterRecord],
49
49
  show_all: bool,
50
50
  show_user: bool,
51
- query_clusters: Optional[List[str]] = None) -> int:
51
+ query_clusters: Optional[List[str]] = None,
52
+ show_workspaces: bool = False) -> int:
52
53
  """Compute cluster table values and display.
53
54
 
54
55
  Returns:
@@ -56,7 +57,6 @@ def show_status_table(cluster_records: List[_ClusterRecord],
56
57
  STOPPED.
57
58
  """
58
59
  # TODO(zhwu): Update the information for autostop clusters.
59
-
60
60
  status_columns = [
61
61
  StatusColumn('NAME', _get_name),
62
62
  ]
@@ -66,6 +66,9 @@ def show_status_table(cluster_records: List[_ClusterRecord],
66
66
  StatusColumn('USER_ID', _get_user_hash, show_by_default=False))
67
67
 
68
68
  status_columns += [
69
+ StatusColumn('WORKSPACE',
70
+ _get_workspace,
71
+ show_by_default=show_workspaces),
69
72
  StatusColumn('INFRA', _get_infra, truncate=not show_all),
70
73
  StatusColumn('RESOURCES', _get_resources, truncate=not show_all),
71
74
  StatusColumn('STATUS', _get_status_colored),
@@ -106,12 +109,13 @@ def show_status_table(cluster_records: List[_ClusterRecord],
106
109
  for cluster in query_clusters
107
110
  if cluster not in cluster_names
108
111
  ]
109
- cluster_str = 'Cluster'
110
- if len(not_found_clusters) > 1:
111
- cluster_str += 's'
112
- cluster_str += ' '
113
- cluster_str += ', '.join(not_found_clusters)
114
- click.echo(f'{cluster_str} not found.')
112
+ if not_found_clusters:
113
+ cluster_str = 'Cluster'
114
+ if len(not_found_clusters) > 1:
115
+ cluster_str += 's'
116
+ cluster_str += ' '
117
+ cluster_str += ', '.join(not_found_clusters)
118
+ click.echo(f'{cluster_str} not found.')
115
119
  elif not cluster_records:
116
120
  click.echo('No existing clusters.')
117
121
  return num_pending_autostop
@@ -243,6 +247,12 @@ def _get_status(cluster_record: _ClusterRecord,
243
247
  return cluster_record['status']
244
248
 
245
249
 
250
+ def _get_workspace(cluster_record: _ClusterRecord,
251
+ truncate: bool = True) -> str:
252
+ del truncate
253
+ return cluster_record['workspace']
254
+
255
+
246
256
  def _get_status_colored(cluster_record: _ClusterRecord,
247
257
  truncate: bool = True) -> str:
248
258
  del truncate
@@ -14,6 +14,8 @@ from typing import Any, Dict, List, Optional, Set
14
14
 
15
15
  import yaml
16
16
 
17
+ from sky.utils import ux_utils
18
+
17
19
  # Colors for nicer UX
18
20
  RED = '\033[0;31m'
19
21
  GREEN = '\033[0;32m'
@@ -117,21 +119,19 @@ def parse_args():
117
119
  def load_ssh_targets(file_path: str) -> Dict[str, Any]:
118
120
  """Load SSH targets from YAML file."""
119
121
  if not os.path.exists(file_path):
120
- print(f'{RED}Error: SSH Node Pools file not found: {file_path}{NC}',
121
- file=sys.stderr)
122
- sys.exit(1)
122
+ with ux_utils.print_exception_no_traceback():
123
+ raise ValueError(f'SSH Node Pools file not found: {file_path}')
123
124
 
124
125
  try:
125
126
  with open(file_path, 'r', encoding='utf-8') as f:
126
127
  targets = yaml.load(f, Loader=UniqueKeySafeLoader)
127
128
  return targets
128
129
  except yaml.constructor.ConstructorError as e:
129
- print(f'{RED}{e.note}{NC}', file=sys.stderr)
130
- sys.exit(1)
130
+ with ux_utils.print_exception_no_traceback():
131
+ raise ValueError(e.note) from e
131
132
  except (yaml.YAMLError, IOError, OSError) as e:
132
- print(f'{RED}Error loading SSH Node Pools file: {e}{NC}',
133
- file=sys.stderr)
134
- sys.exit(1)
133
+ with ux_utils.print_exception_no_traceback():
134
+ raise ValueError(f'Error loading SSH Node Pools file: {e}') from e
135
135
 
136
136
 
137
137
  def check_host_in_ssh_config(hostname: str) -> bool:
@@ -181,31 +181,28 @@ def get_cluster_config(targets: Dict[str, Any],
181
181
  file_path: Optional[str] = None) -> Dict[str, Any]:
182
182
  """Get configuration for specific clusters or all clusters."""
183
183
  if not targets:
184
- print(
185
- f'{RED}Error: No clusters defined in SSH Node Pools '
186
- f'file {file_path}{NC}',
187
- file=sys.stderr)
188
- sys.exit(1)
184
+ with ux_utils.print_exception_no_traceback():
185
+ raise ValueError(
186
+ f'No clusters defined in SSH Node Pools file {file_path}')
189
187
 
190
188
  if cluster_name:
191
189
  if cluster_name not in targets:
192
- print(
193
- f'{RED}Error: Cluster {cluster_name!r} not found in '
194
- f'SSH Node Pools file {file_path}{NC}',
195
- file=sys.stderr)
196
- sys.exit(1)
190
+ with ux_utils.print_exception_no_traceback():
191
+ raise ValueError(f'Cluster {cluster_name!r} not found in '
192
+ f'SSH Node Pools file {file_path}')
197
193
  return {cluster_name: targets[cluster_name]}
198
194
 
199
195
  # Return all clusters if no specific cluster is specified
200
196
  return targets
201
197
 
202
198
 
203
- def prepare_hosts_info(cluster_config: Dict[str, Any]) -> List[Dict[str, str]]:
199
+ def prepare_hosts_info(cluster_name: str,
200
+ cluster_config: Dict[str, Any]) -> List[Dict[str, str]]:
204
201
  """Prepare list of hosts with resolved user, identity_file, and password."""
205
202
  if 'hosts' not in cluster_config or not cluster_config['hosts']:
206
- print(f'{RED}Error: No hosts defined in cluster configuration{NC}',
207
- file=sys.stderr)
208
- sys.exit(1)
203
+ with ux_utils.print_exception_no_traceback():
204
+ raise ValueError(
205
+ f'No hosts defined in cluster {cluster_name} configuration')
209
206
 
210
207
  # Get cluster-level defaults
211
208
  cluster_user = cluster_config.get('user', '')
@@ -636,23 +633,20 @@ def main():
636
633
  # Using command line arguments - legacy mode
637
634
  if args.ssh_key and not os.path.isfile(
638
635
  args.ssh_key) and not global_use_ssh_config:
639
- print(f'{RED}Error: SSH key not found: {args.ssh_key}{NC}',
640
- file=sys.stderr)
641
- sys.exit(1)
636
+ with ux_utils.print_exception_no_traceback():
637
+ raise ValueError(f'SSH key not found: {args.ssh_key}')
642
638
 
643
639
  if not os.path.isfile(args.ips_file):
644
- print(f'{RED}Error: IPs file not found: {args.ips_file}{NC}',
645
- file=sys.stderr)
646
- sys.exit(1)
640
+ with ux_utils.print_exception_no_traceback():
641
+ raise ValueError(f'IPs file not found: {args.ips_file}')
647
642
 
648
643
  with open(args.ips_file, 'r', encoding='utf-8') as f:
649
644
  hosts = [line.strip() for line in f if line.strip()]
650
645
 
651
646
  if not hosts:
652
- print(
653
- f'{RED}Error: Hosts file is empty or not formatted correctly.{NC}',
654
- file=sys.stderr)
655
- sys.exit(1)
647
+ with ux_utils.print_exception_no_traceback():
648
+ raise ValueError(
649
+ 'Hosts file is empty or not formatted correctly.')
656
650
 
657
651
  head_node = hosts[0]
658
652
  worker_nodes = hosts[1:]
@@ -688,108 +682,121 @@ def main():
688
682
 
689
683
  # Process each cluster
690
684
  for cluster_name, cluster_config in clusters_config.items():
691
- print(f'SKYPILOT_CURRENT_CLUSTER: {cluster_name}')
692
- print(f'{YELLOW}==== Deploying cluster: {cluster_name} ====${NC}')
693
- hosts_info = prepare_hosts_info(cluster_config)
694
-
695
- if not hosts_info:
685
+ try:
686
+ print(f'SKYPILOT_CURRENT_CLUSTER: {cluster_name}')
696
687
  print(
697
- f'{RED}Error: No valid hosts found for cluster {cluster_name!r}. Skipping.{NC}'
698
- )
699
- continue
700
-
701
- # Generate a unique context name for each cluster
702
- context_name = args.context_name
703
- if context_name == 'default':
704
- context_name = 'ssh-' + cluster_name
705
-
706
- # Check cluster history
707
- os.makedirs(NODE_POOLS_INFO_DIR, exist_ok=True)
708
- history_yaml_file = os.path.join(NODE_POOLS_INFO_DIR,
709
- f'{context_name}-history.yaml')
710
-
711
- history = None
712
- if os.path.exists(history_yaml_file):
713
- print(f'{YELLOW}Loading history from {history_yaml_file}{NC}')
714
- with open(history_yaml_file, 'r', encoding='utf-8') as f:
715
- history = yaml.safe_load(f)
716
- else:
717
- print(f'{YELLOW}No history found for {context_name}.{NC}')
718
-
719
- history_workers_info = None
720
- history_worker_nodes = None
721
- history_use_ssh_config = None
722
- # Do not support changing anything besides hosts for now
723
- if history is not None:
724
- for key in ['user', 'identity_file', 'password']:
725
- if history.get(key) != cluster_config.get(key):
688
+ f'{YELLOW}==== Deploying cluster: {cluster_name} ====${NC}')
689
+ hosts_info = prepare_hosts_info(cluster_name, cluster_config)
690
+
691
+ if not hosts_info:
692
+ print(
693
+ f'{RED}Error: No valid hosts found for cluster {cluster_name!r}. Skipping.{NC}'
694
+ )
695
+ continue
696
+
697
+ # Generate a unique context name for each cluster
698
+ context_name = args.context_name
699
+ if context_name == 'default':
700
+ context_name = 'ssh-' + cluster_name
701
+
702
+ # Check cluster history
703
+ os.makedirs(NODE_POOLS_INFO_DIR, exist_ok=True)
704
+ history_yaml_file = os.path.join(
705
+ NODE_POOLS_INFO_DIR, f'{context_name}-history.yaml')
706
+
707
+ history = None
708
+ if os.path.exists(history_yaml_file):
709
+ print(
710
+ f'{YELLOW}Loading history from {history_yaml_file}{NC}')
711
+ with open(history_yaml_file, 'r', encoding='utf-8') as f:
712
+ history = yaml.safe_load(f)
713
+ else:
714
+ print(f'{YELLOW}No history found for {context_name}.{NC}')
715
+
716
+ history_workers_info = None
717
+ history_worker_nodes = None
718
+ history_use_ssh_config = None
719
+ # Do not support changing anything besides hosts for now
720
+ if history is not None:
721
+ for key in ['user', 'identity_file', 'password']:
722
+ if history.get(key) != cluster_config.get(key):
723
+ raise ValueError(
724
+ f'Cluster configuration has changed for field {key!r}. '
725
+ f'Previous value: {history.get(key)}, '
726
+ f'Current value: {cluster_config.get(key)}')
727
+ history_hosts_info = prepare_hosts_info(
728
+ cluster_name, history)
729
+ if history_hosts_info[0] != hosts_info[0]:
726
730
  raise ValueError(
727
- f'Cluster configuration has changed for field {key!r}. '
728
- f'Previous value: {history.get(key)}, '
729
- f'Current value: {cluster_config.get(key)}')
730
- history_hosts_info = prepare_hosts_info(history)
731
- if history_hosts_info[0] != hosts_info[0]:
732
- raise ValueError(
733
- f'Cluster configuration has changed for master node. '
734
- f'Previous value: {history_hosts_info[0]}, '
735
- f'Current value: {hosts_info[0]}')
736
- history_workers_info = history_hosts_info[1:] if len(
737
- history_hosts_info) > 1 else []
738
- history_worker_nodes = [h['ip'] for h in history_workers_info]
739
- history_use_ssh_config = [
740
- h.get('use_ssh_config', False) for h in history_workers_info
731
+ f'Cluster configuration has changed for master node. '
732
+ f'Previous value: {history_hosts_info[0]}, '
733
+ f'Current value: {hosts_info[0]}')
734
+ history_workers_info = history_hosts_info[1:] if len(
735
+ history_hosts_info) > 1 else []
736
+ history_worker_nodes = [
737
+ h['ip'] for h in history_workers_info
738
+ ]
739
+ history_use_ssh_config = [
740
+ h.get('use_ssh_config', False)
741
+ for h in history_workers_info
742
+ ]
743
+
744
+ # Use the first host as the head node and the rest as worker nodes
745
+ head_host = hosts_info[0]
746
+ worker_hosts = hosts_info[1:] if len(hosts_info) > 1 else []
747
+
748
+ head_node = head_host['ip']
749
+ worker_nodes = [h['ip'] for h in worker_hosts]
750
+ ssh_user = head_host['user']
751
+ ssh_key = head_host['identity_file']
752
+ head_use_ssh_config = global_use_ssh_config or head_host.get(
753
+ 'use_ssh_config', False)
754
+ worker_use_ssh_config = [
755
+ global_use_ssh_config or h.get('use_ssh_config', False)
756
+ for h in worker_hosts
741
757
  ]
758
+ password = head_host['password']
759
+
760
+ # Deploy this cluster
761
+ unsuccessful_workers = deploy_cluster(
762
+ head_node,
763
+ worker_nodes,
764
+ ssh_user,
765
+ ssh_key,
766
+ context_name,
767
+ password,
768
+ head_use_ssh_config,
769
+ worker_use_ssh_config,
770
+ kubeconfig_path,
771
+ args.cleanup,
772
+ worker_hosts=worker_hosts,
773
+ history_worker_nodes=history_worker_nodes,
774
+ history_workers_info=history_workers_info,
775
+ history_use_ssh_config=history_use_ssh_config)
776
+
777
+ if not args.cleanup:
778
+ successful_hosts = []
779
+ for host in cluster_config['hosts']:
780
+ if isinstance(host, str):
781
+ host_node = host
782
+ else:
783
+ host_node = host['ip']
784
+ if host_node not in unsuccessful_workers:
785
+ successful_hosts.append(host)
786
+ cluster_config['hosts'] = successful_hosts
787
+ with open(history_yaml_file, 'w', encoding='utf-8') as f:
788
+ print(
789
+ f'{YELLOW}Writing history to {history_yaml_file}{NC}'
790
+ )
791
+ yaml.dump(cluster_config, f)
742
792
 
743
- # Use the first host as the head node and the rest as worker nodes
744
- head_host = hosts_info[0]
745
- worker_hosts = hosts_info[1:] if len(hosts_info) > 1 else []
746
-
747
- head_node = head_host['ip']
748
- worker_nodes = [h['ip'] for h in worker_hosts]
749
- ssh_user = head_host['user']
750
- ssh_key = head_host['identity_file']
751
- head_use_ssh_config = global_use_ssh_config or head_host.get(
752
- 'use_ssh_config', False)
753
- worker_use_ssh_config = [
754
- global_use_ssh_config or h.get('use_ssh_config', False)
755
- for h in worker_hosts
756
- ]
757
- password = head_host['password']
758
-
759
- # Deploy this cluster
760
- unsuccessful_workers = deploy_cluster(
761
- head_node,
762
- worker_nodes,
763
- ssh_user,
764
- ssh_key,
765
- context_name,
766
- password,
767
- head_use_ssh_config,
768
- worker_use_ssh_config,
769
- kubeconfig_path,
770
- args.cleanup,
771
- worker_hosts=worker_hosts,
772
- history_worker_nodes=history_worker_nodes,
773
- history_workers_info=history_workers_info,
774
- history_use_ssh_config=history_use_ssh_config)
775
-
776
- if not args.cleanup:
777
- successful_hosts = []
778
- for host in cluster_config['hosts']:
779
- if isinstance(host, str):
780
- host_node = host
781
- else:
782
- host_node = host['ip']
783
- if host_node not in unsuccessful_workers:
784
- successful_hosts.append(host)
785
- cluster_config['hosts'] = successful_hosts
786
- with open(history_yaml_file, 'w', encoding='utf-8') as f:
787
- print(f'{YELLOW}Writing history to {history_yaml_file}{NC}')
788
- yaml.dump(cluster_config, f)
789
-
790
- print(
791
- f'{GREEN}==== Completed deployment for cluster: {cluster_name} ====${NC}'
792
- )
793
+ print(
794
+ f'{GREEN}==== Completed deployment for cluster: {cluster_name} ====${NC}'
795
+ )
796
+ except Exception as e: # pylint: disable=broad-except
797
+ print(
798
+ f'{RED}Error deploying SSH Node Pool {cluster_name}: {e}{NC}'
799
+ )
793
800
 
794
801
 
795
802
  def deploy_cluster(head_node,
@@ -839,11 +846,9 @@ def deploy_cluster(head_node,
839
846
  # For SkySSHUpLineProcessor
840
847
  print_output=True)
841
848
  if result is None:
842
- print(
843
- f'{RED}Failed to SSH to head node ({head_node}). '
844
- f'Please check the SSH configuration.{NC}',
845
- file=sys.stderr)
846
- sys.exit(1)
849
+ with ux_utils.print_exception_no_traceback():
850
+ raise RuntimeError(f'Failed to SSH to head node ({head_node}). '
851
+ f'Please check the SSH configuration.')
847
852
 
848
853
  # Checking history
849
854
  history_exists = (history_worker_nodes is not None and
@@ -981,10 +986,10 @@ def deploy_cluster(head_node,
981
986
  print_output=True,
982
987
  use_shell=True)
983
988
  if result is None:
984
- print(
985
- f'{RED}Failed to setup TCP forwarding on head node ({head_node}). '
986
- f'Please check the SSH configuration.{NC}',
987
- file=sys.stderr)
989
+ with ux_utils.print_exception_no_traceback():
990
+ raise RuntimeError(
991
+ f'Failed to setup TCP forwarding on head node ({head_node}). '
992
+ f'Please check the SSH configuration.')
988
993
 
989
994
  # Get effective IP for master node if using SSH config - needed for workers to connect
990
995
  if head_use_ssh_config:
@@ -1024,9 +1029,9 @@ def deploy_cluster(head_node,
1024
1029
  ssh_key,
1025
1030
  use_ssh_config=head_use_ssh_config)
1026
1031
  if result is None:
1027
- print(f'{RED}Failed to deploy K3s on head node ({head_node}). {NC}',
1028
- file=sys.stderr)
1029
- sys.exit(1)
1032
+ with ux_utils.print_exception_no_traceback():
1033
+ raise RuntimeError(
1034
+ f'Failed to deploy K3s on head node ({head_node}).')
1030
1035
  success_message(f'K3s deployed on head node ({head_node}).')
1031
1036
 
1032
1037
  # Check if head node has a GPU
@@ -1045,11 +1050,9 @@ def deploy_cluster(head_node,
1045
1050
  ssh_key,
1046
1051
  use_ssh_config=head_use_ssh_config)
1047
1052
  if master_addr is None:
1048
- print(
1049
- f'{RED}Failed to SSH to head node ({head_node}). '
1050
- f'Please check the SSH configuration.{NC}',
1051
- file=sys.stderr)
1052
- sys.exit(1)
1053
+ with ux_utils.print_exception_no_traceback():
1054
+ raise RuntimeError(f'Failed to SSH to head node ({head_node}). '
1055
+ f'Please check the SSH configuration.')
1053
1056
  print(f'{GREEN}Master node internal IP: {master_addr}{NC}')
1054
1057
 
1055
1058
  # Step 2: Install k3s on worker nodes and join them to the master node
sky/utils/log_utils.py CHANGED
@@ -497,6 +497,10 @@ class SkySSHUpLineProcessor(LineProcessor):
497
497
  f'✗ Failed to setup TCP forwarding on head node {node_name}.'
498
498
  f'{colorama.Style.RESET_ALL}')
499
499
 
500
+ if 'Error in deploying SSH Target' in log_line:
501
+ logger.info(f'{ux_utils.INDENT_LAST_SYMBOL}{colorama.Fore.RED}'
502
+ f'{log_line.strip()}{colorama.Style.RESET_ALL}')
503
+
500
504
  def __exit__(self, except_type: Optional[Type[BaseException]],
501
505
  except_value: Optional[BaseException],
502
506
  traceback: Optional[types.TracebackType]) -> None:
sky/utils/schemas.py CHANGED
@@ -1173,6 +1173,54 @@ def get_config_schema():
1173
1173
  }
1174
1174
  }
1175
1175
 
1176
+ workspace_schema = {'type': 'string'}
1177
+
1178
+ allowed_workspace_cloud_names = list(
1179
+ service_catalog.ALL_CLOUDS) + ['cloudflare']
1180
+ # Create pattern for non-GCP clouds (all clouds except gcp)
1181
+ non_gcp_clouds = [
1182
+ cloud for cloud in allowed_workspace_cloud_names
1183
+ if cloud.lower() != 'gcp'
1184
+ ]
1185
+ non_gcp_cloud_regex = '|'.join(non_gcp_clouds)
1186
+ workspaces_schema = {
1187
+ 'type': 'object',
1188
+ 'required': [],
1189
+ # each key is a workspace name
1190
+ 'additionalProperties': {
1191
+ 'type': 'object',
1192
+ 'additionalProperties': False,
1193
+ 'patternProperties': {
1194
+ # Pattern for non-GCP clouds - only allows 'disabled' property
1195
+ f'^({non_gcp_cloud_regex})$': {
1196
+ 'type': 'object',
1197
+ 'additionalProperties': False,
1198
+ 'properties': {
1199
+ 'disabled': {
1200
+ 'type': 'boolean'
1201
+ }
1202
+ },
1203
+ },
1204
+ },
1205
+ 'properties': {
1206
+ # Explicit definition for GCP allows both project_id and
1207
+ # disabled
1208
+ 'gcp': {
1209
+ 'type': 'object',
1210
+ 'properties': {
1211
+ 'project_id': {
1212
+ 'type': 'string'
1213
+ },
1214
+ 'disabled': {
1215
+ 'type': 'boolean'
1216
+ }
1217
+ },
1218
+ 'additionalProperties': False,
1219
+ },
1220
+ },
1221
+ },
1222
+ }
1223
+
1176
1224
  provision_configs = {
1177
1225
  'type': 'object',
1178
1226
  'required': [],
@@ -1199,6 +1247,10 @@ def get_config_schema():
1199
1247
  'required': [],
1200
1248
  'additionalProperties': False,
1201
1249
  'properties': {
1250
+ # TODO Replace this with whatever syang cooks up
1251
+ 'workspace': {
1252
+ 'type': 'string',
1253
+ },
1202
1254
  'jobs': controller_resources_schema,
1203
1255
  'serve': controller_resources_schema,
1204
1256
  'allowed_clouds': allowed_clouds,
@@ -1206,6 +1258,8 @@ def get_config_schema():
1206
1258
  'docker': docker_configs,
1207
1259
  'nvidia_gpus': gpu_configs,
1208
1260
  'api_server': api_server,
1261
+ 'active_workspace': workspace_schema,
1262
+ 'workspaces': workspaces_schema,
1209
1263
  'provision': provision_configs,
1210
1264
  **cloud_configs,
1211
1265
  },
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20250523
3
+ Version: 1.0.0.dev20250524
4
4
  Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0