skypilot-nightly 1.0.0.dev20250625__py3-none-any.whl → 1.0.0.dev20250626__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/kubernetes.py +1 -6
  3. sky/client/cli/command.py +10 -5
  4. sky/client/sdk.py +91 -15
  5. sky/clouds/ssh.py +36 -0
  6. sky/core.py +20 -0
  7. sky/dashboard/out/404.html +1 -1
  8. sky/dashboard/out/_next/static/bs6UB9V4Jq10TIZ5x-kBK/_buildManifest.js +1 -0
  9. sky/dashboard/out/_next/static/chunks/141-fa5a20cbf401b351.js +11 -0
  10. sky/dashboard/out/_next/static/chunks/25.76c246239df93d50.js +6 -0
  11. sky/dashboard/out/_next/static/chunks/43-36177d00f6956ab2.js +1 -0
  12. sky/dashboard/out/_next/static/chunks/430.ed51037d1a4a438b.js +1 -0
  13. sky/dashboard/out/_next/static/chunks/690.55f9eed3be903f56.js +16 -0
  14. sky/dashboard/out/_next/static/chunks/785.dc2686c3c1235554.js +1 -0
  15. sky/dashboard/out/_next/static/chunks/871-3db673be3ee3750b.js +6 -0
  16. sky/dashboard/out/_next/static/chunks/875.52c962183328b3f2.js +25 -0
  17. sky/dashboard/out/_next/static/chunks/973-81b2d057178adb76.js +1 -0
  18. sky/dashboard/out/_next/static/chunks/982.1b61658204416b0f.js +1 -0
  19. sky/dashboard/out/_next/static/chunks/984.e8bac186a24e5178.js +1 -0
  20. sky/dashboard/out/_next/static/chunks/990-0ad5ea1699e03ee8.js +1 -0
  21. sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-32ce4f49f2261f55.js → [cluster]-8040f2483897ed0c.js} +1 -1
  22. sky/dashboard/out/_next/static/chunks/pages/clusters-f119a5630a1efd61.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/pages/config-6b255eae088da6a3.js +1 -0
  24. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-b302aea4d65766bf.js +1 -0
  25. sky/dashboard/out/_next/static/chunks/pages/infra-ee8cc4d449945d19.js +1 -0
  26. sky/dashboard/out/_next/static/chunks/pages/{jobs-26da173e20af16e4.js → jobs-0a5695ff3075d94a.js} +1 -1
  27. sky/dashboard/out/_next/static/chunks/pages/{users-ce29e7420385563d.js → users-4978cbb093e141e7.js} +1 -1
  28. sky/dashboard/out/_next/static/chunks/pages/workspace/new-5b59bce9eb208d84.js +1 -0
  29. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-cb7e720b739de53a.js +1 -0
  30. sky/dashboard/out/_next/static/chunks/pages/{workspaces-862b120406461b10.js → workspaces-50e230828730cfb3.js} +1 -1
  31. sky/dashboard/out/_next/static/chunks/webpack-08fdb9e6070127fc.js +1 -0
  32. sky/dashboard/out/_next/static/css/52082cf558ec9705.css +3 -0
  33. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  34. sky/dashboard/out/clusters/[cluster].html +1 -1
  35. sky/dashboard/out/clusters.html +1 -1
  36. sky/dashboard/out/config.html +1 -1
  37. sky/dashboard/out/index.html +1 -1
  38. sky/dashboard/out/infra/[context].html +1 -1
  39. sky/dashboard/out/infra.html +1 -1
  40. sky/dashboard/out/jobs/[job].html +1 -1
  41. sky/dashboard/out/jobs.html +1 -1
  42. sky/dashboard/out/users.html +1 -1
  43. sky/dashboard/out/volumes.html +1 -1
  44. sky/dashboard/out/workspace/new.html +1 -1
  45. sky/dashboard/out/workspaces/[name].html +1 -1
  46. sky/dashboard/out/workspaces.html +1 -1
  47. sky/provision/kubernetes/utils.py +18 -2
  48. sky/server/rest.py +3 -2
  49. sky/server/server.py +4 -27
  50. sky/skylet/constants.py +5 -0
  51. sky/skypilot_config.py +3 -0
  52. sky/ssh_node_pools/__init__.py +1 -0
  53. sky/ssh_node_pools/core.py +133 -0
  54. sky/ssh_node_pools/server.py +232 -0
  55. sky/utils/kubernetes/deploy_remote_cluster.py +12 -185
  56. sky/utils/kubernetes/ssh_utils.py +221 -0
  57. {skypilot_nightly-1.0.0.dev20250625.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/METADATA +1 -1
  58. {skypilot_nightly-1.0.0.dev20250625.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/RECORD +64 -59
  59. sky/dashboard/out/_next/static/ZWdSYkqVe3WjnFR8ocqoG/_buildManifest.js +0 -1
  60. sky/dashboard/out/_next/static/chunks/211.692afc57e812ae1a.js +0 -1
  61. sky/dashboard/out/_next/static/chunks/310.2671028c20e892c7.js +0 -16
  62. sky/dashboard/out/_next/static/chunks/37-1f1e94f5a561202a.js +0 -6
  63. sky/dashboard/out/_next/static/chunks/42.bc85e5b1a4debf22.js +0 -6
  64. sky/dashboard/out/_next/static/chunks/443.b2242d0efcdf5f47.js +0 -1
  65. sky/dashboard/out/_next/static/chunks/513.309df9e18a9ff005.js +0 -1
  66. sky/dashboard/out/_next/static/chunks/66-66ae330df2d3c1c7.js +0 -1
  67. sky/dashboard/out/_next/static/chunks/682.00e56a220dd26fe1.js +0 -6
  68. sky/dashboard/out/_next/static/chunks/843-07d25a7e64462fd8.js +0 -11
  69. sky/dashboard/out/_next/static/chunks/856-cdf66268ec878d0c.js +0 -1
  70. sky/dashboard/out/_next/static/chunks/973-5b5019ba333e8d62.js +0 -1
  71. sky/dashboard/out/_next/static/chunks/pages/clusters-4aa031d1f42723d8.js +0 -1
  72. sky/dashboard/out/_next/static/chunks/pages/config-3102d02a188f04b3.js +0 -1
  73. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-6f1e02e31eecb5ce.js +0 -1
  74. sky/dashboard/out/_next/static/chunks/pages/infra-fd5dc8a91bd9169a.js +0 -1
  75. sky/dashboard/out/_next/static/chunks/pages/workspace/new-09ae0f6f972aa871.js +0 -1
  76. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-0b4c662a25e4747a.js +0 -1
  77. sky/dashboard/out/_next/static/chunks/webpack-6133dc1e928bd0b5.js +0 -1
  78. sky/dashboard/out/_next/static/css/b23cb0257bf96c51.css +0 -3
  79. /sky/dashboard/out/_next/static/{ZWdSYkqVe3WjnFR8ocqoG → bs6UB9V4Jq10TIZ5x-kBK}/_ssgManifest.js +0 -0
  80. /sky/dashboard/out/_next/static/chunks/pages/{_app-0ef7418d1a3822f3.js → _app-9a3ce3170d2edcec.js} +0 -0
  81. {skypilot_nightly-1.0.0.dev20250625.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/WHEEL +0 -0
  82. {skypilot_nightly-1.0.0.dev20250625.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/entry_points.txt +0 -0
  83. {skypilot_nightly-1.0.0.dev20250625.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/licenses/LICENSE +0 -0
  84. {skypilot_nightly-1.0.0.dev20250625.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,232 @@
1
+ """SSH Node Pool management API endpoints."""
2
+ import re
3
+ from typing import Any, Dict, List
4
+
5
+ import fastapi
6
+
7
+ from sky import core as sky_core
8
+ from sky.server.requests import executor
9
+ from sky.server.requests import payloads
10
+ from sky.server.requests import requests as requests_lib
11
+ from sky.ssh_node_pools import core as ssh_node_pools_core
12
+ from sky.utils import common_utils
13
+
14
+ router = fastapi.APIRouter()
15
+
16
+
17
+ @router.get('')
18
+ async def get_ssh_node_pools() -> Dict[str, Any]:
19
+ """Get all SSH Node Pool configurations."""
20
+ try:
21
+ return ssh_node_pools_core.get_all_pools()
22
+ except Exception as e:
23
+ raise fastapi.HTTPException(
24
+ status_code=500,
25
+ detail=
26
+ f'Failed to get SSH Node Pools: {common_utils.format_exception(e)}')
27
+
28
+
29
+ @router.post('')
30
+ async def update_ssh_node_pools(pools_config: Dict[str, Any]) -> Dict[str, str]:
31
+ """Update SSH Node Pool configurations."""
32
+ try:
33
+ ssh_node_pools_core.update_pools(pools_config)
34
+ return {'status': 'success'}
35
+ except Exception as e:
36
+ raise fastapi.HTTPException(status_code=400,
37
+ detail=f'Failed to update SSH Node Pools:'
38
+ f' {common_utils.format_exception(e)}')
39
+
40
+
41
+ @router.delete('/{pool_name}')
42
+ async def delete_ssh_node_pool(pool_name: str) -> Dict[str, str]:
43
+ """Delete a SSH Node Pool configuration."""
44
+ try:
45
+ if ssh_node_pools_core.delete_pool(pool_name):
46
+ return {'status': 'success'}
47
+ else:
48
+ raise fastapi.HTTPException(
49
+ status_code=404,
50
+ detail=f'SSH Node Pool `{pool_name}` not found')
51
+ except fastapi.HTTPException:
52
+ raise
53
+ except Exception as e:
54
+ raise fastapi.HTTPException(status_code=500,
55
+ detail='Failed to delete SSH Node Pool: '
56
+ f'{common_utils.format_exception(e)}')
57
+
58
+
59
+ @router.post('/keys')
60
+ async def upload_ssh_key(request: fastapi.Request) -> Dict[str, str]:
61
+ """Upload SSH private key."""
62
+ try:
63
+ form = await request.form()
64
+ key_name = form.get('key_name')
65
+ key_file = form.get('key_file')
66
+
67
+ if not key_name or not key_file:
68
+ raise fastapi.HTTPException(status_code=400,
69
+ detail='Missing key_name or key_file')
70
+
71
+ key_content = await key_file.read()
72
+ key_path = ssh_node_pools_core.upload_ssh_key(key_name,
73
+ key_content.decode())
74
+
75
+ return {'status': 'success', 'key_path': key_path}
76
+ except fastapi.HTTPException:
77
+ raise
78
+ except Exception as e:
79
+ raise fastapi.HTTPException(
80
+ status_code=500,
81
+ detail=
82
+ f'Failed to upload SSH key: {common_utils.format_exception(e)}')
83
+
84
+
85
+ @router.get('/keys')
86
+ async def list_ssh_keys() -> List[str]:
87
+ """List available SSH keys."""
88
+ try:
89
+ return ssh_node_pools_core.list_ssh_keys()
90
+ except Exception as e:
91
+ exception_msg = common_utils.format_exception(e)
92
+ raise fastapi.HTTPException(
93
+ status_code=500, detail=f'Failed to list SSH keys: {exception_msg}')
94
+
95
+
96
+ @router.post('/{pool_name}/deploy')
97
+ async def deploy_ssh_node_pool(request: fastapi.Request,
98
+ pool_name: str) -> Dict[str, str]:
99
+ """Deploy SSH Node Pool using existing ssh_up functionality."""
100
+ try:
101
+ ssh_up_body = payloads.SSHUpBody(infra=pool_name, cleanup=False)
102
+ executor.schedule_request(
103
+ request_id=request.state.request_id,
104
+ request_name='ssh_up',
105
+ request_body=ssh_up_body,
106
+ func=sky_core.ssh_up,
107
+ schedule_type=requests_lib.ScheduleType.LONG,
108
+ )
109
+
110
+ return {
111
+ 'status': 'success',
112
+ 'request_id': request.state.request_id,
113
+ 'message': f'SSH Node Pool `{pool_name}` deployment started'
114
+ }
115
+ except Exception as e:
116
+ raise fastapi.HTTPException(status_code=500,
117
+ detail=f'Failed to deploy SSH Node Pool: '
118
+ f'{common_utils.format_exception(e)}')
119
+
120
+
121
+ @router.post('/deploy')
122
+ async def deploy_ssh_node_pool_general(
123
+ request: fastapi.Request,
124
+ ssh_up_body: payloads.SSHUpBody) -> Dict[str, str]:
125
+ """Deploys all SSH Node Pools."""
126
+ try:
127
+ executor.schedule_request(
128
+ request_id=request.state.request_id,
129
+ request_name='ssh_up',
130
+ request_body=ssh_up_body,
131
+ func=sky_core.ssh_up,
132
+ schedule_type=requests_lib.ScheduleType.LONG,
133
+ )
134
+
135
+ pool_name = ssh_up_body.infra or 'default'
136
+ return {
137
+ 'status': 'success',
138
+ 'request_id': request.state.request_id,
139
+ 'message': f'SSH Node Pool `{pool_name}` deployment started'
140
+ }
141
+ except Exception as e:
142
+ raise fastapi.HTTPException(status_code=500,
143
+ detail=f'Failed to deploy SSH Node Pool: '
144
+ f'{common_utils.format_exception(e)}')
145
+
146
+
147
+ @router.post('/{pool_name}/down')
148
+ async def down_ssh_node_pool(request: fastapi.Request,
149
+ pool_name: str) -> Dict[str, str]:
150
+ """Cleans up a SSH Node Pools."""
151
+ try:
152
+ ssh_up_body = payloads.SSHUpBody(infra=pool_name, cleanup=True)
153
+ executor.schedule_request(
154
+ request_id=request.state.request_id,
155
+ request_name='ssh_down',
156
+ request_body=ssh_up_body,
157
+ func=sky_core.ssh_up, # Reuse ssh_up function with cleanup=True
158
+ schedule_type=requests_lib.ScheduleType.LONG,
159
+ )
160
+
161
+ return {
162
+ 'status': 'success',
163
+ 'request_id': request.state.request_id,
164
+ 'message': f'SSH Node Pool `{pool_name}` teardown started'
165
+ }
166
+ except Exception as e:
167
+ raise fastapi.HTTPException(
168
+ status_code=500,
169
+ detail=f'Failed to tear down SSH Node Pool: '
170
+ f'{common_utils.format_exception(e)}')
171
+
172
+
173
+ @router.post('/down')
174
+ async def down_ssh_node_pool_general(
175
+ request: fastapi.Request,
176
+ ssh_up_body: payloads.SSHUpBody) -> Dict[str, str]:
177
+ """Cleans up all SSH Node Pools."""
178
+ try:
179
+ # Set cleanup=True for down operation
180
+ ssh_up_body.cleanup = True
181
+ executor.schedule_request(
182
+ request_id=request.state.request_id,
183
+ request_name='ssh_down',
184
+ request_body=ssh_up_body,
185
+ func=sky_core.ssh_up, # Reuse ssh_up function with cleanup=True
186
+ schedule_type=requests_lib.ScheduleType.LONG,
187
+ )
188
+
189
+ pool_name = ssh_up_body.infra or 'default'
190
+ return {
191
+ 'status': 'success',
192
+ 'request_id': request.state.request_id,
193
+ 'message': f'SSH Node Pool `{pool_name}` teardown started'
194
+ }
195
+ except Exception as e:
196
+ raise fastapi.HTTPException(
197
+ status_code=500,
198
+ detail=f'Failed to tear down SSH Node Pool: '
199
+ f'{common_utils.format_exception(e)}')
200
+
201
+
202
+ @router.get('/{pool_name}/status')
203
+ async def get_ssh_node_pool_status(pool_name: str) -> Dict[str, str]:
204
+ """Get the status of a specific SSH Node Pool."""
205
+ try:
206
+ # Call ssh_status to check the context
207
+ context_name = f'ssh-{pool_name}'
208
+ is_ready, reason = sky_core.ssh_status(context_name)
209
+
210
+ # Strip ANSI escape codes from the reason text
211
+ def strip_ansi_codes(text):
212
+ if not text:
213
+ return text
214
+ # Remove ANSI escape sequences (color codes, formatting, etc.)
215
+ text = re.sub(r'\x1b\[[0-9;]*m', '', text)
216
+ # Remove 'disabled. Reason: ' prefix if present
217
+ text = text.replace('disabled. Reason: ', '')
218
+ return text
219
+
220
+ cleaned_reason = strip_ansi_codes(reason) if reason else reason
221
+
222
+ return {
223
+ 'pool_name': pool_name,
224
+ 'context_name': context_name,
225
+ 'status': 'Ready' if is_ready else 'Not Ready',
226
+ 'reason': cleaned_reason
227
+ }
228
+ except Exception as e:
229
+ raise fastapi.HTTPException(
230
+ status_code=500,
231
+ detail=f'Failed to get SSH Node Pool status: '
232
+ f'{common_utils.format_exception(e)}')
@@ -11,11 +11,12 @@ import shutil
11
11
  import subprocess
12
12
  import sys
13
13
  import tempfile
14
- from typing import Any, Dict, List, Optional, Set
14
+ from typing import List, Set
15
15
 
16
16
  import yaml
17
17
 
18
18
  from sky.utils import ux_utils
19
+ from sky.utils.kubernetes import ssh_utils
19
20
 
20
21
  # Colors for nicer UX
21
22
  RED = '\033[0;31m'
@@ -24,7 +25,6 @@ YELLOW = '\033[1;33m'
24
25
  WARNING_YELLOW = '\x1b[33m'
25
26
  NC = '\033[0m' # No color
26
27
 
27
- DEFAULT_SSH_NODE_POOLS_PATH = os.path.expanduser('~/.sky/ssh_node_pools.yaml')
28
28
  DEFAULT_KUBECONFIG_PATH = os.path.expanduser('~/.kube/config')
29
29
  SSH_CONFIG_PATH = os.path.expanduser('~/.ssh/config')
30
30
  NODE_POOLS_INFO_DIR = os.path.expanduser('~/.sky/ssh_node_pools_info')
@@ -33,29 +33,6 @@ NODE_POOLS_INFO_DIR = os.path.expanduser('~/.sky/ssh_node_pools_info')
33
33
  SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
34
34
 
35
35
 
36
- class UniqueKeySafeLoader(yaml.SafeLoader):
37
- """Custom YAML loader that raises an error if there are duplicate keys."""
38
-
39
- def construct_mapping(self, node, deep=False):
40
- mapping = {}
41
- for key_node, value_node in node.value:
42
- key = self.construct_object(key_node, deep=deep)
43
- if key in mapping:
44
- raise yaml.constructor.ConstructorError(
45
- note=(f'Duplicate cluster config for cluster {key!r}.\n'
46
- 'Please remove one of them from: '
47
- f'{DEFAULT_SSH_NODE_POOLS_PATH}'))
48
- value = self.construct_object(value_node, deep=deep)
49
- mapping[key] = value
50
- return mapping
51
-
52
-
53
- # Register the custom constructor inside the class
54
- UniqueKeySafeLoader.add_constructor(
55
- yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
56
- UniqueKeySafeLoader.construct_mapping)
57
-
58
-
59
36
  def parse_args():
60
37
  parser = argparse.ArgumentParser(
61
38
  description='Deploy a Kubernetes cluster on remote machines.')
@@ -64,9 +41,9 @@ def parse_args():
64
41
  parser.add_argument(
65
42
  '--ssh-node-pools-file',
66
43
  dest='ssh_node_pools_file',
67
- default=DEFAULT_SSH_NODE_POOLS_PATH,
44
+ default=ssh_utils.DEFAULT_SSH_NODE_POOLS_PATH,
68
45
  help=
69
- f'Path to SSH node pools YAML file (default: {DEFAULT_SSH_NODE_POOLS_PATH})'
46
+ f'Path to SSH node pools YAML file (default: {ssh_utils.DEFAULT_SSH_NODE_POOLS_PATH})'
70
47
  )
71
48
  parser.add_argument(
72
49
  '--kubeconfig-path',
@@ -117,156 +94,6 @@ def parse_args():
117
94
  return parser.parse_args()
118
95
 
119
96
 
120
- def load_ssh_targets(file_path: str) -> Dict[str, Any]:
121
- """Load SSH targets from YAML file."""
122
- if not os.path.exists(file_path):
123
- with ux_utils.print_exception_no_traceback():
124
- raise ValueError(f'SSH Node Pools file not found: {file_path}')
125
-
126
- try:
127
- with open(file_path, 'r', encoding='utf-8') as f:
128
- targets = yaml.load(f, Loader=UniqueKeySafeLoader)
129
- return targets
130
- except yaml.constructor.ConstructorError as e:
131
- with ux_utils.print_exception_no_traceback():
132
- raise ValueError(e.note) from e
133
- except (yaml.YAMLError, IOError, OSError) as e:
134
- with ux_utils.print_exception_no_traceback():
135
- raise ValueError(f'Error loading SSH Node Pools file: {e}') from e
136
-
137
-
138
- def check_host_in_ssh_config(hostname: str) -> bool:
139
- """Return True iff *hostname* matches at least one `Host`/`Match` stanza
140
- in the user's OpenSSH client configuration (including anything pulled in
141
- via Include).
142
-
143
- It calls: ssh -vvG <hostname> -o ConnectTimeout=0
144
- which:
145
- • -G expands the effective config without connecting
146
- • -vv prints debug lines that show which stanzas are applied
147
- • ConnectTimeout=0 avoids a DNS lookup if <hostname> is a FQDN/IP
148
-
149
- No config files are opened or parsed manually.
150
-
151
- Parameters
152
- ----------
153
- hostname : str
154
- The alias/IP/FQDN you want to test.
155
-
156
- Returns
157
- -------
158
- bool
159
- True – a specific stanza matched the host
160
- False – nothing but the global defaults (`Host *`) applied
161
- """
162
- # We direct stderr→stdout because debug output goes to stderr.
163
- proc = subprocess.run(
164
- ['ssh', '-vvG', hostname, '-o', 'ConnectTimeout=0'],
165
- text=True,
166
- stdout=subprocess.PIPE,
167
- stderr=subprocess.STDOUT,
168
- check=False, # we only want the text, not to raise
169
- )
170
-
171
- # Look for lines like:
172
- # debug1: ~/.ssh/config line 42: Applying options for <hostname>
173
- # Anything other than "*"
174
- pattern = re.compile(r'^debug\d+: .*Applying options for ([^*].*)$',
175
- re.MULTILINE)
176
-
177
- return bool(pattern.search(proc.stdout))
178
-
179
-
180
- def get_cluster_config(targets: Dict[str, Any],
181
- cluster_name: Optional[str] = None,
182
- file_path: Optional[str] = None) -> Dict[str, Any]:
183
- """Get configuration for specific clusters or all clusters."""
184
- if not targets:
185
- with ux_utils.print_exception_no_traceback():
186
- raise ValueError(
187
- f'No clusters defined in SSH Node Pools file {file_path}')
188
-
189
- if cluster_name:
190
- if cluster_name not in targets:
191
- with ux_utils.print_exception_no_traceback():
192
- raise ValueError(f'Cluster {cluster_name!r} not found in '
193
- f'SSH Node Pools file {file_path}')
194
- return {cluster_name: targets[cluster_name]}
195
-
196
- # Return all clusters if no specific cluster is specified
197
- return targets
198
-
199
-
200
- def prepare_hosts_info(cluster_name: str,
201
- cluster_config: Dict[str, Any]) -> List[Dict[str, str]]:
202
- """Prepare list of hosts with resolved user, identity_file, and password."""
203
- if 'hosts' not in cluster_config or not cluster_config['hosts']:
204
- with ux_utils.print_exception_no_traceback():
205
- raise ValueError(
206
- f'No hosts defined in cluster {cluster_name} configuration')
207
-
208
- # Get cluster-level defaults
209
- cluster_user = cluster_config.get('user', '')
210
- cluster_identity_file = os.path.expanduser(
211
- cluster_config.get('identity_file', ''))
212
- cluster_password = cluster_config.get('password', '')
213
-
214
- # Check if cluster identity file exists
215
- if cluster_identity_file and not os.path.isfile(cluster_identity_file):
216
- with ux_utils.print_exception_no_traceback():
217
- raise ValueError(
218
- f'SSH Identity File Missing: {cluster_identity_file}')
219
-
220
- hosts_info = []
221
- for host in cluster_config['hosts']:
222
- # Host can be a string (IP or SSH config hostname) or a dict
223
- if isinstance(host, str):
224
- # Check if this is an SSH config hostname
225
- is_ssh_config_host = check_host_in_ssh_config(host)
226
-
227
- hosts_info.append({
228
- 'ip': host,
229
- 'user': '' if is_ssh_config_host else cluster_user,
230
- 'identity_file': '' if is_ssh_config_host else
231
- cluster_identity_file,
232
- 'password': cluster_password,
233
- 'use_ssh_config': is_ssh_config_host
234
- })
235
- else:
236
- # It's a dict with potential overrides
237
- if 'ip' not in host:
238
- print(
239
- f'{RED}Warning: Host missing \'ip\' field, skipping: {host}{NC}'
240
- )
241
- continue
242
-
243
- # Check if this is an SSH config hostname
244
- is_ssh_config_host = check_host_in_ssh_config(host['ip'])
245
-
246
- # Use host-specific values or fall back to cluster defaults
247
- host_user = '' if is_ssh_config_host else host.get(
248
- 'user', cluster_user)
249
- host_identity_file = os.path.expanduser(
250
- '' if is_ssh_config_host else host.
251
- get('identity_file', cluster_identity_file))
252
- host_password = host.get('password', cluster_password)
253
-
254
- if host_identity_file and not os.path.isfile(host_identity_file):
255
- with ux_utils.print_exception_no_traceback():
256
- raise ValueError(
257
- f'SSH Identity File Missing: {host_identity_file}')
258
-
259
- hosts_info.append({
260
- 'ip': host['ip'],
261
- 'user': host_user,
262
- 'identity_file': host_identity_file,
263
- 'password': host_password,
264
- 'use_ssh_config': is_ssh_config_host
265
- })
266
-
267
- return hosts_info
268
-
269
-
270
97
  def run_command(cmd, shell=False):
271
98
  """Run a local command and return the output."""
272
99
  process = subprocess.run(cmd,
@@ -675,10 +502,10 @@ def main():
675
502
  password = args.password
676
503
 
677
504
  # Check if hosts are in SSH config
678
- head_use_ssh_config = global_use_ssh_config or check_host_in_ssh_config(
505
+ head_use_ssh_config = global_use_ssh_config or ssh_utils.check_host_in_ssh_config(
679
506
  head_node)
680
507
  worker_use_ssh_config = [
681
- global_use_ssh_config or check_host_in_ssh_config(node)
508
+ global_use_ssh_config or ssh_utils.check_host_in_ssh_config(node)
682
509
  for node in worker_nodes
683
510
  ]
684
511
 
@@ -688,10 +515,9 @@ def main():
688
515
  kubeconfig_path, args.cleanup)
689
516
  else:
690
517
  # Using YAML configuration
691
- targets = load_ssh_targets(args.ssh_node_pools_file)
692
- clusters_config = get_cluster_config(targets,
693
- args.infra,
694
- file_path=args.ssh_node_pools_file)
518
+ targets = ssh_utils.load_ssh_targets(args.ssh_node_pools_file)
519
+ clusters_config = ssh_utils.get_cluster_config(
520
+ targets, args.infra, file_path=args.ssh_node_pools_file)
695
521
 
696
522
  # Print information about clusters being processed
697
523
  num_clusters = len(clusters_config)
@@ -705,7 +531,8 @@ def main():
705
531
  print(f'SKYPILOT_CURRENT_CLUSTER: {cluster_name}')
706
532
  print(
707
533
  f'{YELLOW}==== Deploying cluster: {cluster_name} ====${NC}')
708
- hosts_info = prepare_hosts_info(cluster_name, cluster_config)
534
+ hosts_info = ssh_utils.prepare_hosts_info(
535
+ cluster_name, cluster_config)
709
536
 
710
537
  if not hosts_info:
711
538
  print(
@@ -744,7 +571,7 @@ def main():
744
571
  f'Cluster configuration has changed for field {key!r}. '
745
572
  f'Previous value: {history.get(key)}, '
746
573
  f'Current value: {cluster_config.get(key)}')
747
- history_hosts_info = prepare_hosts_info(
574
+ history_hosts_info = ssh_utils.prepare_hosts_info(
748
575
  cluster_name, history)
749
576
  if not args.cleanup and history_hosts_info[0] != hosts_info[
750
577
  0]: