skypilot-nightly 1.0.0.dev20250625__py3-none-any.whl → 1.0.0.dev20250627__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/kubernetes.py +1 -6
- sky/admin_policy.py +27 -17
- sky/client/cli/command.py +10 -5
- sky/client/sdk.py +91 -15
- sky/clouds/ssh.py +36 -0
- sky/core.py +20 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/HudU4f4Xsy-cP51JvXSZ-/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/141-fa5a20cbf401b351.js +11 -0
- sky/dashboard/out/_next/static/chunks/25.76c246239df93d50.js +6 -0
- sky/dashboard/out/_next/static/chunks/43-36177d00f6956ab2.js +1 -0
- sky/dashboard/out/_next/static/chunks/430.ed51037d1a4a438b.js +1 -0
- sky/dashboard/out/_next/static/chunks/690.55f9eed3be903f56.js +16 -0
- sky/dashboard/out/_next/static/chunks/785.dc2686c3c1235554.js +1 -0
- sky/dashboard/out/_next/static/chunks/871-3db673be3ee3750b.js +6 -0
- sky/dashboard/out/_next/static/chunks/875.52c962183328b3f2.js +25 -0
- sky/dashboard/out/_next/static/chunks/973-81b2d057178adb76.js +1 -0
- sky/dashboard/out/_next/static/chunks/982.1b61658204416b0f.js +1 -0
- sky/dashboard/out/_next/static/chunks/984.e8bac186a24e5178.js +1 -0
- sky/dashboard/out/_next/static/chunks/990-0ad5ea1699e03ee8.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-32ce4f49f2261f55.js → [cluster]-8040f2483897ed0c.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-f119a5630a1efd61.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-6b255eae088da6a3.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-b302aea4d65766bf.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-ee8cc4d449945d19.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{jobs-26da173e20af16e4.js → jobs-0a5695ff3075d94a.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{users-ce29e7420385563d.js → users-4978cbb093e141e7.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-5b59bce9eb208d84.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-cb7e720b739de53a.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{workspaces-862b120406461b10.js → workspaces-50e230828730cfb3.js} +1 -1
- sky/dashboard/out/_next/static/chunks/webpack-08fdb9e6070127fc.js +1 -0
- sky/dashboard/out/_next/static/css/52082cf558ec9705.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/provision/kubernetes/utils.py +18 -2
- sky/server/rest.py +3 -2
- sky/server/server.py +4 -27
- sky/skylet/constants.py +5 -0
- sky/skypilot_config.py +3 -0
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +133 -0
- sky/ssh_node_pools/server.py +232 -0
- sky/utils/kubernetes/deploy_remote_cluster.py +12 -185
- sky/utils/kubernetes/ssh_utils.py +221 -0
- {skypilot_nightly-1.0.0.dev20250625.dist-info → skypilot_nightly-1.0.0.dev20250627.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20250625.dist-info → skypilot_nightly-1.0.0.dev20250627.dist-info}/RECORD +65 -60
- sky/dashboard/out/_next/static/ZWdSYkqVe3WjnFR8ocqoG/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/211.692afc57e812ae1a.js +0 -1
- sky/dashboard/out/_next/static/chunks/310.2671028c20e892c7.js +0 -16
- sky/dashboard/out/_next/static/chunks/37-1f1e94f5a561202a.js +0 -6
- sky/dashboard/out/_next/static/chunks/42.bc85e5b1a4debf22.js +0 -6
- sky/dashboard/out/_next/static/chunks/443.b2242d0efcdf5f47.js +0 -1
- sky/dashboard/out/_next/static/chunks/513.309df9e18a9ff005.js +0 -1
- sky/dashboard/out/_next/static/chunks/66-66ae330df2d3c1c7.js +0 -1
- sky/dashboard/out/_next/static/chunks/682.00e56a220dd26fe1.js +0 -6
- sky/dashboard/out/_next/static/chunks/843-07d25a7e64462fd8.js +0 -11
- sky/dashboard/out/_next/static/chunks/856-cdf66268ec878d0c.js +0 -1
- sky/dashboard/out/_next/static/chunks/973-5b5019ba333e8d62.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-4aa031d1f42723d8.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/config-3102d02a188f04b3.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-6f1e02e31eecb5ce.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra-fd5dc8a91bd9169a.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-09ae0f6f972aa871.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-0b4c662a25e4747a.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-6133dc1e928bd0b5.js +0 -1
- sky/dashboard/out/_next/static/css/b23cb0257bf96c51.css +0 -3
- /sky/dashboard/out/_next/static/{ZWdSYkqVe3WjnFR8ocqoG → HudU4f4Xsy-cP51JvXSZ-}/_ssgManifest.js +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/{_app-0ef7418d1a3822f3.js → _app-9a3ce3170d2edcec.js} +0 -0
- {skypilot_nightly-1.0.0.dev20250625.dist-info → skypilot_nightly-1.0.0.dev20250627.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250625.dist-info → skypilot_nightly-1.0.0.dev20250627.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250625.dist-info → skypilot_nightly-1.0.0.dev20250627.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250625.dist-info → skypilot_nightly-1.0.0.dev20250627.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,232 @@
|
|
1
|
+
"""SSH Node Pool management API endpoints."""
|
2
|
+
import re
|
3
|
+
from typing import Any, Dict, List
|
4
|
+
|
5
|
+
import fastapi
|
6
|
+
|
7
|
+
from sky import core as sky_core
|
8
|
+
from sky.server.requests import executor
|
9
|
+
from sky.server.requests import payloads
|
10
|
+
from sky.server.requests import requests as requests_lib
|
11
|
+
from sky.ssh_node_pools import core as ssh_node_pools_core
|
12
|
+
from sky.utils import common_utils
|
13
|
+
|
14
|
+
router = fastapi.APIRouter()
|
15
|
+
|
16
|
+
|
17
|
+
@router.get('')
|
18
|
+
async def get_ssh_node_pools() -> Dict[str, Any]:
|
19
|
+
"""Get all SSH Node Pool configurations."""
|
20
|
+
try:
|
21
|
+
return ssh_node_pools_core.get_all_pools()
|
22
|
+
except Exception as e:
|
23
|
+
raise fastapi.HTTPException(
|
24
|
+
status_code=500,
|
25
|
+
detail=
|
26
|
+
f'Failed to get SSH Node Pools: {common_utils.format_exception(e)}')
|
27
|
+
|
28
|
+
|
29
|
+
@router.post('')
|
30
|
+
async def update_ssh_node_pools(pools_config: Dict[str, Any]) -> Dict[str, str]:
|
31
|
+
"""Update SSH Node Pool configurations."""
|
32
|
+
try:
|
33
|
+
ssh_node_pools_core.update_pools(pools_config)
|
34
|
+
return {'status': 'success'}
|
35
|
+
except Exception as e:
|
36
|
+
raise fastapi.HTTPException(status_code=400,
|
37
|
+
detail=f'Failed to update SSH Node Pools:'
|
38
|
+
f' {common_utils.format_exception(e)}')
|
39
|
+
|
40
|
+
|
41
|
+
@router.delete('/{pool_name}')
|
42
|
+
async def delete_ssh_node_pool(pool_name: str) -> Dict[str, str]:
|
43
|
+
"""Delete a SSH Node Pool configuration."""
|
44
|
+
try:
|
45
|
+
if ssh_node_pools_core.delete_pool(pool_name):
|
46
|
+
return {'status': 'success'}
|
47
|
+
else:
|
48
|
+
raise fastapi.HTTPException(
|
49
|
+
status_code=404,
|
50
|
+
detail=f'SSH Node Pool `{pool_name}` not found')
|
51
|
+
except fastapi.HTTPException:
|
52
|
+
raise
|
53
|
+
except Exception as e:
|
54
|
+
raise fastapi.HTTPException(status_code=500,
|
55
|
+
detail='Failed to delete SSH Node Pool: '
|
56
|
+
f'{common_utils.format_exception(e)}')
|
57
|
+
|
58
|
+
|
59
|
+
@router.post('/keys')
|
60
|
+
async def upload_ssh_key(request: fastapi.Request) -> Dict[str, str]:
|
61
|
+
"""Upload SSH private key."""
|
62
|
+
try:
|
63
|
+
form = await request.form()
|
64
|
+
key_name = form.get('key_name')
|
65
|
+
key_file = form.get('key_file')
|
66
|
+
|
67
|
+
if not key_name or not key_file:
|
68
|
+
raise fastapi.HTTPException(status_code=400,
|
69
|
+
detail='Missing key_name or key_file')
|
70
|
+
|
71
|
+
key_content = await key_file.read()
|
72
|
+
key_path = ssh_node_pools_core.upload_ssh_key(key_name,
|
73
|
+
key_content.decode())
|
74
|
+
|
75
|
+
return {'status': 'success', 'key_path': key_path}
|
76
|
+
except fastapi.HTTPException:
|
77
|
+
raise
|
78
|
+
except Exception as e:
|
79
|
+
raise fastapi.HTTPException(
|
80
|
+
status_code=500,
|
81
|
+
detail=
|
82
|
+
f'Failed to upload SSH key: {common_utils.format_exception(e)}')
|
83
|
+
|
84
|
+
|
85
|
+
@router.get('/keys')
|
86
|
+
async def list_ssh_keys() -> List[str]:
|
87
|
+
"""List available SSH keys."""
|
88
|
+
try:
|
89
|
+
return ssh_node_pools_core.list_ssh_keys()
|
90
|
+
except Exception as e:
|
91
|
+
exception_msg = common_utils.format_exception(e)
|
92
|
+
raise fastapi.HTTPException(
|
93
|
+
status_code=500, detail=f'Failed to list SSH keys: {exception_msg}')
|
94
|
+
|
95
|
+
|
96
|
+
@router.post('/{pool_name}/deploy')
|
97
|
+
async def deploy_ssh_node_pool(request: fastapi.Request,
|
98
|
+
pool_name: str) -> Dict[str, str]:
|
99
|
+
"""Deploy SSH Node Pool using existing ssh_up functionality."""
|
100
|
+
try:
|
101
|
+
ssh_up_body = payloads.SSHUpBody(infra=pool_name, cleanup=False)
|
102
|
+
executor.schedule_request(
|
103
|
+
request_id=request.state.request_id,
|
104
|
+
request_name='ssh_up',
|
105
|
+
request_body=ssh_up_body,
|
106
|
+
func=sky_core.ssh_up,
|
107
|
+
schedule_type=requests_lib.ScheduleType.LONG,
|
108
|
+
)
|
109
|
+
|
110
|
+
return {
|
111
|
+
'status': 'success',
|
112
|
+
'request_id': request.state.request_id,
|
113
|
+
'message': f'SSH Node Pool `{pool_name}` deployment started'
|
114
|
+
}
|
115
|
+
except Exception as e:
|
116
|
+
raise fastapi.HTTPException(status_code=500,
|
117
|
+
detail=f'Failed to deploy SSH Node Pool: '
|
118
|
+
f'{common_utils.format_exception(e)}')
|
119
|
+
|
120
|
+
|
121
|
+
@router.post('/deploy')
|
122
|
+
async def deploy_ssh_node_pool_general(
|
123
|
+
request: fastapi.Request,
|
124
|
+
ssh_up_body: payloads.SSHUpBody) -> Dict[str, str]:
|
125
|
+
"""Deploys all SSH Node Pools."""
|
126
|
+
try:
|
127
|
+
executor.schedule_request(
|
128
|
+
request_id=request.state.request_id,
|
129
|
+
request_name='ssh_up',
|
130
|
+
request_body=ssh_up_body,
|
131
|
+
func=sky_core.ssh_up,
|
132
|
+
schedule_type=requests_lib.ScheduleType.LONG,
|
133
|
+
)
|
134
|
+
|
135
|
+
pool_name = ssh_up_body.infra or 'default'
|
136
|
+
return {
|
137
|
+
'status': 'success',
|
138
|
+
'request_id': request.state.request_id,
|
139
|
+
'message': f'SSH Node Pool `{pool_name}` deployment started'
|
140
|
+
}
|
141
|
+
except Exception as e:
|
142
|
+
raise fastapi.HTTPException(status_code=500,
|
143
|
+
detail=f'Failed to deploy SSH Node Pool: '
|
144
|
+
f'{common_utils.format_exception(e)}')
|
145
|
+
|
146
|
+
|
147
|
+
@router.post('/{pool_name}/down')
|
148
|
+
async def down_ssh_node_pool(request: fastapi.Request,
|
149
|
+
pool_name: str) -> Dict[str, str]:
|
150
|
+
"""Cleans up a SSH Node Pools."""
|
151
|
+
try:
|
152
|
+
ssh_up_body = payloads.SSHUpBody(infra=pool_name, cleanup=True)
|
153
|
+
executor.schedule_request(
|
154
|
+
request_id=request.state.request_id,
|
155
|
+
request_name='ssh_down',
|
156
|
+
request_body=ssh_up_body,
|
157
|
+
func=sky_core.ssh_up, # Reuse ssh_up function with cleanup=True
|
158
|
+
schedule_type=requests_lib.ScheduleType.LONG,
|
159
|
+
)
|
160
|
+
|
161
|
+
return {
|
162
|
+
'status': 'success',
|
163
|
+
'request_id': request.state.request_id,
|
164
|
+
'message': f'SSH Node Pool `{pool_name}` teardown started'
|
165
|
+
}
|
166
|
+
except Exception as e:
|
167
|
+
raise fastapi.HTTPException(
|
168
|
+
status_code=500,
|
169
|
+
detail=f'Failed to tear down SSH Node Pool: '
|
170
|
+
f'{common_utils.format_exception(e)}')
|
171
|
+
|
172
|
+
|
173
|
+
@router.post('/down')
|
174
|
+
async def down_ssh_node_pool_general(
|
175
|
+
request: fastapi.Request,
|
176
|
+
ssh_up_body: payloads.SSHUpBody) -> Dict[str, str]:
|
177
|
+
"""Cleans up all SSH Node Pools."""
|
178
|
+
try:
|
179
|
+
# Set cleanup=True for down operation
|
180
|
+
ssh_up_body.cleanup = True
|
181
|
+
executor.schedule_request(
|
182
|
+
request_id=request.state.request_id,
|
183
|
+
request_name='ssh_down',
|
184
|
+
request_body=ssh_up_body,
|
185
|
+
func=sky_core.ssh_up, # Reuse ssh_up function with cleanup=True
|
186
|
+
schedule_type=requests_lib.ScheduleType.LONG,
|
187
|
+
)
|
188
|
+
|
189
|
+
pool_name = ssh_up_body.infra or 'default'
|
190
|
+
return {
|
191
|
+
'status': 'success',
|
192
|
+
'request_id': request.state.request_id,
|
193
|
+
'message': f'SSH Node Pool `{pool_name}` teardown started'
|
194
|
+
}
|
195
|
+
except Exception as e:
|
196
|
+
raise fastapi.HTTPException(
|
197
|
+
status_code=500,
|
198
|
+
detail=f'Failed to tear down SSH Node Pool: '
|
199
|
+
f'{common_utils.format_exception(e)}')
|
200
|
+
|
201
|
+
|
202
|
+
@router.get('/{pool_name}/status')
|
203
|
+
async def get_ssh_node_pool_status(pool_name: str) -> Dict[str, str]:
|
204
|
+
"""Get the status of a specific SSH Node Pool."""
|
205
|
+
try:
|
206
|
+
# Call ssh_status to check the context
|
207
|
+
context_name = f'ssh-{pool_name}'
|
208
|
+
is_ready, reason = sky_core.ssh_status(context_name)
|
209
|
+
|
210
|
+
# Strip ANSI escape codes from the reason text
|
211
|
+
def strip_ansi_codes(text):
|
212
|
+
if not text:
|
213
|
+
return text
|
214
|
+
# Remove ANSI escape sequences (color codes, formatting, etc.)
|
215
|
+
text = re.sub(r'\x1b\[[0-9;]*m', '', text)
|
216
|
+
# Remove 'disabled. Reason: ' prefix if present
|
217
|
+
text = text.replace('disabled. Reason: ', '')
|
218
|
+
return text
|
219
|
+
|
220
|
+
cleaned_reason = strip_ansi_codes(reason) if reason else reason
|
221
|
+
|
222
|
+
return {
|
223
|
+
'pool_name': pool_name,
|
224
|
+
'context_name': context_name,
|
225
|
+
'status': 'Ready' if is_ready else 'Not Ready',
|
226
|
+
'reason': cleaned_reason
|
227
|
+
}
|
228
|
+
except Exception as e:
|
229
|
+
raise fastapi.HTTPException(
|
230
|
+
status_code=500,
|
231
|
+
detail=f'Failed to get SSH Node Pool status: '
|
232
|
+
f'{common_utils.format_exception(e)}')
|
@@ -11,11 +11,12 @@ import shutil
|
|
11
11
|
import subprocess
|
12
12
|
import sys
|
13
13
|
import tempfile
|
14
|
-
from typing import
|
14
|
+
from typing import List, Set
|
15
15
|
|
16
16
|
import yaml
|
17
17
|
|
18
18
|
from sky.utils import ux_utils
|
19
|
+
from sky.utils.kubernetes import ssh_utils
|
19
20
|
|
20
21
|
# Colors for nicer UX
|
21
22
|
RED = '\033[0;31m'
|
@@ -24,7 +25,6 @@ YELLOW = '\033[1;33m'
|
|
24
25
|
WARNING_YELLOW = '\x1b[33m'
|
25
26
|
NC = '\033[0m' # No color
|
26
27
|
|
27
|
-
DEFAULT_SSH_NODE_POOLS_PATH = os.path.expanduser('~/.sky/ssh_node_pools.yaml')
|
28
28
|
DEFAULT_KUBECONFIG_PATH = os.path.expanduser('~/.kube/config')
|
29
29
|
SSH_CONFIG_PATH = os.path.expanduser('~/.ssh/config')
|
30
30
|
NODE_POOLS_INFO_DIR = os.path.expanduser('~/.sky/ssh_node_pools_info')
|
@@ -33,29 +33,6 @@ NODE_POOLS_INFO_DIR = os.path.expanduser('~/.sky/ssh_node_pools_info')
|
|
33
33
|
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
34
34
|
|
35
35
|
|
36
|
-
class UniqueKeySafeLoader(yaml.SafeLoader):
|
37
|
-
"""Custom YAML loader that raises an error if there are duplicate keys."""
|
38
|
-
|
39
|
-
def construct_mapping(self, node, deep=False):
|
40
|
-
mapping = {}
|
41
|
-
for key_node, value_node in node.value:
|
42
|
-
key = self.construct_object(key_node, deep=deep)
|
43
|
-
if key in mapping:
|
44
|
-
raise yaml.constructor.ConstructorError(
|
45
|
-
note=(f'Duplicate cluster config for cluster {key!r}.\n'
|
46
|
-
'Please remove one of them from: '
|
47
|
-
f'{DEFAULT_SSH_NODE_POOLS_PATH}'))
|
48
|
-
value = self.construct_object(value_node, deep=deep)
|
49
|
-
mapping[key] = value
|
50
|
-
return mapping
|
51
|
-
|
52
|
-
|
53
|
-
# Register the custom constructor inside the class
|
54
|
-
UniqueKeySafeLoader.add_constructor(
|
55
|
-
yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
|
56
|
-
UniqueKeySafeLoader.construct_mapping)
|
57
|
-
|
58
|
-
|
59
36
|
def parse_args():
|
60
37
|
parser = argparse.ArgumentParser(
|
61
38
|
description='Deploy a Kubernetes cluster on remote machines.')
|
@@ -64,9 +41,9 @@ def parse_args():
|
|
64
41
|
parser.add_argument(
|
65
42
|
'--ssh-node-pools-file',
|
66
43
|
dest='ssh_node_pools_file',
|
67
|
-
default=DEFAULT_SSH_NODE_POOLS_PATH,
|
44
|
+
default=ssh_utils.DEFAULT_SSH_NODE_POOLS_PATH,
|
68
45
|
help=
|
69
|
-
f'Path to SSH node pools YAML file (default: {DEFAULT_SSH_NODE_POOLS_PATH})'
|
46
|
+
f'Path to SSH node pools YAML file (default: {ssh_utils.DEFAULT_SSH_NODE_POOLS_PATH})'
|
70
47
|
)
|
71
48
|
parser.add_argument(
|
72
49
|
'--kubeconfig-path',
|
@@ -117,156 +94,6 @@ def parse_args():
|
|
117
94
|
return parser.parse_args()
|
118
95
|
|
119
96
|
|
120
|
-
def load_ssh_targets(file_path: str) -> Dict[str, Any]:
|
121
|
-
"""Load SSH targets from YAML file."""
|
122
|
-
if not os.path.exists(file_path):
|
123
|
-
with ux_utils.print_exception_no_traceback():
|
124
|
-
raise ValueError(f'SSH Node Pools file not found: {file_path}')
|
125
|
-
|
126
|
-
try:
|
127
|
-
with open(file_path, 'r', encoding='utf-8') as f:
|
128
|
-
targets = yaml.load(f, Loader=UniqueKeySafeLoader)
|
129
|
-
return targets
|
130
|
-
except yaml.constructor.ConstructorError as e:
|
131
|
-
with ux_utils.print_exception_no_traceback():
|
132
|
-
raise ValueError(e.note) from e
|
133
|
-
except (yaml.YAMLError, IOError, OSError) as e:
|
134
|
-
with ux_utils.print_exception_no_traceback():
|
135
|
-
raise ValueError(f'Error loading SSH Node Pools file: {e}') from e
|
136
|
-
|
137
|
-
|
138
|
-
def check_host_in_ssh_config(hostname: str) -> bool:
|
139
|
-
"""Return True iff *hostname* matches at least one `Host`/`Match` stanza
|
140
|
-
in the user's OpenSSH client configuration (including anything pulled in
|
141
|
-
via Include).
|
142
|
-
|
143
|
-
It calls: ssh -vvG <hostname> -o ConnectTimeout=0
|
144
|
-
which:
|
145
|
-
• -G expands the effective config without connecting
|
146
|
-
• -vv prints debug lines that show which stanzas are applied
|
147
|
-
• ConnectTimeout=0 avoids a DNS lookup if <hostname> is a FQDN/IP
|
148
|
-
|
149
|
-
No config files are opened or parsed manually.
|
150
|
-
|
151
|
-
Parameters
|
152
|
-
----------
|
153
|
-
hostname : str
|
154
|
-
The alias/IP/FQDN you want to test.
|
155
|
-
|
156
|
-
Returns
|
157
|
-
-------
|
158
|
-
bool
|
159
|
-
True – a specific stanza matched the host
|
160
|
-
False – nothing but the global defaults (`Host *`) applied
|
161
|
-
"""
|
162
|
-
# We direct stderr→stdout because debug output goes to stderr.
|
163
|
-
proc = subprocess.run(
|
164
|
-
['ssh', '-vvG', hostname, '-o', 'ConnectTimeout=0'],
|
165
|
-
text=True,
|
166
|
-
stdout=subprocess.PIPE,
|
167
|
-
stderr=subprocess.STDOUT,
|
168
|
-
check=False, # we only want the text, not to raise
|
169
|
-
)
|
170
|
-
|
171
|
-
# Look for lines like:
|
172
|
-
# debug1: ~/.ssh/config line 42: Applying options for <hostname>
|
173
|
-
# Anything other than "*"
|
174
|
-
pattern = re.compile(r'^debug\d+: .*Applying options for ([^*].*)$',
|
175
|
-
re.MULTILINE)
|
176
|
-
|
177
|
-
return bool(pattern.search(proc.stdout))
|
178
|
-
|
179
|
-
|
180
|
-
def get_cluster_config(targets: Dict[str, Any],
|
181
|
-
cluster_name: Optional[str] = None,
|
182
|
-
file_path: Optional[str] = None) -> Dict[str, Any]:
|
183
|
-
"""Get configuration for specific clusters or all clusters."""
|
184
|
-
if not targets:
|
185
|
-
with ux_utils.print_exception_no_traceback():
|
186
|
-
raise ValueError(
|
187
|
-
f'No clusters defined in SSH Node Pools file {file_path}')
|
188
|
-
|
189
|
-
if cluster_name:
|
190
|
-
if cluster_name not in targets:
|
191
|
-
with ux_utils.print_exception_no_traceback():
|
192
|
-
raise ValueError(f'Cluster {cluster_name!r} not found in '
|
193
|
-
f'SSH Node Pools file {file_path}')
|
194
|
-
return {cluster_name: targets[cluster_name]}
|
195
|
-
|
196
|
-
# Return all clusters if no specific cluster is specified
|
197
|
-
return targets
|
198
|
-
|
199
|
-
|
200
|
-
def prepare_hosts_info(cluster_name: str,
|
201
|
-
cluster_config: Dict[str, Any]) -> List[Dict[str, str]]:
|
202
|
-
"""Prepare list of hosts with resolved user, identity_file, and password."""
|
203
|
-
if 'hosts' not in cluster_config or not cluster_config['hosts']:
|
204
|
-
with ux_utils.print_exception_no_traceback():
|
205
|
-
raise ValueError(
|
206
|
-
f'No hosts defined in cluster {cluster_name} configuration')
|
207
|
-
|
208
|
-
# Get cluster-level defaults
|
209
|
-
cluster_user = cluster_config.get('user', '')
|
210
|
-
cluster_identity_file = os.path.expanduser(
|
211
|
-
cluster_config.get('identity_file', ''))
|
212
|
-
cluster_password = cluster_config.get('password', '')
|
213
|
-
|
214
|
-
# Check if cluster identity file exists
|
215
|
-
if cluster_identity_file and not os.path.isfile(cluster_identity_file):
|
216
|
-
with ux_utils.print_exception_no_traceback():
|
217
|
-
raise ValueError(
|
218
|
-
f'SSH Identity File Missing: {cluster_identity_file}')
|
219
|
-
|
220
|
-
hosts_info = []
|
221
|
-
for host in cluster_config['hosts']:
|
222
|
-
# Host can be a string (IP or SSH config hostname) or a dict
|
223
|
-
if isinstance(host, str):
|
224
|
-
# Check if this is an SSH config hostname
|
225
|
-
is_ssh_config_host = check_host_in_ssh_config(host)
|
226
|
-
|
227
|
-
hosts_info.append({
|
228
|
-
'ip': host,
|
229
|
-
'user': '' if is_ssh_config_host else cluster_user,
|
230
|
-
'identity_file': '' if is_ssh_config_host else
|
231
|
-
cluster_identity_file,
|
232
|
-
'password': cluster_password,
|
233
|
-
'use_ssh_config': is_ssh_config_host
|
234
|
-
})
|
235
|
-
else:
|
236
|
-
# It's a dict with potential overrides
|
237
|
-
if 'ip' not in host:
|
238
|
-
print(
|
239
|
-
f'{RED}Warning: Host missing \'ip\' field, skipping: {host}{NC}'
|
240
|
-
)
|
241
|
-
continue
|
242
|
-
|
243
|
-
# Check if this is an SSH config hostname
|
244
|
-
is_ssh_config_host = check_host_in_ssh_config(host['ip'])
|
245
|
-
|
246
|
-
# Use host-specific values or fall back to cluster defaults
|
247
|
-
host_user = '' if is_ssh_config_host else host.get(
|
248
|
-
'user', cluster_user)
|
249
|
-
host_identity_file = os.path.expanduser(
|
250
|
-
'' if is_ssh_config_host else host.
|
251
|
-
get('identity_file', cluster_identity_file))
|
252
|
-
host_password = host.get('password', cluster_password)
|
253
|
-
|
254
|
-
if host_identity_file and not os.path.isfile(host_identity_file):
|
255
|
-
with ux_utils.print_exception_no_traceback():
|
256
|
-
raise ValueError(
|
257
|
-
f'SSH Identity File Missing: {host_identity_file}')
|
258
|
-
|
259
|
-
hosts_info.append({
|
260
|
-
'ip': host['ip'],
|
261
|
-
'user': host_user,
|
262
|
-
'identity_file': host_identity_file,
|
263
|
-
'password': host_password,
|
264
|
-
'use_ssh_config': is_ssh_config_host
|
265
|
-
})
|
266
|
-
|
267
|
-
return hosts_info
|
268
|
-
|
269
|
-
|
270
97
|
def run_command(cmd, shell=False):
|
271
98
|
"""Run a local command and return the output."""
|
272
99
|
process = subprocess.run(cmd,
|
@@ -675,10 +502,10 @@ def main():
|
|
675
502
|
password = args.password
|
676
503
|
|
677
504
|
# Check if hosts are in SSH config
|
678
|
-
head_use_ssh_config = global_use_ssh_config or check_host_in_ssh_config(
|
505
|
+
head_use_ssh_config = global_use_ssh_config or ssh_utils.check_host_in_ssh_config(
|
679
506
|
head_node)
|
680
507
|
worker_use_ssh_config = [
|
681
|
-
global_use_ssh_config or check_host_in_ssh_config(node)
|
508
|
+
global_use_ssh_config or ssh_utils.check_host_in_ssh_config(node)
|
682
509
|
for node in worker_nodes
|
683
510
|
]
|
684
511
|
|
@@ -688,10 +515,9 @@ def main():
|
|
688
515
|
kubeconfig_path, args.cleanup)
|
689
516
|
else:
|
690
517
|
# Using YAML configuration
|
691
|
-
targets = load_ssh_targets(args.ssh_node_pools_file)
|
692
|
-
clusters_config = get_cluster_config(
|
693
|
-
|
694
|
-
file_path=args.ssh_node_pools_file)
|
518
|
+
targets = ssh_utils.load_ssh_targets(args.ssh_node_pools_file)
|
519
|
+
clusters_config = ssh_utils.get_cluster_config(
|
520
|
+
targets, args.infra, file_path=args.ssh_node_pools_file)
|
695
521
|
|
696
522
|
# Print information about clusters being processed
|
697
523
|
num_clusters = len(clusters_config)
|
@@ -705,7 +531,8 @@ def main():
|
|
705
531
|
print(f'SKYPILOT_CURRENT_CLUSTER: {cluster_name}')
|
706
532
|
print(
|
707
533
|
f'{YELLOW}==== Deploying cluster: {cluster_name} ====${NC}')
|
708
|
-
hosts_info = prepare_hosts_info(
|
534
|
+
hosts_info = ssh_utils.prepare_hosts_info(
|
535
|
+
cluster_name, cluster_config)
|
709
536
|
|
710
537
|
if not hosts_info:
|
711
538
|
print(
|
@@ -744,7 +571,7 @@ def main():
|
|
744
571
|
f'Cluster configuration has changed for field {key!r}. '
|
745
572
|
f'Previous value: {history.get(key)}, '
|
746
573
|
f'Current value: {cluster_config.get(key)}')
|
747
|
-
history_hosts_info = prepare_hosts_info(
|
574
|
+
history_hosts_info = ssh_utils.prepare_hosts_info(
|
748
575
|
cluster_name, history)
|
749
576
|
if not args.cleanup and history_hosts_info[0] != hosts_info[
|
750
577
|
0]:
|