skypilot-nightly 1.0.0.dev20250522__py3-none-any.whl → 1.0.0.dev20250523__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/kubernetes.py +46 -16
- sky/backends/cloud_vm_ray_backend.py +16 -4
- sky/check.py +109 -44
- sky/cli.py +261 -90
- sky/client/cli.py +261 -90
- sky/client/sdk.py +50 -2
- sky/clouds/__init__.py +3 -0
- sky/clouds/aws.py +4 -2
- sky/clouds/azure.py +4 -2
- sky/clouds/cloud.py +24 -6
- sky/clouds/cudo.py +2 -1
- sky/clouds/do.py +2 -1
- sky/clouds/fluidstack.py +2 -1
- sky/clouds/gcp.py +4 -2
- sky/clouds/ibm.py +4 -2
- sky/clouds/kubernetes.py +66 -22
- sky/clouds/lambda_cloud.py +2 -1
- sky/clouds/nebius.py +18 -2
- sky/clouds/oci.py +4 -2
- sky/clouds/paperspace.py +2 -1
- sky/clouds/runpod.py +2 -1
- sky/clouds/scp.py +2 -1
- sky/clouds/service_catalog/constants.py +1 -1
- sky/clouds/service_catalog/ssh_catalog.py +167 -0
- sky/clouds/ssh.py +203 -0
- sky/clouds/vast.py +2 -1
- sky/clouds/vsphere.py +2 -1
- sky/core.py +53 -9
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/{CzOVV6JpRQBRt5GhZuhyK → ECKwDNS9v9y3_IKFZ2lpp}/_buildManifest.js +1 -1
- sky/dashboard/out/_next/static/chunks/pages/infra-abf08c4384190a39.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/optimizer.py +23 -4
- sky/provision/__init__.py +1 -0
- sky/provision/aws/instance.py +17 -1
- sky/provision/kubernetes/instance.py +16 -5
- sky/provision/kubernetes/utils.py +37 -19
- sky/provision/nebius/instance.py +3 -1
- sky/provision/nebius/utils.py +14 -2
- sky/provision/ssh/__init__.py +18 -0
- sky/resources.py +4 -1
- sky/server/requests/payloads.py +7 -0
- sky/server/server.py +40 -0
- sky/setup_files/dependencies.py +1 -0
- sky/templates/nebius-ray.yml.j2 +12 -0
- sky/utils/infra_utils.py +21 -1
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/create_cluster.sh +1 -0
- sky/utils/kubernetes/deploy_remote_cluster.py +1437 -0
- sky/utils/kubernetes/kubernetes_deploy_utils.py +117 -10
- sky/utils/kubernetes/ssh-tunnel.sh +387 -0
- sky/utils/log_utils.py +214 -1
- sky/utils/schemas.py +21 -0
- sky/utils/ux_utils.py +2 -1
- {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250523.dist-info}/METADATA +6 -1
- {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250523.dist-info}/RECORD +68 -63
- sky/dashboard/out/_next/static/chunks/pages/infra-9180cd91cee64b96.js +0 -1
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- /sky/dashboard/out/_next/static/{CzOVV6JpRQBRt5GhZuhyK → ECKwDNS9v9y3_IKFZ2lpp}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250523.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250523.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250523.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250523.dist-info}/top_level.txt +0 -0
sky/templates/nebius-ray.yml.j2
CHANGED
@@ -46,6 +46,13 @@ available_node_types:
|
|
46
46
|
InstanceType: {{instance_type}}
|
47
47
|
ImageId: {{image_id}}
|
48
48
|
DiskSize: {{disk_size}}
|
49
|
+
filesystems:
|
50
|
+
{%- for fs in filesystems %}
|
51
|
+
- filesystem_id: {{ fs.filesystem_id }}
|
52
|
+
filesystem_mount_tag: {{ fs.filesystem_mount_tag }}
|
53
|
+
filesystem_attach_mode: {{ fs.filesystem_attach_mode }}
|
54
|
+
filesystem_mount_path: {{ fs.filesystem_mount_path }}
|
55
|
+
{%- endfor %}
|
49
56
|
UserData: |
|
50
57
|
runcmd:
|
51
58
|
- sudo sed -i 's/^#\?AllowTcpForwarding.*/AllowTcpForwarding yes/' /etc/ssh/sshd_config
|
@@ -130,6 +137,11 @@ setup_commands:
|
|
130
137
|
- {%- for initial_setup_command in initial_setup_commands %}
|
131
138
|
{{ initial_setup_command }}
|
132
139
|
{%- endfor %}
|
140
|
+
{%- for fs in filesystems %}
|
141
|
+
sudo mkdir {{ fs.filesystem_mount_path }};
|
142
|
+
sudo mount -t virtiofs {{ fs.filesystem_mount_tag }} {{ fs.filesystem_mount_path }};
|
143
|
+
sudo chmod a+w {{ fs.filesystem_mount_path }};
|
144
|
+
{%- endfor %}
|
133
145
|
sudo systemctl stop unattended-upgrades || true;
|
134
146
|
sudo systemctl disable unattended-upgrades || true;
|
135
147
|
sudo sed -i 's/Unattended-Upgrade "1"/Unattended-Upgrade "0"/g' /etc/apt/apt.conf.d/20auto-upgrades || true;
|
sky/utils/infra_utils.py
CHANGED
@@ -86,6 +86,16 @@ class InfraInfo:
|
|
86
86
|
cloud_name = 'kubernetes' # Normalize k8s to kubernetes
|
87
87
|
region = '/'.join(parts[1:]) if len(parts) >= 2 else None
|
88
88
|
zone = None
|
89
|
+
elif cloud_name == 'ssh':
|
90
|
+
# For SSH, the entire string after "ssh/" is the
|
91
|
+
# node pool name. We prepend 'ssh-' for the internal implementation
|
92
|
+
# which reuses the context name.
|
93
|
+
# TODO(romilb): This is a workaround while we use the global
|
94
|
+
# kubeconfig to store the ssh contexts.
|
95
|
+
region = '/'.join(parts[1:]) if len(parts) >= 2 else None
|
96
|
+
if region:
|
97
|
+
region = f'ssh-{region}'
|
98
|
+
zone = None
|
89
99
|
else:
|
90
100
|
# For non-Kubernetes clouds, continue with regular parsing
|
91
101
|
# but be careful to only split into max 3 parts
|
@@ -133,6 +143,12 @@ class InfraInfo:
|
|
133
143
|
if zone is None:
|
134
144
|
zone = '*'
|
135
145
|
|
146
|
+
# If the cloud is ssh, we remove the ssh- prefix from the region
|
147
|
+
# TODO(romilb): This is a workaround while we use the global
|
148
|
+
# kubeconfig to store the ssh contexts.
|
149
|
+
if region and region.startswith('ssh-'):
|
150
|
+
region = region[4:]
|
151
|
+
|
136
152
|
# Build the parts list and filter out trailing wildcards
|
137
153
|
parts = [cloud.lower(), region, zone]
|
138
154
|
while parts and parts[-1] == '*':
|
@@ -160,7 +176,11 @@ class InfraInfo:
|
|
160
176
|
if self.zone is not None and self.zone != '*':
|
161
177
|
region_or_zone = self.zone
|
162
178
|
elif self.region is not None and self.region != '*':
|
163
|
-
|
179
|
+
# If using region, we remove the ssh- prefix if it exists for SSH
|
180
|
+
# Node Pools.
|
181
|
+
# TODO(romilb): This is a workaround while we use the global
|
182
|
+
# kubeconfig to store the ssh contexts.
|
183
|
+
region_or_zone = self.region.lstrip('ssh-')
|
164
184
|
|
165
185
|
if region_or_zone is not None and truncate:
|
166
186
|
region_or_zone = common_utils.truncate_long_string(
|
@@ -0,0 +1,62 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
# cleanup-tunnel.sh - Script to clean up SSH tunnels for a Kubernetes context
|
3
|
+
|
4
|
+
# Usage: cleanup-tunnel.sh CONTEXT_NAME
|
5
|
+
|
6
|
+
CONTEXT="${1:-default}"
|
7
|
+
TUNNEL_DIR="$HOME/.sky/ssh_node_pools_info"
|
8
|
+
PID_FILE="$TUNNEL_DIR/$CONTEXT-tunnel.pid"
|
9
|
+
LOG_FILE="$TUNNEL_DIR/$CONTEXT-tunnel.log"
|
10
|
+
LOCK_FILE="$TUNNEL_DIR/$CONTEXT-tunnel.lock"
|
11
|
+
|
12
|
+
# Get the port from kubeconfig if available
|
13
|
+
KUBE_PORT=$(kubectl config view --minify --context="$CONTEXT" -o jsonpath='{.clusters[0].cluster.server}' 2>/dev/null | grep -o ":[0-9]\+" | tr -d ":" || echo "")
|
14
|
+
|
15
|
+
if [[ -z "$KUBE_PORT" ]]; then
|
16
|
+
# Default to 6443 if we can't determine the port
|
17
|
+
KUBE_PORT=6443
|
18
|
+
echo "$(date): Could not determine port from kubeconfig, using default port $KUBE_PORT" >> "$LOG_FILE"
|
19
|
+
else
|
20
|
+
echo "$(date): Found port $KUBE_PORT in kubeconfig for context $CONTEXT" >> "$LOG_FILE"
|
21
|
+
fi
|
22
|
+
|
23
|
+
# Check if PID file exists
|
24
|
+
if [[ -f "$PID_FILE" ]]; then
|
25
|
+
OLD_PID=$(cat "$PID_FILE")
|
26
|
+
|
27
|
+
# Log the cleanup attempt
|
28
|
+
echo "$(date): Attempting to clean up tunnel for context $CONTEXT (PID: $OLD_PID, Port: $KUBE_PORT)" >> "$LOG_FILE"
|
29
|
+
|
30
|
+
# Try to kill the process
|
31
|
+
if kill -0 "$OLD_PID" 2>/dev/null; then
|
32
|
+
# Process exists, kill it
|
33
|
+
kill "$OLD_PID" 2>/dev/null
|
34
|
+
|
35
|
+
# Wait a moment and check if it's really gone
|
36
|
+
sleep 1
|
37
|
+
if kill -0 "$OLD_PID" 2>/dev/null; then
|
38
|
+
# Still running, force kill
|
39
|
+
kill -9 "$OLD_PID" 2>/dev/null
|
40
|
+
echo "$(date): Forcefully terminated tunnel process $OLD_PID" >> "$LOG_FILE"
|
41
|
+
else
|
42
|
+
echo "$(date): Successfully terminated tunnel process $OLD_PID" >> "$LOG_FILE"
|
43
|
+
fi
|
44
|
+
else
|
45
|
+
echo "$(date): No running process found with PID $OLD_PID" >> "$LOG_FILE"
|
46
|
+
fi
|
47
|
+
|
48
|
+
# Remove PID file
|
49
|
+
rm -f "$PID_FILE"
|
50
|
+
else
|
51
|
+
echo "$(date): No PID file found for context $CONTEXT. Nothing to clean up." >> "$LOG_FILE"
|
52
|
+
fi
|
53
|
+
|
54
|
+
# Clean up lock file if it exists
|
55
|
+
rm -f "$LOCK_FILE"
|
56
|
+
|
57
|
+
# Check if port is still in use
|
58
|
+
if nc -z localhost "$KUBE_PORT" 2>/dev/null; then
|
59
|
+
echo "$(date): Warning: Port $KUBE_PORT is still in use after cleanup. Another process might be using it." >> "$LOG_FILE"
|
60
|
+
fi
|
61
|
+
|
62
|
+
echo "$(date): Cleanup complete for context $CONTEXT" >> "$LOG_FILE"
|