skypilot-nightly 1.0.0.dev20250522__py3-none-any.whl → 1.0.0.dev20250523__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/kubernetes.py +46 -16
  3. sky/backends/cloud_vm_ray_backend.py +16 -4
  4. sky/check.py +109 -44
  5. sky/cli.py +261 -90
  6. sky/client/cli.py +261 -90
  7. sky/client/sdk.py +50 -2
  8. sky/clouds/__init__.py +3 -0
  9. sky/clouds/aws.py +4 -2
  10. sky/clouds/azure.py +4 -2
  11. sky/clouds/cloud.py +24 -6
  12. sky/clouds/cudo.py +2 -1
  13. sky/clouds/do.py +2 -1
  14. sky/clouds/fluidstack.py +2 -1
  15. sky/clouds/gcp.py +4 -2
  16. sky/clouds/ibm.py +4 -2
  17. sky/clouds/kubernetes.py +66 -22
  18. sky/clouds/lambda_cloud.py +2 -1
  19. sky/clouds/nebius.py +18 -2
  20. sky/clouds/oci.py +4 -2
  21. sky/clouds/paperspace.py +2 -1
  22. sky/clouds/runpod.py +2 -1
  23. sky/clouds/scp.py +2 -1
  24. sky/clouds/service_catalog/constants.py +1 -1
  25. sky/clouds/service_catalog/ssh_catalog.py +167 -0
  26. sky/clouds/ssh.py +203 -0
  27. sky/clouds/vast.py +2 -1
  28. sky/clouds/vsphere.py +2 -1
  29. sky/core.py +53 -9
  30. sky/dashboard/out/404.html +1 -1
  31. sky/dashboard/out/_next/static/{CzOVV6JpRQBRt5GhZuhyK → ECKwDNS9v9y3_IKFZ2lpp}/_buildManifest.js +1 -1
  32. sky/dashboard/out/_next/static/chunks/pages/infra-abf08c4384190a39.js +1 -0
  33. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  34. sky/dashboard/out/clusters/[cluster].html +1 -1
  35. sky/dashboard/out/clusters.html +1 -1
  36. sky/dashboard/out/index.html +1 -1
  37. sky/dashboard/out/infra.html +1 -1
  38. sky/dashboard/out/jobs/[job].html +1 -1
  39. sky/dashboard/out/jobs.html +1 -1
  40. sky/optimizer.py +23 -4
  41. sky/provision/__init__.py +1 -0
  42. sky/provision/aws/instance.py +17 -1
  43. sky/provision/kubernetes/instance.py +16 -5
  44. sky/provision/kubernetes/utils.py +37 -19
  45. sky/provision/nebius/instance.py +3 -1
  46. sky/provision/nebius/utils.py +14 -2
  47. sky/provision/ssh/__init__.py +18 -0
  48. sky/resources.py +4 -1
  49. sky/server/requests/payloads.py +7 -0
  50. sky/server/server.py +40 -0
  51. sky/setup_files/dependencies.py +1 -0
  52. sky/templates/nebius-ray.yml.j2 +12 -0
  53. sky/utils/infra_utils.py +21 -1
  54. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  55. sky/utils/kubernetes/create_cluster.sh +1 -0
  56. sky/utils/kubernetes/deploy_remote_cluster.py +1437 -0
  57. sky/utils/kubernetes/kubernetes_deploy_utils.py +117 -10
  58. sky/utils/kubernetes/ssh-tunnel.sh +387 -0
  59. sky/utils/log_utils.py +214 -1
  60. sky/utils/schemas.py +21 -0
  61. sky/utils/ux_utils.py +2 -1
  62. {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250523.dist-info}/METADATA +6 -1
  63. {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250523.dist-info}/RECORD +68 -63
  64. sky/dashboard/out/_next/static/chunks/pages/infra-9180cd91cee64b96.js +0 -1
  65. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  66. /sky/dashboard/out/_next/static/{CzOVV6JpRQBRt5GhZuhyK → ECKwDNS9v9y3_IKFZ2lpp}/_ssgManifest.js +0 -0
  67. {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250523.dist-info}/WHEEL +0 -0
  68. {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250523.dist-info}/entry_points.txt +0 -0
  69. {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250523.dist-info}/licenses/LICENSE +0 -0
  70. {skypilot_nightly-1.0.0.dev20250522.dist-info → skypilot_nightly-1.0.0.dev20250523.dist-info}/top_level.txt +0 -0
@@ -46,6 +46,13 @@ available_node_types:
46
46
  InstanceType: {{instance_type}}
47
47
  ImageId: {{image_id}}
48
48
  DiskSize: {{disk_size}}
49
+ filesystems:
50
+ {%- for fs in filesystems %}
51
+ - filesystem_id: {{ fs.filesystem_id }}
52
+ filesystem_mount_tag: {{ fs.filesystem_mount_tag }}
53
+ filesystem_attach_mode: {{ fs.filesystem_attach_mode }}
54
+ filesystem_mount_path: {{ fs.filesystem_mount_path }}
55
+ {%- endfor %}
49
56
  UserData: |
50
57
  runcmd:
51
58
  - sudo sed -i 's/^#\?AllowTcpForwarding.*/AllowTcpForwarding yes/' /etc/ssh/sshd_config
@@ -130,6 +137,11 @@ setup_commands:
130
137
  - {%- for initial_setup_command in initial_setup_commands %}
131
138
  {{ initial_setup_command }}
132
139
  {%- endfor %}
140
+ {%- for fs in filesystems %}
141
+ sudo mkdir {{ fs.filesystem_mount_path }};
142
+ sudo mount -t virtiofs {{ fs.filesystem_mount_tag }} {{ fs.filesystem_mount_path }};
143
+ sudo chmod a+w {{ fs.filesystem_mount_path }};
144
+ {%- endfor %}
133
145
  sudo systemctl stop unattended-upgrades || true;
134
146
  sudo systemctl disable unattended-upgrades || true;
135
147
  sudo sed -i 's/Unattended-Upgrade "1"/Unattended-Upgrade "0"/g' /etc/apt/apt.conf.d/20auto-upgrades || true;
sky/utils/infra_utils.py CHANGED
@@ -86,6 +86,16 @@ class InfraInfo:
86
86
  cloud_name = 'kubernetes' # Normalize k8s to kubernetes
87
87
  region = '/'.join(parts[1:]) if len(parts) >= 2 else None
88
88
  zone = None
89
+ elif cloud_name == 'ssh':
90
+ # For SSH, the entire string after "ssh/" is the
91
+ # node pool name. We prepend 'ssh-' for the internal implementation
92
+ # which reuses the context name.
93
+ # TODO(romilb): This is a workaround while we use the global
94
+ # kubeconfig to store the ssh contexts.
95
+ region = '/'.join(parts[1:]) if len(parts) >= 2 else None
96
+ if region:
97
+ region = f'ssh-{region}'
98
+ zone = None
89
99
  else:
90
100
  # For non-Kubernetes clouds, continue with regular parsing
91
101
  # but be careful to only split into max 3 parts
@@ -133,6 +143,12 @@ class InfraInfo:
133
143
  if zone is None:
134
144
  zone = '*'
135
145
 
146
+ # If the cloud is ssh, we remove the ssh- prefix from the region
147
+ # TODO(romilb): This is a workaround while we use the global
148
+ # kubeconfig to store the ssh contexts.
149
+ if region and region.startswith('ssh-'):
150
+ region = region[4:]
151
+
136
152
  # Build the parts list and filter out trailing wildcards
137
153
  parts = [cloud.lower(), region, zone]
138
154
  while parts and parts[-1] == '*':
@@ -160,7 +176,11 @@ class InfraInfo:
160
176
  if self.zone is not None and self.zone != '*':
161
177
  region_or_zone = self.zone
162
178
  elif self.region is not None and self.region != '*':
163
- region_or_zone = self.region
179
+ # If using region, we remove the ssh- prefix if it exists for SSH
180
+ # Node Pools.
181
+ # TODO(romilb): This is a workaround while we use the global
182
+ # kubeconfig to store the ssh contexts.
183
+ region_or_zone = self.region.lstrip('ssh-')
164
184
 
165
185
  if region_or_zone is not None and truncate:
166
186
  region_or_zone = common_utils.truncate_long_string(
@@ -0,0 +1,62 @@
1
+ #!/bin/bash
2
+ # cleanup-tunnel.sh - Script to clean up SSH tunnels for a Kubernetes context
3
+
4
+ # Usage: cleanup-tunnel.sh CONTEXT_NAME
5
+
6
+ CONTEXT="${1:-default}"
7
+ TUNNEL_DIR="$HOME/.sky/ssh_node_pools_info"
8
+ PID_FILE="$TUNNEL_DIR/$CONTEXT-tunnel.pid"
9
+ LOG_FILE="$TUNNEL_DIR/$CONTEXT-tunnel.log"
10
+ LOCK_FILE="$TUNNEL_DIR/$CONTEXT-tunnel.lock"
11
+
12
+ # Get the port from kubeconfig if available
13
+ KUBE_PORT=$(kubectl config view --minify --context="$CONTEXT" -o jsonpath='{.clusters[0].cluster.server}' 2>/dev/null | grep -o ":[0-9]\+" | tr -d ":" || echo "")
14
+
15
+ if [[ -z "$KUBE_PORT" ]]; then
16
+ # Default to 6443 if we can't determine the port
17
+ KUBE_PORT=6443
18
+ echo "$(date): Could not determine port from kubeconfig, using default port $KUBE_PORT" >> "$LOG_FILE"
19
+ else
20
+ echo "$(date): Found port $KUBE_PORT in kubeconfig for context $CONTEXT" >> "$LOG_FILE"
21
+ fi
22
+
23
+ # Check if PID file exists
24
+ if [[ -f "$PID_FILE" ]]; then
25
+ OLD_PID=$(cat "$PID_FILE")
26
+
27
+ # Log the cleanup attempt
28
+ echo "$(date): Attempting to clean up tunnel for context $CONTEXT (PID: $OLD_PID, Port: $KUBE_PORT)" >> "$LOG_FILE"
29
+
30
+ # Try to kill the process
31
+ if kill -0 "$OLD_PID" 2>/dev/null; then
32
+ # Process exists, kill it
33
+ kill "$OLD_PID" 2>/dev/null
34
+
35
+ # Wait a moment and check if it's really gone
36
+ sleep 1
37
+ if kill -0 "$OLD_PID" 2>/dev/null; then
38
+ # Still running, force kill
39
+ kill -9 "$OLD_PID" 2>/dev/null
40
+ echo "$(date): Forcefully terminated tunnel process $OLD_PID" >> "$LOG_FILE"
41
+ else
42
+ echo "$(date): Successfully terminated tunnel process $OLD_PID" >> "$LOG_FILE"
43
+ fi
44
+ else
45
+ echo "$(date): No running process found with PID $OLD_PID" >> "$LOG_FILE"
46
+ fi
47
+
48
+ # Remove PID file
49
+ rm -f "$PID_FILE"
50
+ else
51
+ echo "$(date): No PID file found for context $CONTEXT. Nothing to clean up." >> "$LOG_FILE"
52
+ fi
53
+
54
+ # Clean up lock file if it exists
55
+ rm -f "$LOCK_FILE"
56
+
57
+ # Check if port is still in use
58
+ if nc -z localhost "$KUBE_PORT" 2>/dev/null; then
59
+ echo "$(date): Warning: Port $KUBE_PORT is still in use after cleanup. Another process might be using it." >> "$LOG_FILE"
60
+ fi
61
+
62
+ echo "$(date): Cleanup complete for context $CONTEXT" >> "$LOG_FILE"
@@ -85,6 +85,7 @@ fi
85
85
  if kind get clusters | grep -q skypilot; then
86
86
  echo "Local cluster already exists. Exiting."
87
87
  # Switch context to the local cluster
88
+ kind export kubeconfig --name skypilot
88
89
  kubectl config use-context kind-skypilot
89
90
  exit 100
90
91
  fi