skypilot-nightly 1.0.0.dev20240927__py3-none-any.whl → 1.0.0.dev20240928__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = 'e6b8d2c086544ab5cfdb877ad414eafddaa49cb4'
8
+ _SKYPILOT_COMMIT_SHA = 'dacf27348ae1446c3c93d0ee2fc57702c5366eac'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20240927'
38
+ __version__ = '1.0.0.dev20240928'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
sky/cli.py CHANGED
@@ -5072,15 +5072,7 @@ def local():
5072
5072
  pass
5073
5073
 
5074
5074
 
5075
- @click.option('--gpus/--no-gpus',
5076
- default=True,
5077
- is_flag=True,
5078
- help='Launch cluster without GPU support even '
5079
- 'if GPUs are detected on the host.')
5080
- @local.command('up', cls=_DocumentedCodeCommand)
5081
- @usage_lib.entrypoint
5082
- def local_up(gpus: bool):
5083
- """Creates a local cluster."""
5075
+ def _deploy_local_cluster(gpus: bool):
5084
5076
  cluster_created = False
5085
5077
 
5086
5078
  # Check if GPUs are available on the host
@@ -5206,6 +5198,124 @@ def local_up(gpus: bool):
5206
5198
  f'{gpu_hint}')
5207
5199
 
5208
5200
 
5201
+ def _deploy_remote_cluster(ip_file: str, ssh_user: str, ssh_key_path: str,
5202
+ cleanup: bool):
5203
+ success = False
5204
+ path_to_package = os.path.dirname(os.path.dirname(__file__))
5205
+ up_script_path = os.path.join(path_to_package, 'sky/utils/kubernetes',
5206
+ 'deploy_remote_cluster.sh')
5207
+ # Get directory of script and run it from there
5208
+ cwd = os.path.dirname(os.path.abspath(up_script_path))
5209
+
5210
+ deploy_command = f'{up_script_path} {ip_file} {ssh_user} {ssh_key_path}'
5211
+ if cleanup:
5212
+ deploy_command += ' --cleanup'
5213
+
5214
+ # Convert the command to a format suitable for subprocess
5215
+ deploy_command = shlex.split(deploy_command)
5216
+
5217
+ # Setup logging paths
5218
+ run_timestamp = backend_utils.get_run_timestamp()
5219
+ log_path = os.path.join(constants.SKY_LOGS_DIRECTORY, run_timestamp,
5220
+ 'local_up.log')
5221
+ tail_cmd = 'tail -n100 -f ' + log_path
5222
+
5223
+ # Check if ~/.kube/config exists:
5224
+ if os.path.exists(os.path.expanduser('~/.kube/config')):
5225
+ click.echo('Found existing kube config. '
5226
+ 'It will be backed up to ~/.kube/config.bak.')
5227
+ style = colorama.Style
5228
+ click.echo('To view detailed progress: '
5229
+ f'{style.BRIGHT}{tail_cmd}{style.RESET_ALL}')
5230
+ if cleanup:
5231
+ msg_str = 'Cleaning up remote cluster...'
5232
+ else:
5233
+ msg_str = 'Deploying remote cluster...'
5234
+ with rich_utils.safe_status(f'[bold cyan]{msg_str}'):
5235
+ returncode, _, stderr = log_lib.run_with_log(
5236
+ cmd=deploy_command,
5237
+ log_path=log_path,
5238
+ require_outputs=True,
5239
+ stream_logs=False,
5240
+ line_processor=log_utils.SkyRemoteUpLineProcessor(),
5241
+ cwd=cwd)
5242
+ if returncode == 0:
5243
+ success = True
5244
+ else:
5245
+ with ux_utils.print_exception_no_traceback():
5246
+ raise RuntimeError(
5247
+ 'Failed to deploy remote cluster. '
5248
+ f'Full log: {log_path}'
5249
+ f'\nError: {style.BRIGHT}{stderr}{style.RESET_ALL}')
5250
+
5251
+ if success:
5252
+ if cleanup:
5253
+ click.echo(f'{colorama.Fore.GREEN}'
5254
+ '🎉 Remote cluster cleaned up successfully.'
5255
+ f'{style.RESET_ALL}')
5256
+ else:
5257
+ click.echo('Cluster deployment done. You can now run tasks on '
5258
+ 'this cluster.\nE.g., run a task with: '
5259
+ 'sky launch --cloud kubernetes -- echo hello world.'
5260
+ f'\n{colorama.Fore.GREEN}🎉 Remote cluster deployed '
5261
+ f'successfully. {style.RESET_ALL}')
5262
+
5263
+
5264
+ @click.option('--gpus/--no-gpus',
5265
+ default=True,
5266
+ is_flag=True,
5267
+ help='Launch cluster without GPU support even '
5268
+ 'if GPUs are detected on the host.')
5269
+ @click.option(
5270
+ '--ips',
5271
+ type=str,
5272
+ required=False,
5273
+ help='Path to the file containing IP addresses of remote machines.')
5274
+ @click.option('--ssh-user',
5275
+ type=str,
5276
+ required=False,
5277
+ help='SSH username for accessing remote machines.')
5278
+ @click.option('--ssh-key-path',
5279
+ type=str,
5280
+ required=False,
5281
+ help='Path to the SSH private key.')
5282
+ @click.option('--cleanup',
5283
+ is_flag=True,
5284
+ help='Clean up the remote cluster instead of deploying it.')
5285
+ @local.command('up', cls=_DocumentedCodeCommand)
5286
+ @usage_lib.entrypoint
5287
+ def local_up(gpus: bool, ips: str, ssh_user: str, ssh_key_path: str,
5288
+ cleanup: bool):
5289
+ """Creates a local or remote cluster."""
5290
+
5291
+ def _validate_args(ips, ssh_user, ssh_key_path, cleanup):
5292
+ # If any of --ips, --ssh-user, or --ssh-key-path is specified,
5293
+ # all must be specified
5294
+ if bool(ips) or bool(ssh_user) or bool(ssh_key_path):
5295
+ if not (ips and ssh_user and ssh_key_path):
5296
+ raise click.BadParameter(
5297
+ 'All --ips, --ssh-user, and --ssh-key-path '
5298
+ 'must be specified together.')
5299
+
5300
+ # --cleanup can only be used if --ips, --ssh-user and --ssh-key-path
5301
+ # are all provided
5302
+ if cleanup and not (ips and ssh_user and ssh_key_path):
5303
+ raise click.BadParameter('--cleanup can only be used with '
5304
+ '--ips, --ssh-user and --ssh-key-path.')
5305
+
5306
+ _validate_args(ips, ssh_user, ssh_key_path, cleanup)
5307
+
5308
+ # If remote deployment arguments are specified, run remote up script
5309
+ if ips and ssh_user and ssh_key_path:
5310
+ # Convert ips and ssh_key_path to absolute paths
5311
+ ips = os.path.abspath(ips)
5312
+ ssh_key_path = os.path.abspath(ssh_key_path)
5313
+ _deploy_remote_cluster(ips, ssh_user, ssh_key_path, cleanup)
5314
+ else:
5315
+ # Run local deployment (kind) if no remote args are specified
5316
+ _deploy_local_cluster(gpus)
5317
+
5318
+
5209
5319
  @local.command('down', cls=_DocumentedCodeCommand)
5210
5320
  @usage_lib.entrypoint
5211
5321
  def local_down():
@@ -0,0 +1,243 @@
1
+ #!/bin/bash
2
+ # Refer to https://skypilot.readthedocs.io/en/latest/reservations/existing-machines.html for details on how to use this script.
3
+ set -e
4
+
5
+ # Colors for nicer UX
6
+ RED='\033[0;31m'
7
+ GREEN='\033[0;32m'
8
+ YELLOW='\033[1;33m'
9
+ NC='\033[0m' # No color
10
+
11
+ # Variables
12
+ IPS_FILE=$1
13
+ USER=$2
14
+ SSH_KEY=$3
15
+ K3S_TOKEN=mytoken # Any string can be used as the token
16
+ CLEANUP=false
17
+ INSTALL_GPU=false
18
+
19
+ if [[ "$4" == "--cleanup" ]]; then
20
+ CLEANUP=true
21
+ fi
22
+
23
+ # Basic argument checks
24
+ if [ -z "$IPS_FILE" ] || [ -z "$USER" ] || [ -z "$SSH_KEY" ]; then
25
+ >&2 echo -e "${RED}Error: Missing required arguments.${NC}"
26
+ >&2 echo "Usage: ./deploy_remote_cluster.sh ips.txt username path/to/ssh/key [--cleanup]"
27
+ exit 1
28
+ fi
29
+
30
+ # Check if SSH key exists
31
+ if [ ! -f "$SSH_KEY" ]; then
32
+ >&2 echo -e "${RED}Error: SSH key not found: $SSH_KEY${NC}"
33
+ exit 1
34
+ fi
35
+
36
+ # Check if IPs file exists
37
+ if [ ! -f "$IPS_FILE" ]; then
38
+ >&2 echo -e "${RED}Error: IPs file not found: $IPS_FILE${NC}"
39
+ exit 1
40
+ fi
41
+
42
+ # Get head node and worker nodes from the IPs file
43
+ HEAD_NODE=$(head -n 1 "$IPS_FILE")
44
+ WORKER_NODES=$(tail -n +2 "$IPS_FILE")
45
+
46
+ # Check if the IPs file is empty or not formatted correctly
47
+ if [ -z "$HEAD_NODE" ]; then
48
+ >&2 echo -e "${RED}Error: IPs file is empty or not formatted correctly.${NC}"
49
+ exit 1
50
+ fi
51
+
52
+ # Function to show a progress message
53
+ progress_message() {
54
+ echo -e "${YELLOW}➜ $1${NC}"
55
+ }
56
+
57
+ # Step to display success
58
+ success_message() {
59
+ echo -e "${GREEN}✔ $1${NC}"
60
+ }
61
+
62
+ # Function to run a command on a remote machine via SSH
63
+ run_remote() {
64
+ local NODE_IP=$1
65
+ local CMD=$2
66
+ # echo -e "${YELLOW}Running command on $NODE_IP...${NC}"
67
+ ssh -o StrictHostKeyChecking=no -i "$SSH_KEY" "$USER@$NODE_IP" "$CMD"
68
+ }
69
+
70
+ # Function to uninstall k3s and clean up the state on a remote machine
71
+ cleanup_server_node() {
72
+ local NODE_IP=$1
73
+ echo -e "${YELLOW}Cleaning up head node $NODE_IP...${NC}"
74
+ run_remote "$NODE_IP" "
75
+ echo 'Uninstalling k3s...' &&
76
+ /usr/local/bin/k3s-uninstall.sh || true &&
77
+ sudo rm -rf /etc/rancher /var/lib/rancher /var/lib/kubelet /etc/kubernetes ~/.kube
78
+ "
79
+ echo -e "${GREEN}Node $NODE_IP cleaned up successfully.${NC}"
80
+ }
81
+
82
+ # Function to uninstall k3s and clean up the state on a remote machine
83
+ cleanup_agent_node() {
84
+ local NODE_IP=$1
85
+ echo -e "${YELLOW}Cleaning up node $NODE_IP...${NC}"
86
+ run_remote "$NODE_IP" "
87
+ echo 'Uninstalling k3s...' &&
88
+ /usr/local/bin/k3s-agent-uninstall.sh || true &&
89
+ sudo rm -rf /etc/rancher /var/lib/rancher /var/lib/kubelet /etc/kubernetes ~/.kube
90
+ "
91
+ echo -e "${GREEN}Node $NODE_IP cleaned up successfully.${NC}"
92
+ }
93
+
94
+ check_gpu() {
95
+ local NODE_IP=$1
96
+ run_remote "$NODE_IP" "
97
+ if command -v nvidia-smi &> /dev/null; then
98
+ nvidia-smi --list-gpus | grep 'GPU 0'
99
+ fi
100
+ "
101
+ }
102
+
103
+ # Pre-flight checks
104
+ run_remote "$HEAD_NODE" "echo 'SSH connection successful'"
105
+ # TODO: Add more pre-flight checks here, including checking if port 6443 is accessible
106
+
107
+ # If --cleanup flag is set, uninstall k3s and exit
108
+ if [ "$CLEANUP" == "true" ]; then
109
+ echo -e "${YELLOW}Starting cleanup...${NC}"
110
+
111
+ # Clean up head node
112
+ cleanup_server_node "$HEAD_NODE"
113
+
114
+ # Clean up worker nodes
115
+ for NODE in $WORKER_NODES; do
116
+ cleanup_agent_node "$NODE"
117
+ done
118
+
119
+ echo -e "${GREEN}Cleanup completed successfully.${NC}"
120
+ exit 0
121
+ fi
122
+
123
+ # Step 1: Install k3s on the head node
124
+ progress_message "Deploying Kubernetes on head node ($HEAD_NODE)..."
125
+ run_remote "$HEAD_NODE" "
126
+ curl -sfL https://get.k3s.io | K3S_TOKEN=$K3S_TOKEN sh - &&
127
+ mkdir -p ~/.kube &&
128
+ sudo cp /etc/rancher/k3s/k3s.yaml ~/.kube/config &&
129
+ sudo chown \$(id -u):\$(id -g) ~/.kube/config &&
130
+ for i in {1..3}; do
131
+ if kubectl wait --for=condition=ready node --all --timeout=2m --kubeconfig ~/.kube/config; then
132
+ break
133
+ else
134
+ echo 'Waiting for nodes to be ready...'
135
+ sleep 5
136
+ fi
137
+ done
138
+ if [ $i -eq 3 ]; then
139
+ echo 'Failed to wait for nodes to be ready after 3 attempts'
140
+ exit 1
141
+ fi"
142
+ success_message "K3s deployed on head node."
143
+
144
+ # Check if head node has a GPU
145
+ if check_gpu "$HEAD_NODE"; then
146
+ echo -e "${YELLOW}GPU detected on head node ($HEAD_NODE).${NC}"
147
+ INSTALL_GPU=true
148
+ fi
149
+
150
+ # Fetch the head node's internal IP (this will be passed to worker nodes)
151
+ MASTER_ADDR=$(run_remote "$HEAD_NODE" "hostname -I | awk '{print \$1}'")
152
+
153
+ echo -e "${GREEN}Master node internal IP: $MASTER_ADDR${NC}"
154
+
155
+ # Step 2: Install k3s on worker nodes and join them to the master node
156
+ for NODE in $WORKER_NODES; do
157
+ progress_message "Deploying Kubernetes on worker node ($NODE)..."
158
+ run_remote "$NODE" "
159
+ curl -sfL https://get.k3s.io | K3S_URL=https://$MASTER_ADDR:6443 K3S_TOKEN=$K3S_TOKEN sh -"
160
+ success_message "Kubernetes deployed on worker node ($NODE)."
161
+
162
+ # Check if worker node has a GPU
163
+ if check_gpu "$NODE"; then
164
+ echo -e "${YELLOW}GPU detected on worker node ($NODE).${NC}"
165
+ INSTALL_GPU=true
166
+ fi
167
+ done
168
+ # Step 3: Configure local kubectl to connect to the cluster
169
+ progress_message "Configuring local kubectl to connect to the cluster..."
170
+ scp -o StrictHostKeyChecking=no -i "$SSH_KEY" "$USER@$HEAD_NODE":~/.kube/config ~/.kube/config
171
+
172
+ # Back up the original kubeconfig file if it exists
173
+ KUBECONFIG_FILE="$HOME/.kube/config"
174
+ if [[ -f "$KUBECONFIG_FILE" ]]; then
175
+ echo "Backing up existing kubeconfig to $KUBECONFIG_FILE.bak"
176
+ cp "$KUBECONFIG_FILE" "$KUBECONFIG_FILE.bak"
177
+ fi
178
+
179
+ # Update kubeconfig for the local machine to use the master node's IP
180
+ # Temporary file to hold the modified kubeconfig
181
+ TEMP_FILE=$(mktemp)
182
+
183
+ # Remove the certificate-authority-data, and replace the server with the master address
184
+ awk '
185
+ BEGIN { in_cluster = 0 }
186
+ /^clusters:/ { in_cluster = 1 }
187
+ /^users:/ { in_cluster = 0 }
188
+ in_cluster && /^ *certificate-authority-data:/ { next }
189
+ in_cluster && /^ *server:/ {
190
+ print " server: https://'${HEAD_NODE}:6443'"
191
+ print " insecure-skip-tls-verify: true"
192
+ next
193
+ }
194
+ { print }
195
+ ' "$KUBECONFIG_FILE" > "$TEMP_FILE"
196
+
197
+ # Replace the original kubeconfig with the modified one
198
+ mv "$TEMP_FILE" "$KUBECONFIG_FILE"
199
+
200
+ success_message "kubectl configured to connect to the cluster."
201
+
202
+ echo "Cluster deployment completed. You can now run 'kubectl get nodes' to verify the setup."
203
+
204
+ # Install GPU operator if a GPU was detected on any node
205
+ if [ "$INSTALL_GPU" == "true" ]; then
206
+ echo -e "${YELLOW}GPU detected in the cluster. Installing Nvidia GPU Operator...${NC}"
207
+ run_remote "$HEAD_NODE" "
208
+ curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 &&
209
+ chmod 700 get_helm.sh &&
210
+ ./get_helm.sh &&
211
+ helm repo add nvidia https://helm.ngc.nvidia.com/nvidia && helm repo update &&
212
+ kubectl create namespace gpu-operator --kubeconfig ~/.kube/config || true &&
213
+ sudo ln -s /sbin/ldconfig /sbin/ldconfig.real || true &&
214
+ helm install gpu-operator -n gpu-operator --create-namespace nvidia/gpu-operator \
215
+ --set 'toolkit.env[0].name=CONTAINERD_CONFIG' \
216
+ --set 'toolkit.env[0].value=/var/lib/rancher/k3s/agent/etc/containerd/config.toml' \
217
+ --set 'toolkit.env[1].name=CONTAINERD_SOCKET' \
218
+ --set 'toolkit.env[1].value=/run/k3s/containerd/containerd.sock' \
219
+ --set 'toolkit.env[2].name=CONTAINERD_RUNTIME_CLASS' \
220
+ --set 'toolkit.env[2].value=nvidia' &&
221
+ echo 'Waiting for GPU operator installation...' &&
222
+ while ! kubectl describe nodes --kubeconfig ~/.kube/config | grep -q 'nvidia.com/gpu:'; do
223
+ echo 'Waiting for GPU operator...'
224
+ sleep 5
225
+ done
226
+ echo 'GPU operator installed successfully.'"
227
+ success_message "GPU Operator installed."
228
+ else
229
+ echo -e "${YELLOW}No GPUs detected. Skipping GPU Operator installation.${NC}"
230
+ fi
231
+
232
+ # Configure SkyPilot
233
+ progress_message "Configuring SkyPilot..."
234
+ sky check kubernetes
235
+ success_message "SkyPilot configured successfully."
236
+
237
+ # Display final success message
238
+ echo -e "${GREEN}==== 🎉 Kubernetes cluster deployment completed successfully 🎉 ====${NC}"
239
+ echo "You can now interact with your Kubernetes cluster through SkyPilot: "
240
+ echo " • List available GPUs: sky show-gpus --cloud kubernetes"
241
+ echo " • Launch a GPU development pod: sky launch -c devbox --cloud kubernetes --gpus A100:1"
242
+ echo " • Connect to pod with SSH: ssh devbox"
243
+ echo " • Connect to pod with VSCode: code --remote ssh-remote+devbox '/'"
sky/utils/log_utils.py CHANGED
@@ -1,6 +1,7 @@
1
1
  """Logging utils."""
2
2
  import enum
3
- from typing import List, Optional
3
+ import types
4
+ from typing import List, Optional, Type
4
5
 
5
6
  import colorama
6
7
  import pendulum
@@ -15,13 +16,15 @@ logger = sky_logging.init_logger(__name__)
15
16
  class LineProcessor(object):
16
17
  """A processor for log lines."""
17
18
 
18
- def __enter__(self):
19
+ def __enter__(self) -> None:
19
20
  pass
20
21
 
21
- def process_line(self, log_line):
22
+ def process_line(self, log_line: str) -> None:
22
23
  pass
23
24
 
24
- def __exit__(self, except_type, except_value, traceback):
25
+ def __exit__(self, except_type: Optional[Type[BaseException]],
26
+ except_value: Optional[BaseException],
27
+ traceback: Optional[types.TracebackType]) -> None:
25
28
  del except_type, except_value, traceback # unused
26
29
  pass
27
30
 
@@ -34,12 +37,12 @@ class RayUpLineProcessor(LineProcessor):
34
37
  RUNTIME_SETUP = 1
35
38
  PULLING_DOCKER_IMAGES = 2
36
39
 
37
- def __enter__(self):
40
+ def __enter__(self) -> None:
38
41
  self.state = self.ProvisionStatus.LAUNCH
39
42
  self.status_display = rich_utils.safe_status('[bold cyan]Launching')
40
43
  self.status_display.start()
41
44
 
42
- def process_line(self, log_line):
45
+ def process_line(self, log_line: str) -> None:
43
46
  if ('Success.' in log_line and
44
47
  self.state == self.ProvisionStatus.LAUNCH):
45
48
  logger.info(f'{colorama.Fore.GREEN}Head node is up.'
@@ -60,7 +63,9 @@ class RayUpLineProcessor(LineProcessor):
60
63
  '[bold cyan]Launching - Preparing SkyPilot runtime')
61
64
  self.state = self.ProvisionStatus.RUNTIME_SETUP
62
65
 
63
- def __exit__(self, except_type, except_value, traceback):
66
+ def __exit__(self, except_type: Optional[Type[BaseException]],
67
+ except_value: Optional[BaseException],
68
+ traceback: Optional[types.TracebackType]) -> None:
64
69
  del except_type, except_value, traceback # unused
65
70
  self.status_display.stop()
66
71
 
@@ -68,13 +73,13 @@ class RayUpLineProcessor(LineProcessor):
68
73
  class SkyLocalUpLineProcessor(LineProcessor):
69
74
  """A processor for `sky local up` log lines."""
70
75
 
71
- def __enter__(self):
76
+ def __enter__(self) -> None:
72
77
  status = rich_utils.safe_status('[bold cyan]Creating local cluster - '
73
78
  'initializing Kubernetes')
74
79
  self.status_display = status
75
80
  self.status_display.start()
76
81
 
77
- def process_line(self, log_line):
82
+ def process_line(self, log_line: str) -> None:
78
83
  if 'Kind cluster created.' in log_line:
79
84
  logger.info(f'{colorama.Fore.GREEN}Kubernetes is running.'
80
85
  f'{colorama.Style.RESET_ALL}')
@@ -124,7 +129,80 @@ class SkyLocalUpLineProcessor(LineProcessor):
124
129
  f'{colorama.Fore.GREEN}Nginx Ingress Controller installed.'
125
130
  f'{colorama.Style.RESET_ALL}')
126
131
 
127
- def __exit__(self, except_type, except_value, traceback):
132
+ def __exit__(self, except_type: Optional[Type[BaseException]],
133
+ except_value: Optional[BaseException],
134
+ traceback: Optional[types.TracebackType]) -> None:
135
+ del except_type, except_value, traceback # unused
136
+ self.status_display.stop()
137
+
138
+
139
+ class SkyRemoteUpLineProcessor(LineProcessor):
140
+ """A processor for deploy_remote_cluster.sh log lines."""
141
+
142
+ def __enter__(self) -> None:
143
+ status = rich_utils.safe_status('[bold cyan]Creating remote cluster')
144
+ self.status_display = status
145
+ self.status_display.start()
146
+
147
+ def process_line(self, log_line: str) -> None:
148
+ # Pre-flight checks
149
+ if 'SSH connection successful' in log_line:
150
+ logger.info(f'{colorama.Fore.GREEN}SSH connection established.'
151
+ f'{colorama.Style.RESET_ALL}')
152
+
153
+ # Kubernetes installation steps
154
+ if 'Deploying Kubernetes on head node' in log_line:
155
+ self.status_display.update('[bold cyan]Creating remote cluster - '
156
+ 'deploying Kubernetes on head node')
157
+ if 'K3s deployed on head node.' in log_line:
158
+ logger.info(f'{colorama.Fore.GREEN}'
159
+ '✔ K3s successfully deployed on head node.'
160
+ f'{colorama.Style.RESET_ALL}')
161
+
162
+ # Worker nodes
163
+ if 'Deploying Kubernetes on worker node' in log_line:
164
+ self.status_display.update('[bold cyan]Creating remote cluster - '
165
+ 'deploying Kubernetes on worker nodes')
166
+ if 'Kubernetes deployed on worker node' in log_line:
167
+ logger.info(f'{colorama.Fore.GREEN}'
168
+ '✔ K3s successfully deployed on worker node.'
169
+ f'{colorama.Style.RESET_ALL}')
170
+
171
+ # Cluster configuration
172
+ if 'Configuring local kubectl to connect to the cluster...' in log_line:
173
+ self.status_display.update('[bold cyan]Creating remote cluster - '
174
+ 'configuring local kubectl')
175
+ if 'kubectl configured to connect to the cluster.' in log_line:
176
+ logger.info(f'{colorama.Fore.GREEN}'
177
+ '✔ kubectl configured for the remote cluster.'
178
+ f'{colorama.Style.RESET_ALL}')
179
+
180
+ # GPU operator installation
181
+ if 'Installing Nvidia GPU Operator...' in log_line:
182
+ self.status_display.update('[bold cyan]Creating remote cluster - '
183
+ 'installing Nvidia GPU Operator')
184
+ if 'GPU Operator installed.' in log_line:
185
+ logger.info(f'{colorama.Fore.GREEN}'
186
+ '✔ Nvidia GPU Operator installed successfully.'
187
+ f'{colorama.Style.RESET_ALL}')
188
+
189
+ # Cleanup steps
190
+ if 'Cleaning up head node' in log_line:
191
+ self.status_display.update('[bold cyan]Cleaning up head node')
192
+ if 'Cleaning up node' in log_line:
193
+ self.status_display.update('[bold cyan]Cleaning up worker node')
194
+ if 'cleaned up successfully' in log_line:
195
+ logger.info(f'{colorama.Fore.GREEN}'
196
+ f'{log_line.strip()}{colorama.Style.RESET_ALL}')
197
+
198
+ # Final status
199
+ if 'Cluster deployment completed.' in log_line:
200
+ logger.info(f'{colorama.Fore.GREEN}✔ Remote k3s is running.'
201
+ f'{colorama.Style.RESET_ALL}')
202
+
203
+ def __exit__(self, except_type: Optional[Type[BaseException]],
204
+ except_value: Optional[BaseException],
205
+ traceback: Optional[types.TracebackType]) -> None:
128
206
  del except_type, except_value, traceback # unused
129
207
  self.status_display.stop()
130
208
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20240927
3
+ Version: 1.0.0.dev20240928
4
4
  Summary: SkyPilot: An intercloud broker for the clouds
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -1,8 +1,8 @@
1
- sky/__init__.py,sha256=S-XaZNVM-9OM5oGtcUfWmQC9CLW7HvN9ckCN-KCbPio,5854
1
+ sky/__init__.py,sha256=8BEk3x0IPkFli8tjp7axkkM5mwQ1GuCABWwTMppkPcc,5854
2
2
  sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
3
3
  sky/authentication.py,sha256=o8ZhUf4VSN8WtjWcUUGYg-HVskaqaoMK4ZobHC-HVYU,20697
4
4
  sky/check.py,sha256=jLMIIJrseaZj1_o5WkbaD9XdyXIlCaT6pyAaIFdhdmA,9079
5
- sky/cli.py,sha256=DMnZ-vLBuFazKmiMavidSYkQvv_YvXnZALJkHaLveDM,201714
5
+ sky/cli.py,sha256=9h4yO8p962960qUjvQ-xSusrtdh8TXNNQ1sfV0OqgZc,206262
6
6
  sky/cloud_stores.py,sha256=RjFgmRhUh1Kk__f6g3KxzLp9s7dA0pFK4W1AukEuUaw,21153
7
7
  sky/core.py,sha256=YF_6kwj8Ja171Oycb8L25SZ7V_ylZYovFS_jpnjwGo0,34408
8
8
  sky/dag.py,sha256=WLFWr5hfrwjd31uYlNvI-zWUk7tLaT_gzJn4LzbVtkE,2780
@@ -252,7 +252,7 @@ sky/utils/dag_utils.py,sha256=gjGZiJj4_GYsraXX67e6ElvbmOByJcyjSfvVgYZiXvs,5588
252
252
  sky/utils/db_utils.py,sha256=AOvMmBEN9cF4I7CoXihPCtus4mU2VDGjBQSVMMgzKlA,2786
253
253
  sky/utils/env_options.py,sha256=1VXyd3bhiUgGfCpmmTqM9PagRo1ILBH4-pzIxmIeE6E,861
254
254
  sky/utils/kubernetes_enums.py,sha256=imGqHSa8O07zD_6xH1SDMM7dBU5lF5fzFFlQuQy00QM,1384
255
- sky/utils/log_utils.py,sha256=W7FYK7xzvbq4V-8R-ihLtz939ryvtABug6O-4DFrjho,8139
255
+ sky/utils/log_utils.py,sha256=yVu3etgKhiVYX8UG-JFPWZujxWBT4kwxZ5oAPIdjtGs,12054
256
256
  sky/utils/resources_utils.py,sha256=snByBxgx3Hnjfch2uysdAA3D-OAwrnuzTDHug36s5H4,6515
257
257
  sky/utils/rich_utils.py,sha256=5ZVhzlFx-nhqMXwv00eO9xC4rz7ibDlfD2lmGhZrJEY,1581
258
258
  sky/utils/schemas.py,sha256=QT0Fxri2o0SiWkky1DlZhA1dzQRQoB5OdVaej0wJvhc,28787
@@ -265,6 +265,7 @@ sky/utils/cli_utils/status_utils.py,sha256=9odkfXiXLMD14XJsqve6sGvHpe7ThHXpC6ic9
265
265
  sky/utils/kubernetes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
266
266
  sky/utils/kubernetes/create_cluster.sh,sha256=rv5Lz6AR00yBJMRyScfMSQiGKptMhtHWRsvyG20-u9c,7764
267
267
  sky/utils/kubernetes/delete_cluster.sh,sha256=BSccHF43GyepDNf-FZcenzHzpXXATkVD92vgn1lWPgk,927
268
+ sky/utils/kubernetes/deploy_remote_cluster.sh,sha256=vGj0mD0tejHDRy8ulwKOvOF2mfLyT5J8fp7GVqEe_EY,8478
268
269
  sky/utils/kubernetes/generate_kind_config.py,sha256=_TNLnifA_r7-CRq083IP1xjelYqiLjzQX9ohuqYpDH8,3187
269
270
  sky/utils/kubernetes/generate_kubeconfig.sh,sha256=AcYhuuG5jXWGHUmyRuH-oKy5qcn92gXhu6bXOt6eD6g,9274
270
271
  sky/utils/kubernetes/gpu_labeler.py,sha256=MEUv0U4ACDcNwtFVltlv017XJMjxx1Bndf6fL0i6eqg,6960
@@ -272,9 +273,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_job.yaml,sha256=KPqp23B-zQ2SZK03jdHeF9fLTog
272
273
  sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488oMQvhRZWwsj9vBbPUg,3812
273
274
  sky/utils/kubernetes/rsync_helper.sh,sha256=Ma-N9a271fTfdgP5-8XIQL7KPf8IPUo-uY004PCdUFo,747
274
275
  sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=RFLJ3k7MR5UN4SKHykQ0lV9SgXumoULpKYIAt1vh-HU,6560
275
- skypilot_nightly-1.0.0.dev20240927.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
276
- skypilot_nightly-1.0.0.dev20240927.dist-info/METADATA,sha256=GXz5qTbQuxyKpPHIVPT5vsvlpo3bRyataK8Vtj6rovw,18948
277
- skypilot_nightly-1.0.0.dev20240927.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
278
- skypilot_nightly-1.0.0.dev20240927.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
279
- skypilot_nightly-1.0.0.dev20240927.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
280
- skypilot_nightly-1.0.0.dev20240927.dist-info/RECORD,,
276
+ skypilot_nightly-1.0.0.dev20240928.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
277
+ skypilot_nightly-1.0.0.dev20240928.dist-info/METADATA,sha256=AT9cnsY7Uj7BK0COu8mOXiCtfyCFrjtk7OBQvqx-_Nk,18948
278
+ skypilot_nightly-1.0.0.dev20240928.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
279
+ skypilot_nightly-1.0.0.dev20240928.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
280
+ skypilot_nightly-1.0.0.dev20240928.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
281
+ skypilot_nightly-1.0.0.dev20240928.dist-info/RECORD,,