skypilot-nightly 1.0.0.dev20240927__py3-none-any.whl → 1.0.0.dev20240929__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = 'e6b8d2c086544ab5cfdb877ad414eafddaa49cb4'
8
+ _SKYPILOT_COMMIT_SHA = 'e6a3b830fb2a12871815773af6171d42e0416e89'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20240927'
38
+ __version__ = '1.0.0.dev20240929'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
sky/authentication.py CHANGED
@@ -380,6 +380,11 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
380
380
  secret_field_name = clouds.Kubernetes().ssh_key_secret_field_name
381
381
  context = config['provider'].get(
382
382
  'context', kubernetes_utils.get_current_kube_config_context_name())
383
+ if context == kubernetes_utils.IN_CLUSTER_REGION:
384
+ # If the context is set to IN_CLUSTER_REGION, we are running in a pod
385
+ # with in-cluster configuration. We need to set the context to None
386
+ # to use the mounted service account.
387
+ context = None
383
388
  namespace = config['provider'].get(
384
389
  'namespace',
385
390
  kubernetes_utils.get_kube_config_context_namespace(context))
sky/cli.py CHANGED
@@ -5072,15 +5072,7 @@ def local():
5072
5072
  pass
5073
5073
 
5074
5074
 
5075
- @click.option('--gpus/--no-gpus',
5076
- default=True,
5077
- is_flag=True,
5078
- help='Launch cluster without GPU support even '
5079
- 'if GPUs are detected on the host.')
5080
- @local.command('up', cls=_DocumentedCodeCommand)
5081
- @usage_lib.entrypoint
5082
- def local_up(gpus: bool):
5083
- """Creates a local cluster."""
5075
+ def _deploy_local_cluster(gpus: bool):
5084
5076
  cluster_created = False
5085
5077
 
5086
5078
  # Check if GPUs are available on the host
@@ -5206,6 +5198,124 @@ def local_up(gpus: bool):
5206
5198
  f'{gpu_hint}')
5207
5199
 
5208
5200
 
5201
+ def _deploy_remote_cluster(ip_file: str, ssh_user: str, ssh_key_path: str,
5202
+ cleanup: bool):
5203
+ success = False
5204
+ path_to_package = os.path.dirname(os.path.dirname(__file__))
5205
+ up_script_path = os.path.join(path_to_package, 'sky/utils/kubernetes',
5206
+ 'deploy_remote_cluster.sh')
5207
+ # Get directory of script and run it from there
5208
+ cwd = os.path.dirname(os.path.abspath(up_script_path))
5209
+
5210
+ deploy_command = f'{up_script_path} {ip_file} {ssh_user} {ssh_key_path}'
5211
+ if cleanup:
5212
+ deploy_command += ' --cleanup'
5213
+
5214
+ # Convert the command to a format suitable for subprocess
5215
+ deploy_command = shlex.split(deploy_command)
5216
+
5217
+ # Setup logging paths
5218
+ run_timestamp = backend_utils.get_run_timestamp()
5219
+ log_path = os.path.join(constants.SKY_LOGS_DIRECTORY, run_timestamp,
5220
+ 'local_up.log')
5221
+ tail_cmd = 'tail -n100 -f ' + log_path
5222
+
5223
+ # Check if ~/.kube/config exists:
5224
+ if os.path.exists(os.path.expanduser('~/.kube/config')):
5225
+ click.echo('Found existing kube config. '
5226
+ 'It will be backed up to ~/.kube/config.bak.')
5227
+ style = colorama.Style
5228
+ click.echo('To view detailed progress: '
5229
+ f'{style.BRIGHT}{tail_cmd}{style.RESET_ALL}')
5230
+ if cleanup:
5231
+ msg_str = 'Cleaning up remote cluster...'
5232
+ else:
5233
+ msg_str = 'Deploying remote cluster...'
5234
+ with rich_utils.safe_status(f'[bold cyan]{msg_str}'):
5235
+ returncode, _, stderr = log_lib.run_with_log(
5236
+ cmd=deploy_command,
5237
+ log_path=log_path,
5238
+ require_outputs=True,
5239
+ stream_logs=False,
5240
+ line_processor=log_utils.SkyRemoteUpLineProcessor(),
5241
+ cwd=cwd)
5242
+ if returncode == 0:
5243
+ success = True
5244
+ else:
5245
+ with ux_utils.print_exception_no_traceback():
5246
+ raise RuntimeError(
5247
+ 'Failed to deploy remote cluster. '
5248
+ f'Full log: {log_path}'
5249
+ f'\nError: {style.BRIGHT}{stderr}{style.RESET_ALL}')
5250
+
5251
+ if success:
5252
+ if cleanup:
5253
+ click.echo(f'{colorama.Fore.GREEN}'
5254
+ '🎉 Remote cluster cleaned up successfully.'
5255
+ f'{style.RESET_ALL}')
5256
+ else:
5257
+ click.echo('Cluster deployment done. You can now run tasks on '
5258
+ 'this cluster.\nE.g., run a task with: '
5259
+ 'sky launch --cloud kubernetes -- echo hello world.'
5260
+ f'\n{colorama.Fore.GREEN}🎉 Remote cluster deployed '
5261
+ f'successfully. {style.RESET_ALL}')
5262
+
5263
+
5264
+ @click.option('--gpus/--no-gpus',
5265
+ default=True,
5266
+ is_flag=True,
5267
+ help='Launch cluster without GPU support even '
5268
+ 'if GPUs are detected on the host.')
5269
+ @click.option(
5270
+ '--ips',
5271
+ type=str,
5272
+ required=False,
5273
+ help='Path to the file containing IP addresses of remote machines.')
5274
+ @click.option('--ssh-user',
5275
+ type=str,
5276
+ required=False,
5277
+ help='SSH username for accessing remote machines.')
5278
+ @click.option('--ssh-key-path',
5279
+ type=str,
5280
+ required=False,
5281
+ help='Path to the SSH private key.')
5282
+ @click.option('--cleanup',
5283
+ is_flag=True,
5284
+ help='Clean up the remote cluster instead of deploying it.')
5285
+ @local.command('up', cls=_DocumentedCodeCommand)
5286
+ @usage_lib.entrypoint
5287
+ def local_up(gpus: bool, ips: str, ssh_user: str, ssh_key_path: str,
5288
+ cleanup: bool):
5289
+ """Creates a local or remote cluster."""
5290
+
5291
+ def _validate_args(ips, ssh_user, ssh_key_path, cleanup):
5292
+ # If any of --ips, --ssh-user, or --ssh-key-path is specified,
5293
+ # all must be specified
5294
+ if bool(ips) or bool(ssh_user) or bool(ssh_key_path):
5295
+ if not (ips and ssh_user and ssh_key_path):
5296
+ raise click.BadParameter(
5297
+ 'All --ips, --ssh-user, and --ssh-key-path '
5298
+ 'must be specified together.')
5299
+
5300
+ # --cleanup can only be used if --ips, --ssh-user and --ssh-key-path
5301
+ # are all provided
5302
+ if cleanup and not (ips and ssh_user and ssh_key_path):
5303
+ raise click.BadParameter('--cleanup can only be used with '
5304
+ '--ips, --ssh-user and --ssh-key-path.')
5305
+
5306
+ _validate_args(ips, ssh_user, ssh_key_path, cleanup)
5307
+
5308
+ # If remote deployment arguments are specified, run remote up script
5309
+ if ips and ssh_user and ssh_key_path:
5310
+ # Convert ips and ssh_key_path to absolute paths
5311
+ ips = os.path.abspath(ips)
5312
+ ssh_key_path = os.path.abspath(ssh_key_path)
5313
+ _deploy_remote_cluster(ips, ssh_user, ssh_key_path, cleanup)
5314
+ else:
5315
+ # Run local deployment (kind) if no remote args are specified
5316
+ _deploy_local_cluster(gpus)
5317
+
5318
+
5209
5319
  @local.command('down', cls=_DocumentedCodeCommand)
5210
5320
  @usage_lib.entrypoint
5211
5321
  def local_down():
sky/clouds/kubernetes.py CHANGED
@@ -129,11 +129,24 @@ class Kubernetes(clouds.Cloud):
129
129
  'Ignoring these contexts.')
130
130
 
131
131
  @classmethod
132
- def _existing_allowed_contexts(cls) -> List[str]:
133
- """Get existing allowed contexts."""
132
+ def _existing_allowed_contexts(cls) -> List[Optional[str]]:
133
+ """Get existing allowed contexts.
134
+
135
+ If None is returned in the list, it means that we are running in a pod
136
+ with in-cluster auth. In this case, we specify None context, which will
137
+ use the service account mounted in the pod.
138
+ """
134
139
  all_contexts = kubernetes_utils.get_all_kube_config_context_names()
135
- if all_contexts is None:
140
+ if len(all_contexts) == 0:
136
141
  return []
142
+ if all_contexts == [None]:
143
+ # If only one context is found and it is None, we are running in a
144
+ # pod with in-cluster auth. In this case, we allow it to be used
145
+ # without checking against allowed_contexts.
146
+ # TODO(romilb): We may want check in-cluster auth against
147
+ # allowed_contexts in the future by adding a special context name
148
+ # for in-cluster auth.
149
+ return [None]
137
150
  all_contexts = set(all_contexts)
138
151
 
139
152
  allowed_contexts = skypilot_config.get_nested(
@@ -164,7 +177,15 @@ class Kubernetes(clouds.Cloud):
164
177
  del accelerators, zone, use_spot # unused
165
178
  existing_contexts = cls._existing_allowed_contexts()
166
179
 
167
- regions = [clouds.Region(context) for context in existing_contexts]
180
+ regions = []
181
+ for context in existing_contexts:
182
+ if context is None:
183
+ # If running in-cluster, we allow the region to be set to the
184
+ # singleton region since there is no context name available.
185
+ regions.append(clouds.Region(
186
+ kubernetes_utils.IN_CLUSTER_REGION))
187
+ else:
188
+ regions.append(clouds.Region(context))
168
189
 
169
190
  if region is not None:
170
191
  regions = [r for r in regions if r.name == region]
@@ -541,13 +562,20 @@ class Kubernetes(clouds.Cloud):
541
562
  def validate_region_zone(self, region: Optional[str], zone: Optional[str]):
542
563
  if region == self._LEGACY_SINGLETON_REGION:
543
564
  # For backward compatibility, we allow the region to be set to the
544
- # legacy singletonton region.
565
+ # legacy singleton region.
545
566
  # TODO: Remove this after 0.9.0.
546
567
  return region, zone
547
568
 
569
+ if region == kubernetes_utils.IN_CLUSTER_REGION:
570
+ # If running incluster, we set region to IN_CLUSTER_REGION
571
+ # since there is no context name available.
572
+ return region, zone
573
+
548
574
  all_contexts = kubernetes_utils.get_all_kube_config_context_names()
549
- if all_contexts is None:
550
- all_contexts = []
575
+ if all_contexts == [None]:
576
+ # If [None] context is returned, use the singleton region since we
577
+ # are running in a pod with in-cluster auth.
578
+ all_contexts = [kubernetes_utils.IN_CLUSTER_REGION]
551
579
  if region not in all_contexts:
552
580
  raise ValueError(
553
581
  f'Context {region} not found in kubeconfig. Kubernetes only '
@@ -247,7 +247,8 @@ def _get_resource(container_resources: Dict[str, Any], resource_name: str,
247
247
 
248
248
 
249
249
  def _configure_autoscaler_service_account(
250
- namespace: str, context: str, provider_config: Dict[str, Any]) -> None:
250
+ namespace: str, context: Optional[str],
251
+ provider_config: Dict[str, Any]) -> None:
251
252
  account_field = 'autoscaler_service_account'
252
253
  if account_field not in provider_config:
253
254
  logger.info('_configure_autoscaler_service_account: '
@@ -281,7 +282,7 @@ def _configure_autoscaler_service_account(
281
282
  f'{created_msg(account_field, name)}')
282
283
 
283
284
 
284
- def _configure_autoscaler_role(namespace: str, context: str,
285
+ def _configure_autoscaler_role(namespace: str, context: Optional[str],
285
286
  provider_config: Dict[str, Any],
286
287
  role_field: str) -> None:
287
288
  """ Reads the role from the provider config, creates if it does not exist.
@@ -330,7 +331,7 @@ def _configure_autoscaler_role(namespace: str, context: str,
330
331
 
331
332
  def _configure_autoscaler_role_binding(
332
333
  namespace: str,
333
- context: str,
334
+ context: Optional[str],
334
335
  provider_config: Dict[str, Any],
335
336
  binding_field: str,
336
337
  override_name: Optional[str] = None,
@@ -620,7 +621,7 @@ def _configure_fuse_mounting(provider_config: Dict[str, Any]) -> None:
620
621
  f'in namespace {fuse_device_manager_namespace!r}')
621
622
 
622
623
 
623
- def _configure_services(namespace: str, context: str,
624
+ def _configure_services(namespace: str, context: Optional[str],
624
625
  provider_config: Dict[str, Any]) -> None:
625
626
  service_field = 'services'
626
627
  if service_field not in provider_config:
@@ -302,7 +302,8 @@ def _wait_for_pods_to_run(namespace, context, new_nodes):
302
302
  time.sleep(1)
303
303
 
304
304
 
305
- def _set_env_vars_in_pods(namespace: str, context: str, new_pods: List):
305
+ def _set_env_vars_in_pods(namespace: str, context: Optional[str],
306
+ new_pods: List):
306
307
  """Setting environment variables in pods.
307
308
 
308
309
  Once all containers are ready, we can exec into them and set env vars.
@@ -330,7 +331,7 @@ def _set_env_vars_in_pods(namespace: str, context: str, new_pods: List):
330
331
  new_pod.metadata.name, rc, stdout)
331
332
 
332
333
 
333
- def _check_user_privilege(namespace: str, context: str,
334
+ def _check_user_privilege(namespace: str, context: Optional[str],
334
335
  new_nodes: List) -> None:
335
336
  # Checks if the default user has sufficient privilege to set up
336
337
  # the kubernetes instance pod.
@@ -366,7 +367,8 @@ def _check_user_privilege(namespace: str, context: str,
366
367
  'from the image.')
367
368
 
368
369
 
369
- def _setup_ssh_in_pods(namespace: str, context: str, new_nodes: List) -> None:
370
+ def _setup_ssh_in_pods(namespace: str, context: Optional[str],
371
+ new_nodes: List) -> None:
370
372
  # Setting up ssh for the pod instance. This is already setup for
371
373
  # the jump pod so it does not need to be run for it.
372
374
  set_k8s_ssh_cmd = (
@@ -410,7 +412,7 @@ def _setup_ssh_in_pods(namespace: str, context: str, new_nodes: List) -> None:
410
412
  logger.info(f'{"-"*20}End: Set up SSH in pod {pod_name!r} {"-"*20}')
411
413
 
412
414
 
413
- def _label_pod(namespace: str, context: str, pod_name: str,
415
+ def _label_pod(namespace: str, context: Optional[str], pod_name: str,
414
416
  label: Dict[str, str]) -> None:
415
417
  """Label a pod."""
416
418
  kubernetes.core_api(context).patch_namespaced_pod(
@@ -647,7 +649,8 @@ def stop_instances(
647
649
  raise NotImplementedError()
648
650
 
649
651
 
650
- def _terminate_node(namespace: str, context: str, pod_name: str) -> None:
652
+ def _terminate_node(namespace: str, context: Optional[str],
653
+ pod_name: str) -> None:
651
654
  """Terminate a pod."""
652
655
  logger.debug('terminate_instances: calling delete_namespaced_pod')
653
656
  try:
@@ -132,7 +132,7 @@ def fill_ingress_template(namespace: str, service_details: List[Tuple[str, int,
132
132
 
133
133
 
134
134
  def create_or_replace_namespaced_ingress(
135
- namespace: str, context: str, ingress_name: str,
135
+ namespace: str, context: Optional[str], ingress_name: str,
136
136
  ingress_spec: Dict[str, Union[str, int]]) -> None:
137
137
  """Creates an ingress resource for the specified service."""
138
138
  networking_api = kubernetes.networking_api(context)
@@ -156,7 +156,7 @@ def create_or_replace_namespaced_ingress(
156
156
  _request_timeout=kubernetes.API_TIMEOUT)
157
157
 
158
158
 
159
- def delete_namespaced_ingress(namespace: str, context: str,
159
+ def delete_namespaced_ingress(namespace: str, context: Optional[str],
160
160
  ingress_name: str) -> None:
161
161
  """Deletes an ingress resource."""
162
162
  networking_api = kubernetes.networking_api(context)
@@ -171,7 +171,7 @@ def delete_namespaced_ingress(namespace: str, context: str,
171
171
 
172
172
 
173
173
  def create_or_replace_namespaced_service(
174
- namespace: str, context: str, service_name: str,
174
+ namespace: str, context: Optional[str], service_name: str,
175
175
  service_spec: Dict[str, Union[str, int]]) -> None:
176
176
  """Creates a service resource for the specified service."""
177
177
  core_api = kubernetes.core_api(context)
@@ -208,7 +208,7 @@ def delete_namespaced_service(namespace: str, service_name: str) -> None:
208
208
  raise e
209
209
 
210
210
 
211
- def ingress_controller_exists(context: str,
211
+ def ingress_controller_exists(context: Optional[str],
212
212
  ingress_class_name: str = 'nginx') -> bool:
213
213
  """Checks if an ingress controller exists in the cluster."""
214
214
  networking_api = kubernetes.networking_api(context)
@@ -220,7 +220,7 @@ def ingress_controller_exists(context: str,
220
220
 
221
221
 
222
222
  def get_ingress_external_ip_and_ports(
223
- context: str,
223
+ context: Optional[str],
224
224
  namespace: str = 'ingress-nginx'
225
225
  ) -> Tuple[Optional[str], Optional[Tuple[int, int]]]:
226
226
  """Returns external ip and ports for the ingress controller."""
@@ -258,7 +258,7 @@ def get_ingress_external_ip_and_ports(
258
258
  return external_ip, None
259
259
 
260
260
 
261
- def get_loadbalancer_ip(context: str,
261
+ def get_loadbalancer_ip(context: Optional[str],
262
262
  namespace: str,
263
263
  service_name: str,
264
264
  timeout: int = 0) -> Optional[str]:
@@ -284,7 +284,8 @@ def get_loadbalancer_ip(context: str,
284
284
  return ip
285
285
 
286
286
 
287
- def get_pod_ip(context: str, namespace: str, pod_name: str) -> Optional[str]:
287
+ def get_pod_ip(context: Optional[str], namespace: str,
288
+ pod_name: str) -> Optional[str]:
288
289
  """Returns the IP address of the pod."""
289
290
  core_api = kubernetes.core_api(context)
290
291
  pod = core_api.read_namespaced_pod(pod_name,
@@ -33,6 +33,7 @@ if typing.TYPE_CHECKING:
33
33
 
34
34
  # TODO(romilb): Move constants to constants.py
35
35
  DEFAULT_NAMESPACE = 'default'
36
+ IN_CLUSTER_REGION = 'in-cluster'
36
37
 
37
38
  DEFAULT_SERVICE_ACCOUNT_NAME = 'skypilot-service-account'
38
39
 
@@ -310,7 +311,7 @@ AUTOSCALER_TO_LABEL_FORMATTER = {
310
311
 
311
312
  @functools.lru_cache()
312
313
  def detect_gpu_label_formatter(
313
- context: str
314
+ context: Optional[str]
314
315
  ) -> Tuple[Optional[GPULabelFormatter], Dict[str, List[Tuple[str, str]]]]:
315
316
  """Detects the GPU label formatter for the Kubernetes cluster
316
317
 
@@ -342,7 +343,7 @@ def detect_gpu_label_formatter(
342
343
 
343
344
 
344
345
  @functools.lru_cache(maxsize=10)
345
- def detect_gpu_resource(context: str) -> Tuple[bool, Set[str]]:
346
+ def detect_gpu_resource(context: Optional[str]) -> Tuple[bool, Set[str]]:
346
347
  """Checks if the Kubernetes cluster has nvidia.com/gpu resource.
347
348
 
348
349
  If nvidia.com/gpu resource is missing, that typically means that the
@@ -402,7 +403,7 @@ def get_all_pods_in_kubernetes_cluster(
402
403
  return pods
403
404
 
404
405
 
405
- def check_instance_fits(context: str,
406
+ def check_instance_fits(context: Optional[str],
406
407
  instance: str) -> Tuple[bool, Optional[str]]:
407
408
  """Checks if the instance fits on the Kubernetes cluster.
408
409
 
@@ -488,7 +489,7 @@ def check_instance_fits(context: str,
488
489
  return fits, reason
489
490
 
490
491
 
491
- def get_gpu_label_key_value(context: str,
492
+ def get_gpu_label_key_value(context: Optional[str],
492
493
  acc_type: str,
493
494
  check_mode=False) -> Tuple[str, str]:
494
495
  """Returns the label key and value for the given GPU type.
@@ -651,11 +652,14 @@ def get_external_ip(network_mode: Optional[
651
652
  return parsed_url.hostname
652
653
 
653
654
 
654
- def check_credentials(context: str, timeout: int = kubernetes.API_TIMEOUT) -> \
655
+ def check_credentials(context: Optional[str],
656
+ timeout: int = kubernetes.API_TIMEOUT) -> \
655
657
  Tuple[bool, Optional[str]]:
656
658
  """Check if the credentials in kubeconfig file are valid
657
659
 
658
660
  Args:
661
+ context (Optional[str]): The Kubernetes context to use. If none, uses
662
+ in-cluster auth to check credentials, if available.
659
663
  timeout (int): Timeout in seconds for the test API call
660
664
 
661
665
  Returns:
@@ -817,22 +821,42 @@ def get_current_kube_config_context_name() -> Optional[str]:
817
821
  return None
818
822
 
819
823
 
820
- def get_all_kube_config_context_names() -> Optional[List[str]]:
824
+ def is_incluster_config_available() -> bool:
825
+ """Check if in-cluster auth is available.
826
+
827
+ Note: We cannot use load_incluster_config() to check if in-cluster config
828
+ is available because it will load the in-cluster config (if available)
829
+ and modify the current global kubernetes config. We simply check if the
830
+ service account token file exists to determine if in-cluster config may
831
+ be available.
832
+ """
833
+ return os.path.exists('/var/run/secrets/kubernetes.io/serviceaccount/token')
834
+
835
+
836
+ def get_all_kube_config_context_names() -> List[Optional[str]]:
821
837
  """Get all kubernetes context names from the kubeconfig file.
822
838
 
839
+ If running in-cluster, returns [None] to indicate in-cluster config.
840
+
823
841
  We should not cache the result of this function as the admin policy may
824
842
  update the contexts.
825
843
 
826
844
  Returns:
827
- List[str] | None: The list of kubernetes context names if it exists,
828
- None otherwise
845
+ List[Optional[str]]: The list of kubernetes context names if
846
+ available, an empty list otherwise. If running in-cluster,
847
+ returns [None] to indicate in-cluster config.
829
848
  """
830
849
  k8s = kubernetes.kubernetes
831
850
  try:
832
851
  all_contexts, _ = k8s.config.list_kube_config_contexts()
852
+ # all_contexts will always have at least one context. If kubeconfig
853
+ # does not have any contexts defined, it will raise ConfigException.
833
854
  return [context['name'] for context in all_contexts]
834
855
  except k8s.config.config_exception.ConfigException:
835
- return None
856
+ # If running in cluster, return [None] to indicate in-cluster config
857
+ if is_incluster_config_available():
858
+ return [None]
859
+ return []
836
860
 
837
861
 
838
862
  @functools.lru_cache()
@@ -1046,7 +1070,7 @@ def get_ssh_proxy_command(
1046
1070
  k8s_ssh_target: str,
1047
1071
  network_mode: kubernetes_enums.KubernetesNetworkingMode,
1048
1072
  private_key_path: str,
1049
- context: str,
1073
+ context: Optional[str],
1050
1074
  namespace: str,
1051
1075
  ) -> str:
1052
1076
  """Generates the SSH proxy command to connect to the pod.
@@ -1144,7 +1168,8 @@ def create_proxy_command_script() -> str:
1144
1168
  return port_fwd_proxy_cmd_path
1145
1169
 
1146
1170
 
1147
- def setup_ssh_jump_svc(ssh_jump_name: str, namespace: str, context: str,
1171
+ def setup_ssh_jump_svc(ssh_jump_name: str, namespace: str,
1172
+ context: Optional[str],
1148
1173
  service_type: kubernetes_enums.KubernetesServiceType):
1149
1174
  """Sets up Kubernetes service resource to access for SSH jump pod.
1150
1175
 
@@ -1216,7 +1241,8 @@ def setup_ssh_jump_svc(ssh_jump_name: str, namespace: str, context: str,
1216
1241
 
1217
1242
 
1218
1243
  def setup_ssh_jump_pod(ssh_jump_name: str, ssh_jump_image: str,
1219
- ssh_key_secret: str, namespace: str, context: str):
1244
+ ssh_key_secret: str, namespace: str,
1245
+ context: Optional[str]):
1220
1246
  """Sets up Kubernetes RBAC and pod for SSH jump host.
1221
1247
 
1222
1248
  Our Kubernetes implementation uses a SSH jump pod to reach SkyPilot clusters
@@ -1296,7 +1322,8 @@ def setup_ssh_jump_pod(ssh_jump_name: str, ssh_jump_image: str,
1296
1322
  logger.info(f'Created SSH Jump Host {ssh_jump_name}.')
1297
1323
 
1298
1324
 
1299
- def clean_zombie_ssh_jump_pod(namespace: str, context: str, node_id: str):
1325
+ def clean_zombie_ssh_jump_pod(namespace: str, context: Optional[str],
1326
+ node_id: str):
1300
1327
  """Analyzes SSH jump pod and removes if it is in a bad state
1301
1328
 
1302
1329
  Prevents the existence of a dangling SSH jump pod. This could happen
@@ -1618,7 +1645,8 @@ def check_nvidia_runtime_class(context: Optional[str] = None) -> bool:
1618
1645
  return nvidia_exists
1619
1646
 
1620
1647
 
1621
- def check_secret_exists(secret_name: str, namespace: str, context: str) -> bool:
1648
+ def check_secret_exists(secret_name: str, namespace: str,
1649
+ context: Optional[str]) -> bool:
1622
1650
  """Checks if a secret exists in a namespace
1623
1651
 
1624
1652
  Args:
@@ -1836,7 +1864,7 @@ def get_namespace_from_config(provider_config: Dict[str, Any]) -> str:
1836
1864
 
1837
1865
 
1838
1866
  def filter_pods(namespace: str,
1839
- context: str,
1867
+ context: Optional[str],
1840
1868
  tag_filters: Dict[str, str],
1841
1869
  status_filters: Optional[List[str]] = None) -> Dict[str, Any]:
1842
1870
  """Filters pods by tags and status."""
@@ -1962,6 +1990,11 @@ def set_autodown_annotations(handle: 'backends.CloudVmRayResourceHandle',
1962
1990
  context=context)
1963
1991
 
1964
1992
 
1965
- def get_context_from_config(provider_config: Dict[str, Any]) -> str:
1966
- return provider_config.get('context',
1967
- get_current_kube_config_context_name())
1993
+ def get_context_from_config(provider_config: Dict[str, Any]) -> Optional[str]:
1994
+ context = provider_config.get('context',
1995
+ get_current_kube_config_context_name())
1996
+ if context == IN_CLUSTER_REGION:
1997
+ # If the context (also used as the region) is set to IN_CLUSTER_REGION
1998
+ # we need to use in-cluster auth.
1999
+ context = None
2000
+ return context
@@ -653,7 +653,7 @@ class KubernetesCommandRunner(CommandRunner):
653
653
 
654
654
  def __init__(
655
655
  self,
656
- node: Tuple[Tuple[str, str], str],
656
+ node: Tuple[Tuple[str, Optional[str]], str],
657
657
  **kwargs,
658
658
  ):
659
659
  """Initialize KubernetesCommandRunner.
@@ -204,7 +204,7 @@ class KubernetesCommandRunner(CommandRunner):
204
204
 
205
205
  def __init__(
206
206
  self,
207
- node: Tuple[Tuple[str, str], str],
207
+ node: Tuple[Tuple[str, Optional[str]], str],
208
208
  ) -> None:
209
209
  ...
210
210
 
@@ -363,6 +363,14 @@ def shared_controller_vars_to_fill(
363
363
  # again on the controller. This is required since admin_policy is not
364
364
  # installed on the controller.
365
365
  local_user_config.pop('admin_policy', None)
366
+ # Remove allowed_contexts from local_user_config since the controller
367
+ # may be running in a Kubernetes cluster with in-cluster auth and may
368
+ # not have kubeconfig available to it. This is the typical case since
369
+ # remote_identity default for Kubernetes is SERVICE_ACCOUNT.
370
+ # TODO(romilb): We should check the cloud the controller is running on
371
+ # before popping allowed_contexts. If it is not on Kubernetes,
372
+ # we may be able to use allowed_contexts.
373
+ local_user_config.pop('allowed_contexts', None)
366
374
  with tempfile.NamedTemporaryFile(
367
375
  delete=False,
368
376
  suffix=_LOCAL_SKYPILOT_CONFIG_PATH_SUFFIX) as temp_file:
@@ -0,0 +1,243 @@
1
+ #!/bin/bash
2
+ # Refer to https://skypilot.readthedocs.io/en/latest/reservations/existing-machines.html for details on how to use this script.
3
+ set -e
4
+
5
+ # Colors for nicer UX
6
+ RED='\033[0;31m'
7
+ GREEN='\033[0;32m'
8
+ YELLOW='\033[1;33m'
9
+ NC='\033[0m' # No color
10
+
11
+ # Variables
12
+ IPS_FILE=$1
13
+ USER=$2
14
+ SSH_KEY=$3
15
+ K3S_TOKEN=mytoken # Any string can be used as the token
16
+ CLEANUP=false
17
+ INSTALL_GPU=false
18
+
19
+ if [[ "$4" == "--cleanup" ]]; then
20
+ CLEANUP=true
21
+ fi
22
+
23
+ # Basic argument checks
24
+ if [ -z "$IPS_FILE" ] || [ -z "$USER" ] || [ -z "$SSH_KEY" ]; then
25
+ >&2 echo -e "${RED}Error: Missing required arguments.${NC}"
26
+ >&2 echo "Usage: ./deploy_remote_cluster.sh ips.txt username path/to/ssh/key [--cleanup]"
27
+ exit 1
28
+ fi
29
+
30
+ # Check if SSH key exists
31
+ if [ ! -f "$SSH_KEY" ]; then
32
+ >&2 echo -e "${RED}Error: SSH key not found: $SSH_KEY${NC}"
33
+ exit 1
34
+ fi
35
+
36
+ # Check if IPs file exists
37
+ if [ ! -f "$IPS_FILE" ]; then
38
+ >&2 echo -e "${RED}Error: IPs file not found: $IPS_FILE${NC}"
39
+ exit 1
40
+ fi
41
+
42
+ # Get head node and worker nodes from the IPs file
43
+ HEAD_NODE=$(head -n 1 "$IPS_FILE")
44
+ WORKER_NODES=$(tail -n +2 "$IPS_FILE")
45
+
46
+ # Check if the IPs file is empty or not formatted correctly
47
+ if [ -z "$HEAD_NODE" ]; then
48
+ >&2 echo -e "${RED}Error: IPs file is empty or not formatted correctly.${NC}"
49
+ exit 1
50
+ fi
51
+
52
+ # Function to show a progress message
53
+ progress_message() {
54
+ echo -e "${YELLOW}➜ $1${NC}"
55
+ }
56
+
57
+ # Step to display success
58
+ success_message() {
59
+ echo -e "${GREEN}✔ $1${NC}"
60
+ }
61
+
62
+ # Function to run a command on a remote machine via SSH
63
+ run_remote() {
64
+ local NODE_IP=$1
65
+ local CMD=$2
66
+ # echo -e "${YELLOW}Running command on $NODE_IP...${NC}"
67
+ ssh -o StrictHostKeyChecking=no -i "$SSH_KEY" "$USER@$NODE_IP" "$CMD"
68
+ }
69
+
70
+ # Function to uninstall k3s and clean up the state on a remote machine
71
+ cleanup_server_node() {
72
+ local NODE_IP=$1
73
+ echo -e "${YELLOW}Cleaning up head node $NODE_IP...${NC}"
74
+ run_remote "$NODE_IP" "
75
+ echo 'Uninstalling k3s...' &&
76
+ /usr/local/bin/k3s-uninstall.sh || true &&
77
+ sudo rm -rf /etc/rancher /var/lib/rancher /var/lib/kubelet /etc/kubernetes ~/.kube
78
+ "
79
+ echo -e "${GREEN}Node $NODE_IP cleaned up successfully.${NC}"
80
+ }
81
+
82
+ # Function to uninstall k3s and clean up the state on a remote machine
83
+ cleanup_agent_node() {
84
+ local NODE_IP=$1
85
+ echo -e "${YELLOW}Cleaning up node $NODE_IP...${NC}"
86
+ run_remote "$NODE_IP" "
87
+ echo 'Uninstalling k3s...' &&
88
+ /usr/local/bin/k3s-agent-uninstall.sh || true &&
89
+ sudo rm -rf /etc/rancher /var/lib/rancher /var/lib/kubelet /etc/kubernetes ~/.kube
90
+ "
91
+ echo -e "${GREEN}Node $NODE_IP cleaned up successfully.${NC}"
92
+ }
93
+
94
+ check_gpu() {
95
+ local NODE_IP=$1
96
+ run_remote "$NODE_IP" "
97
+ if command -v nvidia-smi &> /dev/null; then
98
+ nvidia-smi --list-gpus | grep 'GPU 0'
99
+ fi
100
+ "
101
+ }
102
+
103
+ # Pre-flight checks
104
+ run_remote "$HEAD_NODE" "echo 'SSH connection successful'"
105
+ # TODO: Add more pre-flight checks here, including checking if port 6443 is accessible
106
+
107
+ # If --cleanup flag is set, uninstall k3s and exit
108
+ if [ "$CLEANUP" == "true" ]; then
109
+ echo -e "${YELLOW}Starting cleanup...${NC}"
110
+
111
+ # Clean up head node
112
+ cleanup_server_node "$HEAD_NODE"
113
+
114
+ # Clean up worker nodes
115
+ for NODE in $WORKER_NODES; do
116
+ cleanup_agent_node "$NODE"
117
+ done
118
+
119
+ echo -e "${GREEN}Cleanup completed successfully.${NC}"
120
+ exit 0
121
+ fi
122
+
123
+ # Step 1: Install k3s on the head node
124
+ progress_message "Deploying Kubernetes on head node ($HEAD_NODE)..."
125
+ run_remote "$HEAD_NODE" "
126
+ curl -sfL https://get.k3s.io | K3S_TOKEN=$K3S_TOKEN sh - &&
127
+ mkdir -p ~/.kube &&
128
+ sudo cp /etc/rancher/k3s/k3s.yaml ~/.kube/config &&
129
+ sudo chown \$(id -u):\$(id -g) ~/.kube/config &&
130
+ for i in {1..3}; do
131
+ if kubectl wait --for=condition=ready node --all --timeout=2m --kubeconfig ~/.kube/config; then
132
+ break
133
+ else
134
+ echo 'Waiting for nodes to be ready...'
135
+ sleep 5
136
+ fi
137
+ done
138
+ if [ $i -eq 3 ]; then
139
+ echo 'Failed to wait for nodes to be ready after 3 attempts'
140
+ exit 1
141
+ fi"
142
+ success_message "K3s deployed on head node."
143
+
144
+ # Check if head node has a GPU
145
+ if check_gpu "$HEAD_NODE"; then
146
+ echo -e "${YELLOW}GPU detected on head node ($HEAD_NODE).${NC}"
147
+ INSTALL_GPU=true
148
+ fi
149
+
150
+ # Fetch the head node's internal IP (this will be passed to worker nodes)
151
+ MASTER_ADDR=$(run_remote "$HEAD_NODE" "hostname -I | awk '{print \$1}'")
152
+
153
+ echo -e "${GREEN}Master node internal IP: $MASTER_ADDR${NC}"
154
+
155
+ # Step 2: Install k3s on worker nodes and join them to the master node
156
+ for NODE in $WORKER_NODES; do
157
+ progress_message "Deploying Kubernetes on worker node ($NODE)..."
158
+ run_remote "$NODE" "
159
+ curl -sfL https://get.k3s.io | K3S_URL=https://$MASTER_ADDR:6443 K3S_TOKEN=$K3S_TOKEN sh -"
160
+ success_message "Kubernetes deployed on worker node ($NODE)."
161
+
162
+ # Check if worker node has a GPU
163
+ if check_gpu "$NODE"; then
164
+ echo -e "${YELLOW}GPU detected on worker node ($NODE).${NC}"
165
+ INSTALL_GPU=true
166
+ fi
167
+ done
168
+ # Step 3: Configure local kubectl to connect to the cluster
169
+ progress_message "Configuring local kubectl to connect to the cluster..."
170
+ scp -o StrictHostKeyChecking=no -i "$SSH_KEY" "$USER@$HEAD_NODE":~/.kube/config ~/.kube/config
171
+
172
+ # Back up the original kubeconfig file if it exists
173
+ KUBECONFIG_FILE="$HOME/.kube/config"
174
+ if [[ -f "$KUBECONFIG_FILE" ]]; then
175
+ echo "Backing up existing kubeconfig to $KUBECONFIG_FILE.bak"
176
+ cp "$KUBECONFIG_FILE" "$KUBECONFIG_FILE.bak"
177
+ fi
178
+
179
+ # Update kubeconfig for the local machine to use the master node's IP
180
+ # Temporary file to hold the modified kubeconfig
181
+ TEMP_FILE=$(mktemp)
182
+
183
+ # Remove the certificate-authority-data, and replace the server with the master address
184
+ awk '
185
+ BEGIN { in_cluster = 0 }
186
+ /^clusters:/ { in_cluster = 1 }
187
+ /^users:/ { in_cluster = 0 }
188
+ in_cluster && /^ *certificate-authority-data:/ { next }
189
+ in_cluster && /^ *server:/ {
190
+ print " server: https://'${HEAD_NODE}:6443'"
191
+ print " insecure-skip-tls-verify: true"
192
+ next
193
+ }
194
+ { print }
195
+ ' "$KUBECONFIG_FILE" > "$TEMP_FILE"
196
+
197
+ # Replace the original kubeconfig with the modified one
198
+ mv "$TEMP_FILE" "$KUBECONFIG_FILE"
199
+
200
+ success_message "kubectl configured to connect to the cluster."
201
+
202
+ echo "Cluster deployment completed. You can now run 'kubectl get nodes' to verify the setup."
203
+
204
+ # Install GPU operator if a GPU was detected on any node
205
+ if [ "$INSTALL_GPU" == "true" ]; then
206
+ echo -e "${YELLOW}GPU detected in the cluster. Installing Nvidia GPU Operator...${NC}"
207
+ run_remote "$HEAD_NODE" "
208
+ curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 &&
209
+ chmod 700 get_helm.sh &&
210
+ ./get_helm.sh &&
211
+ helm repo add nvidia https://helm.ngc.nvidia.com/nvidia && helm repo update &&
212
+ kubectl create namespace gpu-operator --kubeconfig ~/.kube/config || true &&
213
+ sudo ln -s /sbin/ldconfig /sbin/ldconfig.real || true &&
214
+ helm install gpu-operator -n gpu-operator --create-namespace nvidia/gpu-operator \
215
+ --set 'toolkit.env[0].name=CONTAINERD_CONFIG' \
216
+ --set 'toolkit.env[0].value=/var/lib/rancher/k3s/agent/etc/containerd/config.toml' \
217
+ --set 'toolkit.env[1].name=CONTAINERD_SOCKET' \
218
+ --set 'toolkit.env[1].value=/run/k3s/containerd/containerd.sock' \
219
+ --set 'toolkit.env[2].name=CONTAINERD_RUNTIME_CLASS' \
220
+ --set 'toolkit.env[2].value=nvidia' &&
221
+ echo 'Waiting for GPU operator installation...' &&
222
+ while ! kubectl describe nodes --kubeconfig ~/.kube/config | grep -q 'nvidia.com/gpu:'; do
223
+ echo 'Waiting for GPU operator...'
224
+ sleep 5
225
+ done
226
+ echo 'GPU operator installed successfully.'"
227
+ success_message "GPU Operator installed."
228
+ else
229
+ echo -e "${YELLOW}No GPUs detected. Skipping GPU Operator installation.${NC}"
230
+ fi
231
+
232
+ # Configure SkyPilot
233
+ progress_message "Configuring SkyPilot..."
234
+ sky check kubernetes
235
+ success_message "SkyPilot configured successfully."
236
+
237
+ # Display final success message
238
+ echo -e "${GREEN}==== 🎉 Kubernetes cluster deployment completed successfully 🎉 ====${NC}"
239
+ echo "You can now interact with your Kubernetes cluster through SkyPilot: "
240
+ echo " • List available GPUs: sky show-gpus --cloud kubernetes"
241
+ echo " • Launch a GPU development pod: sky launch -c devbox --cloud kubernetes --gpus A100:1"
242
+ echo " • Connect to pod with SSH: ssh devbox"
243
+ echo " • Connect to pod with VSCode: code --remote ssh-remote+devbox '/'"
sky/utils/log_utils.py CHANGED
@@ -1,6 +1,7 @@
1
1
  """Logging utils."""
2
2
  import enum
3
- from typing import List, Optional
3
+ import types
4
+ from typing import List, Optional, Type
4
5
 
5
6
  import colorama
6
7
  import pendulum
@@ -15,13 +16,15 @@ logger = sky_logging.init_logger(__name__)
15
16
  class LineProcessor(object):
16
17
  """A processor for log lines."""
17
18
 
18
- def __enter__(self):
19
+ def __enter__(self) -> None:
19
20
  pass
20
21
 
21
- def process_line(self, log_line):
22
+ def process_line(self, log_line: str) -> None:
22
23
  pass
23
24
 
24
- def __exit__(self, except_type, except_value, traceback):
25
+ def __exit__(self, except_type: Optional[Type[BaseException]],
26
+ except_value: Optional[BaseException],
27
+ traceback: Optional[types.TracebackType]) -> None:
25
28
  del except_type, except_value, traceback # unused
26
29
  pass
27
30
 
@@ -34,12 +37,12 @@ class RayUpLineProcessor(LineProcessor):
34
37
  RUNTIME_SETUP = 1
35
38
  PULLING_DOCKER_IMAGES = 2
36
39
 
37
- def __enter__(self):
40
+ def __enter__(self) -> None:
38
41
  self.state = self.ProvisionStatus.LAUNCH
39
42
  self.status_display = rich_utils.safe_status('[bold cyan]Launching')
40
43
  self.status_display.start()
41
44
 
42
- def process_line(self, log_line):
45
+ def process_line(self, log_line: str) -> None:
43
46
  if ('Success.' in log_line and
44
47
  self.state == self.ProvisionStatus.LAUNCH):
45
48
  logger.info(f'{colorama.Fore.GREEN}Head node is up.'
@@ -60,7 +63,9 @@ class RayUpLineProcessor(LineProcessor):
60
63
  '[bold cyan]Launching - Preparing SkyPilot runtime')
61
64
  self.state = self.ProvisionStatus.RUNTIME_SETUP
62
65
 
63
- def __exit__(self, except_type, except_value, traceback):
66
+ def __exit__(self, except_type: Optional[Type[BaseException]],
67
+ except_value: Optional[BaseException],
68
+ traceback: Optional[types.TracebackType]) -> None:
64
69
  del except_type, except_value, traceback # unused
65
70
  self.status_display.stop()
66
71
 
@@ -68,13 +73,13 @@ class RayUpLineProcessor(LineProcessor):
68
73
  class SkyLocalUpLineProcessor(LineProcessor):
69
74
  """A processor for `sky local up` log lines."""
70
75
 
71
- def __enter__(self):
76
+ def __enter__(self) -> None:
72
77
  status = rich_utils.safe_status('[bold cyan]Creating local cluster - '
73
78
  'initializing Kubernetes')
74
79
  self.status_display = status
75
80
  self.status_display.start()
76
81
 
77
- def process_line(self, log_line):
82
+ def process_line(self, log_line: str) -> None:
78
83
  if 'Kind cluster created.' in log_line:
79
84
  logger.info(f'{colorama.Fore.GREEN}Kubernetes is running.'
80
85
  f'{colorama.Style.RESET_ALL}')
@@ -124,7 +129,80 @@ class SkyLocalUpLineProcessor(LineProcessor):
124
129
  f'{colorama.Fore.GREEN}Nginx Ingress Controller installed.'
125
130
  f'{colorama.Style.RESET_ALL}')
126
131
 
127
- def __exit__(self, except_type, except_value, traceback):
132
+ def __exit__(self, except_type: Optional[Type[BaseException]],
133
+ except_value: Optional[BaseException],
134
+ traceback: Optional[types.TracebackType]) -> None:
135
+ del except_type, except_value, traceback # unused
136
+ self.status_display.stop()
137
+
138
+
139
+ class SkyRemoteUpLineProcessor(LineProcessor):
140
+ """A processor for deploy_remote_cluster.sh log lines."""
141
+
142
+ def __enter__(self) -> None:
143
+ status = rich_utils.safe_status('[bold cyan]Creating remote cluster')
144
+ self.status_display = status
145
+ self.status_display.start()
146
+
147
+ def process_line(self, log_line: str) -> None:
148
+ # Pre-flight checks
149
+ if 'SSH connection successful' in log_line:
150
+ logger.info(f'{colorama.Fore.GREEN}SSH connection established.'
151
+ f'{colorama.Style.RESET_ALL}')
152
+
153
+ # Kubernetes installation steps
154
+ if 'Deploying Kubernetes on head node' in log_line:
155
+ self.status_display.update('[bold cyan]Creating remote cluster - '
156
+ 'deploying Kubernetes on head node')
157
+ if 'K3s deployed on head node.' in log_line:
158
+ logger.info(f'{colorama.Fore.GREEN}'
159
+ '✔ K3s successfully deployed on head node.'
160
+ f'{colorama.Style.RESET_ALL}')
161
+
162
+ # Worker nodes
163
+ if 'Deploying Kubernetes on worker node' in log_line:
164
+ self.status_display.update('[bold cyan]Creating remote cluster - '
165
+ 'deploying Kubernetes on worker nodes')
166
+ if 'Kubernetes deployed on worker node' in log_line:
167
+ logger.info(f'{colorama.Fore.GREEN}'
168
+ '✔ K3s successfully deployed on worker node.'
169
+ f'{colorama.Style.RESET_ALL}')
170
+
171
+ # Cluster configuration
172
+ if 'Configuring local kubectl to connect to the cluster...' in log_line:
173
+ self.status_display.update('[bold cyan]Creating remote cluster - '
174
+ 'configuring local kubectl')
175
+ if 'kubectl configured to connect to the cluster.' in log_line:
176
+ logger.info(f'{colorama.Fore.GREEN}'
177
+ '✔ kubectl configured for the remote cluster.'
178
+ f'{colorama.Style.RESET_ALL}')
179
+
180
+ # GPU operator installation
181
+ if 'Installing Nvidia GPU Operator...' in log_line:
182
+ self.status_display.update('[bold cyan]Creating remote cluster - '
183
+ 'installing Nvidia GPU Operator')
184
+ if 'GPU Operator installed.' in log_line:
185
+ logger.info(f'{colorama.Fore.GREEN}'
186
+ '✔ Nvidia GPU Operator installed successfully.'
187
+ f'{colorama.Style.RESET_ALL}')
188
+
189
+ # Cleanup steps
190
+ if 'Cleaning up head node' in log_line:
191
+ self.status_display.update('[bold cyan]Cleaning up head node')
192
+ if 'Cleaning up node' in log_line:
193
+ self.status_display.update('[bold cyan]Cleaning up worker node')
194
+ if 'cleaned up successfully' in log_line:
195
+ logger.info(f'{colorama.Fore.GREEN}'
196
+ f'{log_line.strip()}{colorama.Style.RESET_ALL}')
197
+
198
+ # Final status
199
+ if 'Cluster deployment completed.' in log_line:
200
+ logger.info(f'{colorama.Fore.GREEN}✔ Remote k3s is running.'
201
+ f'{colorama.Style.RESET_ALL}')
202
+
203
+ def __exit__(self, except_type: Optional[Type[BaseException]],
204
+ except_value: Optional[BaseException],
205
+ traceback: Optional[types.TracebackType]) -> None:
128
206
  del except_type, except_value, traceback # unused
129
207
  self.status_display.stop()
130
208
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20240927
3
+ Version: 1.0.0.dev20240929
4
4
  Summary: SkyPilot: An intercloud broker for the clouds
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -1,8 +1,8 @@
1
- sky/__init__.py,sha256=S-XaZNVM-9OM5oGtcUfWmQC9CLW7HvN9ckCN-KCbPio,5854
1
+ sky/__init__.py,sha256=8FioKRx3X_BHtQt6BCrINW2IHHhrCWkiT7cb8NcaRjY,5854
2
2
  sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
3
- sky/authentication.py,sha256=o8ZhUf4VSN8WtjWcUUGYg-HVskaqaoMK4ZobHC-HVYU,20697
3
+ sky/authentication.py,sha256=TfKkVnmRIetATSEVQFp-rOOIRGqVig2i8faSQQt_ixA,20974
4
4
  sky/check.py,sha256=jLMIIJrseaZj1_o5WkbaD9XdyXIlCaT6pyAaIFdhdmA,9079
5
- sky/cli.py,sha256=DMnZ-vLBuFazKmiMavidSYkQvv_YvXnZALJkHaLveDM,201714
5
+ sky/cli.py,sha256=9h4yO8p962960qUjvQ-xSusrtdh8TXNNQ1sfV0OqgZc,206262
6
6
  sky/cloud_stores.py,sha256=RjFgmRhUh1Kk__f6g3KxzLp9s7dA0pFK4W1AukEuUaw,21153
7
7
  sky/core.py,sha256=YF_6kwj8Ja171Oycb8L25SZ7V_ylZYovFS_jpnjwGo0,34408
8
8
  sky/dag.py,sha256=WLFWr5hfrwjd31uYlNvI-zWUk7tLaT_gzJn4LzbVtkE,2780
@@ -48,7 +48,7 @@ sky/clouds/cudo.py,sha256=H4VyMo5wWGAv2MXZ3xsbWjlZA_cZYnt4ecNlTOOao8Y,13147
48
48
  sky/clouds/fluidstack.py,sha256=iOmoOx52yTrHKMzwBDaxFJCfNo79M61d5tj-Np24Lyc,12436
49
49
  sky/clouds/gcp.py,sha256=CrSsaSXd83tM78foKH9viBfW1cQsjve3aUQbshsqvDg,54033
50
50
  sky/clouds/ibm.py,sha256=M8QdjeSFlwssfoY2aOodxG4q5R3eT9K-4lTPDHYvEYI,21476
51
- sky/clouds/kubernetes.py,sha256=DyGkJusl5YMy_sIeogSKAcJ8XaUgxUx7Gc90dRi2bZU,27251
51
+ sky/clouds/kubernetes.py,sha256=aWoXWR-S4puZHzuUHroLKxLdTpkqU7j75dQlXECnsmE,28679
52
52
  sky/clouds/lambda_cloud.py,sha256=2Al3qCSl-I4iTi7pPPNXcbaLyVfCUgTl__vYBunLB6k,12439
53
53
  sky/clouds/oci.py,sha256=ozVEa-9IkfI-RxyXDs_aLG5G0toLBRdtwUtaU-y7bH4,26382
54
54
  sky/clouds/paperspace.py,sha256=lmUZPYAblaqiBmGQwCunccMiTF_dVA1o3vqY9Q_Nc28,10921
@@ -137,11 +137,11 @@ sky/provision/gcp/instance.py,sha256=l2-1nHj4pUoHqOu8HMN1hT1bwd4Q96X8MXgOPsNJUN8
137
137
  sky/provision/gcp/instance_utils.py,sha256=veRBr6Oziv0KaUdC4acuWeaOremNV0gMYCCHaSvY7c8,70943
138
138
  sky/provision/gcp/mig_utils.py,sha256=oFpcFZoapHMILSE4iIm8V5bxP1RhbMHRF7cciqq8qAk,7883
139
139
  sky/provision/kubernetes/__init__.py,sha256=y6yVfii81WYG3ROxv4hiIj-ydinS5-xGxLvXnARVQoI,719
140
- sky/provision/kubernetes/config.py,sha256=gC1FeW-cyeebphY6sq2BGVF8QKZujUKyH7qe9TAAoPM,29024
141
- sky/provision/kubernetes/instance.py,sha256=YdcZ2vhxJPXzT1D8FuCIUyjdkK6VjsG4_qm3dDbygGw,38204
140
+ sky/provision/kubernetes/config.py,sha256=WEKcFXXhe89bLGAvoMiBvTDxdxkpTIA6ezrj2vmzldc,29072
141
+ sky/provision/kubernetes/instance.py,sha256=MdgyGcMUbhsSRdaTRV3IgHmiAj5goCDVhzDZ2PDVs_Y,38323
142
142
  sky/provision/kubernetes/network.py,sha256=EpNjRQ131CXepqbdkoRKFu4szVrm0oKEpv1l8EgOkjU,12364
143
- sky/provision/kubernetes/network_utils.py,sha256=AZ8dkVyRHxdbJ8Lm_zXYc2y9a3O9PJWLL1PH8IjwAW8,11314
144
- sky/provision/kubernetes/utils.py,sha256=JO54OXmalzjXkiE6VgirTJoJ6s-3uBH1zt0rBqPz_Yk,82014
143
+ sky/provision/kubernetes/network_utils.py,sha256=t1FS3K400fetH7cBuRgQJZl5_jEeMshsvsYmnMUcq8k,11399
144
+ sky/provision/kubernetes/utils.py,sha256=iULhot4naFOsyzp53x4Q4qpsHXvz5-DMOIFFTR8ap9s,83609
145
145
  sky/provision/kubernetes/manifests/smarter-device-manager-configmap.yaml,sha256=AMzYzlY0JIlfBWj5eX054Rc1XDW2thUcLSOGMJVhIdA,229
146
146
  sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml,sha256=RtTq4F1QUmR2Uunb6zuuRaPhV7hpesz4saHjn3Ncsb4,2010
147
147
  sky/provision/paperspace/__init__.py,sha256=1nbUPWio7UA5gCQkO_rfEDfgXT17u5OtuByxQx4Ez6g,598
@@ -244,15 +244,15 @@ sky/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
244
244
  sky/utils/accelerator_registry.py,sha256=BO4iYH5bV80Xyp4EPfO0n1D3LL0FvESCy7xm59Je3_o,3798
245
245
  sky/utils/admin_policy_utils.py,sha256=zFCu1OFIrZRfQNY0JFRO1502WFfdqZhwAU_QgM4fO9U,5943
246
246
  sky/utils/cluster_yaml_utils.py,sha256=1wRRYqI1kI-eFs1pMW4r_FFjHJ0zamq6v2RRI-Gtx5E,849
247
- sky/utils/command_runner.py,sha256=4A7IbhyAiHHeYA85MulyRqldkQyDmc4BhRhjbfVlwE4,33850
248
- sky/utils/command_runner.pyi,sha256=1khh14BhdOpMxvk9Ydnd3OFdas5Nha6dSOzy5xLBUU4,7710
247
+ sky/utils/command_runner.py,sha256=NpBe7VHlzxGxuWJeDbRWwy2p64qefqz8c6Bar7KaRnc,33860
248
+ sky/utils/command_runner.pyi,sha256=G6DHTQ9DhjYmGf_hDoyeWdWuktXqkQyJ7U-wAJTcLiw,7720
249
249
  sky/utils/common_utils.py,sha256=O6PlZTCNhbuXOzjuV2DKw43niWE_qPfYZNGhnMtZzQg,24028
250
- sky/utils/controller_utils.py,sha256=VtdjKH9u1kWwUOMzPUxuLpT-XXQ2gCLCLOldB-vdh_8,37483
250
+ sky/utils/controller_utils.py,sha256=32pVORm2cd42tg0srxGvmYV0kYTl67IFsw2EdXbdoR8,38042
251
251
  sky/utils/dag_utils.py,sha256=gjGZiJj4_GYsraXX67e6ElvbmOByJcyjSfvVgYZiXvs,5588
252
252
  sky/utils/db_utils.py,sha256=AOvMmBEN9cF4I7CoXihPCtus4mU2VDGjBQSVMMgzKlA,2786
253
253
  sky/utils/env_options.py,sha256=1VXyd3bhiUgGfCpmmTqM9PagRo1ILBH4-pzIxmIeE6E,861
254
254
  sky/utils/kubernetes_enums.py,sha256=imGqHSa8O07zD_6xH1SDMM7dBU5lF5fzFFlQuQy00QM,1384
255
- sky/utils/log_utils.py,sha256=W7FYK7xzvbq4V-8R-ihLtz939ryvtABug6O-4DFrjho,8139
255
+ sky/utils/log_utils.py,sha256=yVu3etgKhiVYX8UG-JFPWZujxWBT4kwxZ5oAPIdjtGs,12054
256
256
  sky/utils/resources_utils.py,sha256=snByBxgx3Hnjfch2uysdAA3D-OAwrnuzTDHug36s5H4,6515
257
257
  sky/utils/rich_utils.py,sha256=5ZVhzlFx-nhqMXwv00eO9xC4rz7ibDlfD2lmGhZrJEY,1581
258
258
  sky/utils/schemas.py,sha256=QT0Fxri2o0SiWkky1DlZhA1dzQRQoB5OdVaej0wJvhc,28787
@@ -265,6 +265,7 @@ sky/utils/cli_utils/status_utils.py,sha256=9odkfXiXLMD14XJsqve6sGvHpe7ThHXpC6ic9
265
265
  sky/utils/kubernetes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
266
266
  sky/utils/kubernetes/create_cluster.sh,sha256=rv5Lz6AR00yBJMRyScfMSQiGKptMhtHWRsvyG20-u9c,7764
267
267
  sky/utils/kubernetes/delete_cluster.sh,sha256=BSccHF43GyepDNf-FZcenzHzpXXATkVD92vgn1lWPgk,927
268
+ sky/utils/kubernetes/deploy_remote_cluster.sh,sha256=vGj0mD0tejHDRy8ulwKOvOF2mfLyT5J8fp7GVqEe_EY,8478
268
269
  sky/utils/kubernetes/generate_kind_config.py,sha256=_TNLnifA_r7-CRq083IP1xjelYqiLjzQX9ohuqYpDH8,3187
269
270
  sky/utils/kubernetes/generate_kubeconfig.sh,sha256=AcYhuuG5jXWGHUmyRuH-oKy5qcn92gXhu6bXOt6eD6g,9274
270
271
  sky/utils/kubernetes/gpu_labeler.py,sha256=MEUv0U4ACDcNwtFVltlv017XJMjxx1Bndf6fL0i6eqg,6960
@@ -272,9 +273,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_job.yaml,sha256=KPqp23B-zQ2SZK03jdHeF9fLTog
272
273
  sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488oMQvhRZWwsj9vBbPUg,3812
273
274
  sky/utils/kubernetes/rsync_helper.sh,sha256=Ma-N9a271fTfdgP5-8XIQL7KPf8IPUo-uY004PCdUFo,747
274
275
  sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=RFLJ3k7MR5UN4SKHykQ0lV9SgXumoULpKYIAt1vh-HU,6560
275
- skypilot_nightly-1.0.0.dev20240927.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
276
- skypilot_nightly-1.0.0.dev20240927.dist-info/METADATA,sha256=GXz5qTbQuxyKpPHIVPT5vsvlpo3bRyataK8Vtj6rovw,18948
277
- skypilot_nightly-1.0.0.dev20240927.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
278
- skypilot_nightly-1.0.0.dev20240927.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
279
- skypilot_nightly-1.0.0.dev20240927.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
280
- skypilot_nightly-1.0.0.dev20240927.dist-info/RECORD,,
276
+ skypilot_nightly-1.0.0.dev20240929.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
277
+ skypilot_nightly-1.0.0.dev20240929.dist-info/METADATA,sha256=WfIgFJj_CNWtVZ39aN4Bx7l35R3rE4vdr6tJR9w9m-A,18948
278
+ skypilot_nightly-1.0.0.dev20240929.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
279
+ skypilot_nightly-1.0.0.dev20240929.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
280
+ skypilot_nightly-1.0.0.dev20240929.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
281
+ skypilot_nightly-1.0.0.dev20240929.dist-info/RECORD,,