skypilot-nightly 1.0.0.dev20241011__py3-none-any.whl → 1.0.0.dev20241013__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/azure.py +3 -1
  3. sky/adaptors/common.py +6 -2
  4. sky/backends/backend.py +9 -4
  5. sky/backends/backend_utils.py +13 -16
  6. sky/backends/cloud_vm_ray_backend.py +207 -161
  7. sky/backends/local_docker_backend.py +3 -1
  8. sky/benchmark/benchmark_utils.py +5 -4
  9. sky/cli.py +128 -31
  10. sky/clouds/service_catalog/aws_catalog.py +6 -7
  11. sky/clouds/service_catalog/common.py +4 -3
  12. sky/clouds/service_catalog/cudo_catalog.py +11 -1
  13. sky/core.py +4 -2
  14. sky/data/storage.py +44 -32
  15. sky/data/storage_utils.py +12 -7
  16. sky/exceptions.py +5 -0
  17. sky/execution.py +10 -24
  18. sky/jobs/__init__.py +2 -0
  19. sky/jobs/core.py +87 -7
  20. sky/jobs/utils.py +35 -19
  21. sky/optimizer.py +50 -37
  22. sky/provision/aws/config.py +15 -6
  23. sky/provision/azure/config.py +14 -3
  24. sky/provision/azure/instance.py +15 -9
  25. sky/provision/kubernetes/instance.py +3 -1
  26. sky/provision/kubernetes/utils.py +25 -0
  27. sky/provision/provisioner.py +63 -74
  28. sky/serve/core.py +42 -40
  29. sky/sky_logging.py +9 -5
  30. sky/skylet/log_lib.py +5 -4
  31. sky/skylet/providers/lambda_cloud/node_provider.py +1 -1
  32. sky/utils/cli_utils/status_utils.py +168 -21
  33. sky/utils/command_runner.py +11 -11
  34. sky/utils/common_utils.py +22 -5
  35. sky/utils/controller_utils.py +78 -29
  36. sky/utils/env_options.py +22 -7
  37. sky/utils/log_utils.py +39 -24
  38. sky/utils/resources_utils.py +23 -0
  39. sky/utils/rich_utils.py +55 -5
  40. sky/utils/ux_utils.py +63 -4
  41. {skypilot_nightly-1.0.0.dev20241011.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/METADATA +1 -1
  42. {skypilot_nightly-1.0.0.dev20241011.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/RECORD +46 -46
  43. {skypilot_nightly-1.0.0.dev20241011.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/LICENSE +0 -0
  44. {skypilot_nightly-1.0.0.dev20241011.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/WHEEL +0 -0
  45. {skypilot_nightly-1.0.0.dev20241011.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/entry_points.txt +0 -0
  46. {skypilot_nightly-1.0.0.dev20241011.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/top_level.txt +0 -0
@@ -5,16 +5,18 @@ a cluster to be launched.
5
5
  """
6
6
  import hashlib
7
7
  import json
8
- import logging
9
8
  from pathlib import Path
10
9
  import random
11
10
  import time
12
11
  from typing import Any, Callable
13
12
 
13
+ from sky import exceptions
14
+ from sky import sky_logging
14
15
  from sky.adaptors import azure
15
16
  from sky.provision import common
17
+ from sky.utils import common_utils
16
18
 
17
- logger = logging.getLogger(__name__)
19
+ logger = sky_logging.init_logger(__name__)
18
20
 
19
21
  UNIQUE_ID_LEN = 4
20
22
  _DEPLOYMENT_NAME = 'skypilot-config'
@@ -92,10 +94,19 @@ def bootstrap_instances(
92
94
  retry += 1
93
95
  continue
94
96
  raise
97
+ except azure.exceptions().ClientAuthenticationError as e:
98
+ message = (
99
+ 'Failed to authenticate with Azure. Please check your Azure '
100
+ f'credentials. Error: {common_utils.format_exception(e)}'
101
+ ).replace('\n', ' ')
102
+ logger.error(message)
103
+ raise exceptions.NoClusterLaunchedError(message) from e
95
104
  else:
96
- raise TimeoutError(
105
+ message = (
97
106
  f'Timed out waiting for resource group {resource_group} to be '
98
107
  'deleted.')
108
+ logger.error(message)
109
+ raise TimeoutError(message)
99
110
 
100
111
  # load the template file
101
112
  current_path = Path(__file__).parent
@@ -441,15 +441,21 @@ def run_instances(region: str, cluster_name_on_cloud: str,
441
441
  if to_start_count > 0:
442
442
  resource_client = azure.get_client('resource', subscription_id)
443
443
  logger.debug(f'run_instances: Creating {to_start_count} instances.')
444
- created_instances = _create_instances(
445
- compute_client=compute_client,
446
- resource_client=resource_client,
447
- cluster_name_on_cloud=cluster_name_on_cloud,
448
- resource_group=resource_group,
449
- provider_config=provider_config,
450
- node_config=config.node_config,
451
- tags=tags,
452
- count=to_start_count)
444
+ try:
445
+ created_instances = _create_instances(
446
+ compute_client=compute_client,
447
+ resource_client=resource_client,
448
+ cluster_name_on_cloud=cluster_name_on_cloud,
449
+ resource_group=resource_group,
450
+ provider_config=provider_config,
451
+ node_config=config.node_config,
452
+ tags=tags,
453
+ count=to_start_count)
454
+ except Exception as e:
455
+ err_message = common_utils.format_exception(
456
+ e, use_bracket=True).replace('\n', ' ')
457
+ logger.error(f'Failed to create instances: {err_message}')
458
+ raise
453
459
  created_instance_ids = [inst.name for inst in created_instances]
454
460
 
455
461
  non_running_instance_statuses = list(
@@ -632,7 +632,9 @@ def run_instances(region: str, cluster_name_on_cloud: str,
632
632
  try:
633
633
  return _create_pods(region, cluster_name_on_cloud, config)
634
634
  except (kubernetes.api_exception(), config_lib.KubernetesError) as e:
635
- logger.warning(f'run_instances: Error occurred when creating pods: {e}')
635
+ e_msg = common_utils.format_exception(e).replace('\n', ' ')
636
+ logger.warning('run_instances: Error occurred when creating pods: '
637
+ f'{e_msg}')
636
638
  raise
637
639
 
638
640
 
@@ -1998,3 +1998,28 @@ def get_context_from_config(provider_config: Dict[str, Any]) -> Optional[str]:
1998
1998
  # we need to use in-cluster auth.
1999
1999
  context = None
2000
2000
  return context
2001
+
2002
+
2003
+ def get_skypilot_pods(context: Optional[str] = None) -> List[Any]:
2004
+ """Gets all SkyPilot pods in the Kubernetes cluster.
2005
+
2006
+ Args:
2007
+ context: Kubernetes context to use. If None, uses the current context.
2008
+
2009
+ Returns:
2010
+ A list of Kubernetes pod objects.
2011
+ """
2012
+ if context is None:
2013
+ context = get_current_kube_config_context_name()
2014
+
2015
+ try:
2016
+ pods = kubernetes.core_api(context).list_pod_for_all_namespaces(
2017
+ label_selector='skypilot-cluster',
2018
+ _request_timeout=kubernetes.API_TIMEOUT).items
2019
+ except kubernetes.max_retry_error():
2020
+ raise exceptions.ResourcesUnavailableError(
2021
+ 'Timed out trying to get SkyPilot pods from Kubernetes cluster. '
2022
+ 'Please check if the cluster is healthy and retry. To debug, run: '
2023
+ 'kubectl get pods --selector=skypilot-cluster --all-namespaces'
2024
+ ) from None
2025
+ return pods
@@ -14,6 +14,7 @@ import colorama
14
14
 
15
15
  import sky
16
16
  from sky import clouds
17
+ from sky import exceptions
17
18
  from sky import provision
18
19
  from sky import sky_logging
19
20
  from sky import status_lib
@@ -42,76 +43,50 @@ _TITLE = '\n\n' + '=' * 20 + ' {} ' + '=' * 20 + '\n'
42
43
  def _bulk_provision(
43
44
  cloud: clouds.Cloud,
44
45
  region: clouds.Region,
45
- zones: Optional[List[clouds.Zone]],
46
46
  cluster_name: resources_utils.ClusterName,
47
47
  bootstrap_config: provision_common.ProvisionConfig,
48
48
  ) -> provision_common.ProvisionRecord:
49
49
  provider_name = repr(cloud)
50
50
  region_name = region.name
51
51
 
52
- style = colorama.Style
53
-
54
- if not zones:
55
- # For Azure, zones is always an empty list.
56
- zone_str = 'all zones'
57
- else:
58
- zone_str = ','.join(z.name for z in zones)
59
-
60
- if isinstance(cloud, clouds.Kubernetes):
61
- # Omit the region name for Kubernetes.
62
- logger.info(f'{style.BRIGHT}Launching on {cloud}{style.RESET_ALL} '
63
- f'{cluster_name!r}.')
64
- else:
65
- logger.info(f'{style.BRIGHT}Launching on {cloud} '
66
- f'{region_name}{style.RESET_ALL} ({zone_str})')
67
-
68
52
  start = time.time()
69
- with rich_utils.safe_status('[bold cyan]Launching[/]') as status:
53
+ # TODO(suquark): Should we cache the bootstrapped result?
54
+ # Currently it is not necessary as bootstrapping takes
55
+ # only ~3s, caching it seems over-engineering and could
56
+ # cause other issues like the cache is not synced
57
+ # with the cloud configuration.
58
+ config = provision.bootstrap_instances(provider_name, region_name,
59
+ cluster_name.name_on_cloud,
60
+ bootstrap_config)
61
+
62
+ provision_record = provision.run_instances(provider_name,
63
+ region_name,
64
+ cluster_name.name_on_cloud,
65
+ config=config)
66
+
67
+ backoff = common_utils.Backoff(initial_backoff=1, max_backoff_factor=3)
68
+ logger.debug(f'\nWaiting for instances of {cluster_name!r} to be ready...')
69
+ rich_utils.force_update_status(
70
+ ux_utils.spinner_message('Launching - Checking instance status',
71
+ str(provision_logging.config.log_path)))
72
+ # AWS would take a very short time (<<1s) updating the state of the
73
+ # instance.
74
+ time.sleep(1)
75
+ for retry_cnt in range(_MAX_RETRY):
70
76
  try:
71
- # TODO(suquark): Should we cache the bootstrapped result?
72
- # Currently it is not necessary as bootstrapping takes
73
- # only ~3s, caching it seems over-engineering and could
74
- # cause other issues like the cache is not synced
75
- # with the cloud configuration.
76
- config = provision.bootstrap_instances(provider_name, region_name,
77
- cluster_name.name_on_cloud,
78
- bootstrap_config)
79
- except Exception as e:
80
- logger.error(f'{colorama.Fore.YELLOW}Failed to configure '
81
- f'{cluster_name!r} on {cloud} {region} ({zone_str}) '
82
- 'with the following error:'
83
- f'{colorama.Style.RESET_ALL}\n'
84
- f'{common_utils.format_exception(e)}')
85
- raise
86
-
87
- provision_record = provision.run_instances(provider_name,
88
- region_name,
89
- cluster_name.name_on_cloud,
90
- config=config)
91
-
92
- backoff = common_utils.Backoff(initial_backoff=1, max_backoff_factor=3)
93
- logger.debug(
94
- f'\nWaiting for instances of {cluster_name!r} to be ready...')
95
- status.update('[bold cyan]Launching - Checking instance status[/]')
96
- # AWS would take a very short time (<<1s) updating the state of the
97
- # instance.
98
- time.sleep(1)
99
- for retry_cnt in range(_MAX_RETRY):
100
- try:
101
- provision.wait_instances(provider_name,
102
- region_name,
103
- cluster_name.name_on_cloud,
104
- state=status_lib.ClusterStatus.UP)
105
- break
106
- except (aws.botocore_exceptions().WaiterError, RuntimeError):
107
- time.sleep(backoff.current_backoff())
108
- else:
109
- raise RuntimeError(
110
- f'Failed to wait for instances of {cluster_name!r} to be '
111
- f'ready on the cloud provider after max retries {_MAX_RETRY}.')
112
- logger.debug(
113
- f'Instances of {cluster_name!r} are ready after {retry_cnt} '
114
- 'retries.')
77
+ provision.wait_instances(provider_name,
78
+ region_name,
79
+ cluster_name.name_on_cloud,
80
+ state=status_lib.ClusterStatus.UP)
81
+ break
82
+ except (aws.botocore_exceptions().WaiterError, RuntimeError):
83
+ time.sleep(backoff.current_backoff())
84
+ else:
85
+ raise RuntimeError(
86
+ f'Failed to wait for instances of {cluster_name!r} to be '
87
+ f'ready on the cloud provider after max retries {_MAX_RETRY}.')
88
+ logger.debug(f'Instances of {cluster_name!r} are ready after {retry_cnt} '
89
+ 'retries.')
115
90
 
116
91
  logger.debug(
117
92
  f'\nProvisioning {cluster_name!r} took {time.time() - start:.2f} '
@@ -162,8 +137,11 @@ def bulk_provision(
162
137
  logger.debug(
163
138
  'Provision config:\n'
164
139
  f'{json.dumps(dataclasses.asdict(bootstrap_config), indent=2)}')
165
- return _bulk_provision(cloud, region, zones, cluster_name,
140
+ return _bulk_provision(cloud, region, cluster_name,
166
141
  bootstrap_config)
142
+ except exceptions.NoClusterLaunchedError:
143
+ # Skip the teardown if the cluster was never launched.
144
+ raise
167
145
  except Exception: # pylint: disable=broad-except
168
146
  zone_str = 'all zones'
169
147
  if zones:
@@ -440,23 +418,30 @@ def _post_provision_setup(
440
418
  # We don't set docker_user here, as we are configuring the VM itself.
441
419
  ssh_credentials = backend_utils.ssh_credential_from_yaml(
442
420
  cluster_yaml, ssh_user=cluster_info.ssh_user)
421
+ docker_config = config_from_yaml.get('docker', {})
443
422
 
444
423
  with rich_utils.safe_status(
445
- '[bold cyan]Launching - Waiting for SSH access[/]') as status:
424
+ ux_utils.spinner_message(
425
+ 'Launching - Waiting for SSH access',
426
+ provision_logging.config.log_path)) as status:
446
427
 
447
428
  logger.debug(
448
429
  f'\nWaiting for SSH to be available for {cluster_name!r} ...')
449
430
  wait_for_ssh(cluster_info, ssh_credentials)
450
- logger.debug(f'SSH Conection ready for {cluster_name!r}')
431
+ logger.debug(f'SSH Connection ready for {cluster_name!r}')
432
+ vm_str = 'Instance' if cloud_name.lower() != 'kubernetes' else 'Pod'
451
433
  plural = '' if len(cluster_info.instances) == 1 else 's'
452
- logger.info(f'{colorama.Fore.GREEN}Successfully provisioned '
453
- f'or found existing instance{plural}.'
454
- f'{colorama.Style.RESET_ALL}')
434
+ verb = 'is' if len(cluster_info.instances) == 1 else 'are'
435
+ indent_str = (ux_utils.INDENT_SYMBOL
436
+ if docker_config else ux_utils.INDENT_LAST_SYMBOL)
437
+ logger.info(f'{indent_str}{colorama.Style.DIM}{vm_str}{plural} {verb} '
438
+ f'up.{colorama.Style.RESET_ALL}')
455
439
 
456
- docker_config = config_from_yaml.get('docker', {})
457
440
  if docker_config:
458
441
  status.update(
459
- '[bold cyan]Launching - Initializing docker container[/]')
442
+ ux_utils.spinner_message(
443
+ 'Launching - Initializing docker container',
444
+ provision_logging.config.log_path))
460
445
  docker_user = instance_setup.initialize_docker(
461
446
  cluster_name.name_on_cloud,
462
447
  docker_config=docker_config,
@@ -470,6 +455,8 @@ def _post_provision_setup(
470
455
  cluster_info.docker_user = docker_user
471
456
  ssh_credentials['docker_user'] = docker_user
472
457
  logger.debug(f'Docker user: {docker_user}')
458
+ logger.info(f'{ux_utils.INDENT_LAST_SYMBOL}{colorama.Style.DIM}'
459
+ f'Docker container is up.{colorama.Style.RESET_ALL}')
473
460
 
474
461
  # We mount the metadata with sky wheel for speedup.
475
462
  # NOTE: currently we mount all credentials for all nodes, because
@@ -482,8 +469,9 @@ def _post_provision_setup(
482
469
  # for later.
483
470
  file_mounts = config_from_yaml.get('file_mounts', {})
484
471
 
485
- runtime_preparation_str = ('[bold cyan]Preparing SkyPilot '
486
- 'runtime ({step}/3 - {step_name})')
472
+ runtime_preparation_str = (ux_utils.spinner_message(
473
+ 'Preparing SkyPilot runtime ({step}/3 - {step_name})',
474
+ provision_logging.config.log_path))
487
475
  status.update(
488
476
  runtime_preparation_str.format(step=1, step_name='initializing'))
489
477
  instance_setup.internal_file_mounts(cluster_name.name_on_cloud,
@@ -551,8 +539,9 @@ def _post_provision_setup(
551
539
  instance_setup.start_skylet_on_head_node(cluster_name.name_on_cloud,
552
540
  cluster_info, ssh_credentials)
553
541
 
554
- logger.info(f'{colorama.Fore.GREEN}Successfully provisioned cluster: '
555
- f'{cluster_name}{colorama.Style.RESET_ALL}')
542
+ logger.info(
543
+ ux_utils.finishing_message(f'Cluster launched: {cluster_name}.',
544
+ provision_logging.config.log_path))
556
545
  return cluster_info
557
546
 
558
547
 
sky/serve/core.py CHANGED
@@ -129,8 +129,10 @@ def up(
129
129
  task, use_mutated_config_in_current_request=False)
130
130
  task = dag.tasks[0]
131
131
 
132
- controller_utils.maybe_translate_local_file_mounts_and_sync_up(task,
133
- path='serve')
132
+ with rich_utils.safe_status(
133
+ ux_utils.spinner_message('Initializing service')):
134
+ controller_utils.maybe_translate_local_file_mounts_and_sync_up(
135
+ task, path='serve')
134
136
 
135
137
  with tempfile.NamedTemporaryFile(
136
138
  prefix=f'service-task-{service_name}-',
@@ -215,7 +217,8 @@ def up(
215
217
  # TODO(tian): Cache endpoint locally to speedup. Endpoint won't
216
218
  # change after the first time, so there is no consistency issue.
217
219
  with rich_utils.safe_status(
218
- '[cyan]Waiting for the service to register[/]'):
220
+ ux_utils.spinner_message(
221
+ 'Waiting for the service to register')):
219
222
  # This function will check the controller job id in the database
220
223
  # and return the endpoint if the job id matches. Otherwise it will
221
224
  # return None.
@@ -274,34 +277,31 @@ def up(
274
277
  f'{style.BRIGHT}{service_name}{style.RESET_ALL}'
275
278
  f'\n{fore.CYAN}Endpoint URL: '
276
279
  f'{style.BRIGHT}{endpoint}{style.RESET_ALL}'
277
- '\nTo see detailed info:\t\t'
278
- f'{backend_utils.BOLD}sky serve status {service_name} '
279
- f'[--endpoint]{backend_utils.RESET_BOLD}'
280
- '\nTo teardown the service:\t'
281
- f'{backend_utils.BOLD}sky serve down {service_name}'
282
- f'{backend_utils.RESET_BOLD}'
283
- '\n'
284
- '\nTo see logs of a replica:\t'
285
- f'{backend_utils.BOLD}sky serve logs {service_name} [REPLICA_ID]'
286
- f'{backend_utils.RESET_BOLD}'
287
- '\nTo see logs of load balancer:\t'
288
- f'{backend_utils.BOLD}sky serve logs --load-balancer {service_name}'
289
- f'{backend_utils.RESET_BOLD}'
290
- '\nTo see logs of controller:\t'
291
- f'{backend_utils.BOLD}sky serve logs --controller {service_name}'
292
- f'{backend_utils.RESET_BOLD}'
293
- '\n'
294
- '\nTo monitor replica status:\t'
295
- f'{backend_utils.BOLD}watch -n10 sky serve status {service_name}'
296
- f'{backend_utils.RESET_BOLD}'
297
- '\nTo send a test request:\t\t'
298
- f'{backend_utils.BOLD}curl {endpoint}'
299
- f'{backend_utils.RESET_BOLD}'
300
- '\n'
301
- f'\n{fore.GREEN}SkyServe is spinning up your service now.'
302
- f'{style.RESET_ALL}'
303
- f'\n{fore.GREEN}The replicas should be ready within a '
304
- f'short time.{style.RESET_ALL}')
280
+ f'\n📋 Useful Commands'
281
+ f'\n{ux_utils.INDENT_SYMBOL}To check service status:\t'
282
+ f'{ux_utils.BOLD}sky serve status {service_name} '
283
+ f'[--endpoint]{ux_utils.RESET_BOLD}'
284
+ f'\n{ux_utils.INDENT_SYMBOL}To teardown the service:\t'
285
+ f'{ux_utils.BOLD}sky serve down {service_name}'
286
+ f'{ux_utils.RESET_BOLD}'
287
+ f'\n{ux_utils.INDENT_SYMBOL}To see replica logs:\t'
288
+ f'{ux_utils.BOLD}sky serve logs {service_name} [REPLICA_ID]'
289
+ f'{ux_utils.RESET_BOLD}'
290
+ f'\n{ux_utils.INDENT_SYMBOL}To see load balancer logs:\t'
291
+ f'{ux_utils.BOLD}sky serve logs --load-balancer {service_name}'
292
+ f'{ux_utils.RESET_BOLD}'
293
+ f'\n{ux_utils.INDENT_SYMBOL}To see controller logs:\t'
294
+ f'{ux_utils.BOLD}sky serve logs --controller {service_name}'
295
+ f'{ux_utils.RESET_BOLD}'
296
+ f'\n{ux_utils.INDENT_SYMBOL}To monitor the status:\t'
297
+ f'{ux_utils.BOLD}watch -n10 sky serve status {service_name}'
298
+ f'{ux_utils.RESET_BOLD}'
299
+ f'\n{ux_utils.INDENT_LAST_SYMBOL}To send a test request:\t'
300
+ f'{ux_utils.BOLD}curl {endpoint}'
301
+ f'{ux_utils.RESET_BOLD}'
302
+ '\n\n' +
303
+ ux_utils.finishing_message('Service is spinning up and replicas '
304
+ 'will be ready shortly.'))
305
305
  return service_name, endpoint
306
306
 
307
307
 
@@ -323,11 +323,11 @@ def update(
323
323
  controller=controller_utils.Controllers.SKY_SERVE_CONTROLLER,
324
324
  stopped_message=
325
325
  'Service controller is stopped. There is no service to update. '
326
- f'To spin up a new service, use {backend_utils.BOLD}'
327
- f'sky serve up{backend_utils.RESET_BOLD}',
326
+ f'To spin up a new service, use {ux_utils.BOLD}'
327
+ f'sky serve up{ux_utils.RESET_BOLD}',
328
328
  non_existent_message='Service does not exist. '
329
329
  'To spin up a new service, '
330
- f'use {backend_utils.BOLD}sky serve up{backend_utils.RESET_BOLD}',
330
+ f'use {ux_utils.BOLD}sky serve up{ux_utils.RESET_BOLD}',
331
331
  )
332
332
 
333
333
  backend = backend_utils.get_backend_from_handle(handle)
@@ -353,8 +353,8 @@ def update(
353
353
  if len(service_statuses) == 0:
354
354
  with ux_utils.print_exception_no_traceback():
355
355
  raise RuntimeError(f'Cannot find service {service_name!r}.'
356
- f'To spin up a service, use {backend_utils.BOLD}'
357
- f'sky serve up{backend_utils.RESET_BOLD}')
356
+ f'To spin up a service, use {ux_utils.BOLD}'
357
+ f'sky serve up{ux_utils.RESET_BOLD}')
358
358
 
359
359
  if len(service_statuses) > 1:
360
360
  with ux_utils.print_exception_no_traceback():
@@ -374,8 +374,10 @@ def update(
374
374
  with ux_utils.print_exception_no_traceback():
375
375
  raise RuntimeError(prompt)
376
376
 
377
- controller_utils.maybe_translate_local_file_mounts_and_sync_up(task,
378
- path='serve')
377
+ with rich_utils.safe_status(
378
+ ux_utils.spinner_message('Initializing service')):
379
+ controller_utils.maybe_translate_local_file_mounts_and_sync_up(
380
+ task, path='serve')
379
381
 
380
382
  code = serve_utils.ServeCodeGen.add_version(service_name)
381
383
  returncode, version_string_payload, stderr = backend.run_on_head(
@@ -433,8 +435,8 @@ def update(
433
435
 
434
436
  print(f'{colorama.Fore.GREEN}Service {service_name!r} update scheduled.'
435
437
  f'{colorama.Style.RESET_ALL}\n'
436
- f'Please use {backend_utils.BOLD}sky serve status {service_name} '
437
- f'{backend_utils.RESET_BOLD}to check the latest status.')
438
+ f'Please use {ux_utils.BOLD}sky serve status {service_name} '
439
+ f'{ux_utils.RESET_BOLD}to check the latest status.')
438
440
 
439
441
 
440
442
  @usage_lib.entrypoint
sky/sky_logging.py CHANGED
@@ -10,10 +10,10 @@ import colorama
10
10
  from sky.utils import env_options
11
11
  from sky.utils import rich_utils
12
12
 
13
- # If the SKYPILOT_MINIMIZE_LOGGING environment variable is set to True,
14
- # remove logging prefixes and unnecessary information in optimizer
15
- _FORMAT = (None if env_options.Options.MINIMIZE_LOGGING.get() else
16
- '%(levelname).1s %(asctime)s %(filename)s:%(lineno)d] %(message)s')
13
+ # UX: Should we show logging prefixes and some extra information in optimizer?
14
+ _show_logging_prefix = (env_options.Options.SHOW_DEBUG_INFO.get() or
15
+ not env_options.Options.MINIMIZE_LOGGING.get())
16
+ _FORMAT = '%(levelname).1s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
17
17
  _DATE_FORMAT = '%m-%d %H:%M:%S'
18
18
 
19
19
 
@@ -45,6 +45,7 @@ _root_logger = logging.getLogger('sky')
45
45
  _default_handler = None
46
46
  _logging_config = threading.local()
47
47
 
48
+ NO_PREFIX_FORMATTER = NewLineFormatter(None, datefmt=_DATE_FORMAT)
48
49
  FORMATTER = NewLineFormatter(_FORMAT, datefmt=_DATE_FORMAT)
49
50
  DIM_FORMATTER = NewLineFormatter(_FORMAT, datefmt=_DATE_FORMAT, dim=True)
50
51
 
@@ -67,7 +68,10 @@ def _setup_logger():
67
68
  else:
68
69
  _default_handler.setLevel(logging.INFO)
69
70
  _root_logger.addHandler(_default_handler)
70
- _default_handler.setFormatter(FORMATTER)
71
+ if _show_logging_prefix:
72
+ _default_handler.setFormatter(FORMATTER)
73
+ else:
74
+ _default_handler.setFormatter(NO_PREFIX_FORMATTER)
71
75
  # Setting this will avoid the message
72
76
  # being propagated to the parent logger.
73
77
  _root_logger.propagate = False
sky/skylet/log_lib.py CHANGED
@@ -21,6 +21,7 @@ from sky.skylet import constants
21
21
  from sky.skylet import job_lib
22
22
  from sky.utils import log_utils
23
23
  from sky.utils import subprocess_utils
24
+ from sky.utils import ux_utils
24
25
 
25
26
  _SKY_LOG_WAITING_GAP_SECONDS = 1
26
27
  _SKY_LOG_WAITING_MAX_RETRY = 5
@@ -377,7 +378,9 @@ def _follow_job_logs(file,
377
378
  wait_last_logs = False
378
379
  continue
379
380
  status_str = status.value if status is not None else 'None'
380
- print(f'INFO: Job finished (status: {status_str}).')
381
+ print(
382
+ ux_utils.finishing_message(
383
+ f'Job finished (status: {status_str}).'))
381
384
  return
382
385
 
383
386
  time.sleep(_SKY_LOG_TAILING_GAP_SECONDS)
@@ -412,8 +415,6 @@ def tail_logs(job_id: Optional[int],
412
415
  return
413
416
  logger.debug(f'Tailing logs for job, real job_id {job_id}, managed_job_id '
414
417
  f'{managed_job_id}.')
415
- logger.info(f'{colorama.Fore.YELLOW}Start streaming logs for {job_str}.'
416
- f'{colorama.Style.RESET_ALL}')
417
418
  log_path = os.path.join(log_dir, 'run.log')
418
419
  log_path = os.path.expanduser(log_path)
419
420
 
@@ -437,7 +438,7 @@ def tail_logs(job_id: Optional[int],
437
438
  time.sleep(_SKY_LOG_WAITING_GAP_SECONDS)
438
439
  status = job_lib.update_job_status([job_id], silent=True)[0]
439
440
 
440
- start_stream_at = 'INFO: Tip: use Ctrl-C to exit log'
441
+ start_stream_at = 'Waiting for task resources on '
441
442
  if follow and status in [
442
443
  job_lib.JobStatus.SETTING_UP,
443
444
  job_lib.JobStatus.PENDING,
@@ -25,7 +25,7 @@ _TAG_PATH_PREFIX = '~/.sky/generated/lambda_cloud/metadata'
25
25
  _REMOTE_SSH_KEY_NAME = '~/.lambda_cloud/ssh_key_name'
26
26
  _REMOTE_RAY_SSH_KEY = '~/ray_bootstrap_key.pem'
27
27
  _REMOTE_RAY_YAML = '~/ray_bootstrap_config.yaml'
28
- _GET_INTERNAL_IP_CMD = 'ip -4 -br addr show | grep UP | grep -Eo "(10\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|172\.(1[6-9]|2[0-9]|3[0-1]))\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)"'
28
+ _GET_INTERNAL_IP_CMD = 's=$(ip -4 -br addr show | grep UP); echo "$s"; echo "$s" | grep -Eo "(10\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|172\.(1[6-9]|2[0-9]|3[0-1])|104\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)"'
29
29
 
30
30
  logger = logging.getLogger(__name__)
31
31