skypilot-nightly 1.0.0.dev20250413__py3-none-any.whl → 1.0.0.dev20250421__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/kubernetes.py +7 -0
  3. sky/authentication.py +2 -2
  4. sky/backends/backend_utils.py +31 -3
  5. sky/backends/cloud_vm_ray_backend.py +22 -29
  6. sky/backends/wheel_utils.py +9 -0
  7. sky/check.py +1 -1
  8. sky/cli.py +253 -74
  9. sky/client/cli.py +253 -74
  10. sky/client/common.py +10 -3
  11. sky/client/sdk.py +11 -8
  12. sky/clouds/aws.py +2 -2
  13. sky/clouds/kubernetes.py +0 -8
  14. sky/clouds/oci.py +1 -1
  15. sky/core.py +17 -11
  16. sky/dashboard/out/404.html +1 -0
  17. sky/dashboard/out/_next/static/chunks/236-d437cf66e68a6f64.js +6 -0
  18. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +15 -0
  19. sky/dashboard/out/_next/static/chunks/37-72fdc8f71d6e4784.js +6 -0
  20. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +59 -0
  21. sky/dashboard/out/_next/static/chunks/845-2ea1cc63ba1f4067.js +1 -0
  22. sky/dashboard/out/_next/static/chunks/979-7cd0778078b9cfad.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +1 -0
  24. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +33 -0
  25. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +1 -0
  26. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +1 -0
  27. sky/dashboard/out/_next/static/chunks/pages/_app-3001e84c61acddfb.js +1 -0
  28. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +1 -0
  29. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-b09f7fbf6d5d74f6.js +1 -0
  30. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-b57ec043f09c5813.js +1 -0
  31. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +1 -0
  32. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +1 -0
  33. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-ef2e0e91a9222cac.js +1 -0
  34. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +1 -0
  35. sky/dashboard/out/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js +1 -0
  36. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +1 -0
  37. sky/dashboard/out/_next/static/css/f3538cd90cfca88c.css +3 -0
  38. sky/dashboard/out/_next/static/mS9YfLA5hhsJMeBj9W8J7/_buildManifest.js +1 -0
  39. sky/dashboard/out/_next/static/mS9YfLA5hhsJMeBj9W8J7/_ssgManifest.js +1 -0
  40. sky/dashboard/out/clusters/[cluster]/[job].html +1 -0
  41. sky/dashboard/out/clusters/[cluster].html +1 -0
  42. sky/dashboard/out/clusters.html +1 -0
  43. sky/dashboard/out/favicon.ico +0 -0
  44. sky/dashboard/out/index.html +1 -0
  45. sky/dashboard/out/jobs/[job].html +1 -0
  46. sky/dashboard/out/jobs.html +1 -0
  47. sky/dashboard/out/skypilot.svg +15 -0
  48. sky/dashboard/out/videos/cursor-small.mp4 +0 -0
  49. sky/data/data_transfer.py +2 -1
  50. sky/data/storage.py +24 -14
  51. sky/exceptions.py +5 -0
  52. sky/jobs/constants.py +8 -1
  53. sky/jobs/server/core.py +12 -8
  54. sky/models.py +28 -0
  55. sky/optimizer.py +7 -9
  56. sky/provision/kubernetes/config.py +1 -1
  57. sky/provision/kubernetes/instance.py +16 -14
  58. sky/provision/kubernetes/network_utils.py +1 -1
  59. sky/provision/kubernetes/utils.py +50 -22
  60. sky/provision/provisioner.py +2 -1
  61. sky/resources.py +56 -2
  62. sky/serve/__init__.py +2 -0
  63. sky/serve/autoscalers.py +6 -2
  64. sky/serve/client/sdk.py +61 -0
  65. sky/serve/constants.py +6 -0
  66. sky/serve/load_balancing_policies.py +0 -4
  67. sky/serve/replica_managers.py +6 -8
  68. sky/serve/serve_state.py +0 -6
  69. sky/serve/serve_utils.py +33 -1
  70. sky/serve/server/core.py +192 -7
  71. sky/serve/server/server.py +28 -0
  72. sky/server/common.py +152 -47
  73. sky/server/constants.py +7 -1
  74. sky/server/requests/executor.py +4 -0
  75. sky/server/requests/payloads.py +12 -15
  76. sky/server/requests/serializers/decoders.py +2 -5
  77. sky/server/requests/serializers/encoders.py +2 -5
  78. sky/server/server.py +44 -1
  79. sky/setup_files/MANIFEST.in +1 -0
  80. sky/setup_files/dependencies.py +1 -0
  81. sky/sky_logging.py +12 -2
  82. sky/skylet/constants.py +5 -7
  83. sky/skylet/job_lib.py +3 -3
  84. sky/skypilot_config.py +225 -84
  85. sky/templates/kubernetes-ray.yml.j2 +7 -3
  86. sky/utils/cli_utils/status_utils.py +12 -5
  87. sky/utils/config_utils.py +39 -15
  88. sky/utils/controller_utils.py +44 -7
  89. sky/utils/kubernetes/generate_kubeconfig.sh +2 -2
  90. sky/utils/kubernetes/gpu_labeler.py +99 -16
  91. sky/utils/schemas.py +24 -0
  92. {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250421.dist-info}/METADATA +2 -1
  93. {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250421.dist-info}/RECORD +97 -64
  94. {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250421.dist-info}/WHEEL +1 -1
  95. {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250421.dist-info}/entry_points.txt +0 -0
  96. {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250421.dist-info}/licenses/LICENSE +0 -0
  97. {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250421.dist-info}/top_level.txt +0 -0
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = '197c8dd3ea85d23323477e7d7cf69e8dc1b693c6'
8
+ _SKYPILOT_COMMIT_SHA = 'd37ba6f11593127305c73cb1b1a03dc6da8452b7'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20250413'
38
+ __version__ = '1.0.0.dev20250421'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
@@ -160,6 +160,13 @@ def api_client(context: Optional[str] = None):
160
160
  return kubernetes.client.ApiClient()
161
161
 
162
162
 
163
+ @_api_logging_decorator('urllib3', logging.ERROR)
164
+ @annotations.lru_cache(scope='request')
165
+ def watch(context: Optional[str] = None):
166
+ _load_config(context)
167
+ return kubernetes.watch.Watch()
168
+
169
+
163
170
  def api_exception():
164
171
  return kubernetes.client.rest.ApiException
165
172
 
sky/authentication.py CHANGED
@@ -382,10 +382,10 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
382
382
  network_mode = kubernetes_enums.KubernetesNetworkingMode.from_str(
383
383
  network_mode_str)
384
384
  except ValueError as e:
385
- # Add message saying "Please check: ~/.sky/skyconfig.yaml" to the error
385
+ # Add message saying "Please check: ~/.sky/config.yaml" to the error
386
386
  # message.
387
387
  with ux_utils.print_exception_no_traceback():
388
- raise ValueError(str(e) + ' Please check: ~/.sky/skyconfig.yaml.') \
388
+ raise ValueError(str(e) + ' Please check: ~/.sky/config.yaml.') \
389
389
  from None
390
390
  _, public_key_path = get_or_generate_keys()
391
391
 
@@ -278,11 +278,11 @@ def _optimize_file_mounts(yaml_path: str) -> None:
278
278
  # the dst.
279
279
  mkdir_parent = f'mkdir -p {dst}'
280
280
  src_basename = f'{src_basename}/*'
281
- mv = (f'cp -r {_REMOTE_RUNTIME_FILES_DIR}/{src_basename} '
281
+ mv = (f'cp -rf {_REMOTE_RUNTIME_FILES_DIR}/{src_basename} '
282
282
  f'{dst_parent_dir}/{dst_basename}')
283
283
  fragment = f'({mkdir_parent} && {mv})'
284
284
  commands.append(fragment)
285
- postprocess_runtime_files_command = ' && '.join(commands)
285
+ postprocess_runtime_files_command = '; '.join(commands)
286
286
 
287
287
  setup_commands = yaml_config.get('setup_commands', [])
288
288
  if setup_commands:
@@ -682,7 +682,7 @@ def write_cluster_config(
682
682
  ssh_proxy_command = ssh_proxy_command_config[region_name]
683
683
  logger.debug(f'Using ssh_proxy_command: {ssh_proxy_command!r}')
684
684
 
685
- # User-supplied global instance tags from ~/.sky/skyconfig.yaml.
685
+ # User-supplied global instance tags from ~/.sky/config.yaml.
686
686
  labels = skypilot_config.get_nested((str(cloud).lower(), 'labels'), {})
687
687
  # labels is a dict, which is guaranteed by the type check in
688
688
  # schemas.py
@@ -2582,11 +2582,36 @@ def get_clusters(
2582
2582
  logger.info(f'Cluster(s) not found: {bright}{clusters_str}{reset}.')
2583
2583
  records = new_records
2584
2584
 
2585
+ def _update_record_with_resources(record: Optional[Dict[str, Any]]) -> None:
2586
+ """Add the resources to the record."""
2587
+ if record is None:
2588
+ return
2589
+ handle = record['handle']
2590
+ if handle is None:
2591
+ return
2592
+ record['nodes'] = handle.launched_nodes
2593
+ if handle.launched_resources is None:
2594
+ return
2595
+ record['cloud'] = (f'{handle.launched_resources.cloud}'
2596
+ if handle.launched_resources.cloud else None)
2597
+ record['region'] = (f'{handle.launched_resources.region}'
2598
+ if handle.launched_resources.region else None)
2599
+ record['cpus'] = (f'{handle.launched_resources.cpus}'
2600
+ if handle.launched_resources.cpus else None)
2601
+ record['memory'] = (f'{handle.launched_resources.memory}'
2602
+ if handle.launched_resources.memory else None)
2603
+ record['accelerators'] = (f'{handle.launched_resources.accelerators}'
2604
+ if handle.launched_resources.accelerators else
2605
+ None)
2606
+
2585
2607
  # Add auth_config to the records
2586
2608
  for record in records:
2587
2609
  _update_record_with_credentials_and_resources_str(record)
2588
2610
 
2589
2611
  if refresh == common.StatusRefreshMode.NONE:
2612
+ # Add resources to the records
2613
+ for record in records:
2614
+ _update_record_with_resources(record)
2590
2615
  return records
2591
2616
 
2592
2617
  plural = 's' if len(records) > 1 else ''
@@ -2662,6 +2687,9 @@ def get_clusters(
2662
2687
  for cluster_name, e in failed_clusters:
2663
2688
  logger.warning(f' {bright}{cluster_name}{reset}: {e}')
2664
2689
 
2690
+ # Add resources to the records
2691
+ for record in kept_records:
2692
+ _update_record_with_resources(record)
2665
2693
  return kept_records
2666
2694
 
2667
2695
 
@@ -35,6 +35,7 @@ from sky import optimizer
35
35
  from sky import provision as provision_lib
36
36
  from sky import resources as resources_lib
37
37
  from sky import sky_logging
38
+ from sky import skypilot_config
38
39
  from sky import task as task_lib
39
40
  from sky.backends import backend_utils
40
41
  from sky.backends import wheel_utils
@@ -439,18 +440,7 @@ class RayCodeGen:
439
440
  pg = ray_util.placement_group({json.dumps(bundles)}, 'STRICT_SPREAD')
440
441
  plural = 's' if {num_nodes} > 1 else ''
441
442
  node_str = f'{num_nodes} node{{plural}}'
442
-
443
- # We have this `INFO: Tip:` message only for backward
444
- # compatibility, because if a cluster has the old SkyPilot version,
445
- # it relies on this message to start log streaming.
446
- # This message will be skipped for new clusters, because we use
447
- # start_streaming_at for the `Waiting for task resources on`
448
- # message.
449
- # TODO: Remove this message in v0.9.0.
450
- message = ('{ux_utils.INDENT_SYMBOL}{colorama.Style.DIM}INFO: '
451
- 'Tip: use Ctrl-C to exit log streaming, not kill '
452
- 'the job.{colorama.Style.RESET_ALL}\\n')
453
- message += ('{ux_utils.INDENT_SYMBOL}{colorama.Style.DIM}'
443
+ message = ('{ux_utils.INDENT_SYMBOL}{colorama.Style.DIM}'
454
444
  'Waiting for task resources on '
455
445
  f'{{node_str}}.{colorama.Style.RESET_ALL}')
456
446
  print(message, flush=True)
@@ -608,9 +598,6 @@ class RayCodeGen:
608
598
  textwrap.dedent(f"""\
609
599
  sky_env_vars_dict = {{}}
610
600
  sky_env_vars_dict['{constants.SKYPILOT_NODE_IPS}'] = job_ip_list_str
611
- # Backward compatibility: Environment starting with `SKY_` is
612
- # deprecated. Remove it in v0.9.0.
613
- sky_env_vars_dict['SKY_NODE_IPS'] = job_ip_list_str
614
601
  sky_env_vars_dict['{constants.SKYPILOT_NUM_NODES}'] = len(job_ip_rank_list)
615
602
  """)
616
603
  ]
@@ -659,9 +646,6 @@ class RayCodeGen:
659
646
  if script is not None:
660
647
  script += rclone_flush_script
661
648
  sky_env_vars_dict['{constants.SKYPILOT_NUM_GPUS_PER_NODE}'] = {int(math.ceil(num_gpus))!r}
662
- # Backward compatibility: Environment starting with `SKY_` is
663
- # deprecated. Remove it in v0.9.0.
664
- sky_env_vars_dict['SKY_NUM_GPUS_PER_NODE'] = {int(math.ceil(num_gpus))!r}
665
649
 
666
650
  ip = gang_scheduling_id_to_ip[{gang_scheduling_id!r}]
667
651
  rank = job_ip_rank_map[ip]
@@ -678,14 +662,8 @@ class RayCodeGen:
678
662
  name_str = f'{{node_name}}, rank={{rank}},'
679
663
  log_path = os.path.expanduser(os.path.join({log_dir!r}, f'{{rank}}-{{node_name}}.log'))
680
664
  sky_env_vars_dict['{constants.SKYPILOT_NODE_RANK}'] = rank
681
- # Backward compatibility: Environment starting with `SKY_` is
682
- # deprecated. Remove it in v0.9.0.
683
- sky_env_vars_dict['SKY_NODE_RANK'] = rank
684
665
 
685
666
  sky_env_vars_dict['SKYPILOT_INTERNAL_JOB_ID'] = {self.job_id}
686
- # Backward compatibility: Environment starting with `SKY_` is
687
- # deprecated. Remove it in v0.9.0.
688
- sky_env_vars_dict['SKY_INTERNAL_JOB_ID'] = {self.job_id}
689
667
 
690
668
  futures.append(run_bash_command_with_log \\
691
669
  .options(name=name_str, {options_str}) \\
@@ -1473,7 +1451,7 @@ class RetryingVmProvisioner(object):
1473
1451
  f'invalid cloud credentials: '
1474
1452
  f'{common_utils.format_exception(e)}')
1475
1453
  except exceptions.InvalidCloudConfigs as e:
1476
- # Failed due to invalid user configs in ~/.sky/skyconfig.yaml.
1454
+ # Failed due to invalid user configs in ~/.sky/config.yaml.
1477
1455
  logger.warning(f'{common_utils.format_exception(e)}')
1478
1456
  # We should block the entire cloud if the user config is
1479
1457
  # invalid.
@@ -2065,9 +2043,9 @@ class RetryingVmProvisioner(object):
2065
2043
  (clouds.Kubernetes, clouds.RunPod)) and
2066
2044
  controller_utils.Controllers.from_name(cluster_name)
2067
2045
  is not None):
2068
- assert (clouds.CloudImplementationFeatures.AUTOSTOP
2069
- in requested_features), requested_features
2070
- requested_features.remove(
2046
+ # If autostop is disabled in config, the feature may not be
2047
+ # requested, so use discard() instead of remove().
2048
+ requested_features.discard(
2071
2049
  clouds.CloudImplementationFeatures.AUTOSTOP)
2072
2050
 
2073
2051
  # Skip if to_provision.cloud does not support requested features
@@ -4470,7 +4448,9 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
4470
4448
  (clouds.Kubernetes, clouds.RunPod)) and not down and
4471
4449
  idle_minutes_to_autostop >= 0):
4472
4450
  # We should hit this code path only for the controllers on
4473
- # Kubernetes and RunPod clusters.
4451
+ # Kubernetes and RunPod clusters, because autostop() will
4452
+ # skip the supported feature check. Non-controller k8s/runpod
4453
+ # clusters will have already errored out.
4474
4454
  controller = controller_utils.Controllers.from_name(
4475
4455
  handle.cluster_name)
4476
4456
  assert (controller is not None), handle.cluster_name
@@ -4481,6 +4461,19 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
4481
4461
  # For SkyServe controllers on Kubernetes: override autostop
4482
4462
  # behavior to force autodown (instead of no-op)
4483
4463
  # to avoid dangling controllers.
4464
+
4465
+ # down = False is the default, but warn the user in case
4466
+ # they have explicitly specified it.
4467
+ config_override_down = skypilot_config.get_nested(
4468
+ (controller.value.controller_type, 'controller',
4469
+ 'autostop', 'down'), None)
4470
+ if config_override_down is False: # will not match None
4471
+ logger.warning(
4472
+ 'SkyServe controller autodown is disabled in the '
4473
+ '~/.sky/config.yaml configuration file '
4474
+ '(serve.controller.autostop.down_when_idle), but '
4475
+ 'it is force enabled for Kubernetes clusters.')
4476
+
4484
4477
  down = True
4485
4478
  else:
4486
4479
  logger.info('Auto-stop is not supported for Kubernetes '
@@ -85,6 +85,15 @@ def _build_sky_wheel() -> pathlib.Path:
85
85
  for f in setup_files_dir.iterdir():
86
86
  if f.is_file() and f.name != 'setup.py':
87
87
  shutil.copy(str(f), str(tmp_dir))
88
+ if f.name == 'MANIFEST.in':
89
+ # Remove the line `sky/dashboard/out`, so we do not
90
+ # include the dashboard files in the internal wheel
91
+ import fileinput # pylint: disable=import-outside-toplevel
92
+ with fileinput.input(tmp_dir / f.name,
93
+ inplace=True) as file:
94
+ for line in file:
95
+ if 'sky/dashboard/out' not in line:
96
+ print(line, end='')
88
97
 
89
98
  init_file_path = SKY_PACKAGE_PATH / '__init__.py'
90
99
  init_file_content = init_file_path.read_text()
sky/check.py CHANGED
@@ -142,7 +142,7 @@ def check_capabilities(
142
142
  if disallowed_cloud_names:
143
143
  disallowed_clouds_hint = (
144
144
  '\nNote: The following clouds were disabled because they were not '
145
- 'included in allowed_clouds in ~/.sky/skyconfig.yaml: '
145
+ 'included in allowed_clouds in ~/.sky/config.yaml: '
146
146
  f'{", ".join([c for c in disallowed_cloud_names])}')
147
147
  if not all_enabled_clouds:
148
148
  echo(