skypilot-nightly 1.0.0.dev20250806__py3-none-any.whl → 1.0.0.dev20250808__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (137) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend_utils.py +20 -1
  3. sky/backends/cloud_vm_ray_backend.py +42 -6
  4. sky/check.py +11 -1
  5. sky/client/cli/command.py +248 -119
  6. sky/client/sdk.py +146 -66
  7. sky/client/sdk_async.py +5 -1
  8. sky/core.py +5 -2
  9. sky/dashboard/out/404.html +1 -1
  10. sky/dashboard/out/_next/static/-DXZksWqf2waNHeU9YTQe/_buildManifest.js +1 -0
  11. sky/dashboard/out/_next/static/chunks/1141-a8a8f1adba34c892.js +11 -0
  12. sky/dashboard/out/_next/static/chunks/1871-980a395e92633a5c.js +6 -0
  13. sky/dashboard/out/_next/static/chunks/3785.6003d293cb83eab4.js +1 -0
  14. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  15. sky/dashboard/out/_next/static/chunks/4725.29550342bd53afd8.js +1 -0
  16. sky/dashboard/out/_next/static/chunks/{4937.d6bf67771e353356.js → 4937.a2baa2df5572a276.js} +1 -1
  17. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  18. sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
  19. sky/dashboard/out/_next/static/chunks/{691.6d99cbfba347cebf.js → 691.5eeedf82cc243343.js} +1 -1
  20. sky/dashboard/out/_next/static/chunks/6989-6129c1cfbcf51063.js +1 -0
  21. sky/dashboard/out/_next/static/chunks/6990-0f886f16e0d55ff8.js +1 -0
  22. sky/dashboard/out/_next/static/chunks/8056-34d27f51e6d1c631.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/8252.62b0d23aed618bb2.js +16 -0
  24. sky/dashboard/out/_next/static/chunks/8969-c9686994ddafcf01.js +1 -0
  25. sky/dashboard/out/_next/static/chunks/9025.a1bef12d672bb66d.js +6 -0
  26. sky/dashboard/out/_next/static/chunks/9159-11421c0f2909236f.js +1 -0
  27. sky/dashboard/out/_next/static/chunks/9360.85b0b1b4054574dd.js +31 -0
  28. sky/dashboard/out/_next/static/chunks/9666.cd4273f2a5c5802c.js +1 -0
  29. sky/dashboard/out/_next/static/chunks/{9847.4c46c5e229c78704.js → 9847.757720f3b40c0aa5.js} +1 -1
  30. sky/dashboard/out/_next/static/chunks/pages/{_app-2a43ea3241bbdacd.js → _app-491a4d699d95e808.js} +1 -1
  31. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-ae17cec0fc6483d9.js +11 -0
  32. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-155d477a6c3e04e2.js +1 -0
  33. sky/dashboard/out/_next/static/chunks/pages/{clusters-47f1ddae13a2f8e4.js → clusters-b30460f683e6ba96.js} +1 -1
  34. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  35. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-2a44e70b500b6b70.js → [context]-13d53fffc03ccb52.js} +1 -1
  36. sky/dashboard/out/_next/static/chunks/pages/{infra-22faac9325016d83.js → infra-fc9222e26c8e2f0d.js} +1 -1
  37. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-154f55cf8af55be5.js +11 -0
  38. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-f5ccf5d39d87aebe.js +21 -0
  39. sky/dashboard/out/_next/static/chunks/pages/jobs-cdc60fb5d371e16a.js +1 -0
  40. sky/dashboard/out/_next/static/chunks/pages/{users-b90c865a690bfe84.js → users-7ed36e44e779d5c7.js} +1 -1
  41. sky/dashboard/out/_next/static/chunks/pages/{volumes-7af733f5d7b6ed1c.js → volumes-c9695d657f78b5dc.js} +1 -1
  42. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  43. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-f72f73bcef9541dc.js +1 -0
  44. sky/dashboard/out/_next/static/chunks/pages/workspaces-8f67be60165724cc.js +1 -0
  45. sky/dashboard/out/_next/static/chunks/webpack-339efec49c0cc7d0.js +1 -0
  46. sky/dashboard/out/_next/static/css/4614e06482d7309e.css +3 -0
  47. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  48. sky/dashboard/out/clusters/[cluster].html +1 -1
  49. sky/dashboard/out/clusters.html +1 -1
  50. sky/dashboard/out/config.html +1 -1
  51. sky/dashboard/out/index.html +1 -1
  52. sky/dashboard/out/infra/[context].html +1 -1
  53. sky/dashboard/out/infra.html +1 -1
  54. sky/dashboard/out/jobs/[job].html +1 -1
  55. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  56. sky/dashboard/out/jobs.html +1 -1
  57. sky/dashboard/out/users.html +1 -1
  58. sky/dashboard/out/volumes.html +1 -1
  59. sky/dashboard/out/workspace/new.html +1 -1
  60. sky/dashboard/out/workspaces/[name].html +1 -1
  61. sky/dashboard/out/workspaces.html +1 -1
  62. sky/execution.py +6 -4
  63. sky/global_user_state.py +22 -3
  64. sky/jobs/__init__.py +2 -0
  65. sky/jobs/client/sdk.py +67 -19
  66. sky/jobs/controller.py +2 -1
  67. sky/jobs/server/core.py +48 -1
  68. sky/jobs/server/server.py +52 -3
  69. sky/jobs/state.py +5 -1
  70. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  71. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  72. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  73. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  74. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  75. sky/serve/client/impl.py +93 -6
  76. sky/serve/client/sdk.py +22 -53
  77. sky/serve/constants.py +2 -1
  78. sky/serve/controller.py +4 -2
  79. sky/serve/serve_state.py +444 -324
  80. sky/serve/serve_utils.py +77 -46
  81. sky/serve/server/core.py +13 -197
  82. sky/serve/server/impl.py +239 -2
  83. sky/serve/service.py +8 -3
  84. sky/server/common.py +18 -7
  85. sky/server/constants.py +1 -1
  86. sky/server/requests/executor.py +5 -3
  87. sky/server/requests/payloads.py +19 -0
  88. sky/setup_files/alembic.ini +4 -0
  89. sky/task.py +18 -11
  90. sky/templates/kubernetes-ray.yml.j2 +5 -0
  91. sky/templates/sky-serve-controller.yaml.j2 +1 -0
  92. sky/usage/usage_lib.py +8 -6
  93. sky/utils/annotations.py +8 -3
  94. sky/utils/cli_utils/status_utils.py +1 -1
  95. sky/utils/common_utils.py +11 -1
  96. sky/utils/db/db_utils.py +31 -0
  97. sky/utils/db/migration_utils.py +6 -2
  98. sky/utils/kubernetes/deploy_remote_cluster.py +3 -1
  99. sky/utils/resource_checker.py +162 -21
  100. sky/volumes/client/sdk.py +4 -4
  101. sky/workspaces/core.py +210 -6
  102. {skypilot_nightly-1.0.0.dev20250806.dist-info → skypilot_nightly-1.0.0.dev20250808.dist-info}/METADATA +19 -14
  103. {skypilot_nightly-1.0.0.dev20250806.dist-info → skypilot_nightly-1.0.0.dev20250808.dist-info}/RECORD +109 -103
  104. sky/client/sdk.pyi +0 -301
  105. sky/dashboard/out/_next/static/Gelsd19kVxXcX7aQQGsGu/_buildManifest.js +0 -1
  106. sky/dashboard/out/_next/static/chunks/1043-75af48ca5d5aaf57.js +0 -1
  107. sky/dashboard/out/_next/static/chunks/1141-8678a9102cc5f67e.js +0 -11
  108. sky/dashboard/out/_next/static/chunks/1664-22b00e32c9ff96a4.js +0 -1
  109. sky/dashboard/out/_next/static/chunks/1871-ced1c14230cad6e1.js +0 -6
  110. sky/dashboard/out/_next/static/chunks/2003.f90b06bb1f914295.js +0 -1
  111. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +0 -1
  112. sky/dashboard/out/_next/static/chunks/2622-951867535095b0eb.js +0 -1
  113. sky/dashboard/out/_next/static/chunks/3785.0a173cd4393f0fef.js +0 -1
  114. sky/dashboard/out/_next/static/chunks/4725.42f21f250f91f65b.js +0 -1
  115. sky/dashboard/out/_next/static/chunks/4869.18e6a4361a380763.js +0 -16
  116. sky/dashboard/out/_next/static/chunks/5230-f3bb2663e442e86c.js +0 -1
  117. sky/dashboard/out/_next/static/chunks/6601-2109d22e7861861c.js +0 -1
  118. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
  119. sky/dashboard/out/_next/static/chunks/8969-9a8cca241b30db83.js +0 -1
  120. sky/dashboard/out/_next/static/chunks/9025.99f29acb7617963e.js +0 -6
  121. sky/dashboard/out/_next/static/chunks/938-bda2685db5eae6cf.js +0 -1
  122. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-7cb24da04ca00956.js +0 -11
  123. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-1e95993124dbfc57.js +0 -1
  124. sky/dashboard/out/_next/static/chunks/pages/config-d56e64f30db7b42e.js +0 -1
  125. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-90693cb88b5599a7.js +0 -11
  126. sky/dashboard/out/_next/static/chunks/pages/jobs-ab318e52eb4424a7.js +0 -1
  127. sky/dashboard/out/_next/static/chunks/pages/workspace/new-92f741084a89e27b.js +0 -1
  128. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-35e0de5bca55e594.js +0 -1
  129. sky/dashboard/out/_next/static/chunks/pages/workspaces-062525fb5462acb6.js +0 -1
  130. sky/dashboard/out/_next/static/chunks/webpack-387626669badf82e.js +0 -1
  131. sky/dashboard/out/_next/static/css/b3227360726f12eb.css +0 -3
  132. /sky/dashboard/out/_next/static/{Gelsd19kVxXcX7aQQGsGu → -DXZksWqf2waNHeU9YTQe}/_ssgManifest.js +0 -0
  133. /sky/dashboard/out/_next/static/chunks/{6135-2d7ed3350659d073.js → 6135-85426374db04811e.js} +0 -0
  134. {skypilot_nightly-1.0.0.dev20250806.dist-info → skypilot_nightly-1.0.0.dev20250808.dist-info}/WHEEL +0 -0
  135. {skypilot_nightly-1.0.0.dev20250806.dist-info → skypilot_nightly-1.0.0.dev20250808.dist-info}/entry_points.txt +0 -0
  136. {skypilot_nightly-1.0.0.dev20250806.dist-info → skypilot_nightly-1.0.0.dev20250808.dist-info}/licenses/LICENSE +0 -0
  137. {skypilot_nightly-1.0.0.dev20250806.dist-info → skypilot_nightly-1.0.0.dev20250808.dist-info}/top_level.txt +0 -0
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = '57784d97d8d63f3c87f4d5d22f3e820b10154241'
8
+ _SKYPILOT_COMMIT_SHA = 'eb83a691489c0c37aae9c22f607469ff78a74e34'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20250806'
38
+ __version__ = '1.0.0.dev20250808'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
@@ -121,6 +121,7 @@ CLUSTER_STATUS_LOCK_TIMEOUT_SECONDS = 20
121
121
  _CLUSTER_STATUS_CACHE_DURATION_SECONDS = 2
122
122
 
123
123
  CLUSTER_FILE_MOUNTS_LOCK_TIMEOUT_SECONDS = 10
124
+ WORKSPACE_LOCK_TIMEOUT_SECONDS = 10
124
125
 
125
126
  # Remote dir that holds our runtime files.
126
127
  _REMOTE_RUNTIME_FILES_DIR = '~/.sky/.runtime_files'
@@ -2760,6 +2761,9 @@ def get_clusters(
2760
2761
  refresh: common.StatusRefreshMode,
2761
2762
  cluster_names: Optional[Union[str, List[str]]] = None,
2762
2763
  all_users: bool = True,
2764
+ # Internal only:
2765
+ # pylint: disable=invalid-name
2766
+ _include_is_managed: bool = False,
2763
2767
  ) -> List[Dict[str, Any]]:
2764
2768
  """Returns a list of cached or optionally refreshed cluster records.
2765
2769
 
@@ -2780,6 +2784,8 @@ def get_clusters(
2780
2784
  names.
2781
2785
  all_users: If True, return clusters from all users. If False, only
2782
2786
  return clusters from the current user.
2787
+ _include_is_managed: Whether to force include clusters created by the
2788
+ controller.
2783
2789
 
2784
2790
  Returns:
2785
2791
  A list of cluster records. If the cluster does not exist or has been
@@ -2788,6 +2794,13 @@ def get_clusters(
2788
2794
  records = global_user_state.get_clusters()
2789
2795
  current_user = common_utils.get_current_user()
2790
2796
 
2797
+ # Filter out clusters created by the controller.
2798
+ if (not env_options.Options.SHOW_DEBUG_INFO.get() and
2799
+ not _include_is_managed):
2800
+ records = [
2801
+ record for record in records if not record.get('is_managed', False)
2802
+ ]
2803
+
2791
2804
  # Filter by user if requested
2792
2805
  if not all_users:
2793
2806
  records = [
@@ -3221,7 +3234,8 @@ def get_endpoints(cluster: str,
3221
3234
  with ux_utils.print_exception_no_traceback():
3222
3235
  raise ValueError(f'Invalid endpoint {port!r}.') from None
3223
3236
  cluster_records = get_clusters(refresh=common.StatusRefreshMode.NONE,
3224
- cluster_names=[cluster])
3237
+ cluster_names=[cluster],
3238
+ _include_is_managed=True)
3225
3239
  if not cluster_records:
3226
3240
  with ux_utils.print_exception_no_traceback():
3227
3241
  raise exceptions.ClusterNotUpError(
@@ -3311,3 +3325,8 @@ def cluster_status_lock_id(cluster_name: str) -> str:
3311
3325
  def cluster_file_mounts_lock_id(cluster_name: str) -> str:
3312
3326
  """Get the lock ID for cluster file mounts operations."""
3313
3327
  return f'{cluster_name}_file_mounts'
3328
+
3329
+
3330
+ def workspace_lock_id(workspace_name: str) -> str:
3331
+ """Get the lock ID for workspace operations."""
3332
+ return f'{workspace_name}_workspace'
@@ -168,6 +168,9 @@ _MAX_INLINE_SCRIPT_LENGTH = 100 * 1024
168
168
  _RESOURCES_UNAVAILABLE_LOG = (
169
169
  'Reasons for provision failures (for details, please check the log above):')
170
170
 
171
+ # Number of seconds to wait locking the cluster before communicating with user.
172
+ _CLUSTER_LOCK_TIMEOUT = 5.0
173
+
171
174
 
172
175
  def _is_command_length_over_limit(command: str) -> bool:
173
176
  """Check if the length of the command exceeds the limit.
@@ -1174,7 +1177,8 @@ class RetryingVmProvisioner(object):
1174
1177
  local_wheel_path: pathlib.Path,
1175
1178
  wheel_hash: str,
1176
1179
  blocked_resources: Optional[Iterable[
1177
- resources_lib.Resources]] = None):
1180
+ resources_lib.Resources]] = None,
1181
+ is_managed: Optional[bool] = None):
1178
1182
  self._blocked_resources: Set[resources_lib.Resources] = set()
1179
1183
  if blocked_resources:
1180
1184
  # blocked_resources is not None and not empty.
@@ -1186,6 +1190,7 @@ class RetryingVmProvisioner(object):
1186
1190
  self._requested_features = requested_features
1187
1191
  self._local_wheel_path = local_wheel_path
1188
1192
  self._wheel_hash = wheel_hash
1193
+ self._is_managed = is_managed
1189
1194
 
1190
1195
  def _yield_zones(
1191
1196
  self, to_provision: resources_lib.Resources, num_nodes: int,
@@ -1519,6 +1524,7 @@ class RetryingVmProvisioner(object):
1519
1524
  cluster_handle=handle,
1520
1525
  requested_resources=requested_resources,
1521
1526
  ready=False,
1527
+ is_managed=self._is_managed,
1522
1528
  )
1523
1529
 
1524
1530
  global_user_state.set_owner_identity_for_cluster(
@@ -2750,6 +2756,8 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
2750
2756
  self._dag = None
2751
2757
  self._optimize_target = None
2752
2758
  self._requested_features = set()
2759
+ self._dump_final_script = False
2760
+ self._is_managed = False
2753
2761
 
2754
2762
  # Command for running the setup script. It is only set when the
2755
2763
  # setup needs to be run outside the self._setup() and as part of
@@ -2766,6 +2774,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
2766
2774
  self._requested_features = kwargs.pop('requested_features',
2767
2775
  self._requested_features)
2768
2776
  self._dump_final_script = kwargs.pop('dump_final_script', False)
2777
+ self._is_managed = kwargs.pop('is_managed', False)
2769
2778
  assert not kwargs, f'Unexpected kwargs: {kwargs}'
2770
2779
 
2771
2780
  def check_resources_fit_cluster(
@@ -2917,10 +2926,36 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
2917
2926
  # exceptions.ClusterOwnerIdentityMismatchError
2918
2927
  backend_utils.check_owner_identity(cluster_name)
2919
2928
  lock_id = backend_utils.cluster_status_lock_id(cluster_name)
2920
- with timeline.DistributedLockEvent(lock_id):
2921
- # Try to launch the exiting cluster first. If no existing cluster,
2922
- # this function will create a to_provision_config with required
2923
- # resources.
2929
+ communicated_with_user = False
2930
+
2931
+ while True:
2932
+ try:
2933
+ return self._locked_provision(lock_id, task, to_provision,
2934
+ dryrun, stream_logs, cluster_name,
2935
+ retry_until_up,
2936
+ skip_unnecessary_provisioning)
2937
+ except locks.LockTimeout:
2938
+ if not communicated_with_user:
2939
+ logger.info(f'{colorama.Fore.YELLOW}'
2940
+ f'Launching delayed, check concurrent tasks: '
2941
+ f'sky api status')
2942
+ communicated_with_user = True
2943
+
2944
+ def _locked_provision(
2945
+ self,
2946
+ lock_id: str,
2947
+ task: task_lib.Task,
2948
+ to_provision: Optional[resources_lib.Resources],
2949
+ dryrun: bool,
2950
+ stream_logs: bool,
2951
+ cluster_name: str,
2952
+ retry_until_up: bool = False,
2953
+ skip_unnecessary_provisioning: bool = False,
2954
+ ) -> Tuple[Optional[CloudVmRayResourceHandle], bool]:
2955
+ with timeline.DistributedLockEvent(lock_id, _CLUSTER_LOCK_TIMEOUT):
2956
+ # Try to launch the exiting cluster first. If no existing
2957
+ # cluster, this function will create a to_provision_config
2958
+ # with required resources.
2924
2959
  to_provision_config = self._check_existing_cluster(
2925
2960
  task, to_provision, cluster_name, dryrun)
2926
2961
  assert to_provision_config.resources is not None, (
@@ -2961,7 +2996,8 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
2961
2996
  self._requested_features,
2962
2997
  local_wheel_path,
2963
2998
  wheel_hash,
2964
- blocked_resources=task.blocked_resources)
2999
+ blocked_resources=task.blocked_resources,
3000
+ is_managed=self._is_managed)
2965
3001
  log_path = os.path.join(self.log_dir, 'provision.log')
2966
3002
  rich_utils.force_update_status(
2967
3003
  ux_utils.spinner_message('Launching', log_path))
sky/check.py CHANGED
@@ -467,8 +467,18 @@ def _print_checked_cloud(
467
467
  if ok:
468
468
  enabled_capabilities.append(capability)
469
469
  # `dict` reasons for K8s and SSH will be printed in detail in
470
- # _format_enabled_cloud. Skip here.
470
+ # _format_enabled_cloud. Skip here unless the cloud is disabled.
471
471
  if not isinstance(reason, str):
472
+ if not ok and isinstance(cloud_tuple[1],
473
+ (sky_clouds.SSH, sky_clouds.Kubernetes)):
474
+ if reason is not None:
475
+ reason_str = _format_context_details(cloud_tuple[1],
476
+ show_details=True,
477
+ ctx2text=reason)
478
+ reason_str = '\n'.join(
479
+ ' ' + line for line in reason_str.splitlines())
480
+ reasons_to_capabilities.setdefault(reason_str,
481
+ []).append(capability)
472
482
  continue
473
483
  if ok:
474
484
  if reason is not None: