skypilot-nightly 1.0.0.dev20250523__py3-none-any.whl → 1.0.0.dev20250526__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend_utils.py +62 -45
  3. sky/backends/cloud_vm_ray_backend.py +3 -1
  4. sky/check.py +335 -170
  5. sky/cli.py +56 -13
  6. sky/client/cli.py +56 -13
  7. sky/client/sdk.py +54 -10
  8. sky/clouds/gcp.py +19 -3
  9. sky/core.py +5 -2
  10. sky/dashboard/out/404.html +1 -1
  11. sky/dashboard/out/_next/static/7GEgRyZKRaSnYZCV1Jwol/_buildManifest.js +1 -0
  12. sky/dashboard/out/_next/static/chunks/25-062253ea41fb8eec.js +6 -0
  13. sky/dashboard/out/_next/static/chunks/480-5a0de8b6570ea105.js +1 -0
  14. sky/dashboard/out/_next/static/chunks/488-50d843fdb5396d32.js +15 -0
  15. sky/dashboard/out/_next/static/chunks/498-d7722313e5e5b4e6.js +21 -0
  16. sky/dashboard/out/_next/static/chunks/573-f17bd89d9f9118b3.js +66 -0
  17. sky/dashboard/out/_next/static/chunks/578-d351125af46c293f.js +6 -0
  18. sky/dashboard/out/_next/static/chunks/734-a6e01d7f98904741.js +1 -0
  19. sky/dashboard/out/_next/static/chunks/937.f97f83652028e944.js +1 -0
  20. sky/dashboard/out/_next/static/chunks/938-59956af3950b02ed.js +1 -0
  21. sky/dashboard/out/_next/static/chunks/9f96d65d-5a3e4af68c26849e.js +1 -0
  22. sky/dashboard/out/_next/static/chunks/pages/_app-96a715a6fb01e228.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-3b5aad09a25f64b7.js +1 -0
  24. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-9529d9e882a0e75c.js +16 -0
  25. sky/dashboard/out/_next/static/chunks/pages/clusters-9e6d1ec6e1ac5b29.js +1 -0
  26. sky/dashboard/out/_next/static/chunks/pages/infra-abb7d744ecf15109.js +1 -0
  27. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-48dc8d67d4b60be1.js +1 -0
  28. sky/dashboard/out/_next/static/chunks/pages/jobs-73d5e0c369d00346.js +16 -0
  29. sky/dashboard/out/_next/static/chunks/pages/users-b8acf6e6735323a2.js +1 -0
  30. sky/dashboard/out/_next/static/chunks/pages/workspace/new-bbf436f41381e169.js +1 -0
  31. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-7733c960685b4385.js +1 -0
  32. sky/dashboard/out/_next/static/chunks/pages/workspaces-5ed48b3201b998c8.js +1 -0
  33. sky/dashboard/out/_next/static/chunks/webpack-deda68c926e8d0bc.js +1 -0
  34. sky/dashboard/out/_next/static/css/28558d57108b05ae.css +3 -0
  35. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  36. sky/dashboard/out/clusters/[cluster].html +1 -1
  37. sky/dashboard/out/clusters.html +1 -1
  38. sky/dashboard/out/index.html +1 -1
  39. sky/dashboard/out/infra.html +1 -1
  40. sky/dashboard/out/jobs/[job].html +1 -1
  41. sky/dashboard/out/jobs.html +1 -1
  42. sky/dashboard/out/users.html +1 -0
  43. sky/dashboard/out/workspace/new.html +1 -0
  44. sky/dashboard/out/workspaces/[name].html +1 -0
  45. sky/dashboard/out/workspaces.html +1 -0
  46. sky/data/storage.py +1 -1
  47. sky/global_user_state.py +606 -543
  48. sky/jobs/constants.py +1 -1
  49. sky/jobs/server/core.py +72 -56
  50. sky/jobs/state.py +26 -5
  51. sky/jobs/utils.py +65 -13
  52. sky/optimizer.py +6 -3
  53. sky/provision/fluidstack/instance.py +1 -0
  54. sky/serve/server/core.py +9 -6
  55. sky/server/html/token_page.html +6 -1
  56. sky/server/requests/executor.py +1 -0
  57. sky/server/requests/payloads.py +28 -0
  58. sky/server/server.py +59 -5
  59. sky/setup_files/dependencies.py +1 -0
  60. sky/skylet/constants.py +4 -1
  61. sky/skypilot_config.py +107 -11
  62. sky/utils/cli_utils/status_utils.py +18 -8
  63. sky/utils/db_utils.py +53 -0
  64. sky/utils/kubernetes/config_map_utils.py +133 -0
  65. sky/utils/kubernetes/deploy_remote_cluster.py +166 -147
  66. sky/utils/kubernetes/kubernetes_deploy_utils.py +49 -5
  67. sky/utils/kubernetes/ssh-tunnel.sh +20 -28
  68. sky/utils/log_utils.py +4 -0
  69. sky/utils/schemas.py +54 -0
  70. sky/workspaces/__init__.py +0 -0
  71. sky/workspaces/core.py +295 -0
  72. sky/workspaces/server.py +62 -0
  73. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/METADATA +2 -1
  74. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/RECORD +79 -63
  75. sky/dashboard/out/_next/static/ECKwDNS9v9y3_IKFZ2lpp/_buildManifest.js +0 -1
  76. sky/dashboard/out/_next/static/chunks/236-1a3a9440417720eb.js +0 -6
  77. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  78. sky/dashboard/out/_next/static/chunks/37-d584022b0da4ac3b.js +0 -6
  79. sky/dashboard/out/_next/static/chunks/393-e1eaa440481337ec.js +0 -1
  80. sky/dashboard/out/_next/static/chunks/480-f28cd152a98997de.js +0 -1
  81. sky/dashboard/out/_next/static/chunks/582-683f4f27b81996dc.js +0 -59
  82. sky/dashboard/out/_next/static/chunks/pages/_app-8cfab319f9fb3ae8.js +0 -1
  83. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33bc2bec322249b1.js +0 -1
  84. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-e2fc2dd1955e6c36.js +0 -1
  85. sky/dashboard/out/_next/static/chunks/pages/clusters-3a748bd76e5c2984.js +0 -1
  86. sky/dashboard/out/_next/static/chunks/pages/infra-abf08c4384190a39.js +0 -1
  87. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-70756c2dad850a7e.js +0 -1
  88. sky/dashboard/out/_next/static/chunks/pages/jobs-ecd804b9272f4a7c.js +0 -1
  89. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  90. sky/dashboard/out/_next/static/css/7e7ce4ff31d3977b.css +0 -3
  91. /sky/dashboard/out/_next/static/{ECKwDNS9v9y3_IKFZ2lpp → 7GEgRyZKRaSnYZCV1Jwol}/_ssgManifest.js +0 -0
  92. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/WHEEL +0 -0
  93. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/entry_points.txt +0 -0
  94. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/licenses/LICENSE +0 -0
  95. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/top_level.txt +0 -0
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = '8848c5c597089bae431284150803f4e557383b9e'
8
+ _SKYPILOT_COMMIT_SHA = 'bbcd11ba81d6312ae5e292e891da531024c321aa'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20250523'
38
+ __version__ = '1.0.0.dev20250526'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
@@ -1556,6 +1556,16 @@ def check_owner_identity(cluster_name: str) -> None:
1556
1556
  handle = record['handle']
1557
1557
  if not isinstance(handle, backends.CloudVmRayResourceHandle):
1558
1558
  return
1559
+ active_workspace = skypilot_config.get_active_workspace()
1560
+ cluster_workspace = record.get('workspace',
1561
+ constants.SKYPILOT_DEFAULT_WORKSPACE)
1562
+ if active_workspace != cluster_workspace:
1563
+ with ux_utils.print_exception_no_traceback():
1564
+ raise exceptions.ClusterOwnerIdentityMismatchError(
1565
+ f'{colorama.Fore.YELLOW}'
1566
+ f'The cluster {cluster_name!r} is in workspace '
1567
+ f'{cluster_workspace!r}, but the active workspace is '
1568
+ f'{active_workspace!r}.{colorama.Fore.RESET}')
1559
1569
 
1560
1570
  launched_resources = handle.launched_resources.assert_launchable()
1561
1571
  cloud = launched_resources.cloud
@@ -2152,57 +2162,64 @@ def refresh_cluster_record(
2152
2162
  record = global_user_state.get_cluster_from_name(cluster_name)
2153
2163
  if record is None:
2154
2164
  return None
2155
- check_owner_identity(cluster_name)
2156
-
2157
- if not isinstance(record['handle'], backends.CloudVmRayResourceHandle):
2158
- return record
2159
-
2160
- # The loop logic allows us to notice if the status was updated in the
2161
- # global_user_state by another process and stop trying to get the lock.
2162
- # The core loop logic is adapted from FileLock's implementation.
2163
- lock = filelock.FileLock(CLUSTER_STATUS_LOCK_PATH.format(cluster_name))
2164
- start_time = time.perf_counter()
2165
+ # TODO(zhwu, 05/20): switch to the specific workspace to make sure we are
2166
+ # using the correct cloud credentials.
2167
+ workspace = record.get('workspace', constants.SKYPILOT_DEFAULT_WORKSPACE)
2168
+ with skypilot_config.local_active_workspace_ctx(workspace):
2169
+ check_owner_identity(cluster_name)
2165
2170
 
2166
- # Loop until we have an up-to-date status or until we acquire the lock.
2167
- while True:
2168
- # Check to see if we can return the cached status.
2169
- if not _must_refresh_cluster_status(record, force_refresh_statuses):
2171
+ if not isinstance(record['handle'], backends.CloudVmRayResourceHandle):
2170
2172
  return record
2171
2173
 
2172
- if not acquire_per_cluster_status_lock:
2173
- return _update_cluster_status(cluster_name)
2174
+ # The loop logic allows us to notice if the status was updated in the
2175
+ # global_user_state by another process and stop trying to get the lock.
2176
+ # The core loop logic is adapted from FileLock's implementation.
2177
+ lock = filelock.FileLock(CLUSTER_STATUS_LOCK_PATH.format(cluster_name))
2178
+ start_time = time.perf_counter()
2174
2179
 
2175
- # Try to acquire the lock so we can fetch the status.
2176
- try:
2177
- with lock.acquire(blocking=False):
2178
- # Check the cluster status again, since it could have been
2179
- # updated between our last check and acquiring the lock.
2180
- record = global_user_state.get_cluster_from_name(cluster_name)
2181
- if record is None or not _must_refresh_cluster_status(
2182
- record, force_refresh_statuses):
2183
- return record
2184
- # Update and return the cluster status.
2180
+ # Loop until we have an up-to-date status or until we acquire the lock.
2181
+ while True:
2182
+ # Check to see if we can return the cached status.
2183
+ if not _must_refresh_cluster_status(record, force_refresh_statuses):
2184
+ return record
2185
+
2186
+ if not acquire_per_cluster_status_lock:
2185
2187
  return _update_cluster_status(cluster_name)
2186
- except filelock.Timeout:
2187
- # lock.acquire() will throw a Timeout exception if the lock is not
2188
- # available and we have blocking=False.
2189
- pass
2190
-
2191
- # Logic adapted from FileLock.acquire().
2192
- # If cluster_status_lock_time is <0, we will never hit this. No timeout.
2193
- # Otherwise, if we have timed out, return the cached status. This has
2194
- # the potential to cause correctness issues, but if so it is the
2195
- # caller's responsibility to set the timeout to -1.
2196
- if 0 <= cluster_status_lock_timeout < time.perf_counter() - start_time:
2197
- logger.debug('Refreshing status: Failed get the lock for cluster '
2198
- f'{cluster_name!r}. Using the cached status.')
2199
- return record
2200
- time.sleep(0.05)
2201
2188
 
2202
- # Refresh for next loop iteration.
2203
- record = global_user_state.get_cluster_from_name(cluster_name)
2204
- if record is None:
2205
- return None
2189
+ # Try to acquire the lock so we can fetch the status.
2190
+ try:
2191
+ with lock.acquire(blocking=False):
2192
+ # Check the cluster status again, since it could have been
2193
+ # updated between our last check and acquiring the lock.
2194
+ record = global_user_state.get_cluster_from_name(
2195
+ cluster_name)
2196
+ if record is None or not _must_refresh_cluster_status(
2197
+ record, force_refresh_statuses):
2198
+ return record
2199
+ # Update and return the cluster status.
2200
+ return _update_cluster_status(cluster_name)
2201
+ except filelock.Timeout:
2202
+ # lock.acquire() will throw a Timeout exception if the lock is not
2203
+ # available and we have blocking=False.
2204
+ pass
2205
+
2206
+ # Logic adapted from FileLock.acquire().
2207
+ # If cluster_status_lock_time is <0, we will never hit this. No timeout.
2208
+ # Otherwise, if we have timed out, return the cached status. This has
2209
+ # the potential to cause correctness issues, but if so it is the
2210
+ # caller's responsibility to set the timeout to -1.
2211
+ if 0 <= cluster_status_lock_timeout < time.perf_counter(
2212
+ ) - start_time:
2213
+ logger.debug(
2214
+ 'Refreshing status: Failed get the lock for cluster '
2215
+ f'{cluster_name!r}. Using the cached status.')
2216
+ return record
2217
+ time.sleep(0.05)
2218
+
2219
+ # Refresh for next loop iteration.
2220
+ record = global_user_state.get_cluster_from_name(cluster_name)
2221
+ if record is None:
2222
+ return None
2206
2223
 
2207
2224
 
2208
2225
  @timeline.event
@@ -3507,7 +3507,9 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
3507
3507
  # Add the managed job to job queue database.
3508
3508
  managed_job_codegen = managed_jobs.ManagedJobCodeGen()
3509
3509
  managed_job_code = managed_job_codegen.set_pending(
3510
- job_id, managed_job_dag)
3510
+ job_id, managed_job_dag,
3511
+ skypilot_config.get_active_workspace(
3512
+ force_user_workspace=True))
3511
3513
  # Set the managed job to PENDING state to make sure that this
3512
3514
  # managed job appears in the `sky jobs queue`, even if it needs
3513
3515
  # to wait to be submitted.