skypilot-nightly 1.0.0.dev20250804__py3-none-any.whl → 1.0.0.dev20250807__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +2 -2
- sky/backends/cloud_vm_ray_backend.py +33 -4
- sky/catalog/kubernetes_catalog.py +8 -0
- sky/catalog/nebius_catalog.py +0 -1
- sky/check.py +11 -1
- sky/client/cli/command.py +234 -100
- sky/client/sdk.py +30 -9
- sky/client/sdk_async.py +815 -0
- sky/clouds/kubernetes.py +6 -1
- sky/clouds/nebius.py +1 -4
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/YAirOGsV1z6B2RJ0VIUmD/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-a8a8f1adba34c892.js +11 -0
- sky/dashboard/out/_next/static/chunks/1871-980a395e92633a5c.js +6 -0
- sky/dashboard/out/_next/static/chunks/3785.6003d293cb83eab4.js +1 -0
- sky/dashboard/out/_next/static/chunks/{3698-7874720877646365.js → 3850-ff4a9a69d978632b.js} +1 -1
- sky/dashboard/out/_next/static/chunks/4725.29550342bd53afd8.js +1 -0
- sky/dashboard/out/_next/static/chunks/{4937.d6bf67771e353356.js → 4937.a2baa2df5572a276.js} +1 -1
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/6601-3e21152fe16da09c.js +1 -0
- sky/dashboard/out/_next/static/chunks/{691.6d99cbfba347cebf.js → 691.5eeedf82cc243343.js} +1 -1
- sky/dashboard/out/_next/static/chunks/6989-6129c1cfbcf51063.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-0f886f16e0d55ff8.js +1 -0
- sky/dashboard/out/_next/static/chunks/8056-019615038d6ce427.js +1 -0
- sky/dashboard/out/_next/static/chunks/8252.62b0d23aed618bb2.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-318c3dca725e8e5d.js +1 -0
- sky/dashboard/out/_next/static/chunks/{9025.7937c16bc8623516.js → 9025.a1bef12d672bb66d.js} +1 -1
- sky/dashboard/out/_next/static/chunks/9159-11421c0f2909236f.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.85b0b1b4054574dd.js +31 -0
- sky/dashboard/out/_next/static/chunks/9666.cd4273f2a5c5802c.js +1 -0
- sky/dashboard/out/_next/static/chunks/{9847.4c46c5e229c78704.js → 9847.757720f3b40c0aa5.js} +1 -1
- sky/dashboard/out/_next/static/chunks/{9984.78ee6d2c6fa4b0e8.js → 9984.c5564679e467d245.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{_app-a67ae198457b9886.js → _app-1e6de35d15a8d432.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6fd1d2d8441aa54b.js +11 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-155d477a6c3e04e2.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-b30460f683e6ba96.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{config-8620d099cbef8608.js → config-dfb9bf07b13045f4.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-13d53fffc03ccb52.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-fc9222e26c8e2f0d.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-154f55cf8af55be5.js +11 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-f5ccf5d39d87aebe.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-cdc60fb5d371e16a.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-7ed36e44e779d5c7.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-c9695d657f78b5dc.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-f72f73bcef9541dc.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-8f67be60165724cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-76efbdad99742559.js +1 -0
- sky/dashboard/out/_next/static/css/4614e06482d7309e.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/global_user_state.py +14 -2
- sky/jobs/__init__.py +2 -0
- sky/jobs/client/sdk.py +43 -2
- sky/jobs/client/sdk_async.py +135 -0
- sky/jobs/server/core.py +48 -1
- sky/jobs/server/server.py +52 -3
- sky/jobs/state.py +5 -1
- sky/jobs/utils.py +3 -1
- sky/provision/kubernetes/utils.py +30 -4
- sky/provision/nebius/instance.py +1 -0
- sky/provision/nebius/utils.py +9 -1
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/serve/client/impl.py +85 -1
- sky/serve/client/sdk.py +16 -47
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +3 -1
- sky/serve/controller.py +6 -3
- sky/serve/load_balancer.py +3 -1
- sky/serve/serve_state.py +93 -5
- sky/serve/serve_utils.py +200 -67
- sky/serve/server/core.py +13 -197
- sky/serve/server/impl.py +261 -23
- sky/serve/service.py +15 -3
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +46 -0
- sky/server/auth/oauth2_proxy.py +185 -0
- sky/server/common.py +119 -21
- sky/server/constants.py +1 -1
- sky/server/daemons.py +60 -11
- sky/server/requests/executor.py +5 -3
- sky/server/requests/payloads.py +19 -0
- sky/server/rest.py +114 -0
- sky/server/server.py +44 -40
- sky/setup_files/dependencies.py +2 -0
- sky/skylet/constants.py +1 -1
- sky/skylet/events.py +5 -1
- sky/skylet/skylet.py +3 -1
- sky/task.py +61 -21
- sky/templates/kubernetes-ray.yml.j2 +9 -0
- sky/templates/nebius-ray.yml.j2 +1 -0
- sky/templates/sky-serve-controller.yaml.j2 +1 -0
- sky/usage/usage_lib.py +8 -6
- sky/utils/annotations.py +8 -3
- sky/utils/common_utils.py +11 -1
- sky/utils/controller_utils.py +7 -0
- sky/utils/db/migration_utils.py +2 -2
- sky/utils/rich_utils.py +120 -0
- {skypilot_nightly-1.0.0.dev20250804.dist-info → skypilot_nightly-1.0.0.dev20250807.dist-info}/METADATA +22 -13
- {skypilot_nightly-1.0.0.dev20250804.dist-info → skypilot_nightly-1.0.0.dev20250807.dist-info}/RECORD +120 -112
- sky/client/sdk.pyi +0 -300
- sky/dashboard/out/_next/static/KiGGm4fK0CpmN6BT17jkh/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/1043-928582d4860fef92.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-3f10a5a9f697c630.js +0 -11
- sky/dashboard/out/_next/static/chunks/1664-22b00e32c9ff96a4.js +0 -1
- sky/dashboard/out/_next/static/chunks/1871-7e17c195296e2ea9.js +0 -6
- sky/dashboard/out/_next/static/chunks/2003.f90b06bb1f914295.js +0 -1
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +0 -1
- sky/dashboard/out/_next/static/chunks/3785.95524bc443db8260.js +0 -1
- sky/dashboard/out/_next/static/chunks/4725.42f21f250f91f65b.js +0 -1
- sky/dashboard/out/_next/static/chunks/4869.18e6a4361a380763.js +0 -16
- sky/dashboard/out/_next/static/chunks/5230-f3bb2663e442e86c.js +0 -1
- sky/dashboard/out/_next/static/chunks/6601-234b1cf963c7280b.js +0 -1
- sky/dashboard/out/_next/static/chunks/6989-983d3ae7a874de98.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
- sky/dashboard/out/_next/static/chunks/8969-9a8cca241b30db83.js +0 -1
- sky/dashboard/out/_next/static/chunks/938-40d15b6261ec8dc1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-fa63e8b1d203f298.js +0 -11
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-9e7df5fc761c95a7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-956ad430075efee8.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-9cfd875eecb6eaf5.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra-0fbdc9072f19fbe2.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-6c5af4c86e6ab3d3.js +0 -11
- sky/dashboard/out/_next/static/chunks/pages/jobs-6393a9edc7322b54.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-34d6bb10c3b3ee3d.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/volumes-225c8dae0634eb7f.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-92f741084a89e27b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-4d41c9023287f59a.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-e4cb7e97d37e93ad.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-13145516b19858fb.js +0 -1
- sky/dashboard/out/_next/static/css/b3227360726f12eb.css +0 -3
- /sky/dashboard/out/_next/static/{KiGGm4fK0CpmN6BT17jkh → YAirOGsV1z6B2RJ0VIUmD}/_ssgManifest.js +0 -0
- /sky/dashboard/out/_next/static/chunks/{6135-d0e285ac5f3f2485.js → 6135-85426374db04811e.js} +0 -0
- {skypilot_nightly-1.0.0.dev20250804.dist-info → skypilot_nightly-1.0.0.dev20250807.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250804.dist-info → skypilot_nightly-1.0.0.dev20250807.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250804.dist-info → skypilot_nightly-1.0.0.dev20250807.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250804.dist-info → skypilot_nightly-1.0.0.dev20250807.dist-info}/top_level.txt +0 -0
sky/server/requests/payloads.py
CHANGED
|
@@ -707,6 +707,25 @@ class JobsPoolStatusBody(RequestBody):
|
|
|
707
707
|
pool_names: Optional[Union[str, List[str]]]
|
|
708
708
|
|
|
709
709
|
|
|
710
|
+
class JobsPoolLogsBody(RequestBody):
|
|
711
|
+
"""The request body for the jobs pool logs endpoint."""
|
|
712
|
+
pool_name: str
|
|
713
|
+
target: Union[str, serve.ServiceComponent]
|
|
714
|
+
worker_id: Optional[int] = None
|
|
715
|
+
follow: bool = True
|
|
716
|
+
tail: Optional[int] = None
|
|
717
|
+
|
|
718
|
+
|
|
719
|
+
class JobsPoolDownloadLogsBody(RequestBody):
|
|
720
|
+
"""The request body for the jobs pool download logs endpoint."""
|
|
721
|
+
pool_name: str
|
|
722
|
+
local_dir: str
|
|
723
|
+
targets: Optional[Union[str, serve.ServiceComponent,
|
|
724
|
+
List[Union[str, serve.ServiceComponent]]]]
|
|
725
|
+
worker_ids: Optional[List[int]] = None
|
|
726
|
+
tail: Optional[int] = None
|
|
727
|
+
|
|
728
|
+
|
|
710
729
|
class UploadZipFileResponse(pydantic.BaseModel):
|
|
711
730
|
"""The response body for the upload zip file endpoint."""
|
|
712
731
|
status: str
|
sky/server/rest.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""REST API client of SkyPilot API server"""
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
3
4
|
import contextlib
|
|
4
5
|
import contextvars
|
|
5
6
|
import functools
|
|
@@ -21,9 +22,11 @@ from sky.utils import ux_utils
|
|
|
21
22
|
logger = sky_logging.init_logger(__name__)
|
|
22
23
|
|
|
23
24
|
if typing.TYPE_CHECKING:
|
|
25
|
+
import aiohttp
|
|
24
26
|
import requests
|
|
25
27
|
|
|
26
28
|
else:
|
|
29
|
+
aiohttp = adaptors_common.LazyImport('aiohttp')
|
|
27
30
|
requests = adaptors_common.LazyImport('requests')
|
|
28
31
|
|
|
29
32
|
F = TypeVar('F', bound=Callable[..., Any])
|
|
@@ -204,3 +207,114 @@ def request_without_retry(method, url, **kwargs) -> 'requests.Response':
|
|
|
204
207
|
if remote_version is not None:
|
|
205
208
|
versions.set_remote_version(remote_version)
|
|
206
209
|
return response
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
# Async versions of the above functions
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
async def request_async(session: 'aiohttp.ClientSession', method: str, url: str,
|
|
216
|
+
**kwargs) -> 'aiohttp.ClientResponse':
|
|
217
|
+
"""Send an async request to the API server, retry on server temporarily
|
|
218
|
+
unavailable."""
|
|
219
|
+
max_retries = 3
|
|
220
|
+
initial_backoff = 1.0
|
|
221
|
+
max_backoff_factor = 5
|
|
222
|
+
|
|
223
|
+
backoff = common_utils.Backoff(initial_backoff, max_backoff_factor)
|
|
224
|
+
last_exception = Exception('Uknown Exception') # this will be replaced by e
|
|
225
|
+
|
|
226
|
+
for retry_count in range(max_retries):
|
|
227
|
+
try:
|
|
228
|
+
return await request_without_retry_async(session, method, url,
|
|
229
|
+
**kwargs)
|
|
230
|
+
except exceptions.RequestInterruptedError:
|
|
231
|
+
logger.debug('Request interrupted. Retry immediately.')
|
|
232
|
+
continue
|
|
233
|
+
except Exception as e: # pylint: disable=broad-except
|
|
234
|
+
last_exception = e
|
|
235
|
+
if retry_count >= max_retries - 1:
|
|
236
|
+
# Retries exhausted
|
|
237
|
+
raise
|
|
238
|
+
|
|
239
|
+
# Check if this is a transient error (similar to sync version logic)
|
|
240
|
+
is_transient = _is_transient_error_async(e)
|
|
241
|
+
if not is_transient:
|
|
242
|
+
# Permanent error, no need to retry
|
|
243
|
+
raise
|
|
244
|
+
|
|
245
|
+
logger.debug(f'Retry async request due to {e}, '
|
|
246
|
+
f'attempt {retry_count + 1}/{max_retries}')
|
|
247
|
+
await asyncio.sleep(backoff.current_backoff())
|
|
248
|
+
|
|
249
|
+
# This should never be reached, but just in case
|
|
250
|
+
raise last_exception
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
async def request_without_retry_async(session: 'aiohttp.ClientSession',
|
|
254
|
+
method: str, url: str,
|
|
255
|
+
**kwargs) -> 'aiohttp.ClientResponse':
|
|
256
|
+
"""Send an async request to the API server without retry."""
|
|
257
|
+
# Add API version headers for compatibility (like sync version does)
|
|
258
|
+
if 'headers' not in kwargs:
|
|
259
|
+
kwargs['headers'] = {}
|
|
260
|
+
kwargs['headers'][constants.API_VERSION_HEADER] = str(constants.API_VERSION)
|
|
261
|
+
kwargs['headers'][constants.VERSION_HEADER] = (
|
|
262
|
+
versions.get_local_readable_version())
|
|
263
|
+
|
|
264
|
+
try:
|
|
265
|
+
response = await session.request(method, url, **kwargs)
|
|
266
|
+
|
|
267
|
+
# Handle server unavailability (503 status) - same as sync version
|
|
268
|
+
if response.status == 503:
|
|
269
|
+
with ux_utils.print_exception_no_traceback():
|
|
270
|
+
raise exceptions.ServerTemporarilyUnavailableError(
|
|
271
|
+
'SkyPilot API server is temporarily unavailable. '
|
|
272
|
+
'Please try again later.')
|
|
273
|
+
|
|
274
|
+
# Set remote API version and version from headers - same as sync version
|
|
275
|
+
remote_api_version = response.headers.get(constants.API_VERSION_HEADER)
|
|
276
|
+
remote_version = response.headers.get(constants.VERSION_HEADER)
|
|
277
|
+
if remote_api_version is not None:
|
|
278
|
+
versions.set_remote_api_version(int(remote_api_version))
|
|
279
|
+
if remote_version is not None:
|
|
280
|
+
versions.set_remote_version(remote_version)
|
|
281
|
+
|
|
282
|
+
return response
|
|
283
|
+
|
|
284
|
+
except aiohttp.ClientError as e:
|
|
285
|
+
# Convert aiohttp errors to appropriate SkyPilot exceptions
|
|
286
|
+
if isinstance(e, aiohttp.ClientConnectorError):
|
|
287
|
+
raise exceptions.RequestInterruptedError(
|
|
288
|
+
f'Connection failed: {e}') from e
|
|
289
|
+
elif isinstance(e, aiohttp.ClientTimeout):
|
|
290
|
+
raise exceptions.RequestInterruptedError(
|
|
291
|
+
f'Request timeout: {e}') from e
|
|
292
|
+
else:
|
|
293
|
+
raise
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def _is_transient_error_async(e: Exception) -> bool:
|
|
297
|
+
"""Check if an exception from async request is transient and should be
|
|
298
|
+
retried.
|
|
299
|
+
|
|
300
|
+
Mirrors the logic from the sync version's is_transient_error().
|
|
301
|
+
"""
|
|
302
|
+
if isinstance(e, aiohttp.ClientError):
|
|
303
|
+
# For response errors, check status code if available
|
|
304
|
+
if isinstance(e, aiohttp.ClientResponseError):
|
|
305
|
+
# Only server error is considered as transient (same as sync
|
|
306
|
+
# version)
|
|
307
|
+
return e.status >= 500
|
|
308
|
+
# Consider connection errors and timeouts as transient
|
|
309
|
+
if isinstance(e, (aiohttp.ClientConnectorError, aiohttp.ClientTimeout)):
|
|
310
|
+
return True
|
|
311
|
+
|
|
312
|
+
# Consider server temporarily unavailable as transient
|
|
313
|
+
if isinstance(e, exceptions.ServerTemporarilyUnavailableError):
|
|
314
|
+
return True
|
|
315
|
+
|
|
316
|
+
# It is hard to enumerate all other errors that are transient, e.g.
|
|
317
|
+
# broken pipe, connection refused, etc. Instead, it is safer to assume
|
|
318
|
+
# all other errors might be transient since we only retry for 3 times
|
|
319
|
+
# by default. (Same comment as in sync version)
|
|
320
|
+
return True
|
sky/server/server.py
CHANGED
|
@@ -51,6 +51,8 @@ from sky.server import metrics
|
|
|
51
51
|
from sky.server import state
|
|
52
52
|
from sky.server import stream_utils
|
|
53
53
|
from sky.server import versions
|
|
54
|
+
from sky.server.auth import authn
|
|
55
|
+
from sky.server.auth import oauth2_proxy
|
|
54
56
|
from sky.server.requests import executor
|
|
55
57
|
from sky.server.requests import payloads
|
|
56
58
|
from sky.server.requests import preconditions
|
|
@@ -120,41 +122,6 @@ def _basic_auth_401_response(content: str):
|
|
|
120
122
|
content=content)
|
|
121
123
|
|
|
122
124
|
|
|
123
|
-
# TODO(hailong): Remove this function and use request.state.auth_user instead.
|
|
124
|
-
async def _override_user_info_in_request_body(request: fastapi.Request,
|
|
125
|
-
auth_user: Optional[models.User]):
|
|
126
|
-
if auth_user is None:
|
|
127
|
-
return
|
|
128
|
-
|
|
129
|
-
body = await request.body()
|
|
130
|
-
if body:
|
|
131
|
-
try:
|
|
132
|
-
original_json = await request.json()
|
|
133
|
-
except (json.JSONDecodeError, UnicodeDecodeError) as e:
|
|
134
|
-
logger.error(f'Error parsing request JSON: {e}')
|
|
135
|
-
else:
|
|
136
|
-
logger.debug(f'Overriding user for {request.state.request_id}: '
|
|
137
|
-
f'{auth_user.name}, {auth_user.id}')
|
|
138
|
-
if 'env_vars' in original_json:
|
|
139
|
-
if isinstance(original_json.get('env_vars'), dict):
|
|
140
|
-
original_json['env_vars'][
|
|
141
|
-
constants.USER_ID_ENV_VAR] = auth_user.id
|
|
142
|
-
original_json['env_vars'][
|
|
143
|
-
constants.USER_ENV_VAR] = auth_user.name
|
|
144
|
-
else:
|
|
145
|
-
logger.warning(
|
|
146
|
-
f'"env_vars" in request body is not a dictionary '
|
|
147
|
-
f'for request {request.state.request_id}. '
|
|
148
|
-
'Skipping user info injection into body.')
|
|
149
|
-
else:
|
|
150
|
-
original_json['env_vars'] = {}
|
|
151
|
-
original_json['env_vars'][
|
|
152
|
-
constants.USER_ID_ENV_VAR] = auth_user.id
|
|
153
|
-
original_json['env_vars'][
|
|
154
|
-
constants.USER_ENV_VAR] = auth_user.name
|
|
155
|
-
request._body = json.dumps(original_json).encode('utf-8') # pylint: disable=protected-access
|
|
156
|
-
|
|
157
|
-
|
|
158
125
|
def _try_set_basic_auth_user(request: fastapi.Request):
|
|
159
126
|
auth_header = request.headers.get('authorization')
|
|
160
127
|
if not auth_header or not auth_header.lower().startswith('basic '):
|
|
@@ -281,7 +248,7 @@ class BasicAuthMiddleware(starlette.middleware.base.BaseHTTPMiddleware):
|
|
|
281
248
|
apr_md5_crypt.verify(password, user.password)):
|
|
282
249
|
valid_user = True
|
|
283
250
|
request.state.auth_user = user
|
|
284
|
-
await
|
|
251
|
+
await authn.override_user_info_in_request_body(request, user)
|
|
285
252
|
break
|
|
286
253
|
if not valid_user:
|
|
287
254
|
return _basic_auth_401_response('Invalid credentials')
|
|
@@ -400,7 +367,7 @@ class BearerTokenMiddleware(starlette.middleware.base.BaseHTTPMiddleware):
|
|
|
400
367
|
request.state.auth_user = auth_user
|
|
401
368
|
|
|
402
369
|
# Override user info in request body for service account requests
|
|
403
|
-
await
|
|
370
|
+
await authn.override_user_info_in_request_body(request, auth_user)
|
|
404
371
|
|
|
405
372
|
logger.debug(f'Authenticated service account: {user_id}')
|
|
406
373
|
|
|
@@ -445,7 +412,7 @@ class AuthProxyMiddleware(starlette.middleware.base.BaseHTTPMiddleware):
|
|
|
445
412
|
if auth_user is not None:
|
|
446
413
|
request.state.auth_user = auth_user
|
|
447
414
|
|
|
448
|
-
await
|
|
415
|
+
await authn.override_user_info_in_request_body(request, auth_user)
|
|
449
416
|
return await call_next(request)
|
|
450
417
|
|
|
451
418
|
|
|
@@ -484,6 +451,8 @@ async def lifespan(app: fastapi.FastAPI): # pylint: disable=redefined-outer-nam
|
|
|
484
451
|
del app # unused
|
|
485
452
|
# Startup: Run background tasks
|
|
486
453
|
for event in daemons.INTERNAL_REQUEST_DAEMONS:
|
|
454
|
+
if event.should_skip():
|
|
455
|
+
continue
|
|
487
456
|
try:
|
|
488
457
|
executor.schedule_request(
|
|
489
458
|
request_id=event.id,
|
|
@@ -625,6 +594,8 @@ app.add_middleware(
|
|
|
625
594
|
# RBACMiddleware must precede all the auth middleware, so it can access
|
|
626
595
|
# request.state.auth_user.
|
|
627
596
|
app.add_middleware(RBACMiddleware)
|
|
597
|
+
# Authentication based on oauth2-proxy.
|
|
598
|
+
app.add_middleware(oauth2_proxy.OAuth2ProxyMiddleware)
|
|
628
599
|
# AuthProxyMiddleware should precede BasicAuthMiddleware and
|
|
629
600
|
# BearerTokenMiddleware, since it should be skipped if either of those set the
|
|
630
601
|
# auth user.
|
|
@@ -1574,9 +1545,42 @@ async def health(request: fastapi.Request) -> Dict[str, Any]:
|
|
|
1574
1545
|
- commit: str; The commit hash of SkyPilot used for API server.
|
|
1575
1546
|
"""
|
|
1576
1547
|
user = request.state.auth_user
|
|
1577
|
-
|
|
1548
|
+
server_status = common.ApiServerStatus.HEALTHY
|
|
1549
|
+
if getattr(request.state, 'anonymous_user', False):
|
|
1550
|
+
# API server authentication is enabled, but the request is not
|
|
1551
|
+
# authenticated. We still have to serve the request because the
|
|
1552
|
+
# /api/health endpoint has two different usage:
|
|
1553
|
+
# 1. For health check from `api start` and external ochestration
|
|
1554
|
+
# tools (k8s), which does not require authentication and user info.
|
|
1555
|
+
# 2. Return server info to client and hint client to login if required.
|
|
1556
|
+
# Separating these two usage to different APIs will break backward
|
|
1557
|
+
# compatibility for existing ochestration solutions (e.g. helm chart).
|
|
1558
|
+
# So we serve these two usages in a backward compatible manner below.
|
|
1559
|
+
client_version = versions.get_remote_api_version()
|
|
1560
|
+
# - For Client with API version >= 14, we return 200 response with
|
|
1561
|
+
# status=NEEDS_AUTH, new client will handle the login process.
|
|
1562
|
+
# - For health check from `sky api start`, the client code always uses
|
|
1563
|
+
# the same API version with the server, thus there is no compatibility
|
|
1564
|
+
# issue.
|
|
1565
|
+
server_status = common.ApiServerStatus.NEEDS_AUTH
|
|
1566
|
+
if client_version is None:
|
|
1567
|
+
# - For health check from ochestration tools (e.g. k8s), we also
|
|
1568
|
+
# return 200 with status=NEEDS_AUTH, which passes HTTP probe
|
|
1569
|
+
# check.
|
|
1570
|
+
# - There is no harm when an malicious client calls /api/health
|
|
1571
|
+
# without authentication since no sensitive information is
|
|
1572
|
+
# returned.
|
|
1573
|
+
return {'status': common.ApiServerStatus.HEALTHY}
|
|
1574
|
+
# TODO(aylei): remove this after min_compatible_api_version >= 14.
|
|
1575
|
+
if client_version < 14:
|
|
1576
|
+
# For Client with API version < 14, the NEEDS_AUTH status is not
|
|
1577
|
+
# honored. Return 401 to trigger the login process.
|
|
1578
|
+
raise fastapi.HTTPException(status_code=401,
|
|
1579
|
+
detail='Authentication required')
|
|
1580
|
+
|
|
1581
|
+
logger.debug(f'Health endpoint: request.state.auth_user = {user}')
|
|
1578
1582
|
return {
|
|
1579
|
-
'status':
|
|
1583
|
+
'status': server_status,
|
|
1580
1584
|
# Kept for backward compatibility, clients before 0.11.0 will read this
|
|
1581
1585
|
# field to check compatibility and hint the user to upgrade the CLI.
|
|
1582
1586
|
# TODO(aylei): remove this field after 0.13.0
|
sky/setup_files/dependencies.py
CHANGED
|
@@ -69,6 +69,7 @@ install_requires = [
|
|
|
69
69
|
'gitpython',
|
|
70
70
|
'types-paramiko',
|
|
71
71
|
'alembic',
|
|
72
|
+
'aiohttp',
|
|
72
73
|
]
|
|
73
74
|
|
|
74
75
|
server_dependencies = [
|
|
@@ -76,6 +77,7 @@ server_dependencies = [
|
|
|
76
77
|
'sqlalchemy_adapter',
|
|
77
78
|
'passlib',
|
|
78
79
|
'pyjwt',
|
|
80
|
+
'aiohttp',
|
|
79
81
|
]
|
|
80
82
|
|
|
81
83
|
local_ray = [
|
sky/skylet/constants.py
CHANGED
|
@@ -406,7 +406,7 @@ PERSISTENT_RUN_SCRIPT_DIR = '~/.sky/.controller_recovery_task_run'
|
|
|
406
406
|
PERSISTENT_RUN_RESTARTING_SIGNAL_FILE = (
|
|
407
407
|
'~/.sky/.controller_recovery_restarting_signal')
|
|
408
408
|
|
|
409
|
-
HA_PERSISTENT_RECOVERY_LOG_PATH = '/tmp/ha_recovery.log'
|
|
409
|
+
HA_PERSISTENT_RECOVERY_LOG_PATH = '/tmp/{}ha_recovery.log'
|
|
410
410
|
|
|
411
411
|
# The placeholder for the local skypilot config path in file mounts for
|
|
412
412
|
# controllers.
|
sky/skylet/events.py
CHANGED
|
@@ -96,8 +96,12 @@ class ServiceUpdateEvent(SkyletEvent):
|
|
|
96
96
|
"""
|
|
97
97
|
EVENT_INTERVAL_SECONDS = 300
|
|
98
98
|
|
|
99
|
+
def __init__(self, pool: bool) -> None:
|
|
100
|
+
super().__init__()
|
|
101
|
+
self._pool = pool
|
|
102
|
+
|
|
99
103
|
def _run(self):
|
|
100
|
-
serve_utils.update_service_status()
|
|
104
|
+
serve_utils.update_service_status(self._pool)
|
|
101
105
|
|
|
102
106
|
|
|
103
107
|
class UsageHeartbeatReportEvent(SkyletEvent):
|
sky/skylet/skylet.py
CHANGED
|
@@ -24,7 +24,9 @@ EVENTS = [
|
|
|
24
24
|
# This is for monitoring controller job status. If it becomes
|
|
25
25
|
# unhealthy, this event will correctly update the controller
|
|
26
26
|
# status to CONTROLLER_FAILED.
|
|
27
|
-
events.ServiceUpdateEvent(),
|
|
27
|
+
events.ServiceUpdateEvent(pool=False),
|
|
28
|
+
# Status refresh for pool.
|
|
29
|
+
events.ServiceUpdateEvent(pool=True),
|
|
28
30
|
# Report usage heartbeat every 10 minutes.
|
|
29
31
|
events.UsageHeartbeatReportEvent(),
|
|
30
32
|
]
|
sky/task.py
CHANGED
|
@@ -241,21 +241,26 @@ class Task:
|
|
|
241
241
|
self,
|
|
242
242
|
name: Optional[str] = None,
|
|
243
243
|
*,
|
|
244
|
-
setup: Optional[str] = None,
|
|
245
|
-
run: Optional[CommandOrCommandGen] = None,
|
|
244
|
+
setup: Optional[Union[str, List[str]]] = None,
|
|
245
|
+
run: Optional[Union[CommandOrCommandGen, List[str]]] = None,
|
|
246
246
|
envs: Optional[Dict[str, str]] = None,
|
|
247
247
|
secrets: Optional[Dict[str, str]] = None,
|
|
248
248
|
workdir: Optional[Union[str, Dict[str, Any]]] = None,
|
|
249
249
|
num_nodes: Optional[int] = None,
|
|
250
|
+
file_mounts: Optional[Dict[str, str]] = None,
|
|
251
|
+
storage_mounts: Optional[Dict[str, storage_lib.Storage]] = None,
|
|
250
252
|
volumes: Optional[Dict[str, str]] = None,
|
|
253
|
+
resources: Optional[Union['resources_lib.Resources',
|
|
254
|
+
List['resources_lib.Resources'],
|
|
255
|
+
Set['resources_lib.Resources']]] = None,
|
|
251
256
|
# Advanced:
|
|
252
257
|
docker_image: Optional[str] = None,
|
|
253
258
|
event_callback: Optional[str] = None,
|
|
254
259
|
blocked_resources: Optional[Iterable['resources_lib.Resources']] = None,
|
|
255
260
|
# Internal use only.
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
261
|
+
_file_mounts_mapping: Optional[Dict[str, str]] = None,
|
|
262
|
+
_volume_mounts: Optional[List[volume_lib.VolumeMount]] = None,
|
|
263
|
+
_metadata: Optional[Dict[str, Any]] = None,
|
|
259
264
|
_user_specified_yaml: Optional[str] = None,
|
|
260
265
|
):
|
|
261
266
|
"""Initializes a Task.
|
|
@@ -288,15 +293,15 @@ class Task:
|
|
|
288
293
|
|
|
289
294
|
Args:
|
|
290
295
|
name: A string name for the Task for display purposes.
|
|
291
|
-
setup: A setup command, which will be run before executing the run
|
|
296
|
+
setup: A setup command(s), which will be run before executing the run
|
|
292
297
|
commands ``run``, and executed under ``workdir``.
|
|
293
298
|
run: The actual command for the task. If not None, either a shell
|
|
294
|
-
command (str) or a command generator (callable).
|
|
295
|
-
must take a node rank and a list of node addresses as
|
|
296
|
-
return a shell command (str) (valid to return None for
|
|
297
|
-
in which case no commands are run on them).
|
|
298
|
-
run under ``workdir``. Note the command generator
|
|
299
|
-
self-contained lambda.
|
|
299
|
+
command(s) (str, list(str)) or a command generator (callable). If
|
|
300
|
+
latter, it must take a node rank and a list of node addresses as
|
|
301
|
+
input and return a shell command (str) (valid to return None for
|
|
302
|
+
some nodes, in which case no commands are run on them). Run
|
|
303
|
+
commands will be run under ``workdir``. Note the command generator
|
|
304
|
+
should be a self-contained lambda.
|
|
300
305
|
envs: A dictionary of environment variables to set before running the
|
|
301
306
|
setup and run commands.
|
|
302
307
|
secrets: A dictionary of secret environment variables to set before
|
|
@@ -315,22 +320,49 @@ class Task:
|
|
|
315
320
|
setup/run command, where ``run`` can either be a str, meaning all
|
|
316
321
|
nodes get the same command, or a lambda, with the semantics
|
|
317
322
|
documented above.
|
|
323
|
+
file_mounts: An optional dict of ``{remote_path: (local_path|cloud
|
|
324
|
+
URI)}``, where remote means the VM(s) on which this Task will
|
|
325
|
+
eventually run on, and local means the node from which the task is
|
|
326
|
+
launched.
|
|
327
|
+
storage_mounts: an optional dict of ``{mount_path: sky.Storage
|
|
328
|
+
object}``, where mount_path is the path inside the remote VM(s)
|
|
329
|
+
where the Storage object will be mounted on.
|
|
330
|
+
volumes: A dict of volumes to be mounted for the task. The dict has
|
|
331
|
+
the form of ``{mount_path: volume_name}``.
|
|
332
|
+
resources: either a sky.Resources, a set of them, or a list of them.
|
|
333
|
+
A set or a list of resources asks the optimizer to "pick the
|
|
334
|
+
best of these resources" to run this task.
|
|
318
335
|
docker_image: (EXPERIMENTAL: Only in effect when LocalDockerBackend
|
|
319
336
|
is used.) The base docker image that this Task will be built on.
|
|
320
337
|
Defaults to 'gpuci/miniforge-cuda:11.4-devel-ubuntu18.04'.
|
|
338
|
+
event_callback: A bash script that will be executed when the task
|
|
339
|
+
changes state.
|
|
321
340
|
blocked_resources: A set of resources that this task cannot run on.
|
|
322
|
-
|
|
341
|
+
_file_mounts_mapping: (Internal use only) A dictionary of file mounts
|
|
342
|
+
mapping.
|
|
343
|
+
_volume_mounts: (Internal use only) A list of volume mounts.
|
|
344
|
+
_metadata: (Internal use only) A dictionary of metadata to be added to
|
|
345
|
+
the task.
|
|
346
|
+
_user_specified_yaml: (Internal use only) A string of user-specified
|
|
347
|
+
YAML config.
|
|
323
348
|
"""
|
|
324
349
|
self.name = name
|
|
325
|
-
self.run = run
|
|
326
350
|
self.storage_mounts: Dict[str, storage_lib.Storage] = {}
|
|
327
351
|
self.storage_plans: Dict[storage_lib.Storage,
|
|
328
352
|
storage_lib.StoreType] = {}
|
|
329
|
-
self.setup = setup
|
|
330
353
|
self._envs = envs or {}
|
|
331
354
|
self._secrets = secrets or {}
|
|
332
355
|
self._volumes = volumes or {}
|
|
333
356
|
|
|
357
|
+
# concatenate commands if given as list
|
|
358
|
+
def _concat(commands):
|
|
359
|
+
if isinstance(commands, list):
|
|
360
|
+
return '\n'.join(commands)
|
|
361
|
+
return commands
|
|
362
|
+
|
|
363
|
+
self.run = _concat(run)
|
|
364
|
+
self.setup = _concat(setup)
|
|
365
|
+
|
|
334
366
|
# Validate Docker login configuration early if both envs and secrets
|
|
335
367
|
# contain Docker variables
|
|
336
368
|
if self._envs or self._secrets:
|
|
@@ -372,11 +404,19 @@ class Task:
|
|
|
372
404
|
self.best_resources: Optional[sky.Resources] = None
|
|
373
405
|
|
|
374
406
|
# For internal use only.
|
|
375
|
-
self.file_mounts_mapping: Optional[Dict[str,
|
|
407
|
+
self.file_mounts_mapping: Optional[Dict[str,
|
|
408
|
+
str]] = _file_mounts_mapping
|
|
376
409
|
self.volume_mounts: Optional[List[volume_lib.VolumeMount]] = (
|
|
377
|
-
|
|
410
|
+
_volume_mounts)
|
|
378
411
|
|
|
379
|
-
self._metadata =
|
|
412
|
+
self._metadata = _metadata if _metadata is not None else {}
|
|
413
|
+
|
|
414
|
+
if resources is not None:
|
|
415
|
+
self.set_resources(resources)
|
|
416
|
+
if storage_mounts is not None:
|
|
417
|
+
self.set_storage_mounts(storage_mounts)
|
|
418
|
+
if file_mounts is not None:
|
|
419
|
+
self.set_file_mounts(file_mounts)
|
|
380
420
|
|
|
381
421
|
dag = sky.dag.get_current_dag()
|
|
382
422
|
if dag is not None:
|
|
@@ -621,10 +661,10 @@ class Task:
|
|
|
621
661
|
num_nodes=config.pop('num_nodes', None),
|
|
622
662
|
envs=config.pop('envs', None),
|
|
623
663
|
secrets=config.pop('secrets', None),
|
|
624
|
-
event_callback=config.pop('event_callback', None),
|
|
625
|
-
file_mounts_mapping=config.pop('file_mounts_mapping', None),
|
|
626
664
|
volumes=config.pop('volumes', None),
|
|
627
|
-
|
|
665
|
+
event_callback=config.pop('event_callback', None),
|
|
666
|
+
_file_mounts_mapping=config.pop('file_mounts_mapping', None),
|
|
667
|
+
_metadata=config.pop('_metadata', None),
|
|
628
668
|
_user_specified_yaml=user_specified_yaml,
|
|
629
669
|
)
|
|
630
670
|
|
|
@@ -777,6 +777,15 @@ available_node_types:
|
|
|
777
777
|
{{ ray_installation_commands }}
|
|
778
778
|
|
|
779
779
|
VIRTUAL_ENV=~/skypilot-runtime ~/.local/bin/uv pip install skypilot[kubernetes,remote]
|
|
780
|
+
# Wait for `patch` package to be installed before applying ray patches
|
|
781
|
+
until dpkg -l | grep -q "^ii patch "; do
|
|
782
|
+
sleep 0.1
|
|
783
|
+
echo "Waiting for patch package to be installed..."
|
|
784
|
+
done
|
|
785
|
+
# Apply Ray patches for progress bar fix
|
|
786
|
+
~/.local/bin/uv pip list | grep "ray " | grep 2.9.3 2>&1 > /dev/null && {
|
|
787
|
+
VIRTUAL_ENV=~/skypilot-runtime python -c "from sky.skylet.ray_patches import patch; patch()" || exit 1;
|
|
788
|
+
}
|
|
780
789
|
touch /tmp/ray_skypilot_installation_complete
|
|
781
790
|
echo "=== Ray and skypilot installation completed ==="
|
|
782
791
|
|
sky/templates/nebius-ray.yml.j2
CHANGED
sky/usage/usage_lib.py
CHANGED
|
@@ -10,6 +10,8 @@ import traceback
|
|
|
10
10
|
import typing
|
|
11
11
|
from typing import Any, Callable, Dict, List, Optional, Union
|
|
12
12
|
|
|
13
|
+
from typing_extensions import ParamSpec
|
|
14
|
+
|
|
13
15
|
import sky
|
|
14
16
|
from sky import sky_logging
|
|
15
17
|
from sky.adaptors import common as adaptors_common
|
|
@@ -517,26 +519,26 @@ def entrypoint_context(name: str, fallback: bool = False):
|
|
|
517
519
|
|
|
518
520
|
|
|
519
521
|
T = typing.TypeVar('T')
|
|
522
|
+
P = ParamSpec('P')
|
|
520
523
|
|
|
521
524
|
|
|
522
525
|
@typing.overload
|
|
523
526
|
def entrypoint(
|
|
524
527
|
name_or_fn: str,
|
|
525
|
-
fallback: bool = False
|
|
526
|
-
) -> Callable[[Callable[..., T]], Callable[..., T]]:
|
|
528
|
+
fallback: bool = False) -> Callable[[Callable[P, T]], Callable[P, T]]:
|
|
527
529
|
...
|
|
528
530
|
|
|
529
531
|
|
|
530
532
|
@typing.overload
|
|
531
|
-
def entrypoint(name_or_fn: Callable[
|
|
532
|
-
fallback: bool = False) -> Callable[
|
|
533
|
+
def entrypoint(name_or_fn: Callable[P, T],
|
|
534
|
+
fallback: bool = False) -> Callable[P, T]:
|
|
533
535
|
...
|
|
534
536
|
|
|
535
537
|
|
|
536
538
|
def entrypoint(
|
|
537
|
-
name_or_fn: Union[str, Callable[
|
|
539
|
+
name_or_fn: Union[str, Callable[P, T]],
|
|
538
540
|
fallback: bool = False
|
|
539
|
-
) -> Union[Callable[
|
|
541
|
+
) -> Union[Callable[P, T], Callable[[Callable[P, T]], Callable[P, T]]]:
|
|
540
542
|
return common_utils.make_decorator(entrypoint_context,
|
|
541
543
|
name_or_fn,
|
|
542
544
|
fallback=fallback)
|
sky/utils/annotations.py
CHANGED
|
@@ -1,14 +1,19 @@
|
|
|
1
1
|
"""Annotations for public APIs."""
|
|
2
2
|
|
|
3
3
|
import functools
|
|
4
|
-
from typing import Callable, Literal
|
|
4
|
+
from typing import Callable, Literal, TypeVar
|
|
5
|
+
|
|
6
|
+
from typing_extensions import ParamSpec
|
|
5
7
|
|
|
6
8
|
# Whether the current process is a SkyPilot API server process.
|
|
7
9
|
is_on_api_server = True
|
|
8
10
|
FUNCTIONS_NEED_RELOAD_CACHE = []
|
|
9
11
|
|
|
12
|
+
T = TypeVar('T')
|
|
13
|
+
P = ParamSpec('P')
|
|
14
|
+
|
|
10
15
|
|
|
11
|
-
def client_api(func):
|
|
16
|
+
def client_api(func: Callable[P, T]) -> Callable[P, T]:
|
|
12
17
|
"""Mark a function as a client-side API.
|
|
13
18
|
|
|
14
19
|
Code invoked by server-side functions will find annotations.is_on_api_server
|
|
@@ -38,7 +43,7 @@ def lru_cache(scope: Literal['global', 'request'], *lru_cache_args,
|
|
|
38
43
|
lru_cache_kwargs: Keyword arguments for functools.lru_cache.
|
|
39
44
|
"""
|
|
40
45
|
|
|
41
|
-
def decorator(func: Callable) -> Callable:
|
|
46
|
+
def decorator(func: Callable[P, T]) -> Callable[P, T]:
|
|
42
47
|
if scope == 'global':
|
|
43
48
|
return functools.lru_cache(*lru_cache_args,
|
|
44
49
|
**lru_cache_kwargs)(func)
|
sky/utils/common_utils.py
CHANGED
|
@@ -271,12 +271,13 @@ _current_command: Optional[str] = None
|
|
|
271
271
|
_current_client_entrypoint: Optional[str] = None
|
|
272
272
|
_using_remote_api_server: Optional[bool] = None
|
|
273
273
|
_current_user: Optional['models.User'] = None
|
|
274
|
+
_current_request_id: Optional[str] = None
|
|
274
275
|
|
|
275
276
|
|
|
276
277
|
def set_request_context(client_entrypoint: Optional[str],
|
|
277
278
|
client_command: Optional[str],
|
|
278
279
|
using_remote_api_server: bool,
|
|
279
|
-
user: Optional['models.User']):
|
|
280
|
+
user: Optional['models.User'], request_id: str) -> None:
|
|
280
281
|
"""Override the current client entrypoint and command.
|
|
281
282
|
|
|
282
283
|
This is useful when we are on the SkyPilot API server side and we have a
|
|
@@ -286,10 +287,19 @@ def set_request_context(client_entrypoint: Optional[str],
|
|
|
286
287
|
global _current_client_entrypoint
|
|
287
288
|
global _using_remote_api_server
|
|
288
289
|
global _current_user
|
|
290
|
+
global _current_request_id
|
|
289
291
|
_current_command = client_command
|
|
290
292
|
_current_client_entrypoint = client_entrypoint
|
|
291
293
|
_using_remote_api_server = using_remote_api_server
|
|
292
294
|
_current_user = user
|
|
295
|
+
_current_request_id = request_id
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def get_current_request_id() -> str:
|
|
299
|
+
"""Returns the current request id."""
|
|
300
|
+
if _current_request_id is not None:
|
|
301
|
+
return _current_request_id
|
|
302
|
+
return 'dummy-request-id'
|
|
293
303
|
|
|
294
304
|
|
|
295
305
|
def get_current_command() -> str:
|
sky/utils/controller_utils.py
CHANGED
|
@@ -209,6 +209,13 @@ class Controllers(enum.Enum):
|
|
|
209
209
|
return None
|
|
210
210
|
|
|
211
211
|
|
|
212
|
+
def get_controller_for_pool(pool: bool) -> Controllers:
|
|
213
|
+
"""Get the controller type."""
|
|
214
|
+
if pool:
|
|
215
|
+
return Controllers.JOBS_CONTROLLER
|
|
216
|
+
return Controllers.SKY_SERVE_CONTROLLER
|
|
217
|
+
|
|
218
|
+
|
|
212
219
|
def high_availability_specified(cluster_name: Optional[str]) -> bool:
|
|
213
220
|
"""Check if the controller high availability is specified in user config.
|
|
214
221
|
"""
|
sky/utils/db/migration_utils.py
CHANGED
|
@@ -19,11 +19,11 @@ logger = sky_logging.init_logger(__name__)
|
|
|
19
19
|
DB_INIT_LOCK_TIMEOUT_SECONDS = 10
|
|
20
20
|
|
|
21
21
|
GLOBAL_USER_STATE_DB_NAME = 'state_db'
|
|
22
|
-
GLOBAL_USER_STATE_VERSION = '
|
|
22
|
+
GLOBAL_USER_STATE_VERSION = '002'
|
|
23
23
|
GLOBAL_USER_STATE_LOCK_PATH = '~/.sky/locks/.state_db.lock'
|
|
24
24
|
|
|
25
25
|
SPOT_JOBS_DB_NAME = 'spot_jobs_db'
|
|
26
|
-
SPOT_JOBS_VERSION = '
|
|
26
|
+
SPOT_JOBS_VERSION = '003'
|
|
27
27
|
SPOT_JOBS_LOCK_PATH = '~/.sky/locks/.spot_jobs_db.lock'
|
|
28
28
|
|
|
29
29
|
|