skypilot-nightly 1.0.0.dev20251009__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +6 -2
- sky/adaptors/aws.py +25 -7
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/kubernetes.py +64 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +20 -0
- sky/authentication.py +59 -149
- sky/backends/backend_utils.py +104 -63
- sky/backends/cloud_vm_ray_backend.py +84 -39
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/catalog/kubernetes_catalog.py +24 -28
- sky/catalog/runpod_catalog.py +5 -1
- sky/catalog/shadeform_catalog.py +165 -0
- sky/check.py +25 -13
- sky/client/cli/command.py +335 -86
- sky/client/cli/flags.py +4 -2
- sky/client/cli/table_utils.py +17 -9
- sky/client/sdk.py +59 -12
- sky/cloud_stores.py +73 -0
- sky/clouds/__init__.py +2 -0
- sky/clouds/aws.py +71 -16
- sky/clouds/azure.py +12 -5
- sky/clouds/cloud.py +19 -9
- sky/clouds/cudo.py +12 -5
- sky/clouds/do.py +4 -1
- sky/clouds/fluidstack.py +12 -5
- sky/clouds/gcp.py +12 -5
- sky/clouds/hyperbolic.py +12 -5
- sky/clouds/ibm.py +12 -5
- sky/clouds/kubernetes.py +62 -25
- sky/clouds/lambda_cloud.py +12 -5
- sky/clouds/nebius.py +12 -5
- sky/clouds/oci.py +12 -5
- sky/clouds/paperspace.py +4 -1
- sky/clouds/primeintellect.py +4 -1
- sky/clouds/runpod.py +12 -5
- sky/clouds/scp.py +12 -5
- sky/clouds/seeweb.py +4 -1
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +4 -2
- sky/clouds/vast.py +12 -5
- sky/clouds/vsphere.py +4 -1
- sky/core.py +12 -11
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/{1871-49141c317f3a9020.js → 1871-74503c8e80fd253b.js} +1 -1
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
- sky/dashboard/out/_next/static/chunks/{3785.a19328ba41517b8b.js → 3785.ad6adaa2a0fa9768.js} +1 -1
- sky/dashboard/out/_next/static/chunks/{4725.10f7a9a5d3ea8208.js → 4725.a830b5c9e7867c92.js} +1 -1
- sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
- sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
- sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
- sky/dashboard/out/_next/static/chunks/pages/{_app-ce361c6959bc2001.js → _app-bde01e4a2beec258.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-477555ab7c0b13d8.js → [cluster]-a37d2063af475a1c.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{clusters-2f61f65487f6d8ff.js → clusters-d44859594e6f8064.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-553b8b5cb65e100b.js → [context]-c0b5935149902e6f.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{infra-910a22500c50596f.js → infra-aed0ea19df7cf961.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/{[pool]-bc979970c247d8f3.js → [pool]-6edeb7d06032adfc.js} +2 -2
- sky/dashboard/out/_next/static/chunks/pages/{jobs-a35a9dc3c5ccd657.js → jobs-479dde13399cf270.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{users-98d2ed979084162a.js → users-5ab3b907622cf0fe.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{volumes-835d14ba94808f79.js → volumes-b84b948ff357c43e.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-e8688c35c06f0ac5.js → [name]-c5a3eeee1c218af1.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{workspaces-69c80d677d3c2949.js → workspaces-22b23febb3e89ce1.js} +1 -1
- sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/data_utils.py +92 -1
- sky/data/mounting_utils.py +143 -19
- sky/data/storage.py +168 -11
- sky/exceptions.py +13 -1
- sky/execution.py +13 -0
- sky/global_user_state.py +189 -113
- sky/jobs/client/sdk.py +32 -10
- sky/jobs/client/sdk_async.py +9 -3
- sky/jobs/constants.py +3 -1
- sky/jobs/controller.py +164 -192
- sky/jobs/file_content_utils.py +80 -0
- sky/jobs/log_gc.py +201 -0
- sky/jobs/recovery_strategy.py +59 -82
- sky/jobs/scheduler.py +20 -9
- sky/jobs/server/core.py +105 -23
- sky/jobs/server/server.py +40 -28
- sky/jobs/server/utils.py +32 -11
- sky/jobs/state.py +588 -110
- sky/jobs/utils.py +442 -209
- sky/logs/agent.py +1 -1
- sky/metrics/utils.py +45 -6
- sky/optimizer.py +1 -1
- sky/provision/__init__.py +7 -0
- sky/provision/aws/instance.py +2 -1
- sky/provision/azure/instance.py +2 -1
- sky/provision/common.py +2 -0
- sky/provision/cudo/instance.py +2 -1
- sky/provision/do/instance.py +2 -1
- sky/provision/fluidstack/instance.py +4 -3
- sky/provision/gcp/instance.py +2 -1
- sky/provision/hyperbolic/instance.py +2 -1
- sky/provision/instance_setup.py +10 -2
- sky/provision/kubernetes/constants.py +0 -1
- sky/provision/kubernetes/instance.py +222 -89
- sky/provision/kubernetes/network.py +12 -8
- sky/provision/kubernetes/utils.py +114 -53
- sky/provision/kubernetes/volume.py +5 -4
- sky/provision/lambda_cloud/instance.py +2 -1
- sky/provision/nebius/instance.py +2 -1
- sky/provision/oci/instance.py +2 -1
- sky/provision/paperspace/instance.py +2 -1
- sky/provision/provisioner.py +11 -2
- sky/provision/runpod/instance.py +2 -1
- sky/provision/scp/instance.py +2 -1
- sky/provision/seeweb/instance.py +3 -3
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/vast/instance.py +2 -1
- sky/provision/vsphere/instance.py +2 -1
- sky/resources.py +1 -1
- sky/schemas/api/responses.py +9 -5
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/generated/jobsv1_pb2.py +52 -52
- sky/schemas/generated/jobsv1_pb2.pyi +4 -2
- sky/schemas/generated/managed_jobsv1_pb2.py +39 -35
- sky/schemas/generated/managed_jobsv1_pb2.pyi +21 -5
- sky/serve/client/impl.py +11 -3
- sky/serve/replica_managers.py +5 -2
- sky/serve/serve_utils.py +9 -2
- sky/serve/server/impl.py +7 -2
- sky/serve/server/server.py +18 -15
- sky/serve/service.py +2 -2
- sky/server/auth/oauth2_proxy.py +2 -5
- sky/server/common.py +31 -28
- sky/server/constants.py +5 -1
- sky/server/daemons.py +27 -19
- sky/server/requests/executor.py +138 -74
- sky/server/requests/payloads.py +9 -1
- sky/server/requests/preconditions.py +13 -10
- sky/server/requests/request_names.py +120 -0
- sky/server/requests/requests.py +485 -153
- sky/server/requests/serializers/decoders.py +26 -13
- sky/server/requests/serializers/encoders.py +56 -11
- sky/server/requests/threads.py +106 -0
- sky/server/rest.py +70 -18
- sky/server/server.py +283 -104
- sky/server/stream_utils.py +233 -59
- sky/server/uvicorn.py +18 -17
- sky/setup_files/alembic.ini +4 -0
- sky/setup_files/dependencies.py +32 -13
- sky/sky_logging.py +0 -2
- sky/skylet/constants.py +30 -7
- sky/skylet/events.py +7 -0
- sky/skylet/log_lib.py +8 -2
- sky/skylet/log_lib.pyi +1 -1
- sky/skylet/services.py +26 -13
- sky/skylet/subprocess_daemon.py +103 -29
- sky/skypilot_config.py +87 -75
- sky/ssh_node_pools/server.py +9 -8
- sky/task.py +67 -54
- sky/templates/kubernetes-ray.yml.j2 +8 -1
- sky/templates/nebius-ray.yml.j2 +1 -0
- sky/templates/shadeform-ray.yml.j2 +72 -0
- sky/templates/websocket_proxy.py +142 -12
- sky/users/permission.py +8 -1
- sky/utils/admin_policy_utils.py +16 -3
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/auth_utils.py +153 -0
- sky/utils/cli_utils/status_utils.py +8 -2
- sky/utils/command_runner.py +11 -0
- sky/utils/common.py +3 -1
- sky/utils/common_utils.py +7 -4
- sky/utils/context.py +57 -51
- sky/utils/context_utils.py +30 -12
- sky/utils/controller_utils.py +35 -8
- sky/utils/db/db_utils.py +37 -10
- sky/utils/db/migration_utils.py +8 -4
- sky/utils/locks.py +24 -6
- sky/utils/resource_checker.py +4 -1
- sky/utils/resources_utils.py +53 -29
- sky/utils/schemas.py +23 -4
- sky/utils/subprocess_utils.py +17 -4
- sky/volumes/server/server.py +7 -6
- sky/workspaces/server.py +13 -12
- {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/METADATA +306 -55
- {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/RECORD +215 -195
- sky/dashboard/out/_next/static/chunks/1121-d0782b9251f0fcd3.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-3b40c39626f99c89.js +0 -11
- sky/dashboard/out/_next/static/chunks/2755.97300e1362fe7c98.js +0 -26
- sky/dashboard/out/_next/static/chunks/3015-8d748834fcc60b46.js +0 -1
- sky/dashboard/out/_next/static/chunks/3294.1fafbf42b3bcebff.js +0 -1
- sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
- sky/dashboard/out/_next/static/chunks/6856-5fdc9b851a18acdb.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-f6818c84ed8f1c86.js +0 -1
- sky/dashboard/out/_next/static/chunks/8969-66237729cdf9749e.js +0 -1
- sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
- sky/dashboard/out/_next/static/chunks/9360.71e83b2ddc844ec2.js +0 -31
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-8f058b0346db2aff.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-4f7079dcab6ed653.js +0 -16
- sky/dashboard/out/_next/static/chunks/webpack-6a5ddd0184bfa22c.js +0 -1
- sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
- sky/dashboard/out/_next/static/hIViZcQBkn0HE8SpaSsUU/_buildManifest.js +0 -1
- /sky/dashboard/out/_next/static/{hIViZcQBkn0HE8SpaSsUU → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/global_user_state.py
CHANGED
|
@@ -32,6 +32,7 @@ from sky import sky_logging
|
|
|
32
32
|
from sky import skypilot_config
|
|
33
33
|
from sky.metrics import utils as metrics_lib
|
|
34
34
|
from sky.skylet import constants
|
|
35
|
+
from sky.utils import annotations
|
|
35
36
|
from sky.utils import common_utils
|
|
36
37
|
from sky.utils import context_utils
|
|
37
38
|
from sky.utils import registry
|
|
@@ -342,6 +343,10 @@ def initialize_and_get_db() -> sqlalchemy.engine.Engine:
|
|
|
342
343
|
|
|
343
344
|
# return engine
|
|
344
345
|
_SQLALCHEMY_ENGINE = engine
|
|
346
|
+
# Cache the result of _sqlite_supports_returning()
|
|
347
|
+
# ahead of time, as it won't change throughout
|
|
348
|
+
# the lifetime of the engine.
|
|
349
|
+
_sqlite_supports_returning()
|
|
345
350
|
return _SQLALCHEMY_ENGINE
|
|
346
351
|
|
|
347
352
|
|
|
@@ -372,19 +377,51 @@ def _init_db(func):
|
|
|
372
377
|
return wrapper
|
|
373
378
|
|
|
374
379
|
|
|
380
|
+
@annotations.lru_cache(scope='global', maxsize=1)
|
|
381
|
+
def _sqlite_supports_returning() -> bool:
|
|
382
|
+
"""Check if SQLite (3.35.0+) and SQLAlchemy (2.0+) support RETURNING.
|
|
383
|
+
|
|
384
|
+
See https://sqlite.org/lang_returning.html and
|
|
385
|
+
https://docs.sqlalchemy.org/en/20/dialects/sqlite.html#insert-update-delete-returning # pylint: disable=line-too-long
|
|
386
|
+
"""
|
|
387
|
+
sqlalchemy_version_parts = sqlalchemy.__version__.split('.')
|
|
388
|
+
assert len(sqlalchemy_version_parts) >= 1, \
|
|
389
|
+
f'Invalid SQLAlchemy version: {sqlalchemy.__version__}'
|
|
390
|
+
sqlalchemy_major = int(sqlalchemy_version_parts[0])
|
|
391
|
+
if sqlalchemy_major < 2:
|
|
392
|
+
return False
|
|
393
|
+
|
|
394
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
395
|
+
if (_SQLALCHEMY_ENGINE.dialect.name !=
|
|
396
|
+
db_utils.SQLAlchemyDialect.SQLITE.value):
|
|
397
|
+
return False
|
|
398
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
399
|
+
result = session.execute(sqlalchemy.text('SELECT sqlite_version()'))
|
|
400
|
+
version_str = result.scalar()
|
|
401
|
+
version_parts = version_str.split('.')
|
|
402
|
+
assert len(version_parts) >= 2, \
|
|
403
|
+
f'Invalid version string: {version_str}'
|
|
404
|
+
major, minor = int(version_parts[0]), int(version_parts[1])
|
|
405
|
+
return (major > 3) or (major == 3 and minor >= 35)
|
|
406
|
+
|
|
407
|
+
|
|
375
408
|
@_init_db
|
|
376
409
|
@metrics_lib.time_me
|
|
377
|
-
def add_or_update_user(
|
|
378
|
-
|
|
410
|
+
def add_or_update_user(
|
|
411
|
+
user: models.User,
|
|
412
|
+
allow_duplicate_name: bool = True,
|
|
413
|
+
return_user: bool = False
|
|
414
|
+
) -> typing.Union[bool, typing.Tuple[bool, models.User]]:
|
|
379
415
|
"""Store the mapping from user hash to user name for display purposes.
|
|
380
416
|
|
|
381
417
|
Returns:
|
|
382
|
-
|
|
418
|
+
If return_user=False: bool (whether the user is newly added)
|
|
419
|
+
If return_user=True: Tuple[bool, models.User]
|
|
383
420
|
"""
|
|
384
421
|
assert _SQLALCHEMY_ENGINE is not None
|
|
385
422
|
|
|
386
423
|
if user.name is None:
|
|
387
|
-
return False
|
|
424
|
+
return (False, user) if return_user else False
|
|
388
425
|
|
|
389
426
|
# Set created_at if not already set
|
|
390
427
|
created_at = user.created_at
|
|
@@ -396,7 +433,7 @@ def add_or_update_user(user: models.User,
|
|
|
396
433
|
existing_user = session.query(user_table).filter(
|
|
397
434
|
user_table.c.name == user.name).first()
|
|
398
435
|
if existing_user is not None:
|
|
399
|
-
return False
|
|
436
|
+
return (False, user) if return_user else False
|
|
400
437
|
|
|
401
438
|
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
402
439
|
db_utils.SQLAlchemyDialect.SQLITE.value):
|
|
@@ -410,24 +447,57 @@ def add_or_update_user(user: models.User,
|
|
|
410
447
|
name=user.name,
|
|
411
448
|
password=user.password,
|
|
412
449
|
created_at=created_at)
|
|
450
|
+
use_returning = return_user and _sqlite_supports_returning()
|
|
451
|
+
if use_returning:
|
|
452
|
+
insert_stmnt = insert_stmnt.returning(
|
|
453
|
+
user_table.c.id,
|
|
454
|
+
user_table.c.name,
|
|
455
|
+
user_table.c.password,
|
|
456
|
+
user_table.c.created_at,
|
|
457
|
+
)
|
|
413
458
|
result = session.execute(insert_stmnt)
|
|
414
459
|
|
|
415
|
-
|
|
416
|
-
|
|
460
|
+
row = None
|
|
461
|
+
if use_returning:
|
|
462
|
+
# With RETURNING, check if we got a row back.
|
|
463
|
+
row = result.fetchone()
|
|
464
|
+
was_inserted = row is not None
|
|
465
|
+
else:
|
|
466
|
+
# Without RETURNING, use rowcount.
|
|
467
|
+
was_inserted = result.rowcount > 0
|
|
417
468
|
|
|
418
469
|
if not was_inserted:
|
|
419
470
|
# User existed, so update it (but don't update created_at)
|
|
471
|
+
update_values = {user_table.c.name: user.name}
|
|
420
472
|
if user.password:
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
473
|
+
update_values[user_table.c.password] = user.password
|
|
474
|
+
|
|
475
|
+
update_stmnt = sqlalchemy.update(user_table).where(
|
|
476
|
+
user_table.c.id == user.id).values(update_values)
|
|
477
|
+
if use_returning:
|
|
478
|
+
update_stmnt = update_stmnt.returning(
|
|
479
|
+
user_table.c.id, user_table.c.name,
|
|
480
|
+
user_table.c.password, user_table.c.created_at)
|
|
481
|
+
|
|
482
|
+
result = session.execute(update_stmnt)
|
|
483
|
+
if use_returning:
|
|
484
|
+
row = result.fetchone()
|
|
428
485
|
|
|
429
486
|
session.commit()
|
|
430
|
-
|
|
487
|
+
|
|
488
|
+
if return_user:
|
|
489
|
+
if row is None:
|
|
490
|
+
# row=None means the sqlite used has no RETURNING support,
|
|
491
|
+
# so we need to do a separate query
|
|
492
|
+
row = session.query(user_table).filter_by(
|
|
493
|
+
id=user.id).first()
|
|
494
|
+
updated_user = models.User(id=row.id,
|
|
495
|
+
name=row.name,
|
|
496
|
+
password=row.password,
|
|
497
|
+
created_at=row.created_at)
|
|
498
|
+
return was_inserted, updated_user
|
|
499
|
+
else:
|
|
500
|
+
return was_inserted
|
|
431
501
|
|
|
432
502
|
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
433
503
|
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
|
@@ -452,6 +522,9 @@ def add_or_update_user(user: models.User,
|
|
|
452
522
|
upsert_stmnt = insert_stmnt.on_conflict_do_update(
|
|
453
523
|
index_elements=[user_table.c.id], set_=set_).returning(
|
|
454
524
|
user_table.c.id,
|
|
525
|
+
user_table.c.name,
|
|
526
|
+
user_table.c.password,
|
|
527
|
+
user_table.c.created_at,
|
|
455
528
|
# This will be True for INSERT, False for UPDATE
|
|
456
529
|
sqlalchemy.literal_column('(xmax = 0)').label('was_inserted'
|
|
457
530
|
))
|
|
@@ -459,10 +532,17 @@ def add_or_update_user(user: models.User,
|
|
|
459
532
|
result = session.execute(upsert_stmnt)
|
|
460
533
|
row = result.fetchone()
|
|
461
534
|
|
|
462
|
-
|
|
535
|
+
was_inserted = bool(row.was_inserted) if row else False
|
|
463
536
|
session.commit()
|
|
464
537
|
|
|
465
|
-
|
|
538
|
+
if return_user:
|
|
539
|
+
updated_user = models.User(id=row.id,
|
|
540
|
+
name=row.name,
|
|
541
|
+
password=row.password,
|
|
542
|
+
created_at=row.created_at)
|
|
543
|
+
return was_inserted, updated_user
|
|
544
|
+
else:
|
|
545
|
+
return was_inserted
|
|
466
546
|
else:
|
|
467
547
|
raise ValueError('Unsupported database dialect')
|
|
468
548
|
|
|
@@ -1081,6 +1161,26 @@ def get_handles_from_cluster_names(
|
|
|
1081
1161
|
}
|
|
1082
1162
|
|
|
1083
1163
|
|
|
1164
|
+
@_init_db
|
|
1165
|
+
@metrics_lib.time_me
|
|
1166
|
+
def get_cluster_name_to_handle_map(
|
|
1167
|
+
is_managed: Optional[bool] = None,
|
|
1168
|
+
) -> Dict[str, Optional['backends.ResourceHandle']]:
|
|
1169
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
1170
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1171
|
+
query = session.query(cluster_table.c.name, cluster_table.c.handle)
|
|
1172
|
+
if is_managed is not None:
|
|
1173
|
+
query = query.filter(cluster_table.c.is_managed == int(is_managed))
|
|
1174
|
+
rows = query.all()
|
|
1175
|
+
name_to_handle = {}
|
|
1176
|
+
for row in rows:
|
|
1177
|
+
if row.handle and len(row.handle) > 0:
|
|
1178
|
+
name_to_handle[row.name] = pickle.loads(row.handle)
|
|
1179
|
+
else:
|
|
1180
|
+
name_to_handle[row.name] = None
|
|
1181
|
+
return name_to_handle
|
|
1182
|
+
|
|
1183
|
+
|
|
1084
1184
|
@_init_db_async
|
|
1085
1185
|
@metrics_lib.time_me
|
|
1086
1186
|
async def get_status_from_cluster_name_async(
|
|
@@ -1494,41 +1594,31 @@ def get_cluster_from_name(
|
|
|
1494
1594
|
include_user_info: bool = True,
|
|
1495
1595
|
summary_response: bool = False) -> Optional[Dict[str, Any]]:
|
|
1496
1596
|
assert _SQLALCHEMY_ENGINE is not None
|
|
1597
|
+
query_fields = [
|
|
1598
|
+
cluster_table.c.name,
|
|
1599
|
+
cluster_table.c.launched_at,
|
|
1600
|
+
cluster_table.c.handle,
|
|
1601
|
+
cluster_table.c.last_use,
|
|
1602
|
+
cluster_table.c.status,
|
|
1603
|
+
cluster_table.c.autostop,
|
|
1604
|
+
cluster_table.c.to_down,
|
|
1605
|
+
cluster_table.c.owner,
|
|
1606
|
+
cluster_table.c.metadata,
|
|
1607
|
+
cluster_table.c.cluster_hash,
|
|
1608
|
+
cluster_table.c.cluster_ever_up,
|
|
1609
|
+
cluster_table.c.status_updated_at,
|
|
1610
|
+
cluster_table.c.user_hash,
|
|
1611
|
+
cluster_table.c.config_hash,
|
|
1612
|
+
cluster_table.c.workspace,
|
|
1613
|
+
cluster_table.c.is_managed,
|
|
1614
|
+
]
|
|
1615
|
+
if not summary_response:
|
|
1616
|
+
query_fields.extend([
|
|
1617
|
+
cluster_table.c.last_creation_yaml,
|
|
1618
|
+
cluster_table.c.last_creation_command,
|
|
1619
|
+
])
|
|
1497
1620
|
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1498
|
-
|
|
1499
|
-
query = session.query(
|
|
1500
|
-
cluster_table.c.name, cluster_table.c.launched_at,
|
|
1501
|
-
cluster_table.c.handle, cluster_table.c.last_use,
|
|
1502
|
-
cluster_table.c.status, cluster_table.c.autostop,
|
|
1503
|
-
cluster_table.c.to_down, cluster_table.c.owner,
|
|
1504
|
-
cluster_table.c.metadata, cluster_table.c.cluster_hash,
|
|
1505
|
-
cluster_table.c.storage_mounts_metadata,
|
|
1506
|
-
cluster_table.c.cluster_ever_up,
|
|
1507
|
-
cluster_table.c.status_updated_at, cluster_table.c.user_hash,
|
|
1508
|
-
cluster_table.c.config_hash, cluster_table.c.workspace,
|
|
1509
|
-
cluster_table.c.is_managed)
|
|
1510
|
-
else:
|
|
1511
|
-
query = session.query(
|
|
1512
|
-
cluster_table.c.name,
|
|
1513
|
-
cluster_table.c.launched_at,
|
|
1514
|
-
cluster_table.c.handle,
|
|
1515
|
-
cluster_table.c.last_use,
|
|
1516
|
-
cluster_table.c.status,
|
|
1517
|
-
cluster_table.c.autostop,
|
|
1518
|
-
cluster_table.c.to_down,
|
|
1519
|
-
cluster_table.c.owner,
|
|
1520
|
-
cluster_table.c.metadata,
|
|
1521
|
-
cluster_table.c.cluster_hash,
|
|
1522
|
-
cluster_table.c.storage_mounts_metadata,
|
|
1523
|
-
cluster_table.c.cluster_ever_up,
|
|
1524
|
-
cluster_table.c.status_updated_at,
|
|
1525
|
-
cluster_table.c.user_hash,
|
|
1526
|
-
cluster_table.c.config_hash,
|
|
1527
|
-
cluster_table.c.workspace,
|
|
1528
|
-
cluster_table.c.is_managed,
|
|
1529
|
-
# extra fields compared to above query
|
|
1530
|
-
cluster_table.c.last_creation_yaml,
|
|
1531
|
-
cluster_table.c.last_creation_command)
|
|
1621
|
+
query = session.query(*query_fields)
|
|
1532
1622
|
row = query.filter_by(name=cluster_name).first()
|
|
1533
1623
|
if row is None:
|
|
1534
1624
|
return None
|
|
@@ -1551,8 +1641,6 @@ def get_cluster_from_name(
|
|
|
1551
1641
|
'owner': _load_owner(row.owner),
|
|
1552
1642
|
'metadata': json.loads(row.metadata),
|
|
1553
1643
|
'cluster_hash': row.cluster_hash,
|
|
1554
|
-
'storage_mounts_metadata': _load_storage_mounts_metadata(
|
|
1555
|
-
row.storage_mounts_metadata),
|
|
1556
1644
|
'cluster_ever_up': bool(row.cluster_ever_up),
|
|
1557
1645
|
'status_updated_at': row.status_updated_at,
|
|
1558
1646
|
'workspace': row.workspace,
|
|
@@ -1609,41 +1697,34 @@ def get_clusters(
|
|
|
1609
1697
|
# we treat it as belonging to the current user.
|
|
1610
1698
|
current_user_hash = common_utils.get_user_hash()
|
|
1611
1699
|
assert _SQLALCHEMY_ENGINE is not None
|
|
1612
|
-
|
|
1613
|
-
|
|
1614
|
-
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
|
|
1618
|
-
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1624
|
-
|
|
1625
|
-
|
|
1626
|
-
|
|
1627
|
-
|
|
1628
|
-
|
|
1629
|
-
|
|
1630
|
-
|
|
1631
|
-
|
|
1632
|
-
|
|
1633
|
-
|
|
1634
|
-
|
|
1635
|
-
|
|
1636
|
-
|
|
1637
|
-
|
|
1638
|
-
|
|
1639
|
-
|
|
1640
|
-
cluster_table.c.user_hash,
|
|
1641
|
-
cluster_table.c.config_hash,
|
|
1642
|
-
cluster_table.c.workspace,
|
|
1643
|
-
cluster_table.c.is_managed,
|
|
1644
|
-
# extra fields compared to above query
|
|
1645
|
-
cluster_table.c.last_creation_yaml,
|
|
1646
|
-
cluster_table.c.last_creation_command)
|
|
1700
|
+
query_fields = [
|
|
1701
|
+
cluster_table.c.name,
|
|
1702
|
+
cluster_table.c.launched_at,
|
|
1703
|
+
cluster_table.c.handle,
|
|
1704
|
+
cluster_table.c.status,
|
|
1705
|
+
cluster_table.c.autostop,
|
|
1706
|
+
cluster_table.c.to_down,
|
|
1707
|
+
cluster_table.c.cluster_hash,
|
|
1708
|
+
cluster_table.c.cluster_ever_up,
|
|
1709
|
+
cluster_table.c.user_hash,
|
|
1710
|
+
cluster_table.c.workspace,
|
|
1711
|
+
user_table.c.name.label('user_name'),
|
|
1712
|
+
]
|
|
1713
|
+
if not summary_response:
|
|
1714
|
+
query_fields.extend([
|
|
1715
|
+
cluster_table.c.last_creation_yaml,
|
|
1716
|
+
cluster_table.c.last_creation_command,
|
|
1717
|
+
cluster_table.c.config_hash,
|
|
1718
|
+
cluster_table.c.owner,
|
|
1719
|
+
cluster_table.c.metadata,
|
|
1720
|
+
cluster_table.c.last_use,
|
|
1721
|
+
cluster_table.c.status_updated_at,
|
|
1722
|
+
])
|
|
1723
|
+
if not exclude_managed_clusters:
|
|
1724
|
+
query_fields.append(cluster_table.c.is_managed)
|
|
1725
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1726
|
+
query = session.query(*query_fields).outerjoin(
|
|
1727
|
+
user_table, cluster_table.c.user_hash == user_table.c.id)
|
|
1647
1728
|
if exclude_managed_clusters:
|
|
1648
1729
|
query = query.filter(cluster_table.c.is_managed == int(False))
|
|
1649
1730
|
if workspaces_filter is not None:
|
|
@@ -1666,55 +1747,50 @@ def get_clusters(
|
|
|
1666
1747
|
rows = query.all()
|
|
1667
1748
|
records = []
|
|
1668
1749
|
|
|
1669
|
-
#
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
|
|
1676
|
-
|
|
1677
|
-
user_hashes = set(row_to_user_hash.values())
|
|
1678
|
-
user_hash_to_user = get_users(user_hashes)
|
|
1750
|
+
# Check if we need to fetch the current user's name,
|
|
1751
|
+
# for backwards compatibility, if user_hash is None.
|
|
1752
|
+
current_user_name = None
|
|
1753
|
+
needs_current_user = any(row.user_hash is None for row in rows)
|
|
1754
|
+
if needs_current_user:
|
|
1755
|
+
current_user = get_user(current_user_hash)
|
|
1756
|
+
current_user_name = (current_user.name
|
|
1757
|
+
if current_user is not None else None)
|
|
1679
1758
|
|
|
1680
1759
|
# get last cluster event for each row
|
|
1681
|
-
cluster_hashes = set(row_to_user_hash.keys())
|
|
1682
1760
|
if not summary_response:
|
|
1761
|
+
cluster_hashes = {row.cluster_hash for row in rows}
|
|
1683
1762
|
last_cluster_event_dict = _get_last_cluster_event_multiple(
|
|
1684
1763
|
cluster_hashes, ClusterEventType.STATUS_CHANGE)
|
|
1685
1764
|
|
|
1686
|
-
# get user for each row
|
|
1687
1765
|
for row in rows:
|
|
1688
|
-
user_hash = row_to_user_hash[row.cluster_hash]
|
|
1689
|
-
user = user_hash_to_user.get(user_hash, None)
|
|
1690
|
-
user_name = user.name if user is not None else None
|
|
1691
1766
|
# TODO: use namedtuple instead of dict
|
|
1692
1767
|
record = {
|
|
1693
1768
|
'name': row.name,
|
|
1694
1769
|
'launched_at': row.launched_at,
|
|
1695
1770
|
'handle': pickle.loads(row.handle),
|
|
1696
|
-
'last_use': row.last_use,
|
|
1697
1771
|
'status': status_lib.ClusterStatus[row.status],
|
|
1698
1772
|
'autostop': row.autostop,
|
|
1699
1773
|
'to_down': bool(row.to_down),
|
|
1700
|
-
'owner': _load_owner(row.owner),
|
|
1701
|
-
'metadata': json.loads(row.metadata),
|
|
1702
1774
|
'cluster_hash': row.cluster_hash,
|
|
1703
|
-
'storage_mounts_metadata': _load_storage_mounts_metadata(
|
|
1704
|
-
row.storage_mounts_metadata),
|
|
1705
1775
|
'cluster_ever_up': bool(row.cluster_ever_up),
|
|
1706
|
-
'
|
|
1707
|
-
|
|
1708
|
-
'user_name': user_name
|
|
1776
|
+
'user_hash': (row.user_hash
|
|
1777
|
+
if row.user_hash is not None else current_user_hash),
|
|
1778
|
+
'user_name': (row.user_name
|
|
1779
|
+
if row.user_name is not None else current_user_name),
|
|
1709
1780
|
'workspace': row.workspace,
|
|
1710
|
-
'is_managed':
|
|
1711
|
-
|
|
1781
|
+
'is_managed': False
|
|
1782
|
+
if exclude_managed_clusters else bool(row.is_managed),
|
|
1712
1783
|
}
|
|
1713
1784
|
if not summary_response:
|
|
1714
1785
|
record['last_creation_yaml'] = row.last_creation_yaml
|
|
1715
1786
|
record['last_creation_command'] = row.last_creation_command
|
|
1716
1787
|
record['last_event'] = last_cluster_event_dict.get(
|
|
1717
1788
|
row.cluster_hash, None)
|
|
1789
|
+
record['config_hash'] = row.config_hash
|
|
1790
|
+
record['owner'] = _load_owner(row.owner)
|
|
1791
|
+
record['metadata'] = json.loads(row.metadata)
|
|
1792
|
+
record['last_use'] = row.last_use
|
|
1793
|
+
record['status_updated_at'] = row.status_updated_at
|
|
1718
1794
|
|
|
1719
1795
|
records.append(record)
|
|
1720
1796
|
return records
|
sky/jobs/client/sdk.py
CHANGED
|
@@ -15,6 +15,7 @@ from sky.server import common as server_common
|
|
|
15
15
|
from sky.server import rest
|
|
16
16
|
from sky.server import versions
|
|
17
17
|
from sky.server.requests import payloads
|
|
18
|
+
from sky.server.requests import request_names
|
|
18
19
|
from sky.skylet import constants
|
|
19
20
|
from sky.usage import usage_lib
|
|
20
21
|
from sky.utils import admin_policy_utils
|
|
@@ -84,7 +85,9 @@ def launch(
|
|
|
84
85
|
|
|
85
86
|
dag = dag_utils.convert_entrypoint_to_dag(task)
|
|
86
87
|
with admin_policy_utils.apply_and_use_config_in_current_request(
|
|
87
|
-
dag,
|
|
88
|
+
dag,
|
|
89
|
+
request_name=request_names.AdminPolicyRequestName.JOBS_LAUNCH,
|
|
90
|
+
at_client_side=True) as dag:
|
|
88
91
|
sdk.validate(dag)
|
|
89
92
|
if _need_confirmation:
|
|
90
93
|
job_identity = 'a managed job'
|
|
@@ -130,8 +133,11 @@ def queue(
|
|
|
130
133
|
refresh: bool,
|
|
131
134
|
skip_finished: bool = False,
|
|
132
135
|
all_users: bool = False,
|
|
133
|
-
job_ids: Optional[List[int]] = None
|
|
134
|
-
|
|
136
|
+
job_ids: Optional[List[int]] = None,
|
|
137
|
+
limit: Optional[int] = None,
|
|
138
|
+
fields: Optional[List[str]] = None,
|
|
139
|
+
) -> server_common.RequestId[Union[List[responses.ManagedJobRecord], Tuple[
|
|
140
|
+
List[responses.ManagedJobRecord], int, Dict[str, int], int]]]:
|
|
135
141
|
"""Gets statuses of managed jobs.
|
|
136
142
|
|
|
137
143
|
Please refer to sky.cli.job_queue for documentation.
|
|
@@ -141,6 +147,8 @@ def queue(
|
|
|
141
147
|
skip_finished: Whether to skip finished jobs.
|
|
142
148
|
all_users: Whether to show all users' jobs.
|
|
143
149
|
job_ids: IDs of the managed jobs to show.
|
|
150
|
+
limit: Number of jobs to show.
|
|
151
|
+
fields: Fields to get for the managed jobs.
|
|
144
152
|
|
|
145
153
|
Returns:
|
|
146
154
|
The request ID of the queue request.
|
|
@@ -173,15 +181,29 @@ def queue(
|
|
|
173
181
|
does not exist.
|
|
174
182
|
RuntimeError: if failed to get the managed jobs with ssh.
|
|
175
183
|
"""
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
184
|
+
remote_api_version = versions.get_remote_api_version()
|
|
185
|
+
if remote_api_version and remote_api_version >= 18:
|
|
186
|
+
body = payloads.JobsQueueV2Body(
|
|
187
|
+
refresh=refresh,
|
|
188
|
+
skip_finished=skip_finished,
|
|
189
|
+
all_users=all_users,
|
|
190
|
+
job_ids=job_ids,
|
|
191
|
+
limit=limit,
|
|
192
|
+
fields=fields,
|
|
193
|
+
)
|
|
194
|
+
path = '/jobs/queue/v2'
|
|
195
|
+
else:
|
|
196
|
+
body = payloads.JobsQueueBody(
|
|
197
|
+
refresh=refresh,
|
|
198
|
+
skip_finished=skip_finished,
|
|
199
|
+
all_users=all_users,
|
|
200
|
+
job_ids=job_ids,
|
|
201
|
+
)
|
|
202
|
+
path = '/jobs/queue'
|
|
203
|
+
|
|
182
204
|
response = server_common.make_authenticated_request(
|
|
183
205
|
'POST',
|
|
184
|
-
|
|
206
|
+
path,
|
|
185
207
|
json=json.loads(body.model_dump_json()),
|
|
186
208
|
timeout=(5, None))
|
|
187
209
|
return server_common.get_request_id(response=response)
|
sky/jobs/client/sdk_async.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
"""Async SDK functions for managed jobs."""
|
|
2
2
|
import typing
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Dict, List, Optional, Tuple, Union
|
|
4
4
|
|
|
5
5
|
from sky import backends
|
|
6
6
|
from sky import sky_logging
|
|
7
7
|
from sky.adaptors import common as adaptors_common
|
|
8
8
|
from sky.client import sdk_async
|
|
9
9
|
from sky.jobs.client import sdk
|
|
10
|
+
from sky.schemas.api import responses
|
|
10
11
|
from sky.skylet import constants
|
|
11
12
|
from sky.usage import usage_lib
|
|
12
13
|
from sky.utils import common_utils
|
|
@@ -50,12 +51,17 @@ async def queue(
|
|
|
50
51
|
refresh: bool,
|
|
51
52
|
skip_finished: bool = False,
|
|
52
53
|
all_users: bool = False,
|
|
54
|
+
job_ids: Optional[List[int]] = None,
|
|
55
|
+
limit: Optional[int] = None,
|
|
56
|
+
fields: Optional[List[str]] = None,
|
|
53
57
|
stream_logs: Optional[
|
|
54
58
|
sdk_async.StreamConfig] = sdk_async.DEFAULT_STREAM_CONFIG
|
|
55
|
-
) -> List[
|
|
59
|
+
) -> Union[List[responses.ManagedJobRecord], Tuple[
|
|
60
|
+
List[responses.ManagedJobRecord], int, Dict[str, int], int]]:
|
|
56
61
|
"""Async version of queue() that gets statuses of managed jobs."""
|
|
57
62
|
request_id = await context_utils.to_thread(sdk.queue, refresh,
|
|
58
|
-
skip_finished, all_users
|
|
63
|
+
skip_finished, all_users,
|
|
64
|
+
job_ids, limit, fields)
|
|
59
65
|
if stream_logs is not None:
|
|
60
66
|
return await sdk_async._stream_and_get(request_id, stream_logs) # pylint: disable=protected-access
|
|
61
67
|
else:
|
sky/jobs/constants.py
CHANGED
|
@@ -46,7 +46,9 @@ JOBS_CLUSTER_NAME_PREFIX_LENGTH = 25
|
|
|
46
46
|
# The version of the lib files that jobs/utils use. Whenever there is an API
|
|
47
47
|
# change for the jobs/utils, we need to bump this version and update
|
|
48
48
|
# job.utils.ManagedJobCodeGen to handle the version update.
|
|
49
|
-
|
|
49
|
+
# WARNING: If you update this due to a codegen change, make sure to make the
|
|
50
|
+
# corresponding change in the ManagedJobsService AND bump the SKYLET_VERSION.
|
|
51
|
+
MANAGED_JOBS_VERSION = 12
|
|
50
52
|
|
|
51
53
|
# The command for setting up the jobs dashboard on the controller. It firstly
|
|
52
54
|
# checks if the systemd services are available, and if not (e.g., Kubernetes
|