skypilot-nightly 1.0.0.dev20250603__py3-none-any.whl → 1.0.0.dev20250605__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +3 -3
- sky/adaptors/kubernetes.py +8 -0
- sky/admin_policy.py +5 -0
- sky/backends/backend_utils.py +1 -0
- sky/backends/cloud_vm_ray_backend.py +8 -4
- sky/{clouds/service_catalog → catalog}/__init__.py +6 -17
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +3 -3
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +2 -2
- sky/{clouds/service_catalog → catalog}/common.py +2 -2
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +2 -2
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +2 -2
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/ssh_catalog.py +3 -3
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +1 -1
- sky/cli.py +16 -13
- sky/client/cli.py +16 -13
- sky/client/sdk.py +30 -12
- sky/clouds/aws.py +41 -40
- sky/clouds/azure.py +31 -34
- sky/clouds/cloud.py +8 -8
- sky/clouds/cudo.py +26 -26
- sky/clouds/do.py +24 -24
- sky/clouds/fluidstack.py +27 -29
- sky/clouds/gcp.py +42 -42
- sky/clouds/ibm.py +26 -26
- sky/clouds/kubernetes.py +24 -12
- sky/clouds/lambda_cloud.py +28 -30
- sky/clouds/nebius.py +26 -28
- sky/clouds/oci.py +32 -32
- sky/clouds/paperspace.py +24 -26
- sky/clouds/runpod.py +26 -28
- sky/clouds/scp.py +37 -36
- sky/clouds/utils/gcp_utils.py +3 -2
- sky/clouds/vast.py +27 -27
- sky/clouds/vsphere.py +12 -15
- sky/core.py +2 -2
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/614-635a84e87800f99e.js +66 -0
- sky/dashboard/out/_next/static/chunks/{856-f1b1f7f47edde2e8.js → 856-3a32da4b84176f6d.js} +1 -1
- sky/dashboard/out/_next/static/chunks/937.3759f538f11a0953.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-1a1eeb949dab8897.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/users-262aab38b9baaf3a.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-384ea5fa0cea8f28.js +1 -0
- sky/dashboard/out/_next/static/chunks/{webpack-f27c9a32aa3d9c6d.js → webpack-65d465f948974c0d.js} +1 -1
- sky/dashboard/out/_next/static/css/667d941a2888ce6e.css +3 -0
- sky/dashboard/out/_next/static/qjhIe-yC6nHcLKBqpzO1M/_buildManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/storage_utils.py +5 -2
- sky/execution.py +44 -46
- sky/global_user_state.py +119 -86
- sky/jobs/client/sdk.py +4 -1
- sky/jobs/server/core.py +6 -2
- sky/models.py +1 -0
- sky/optimizer.py +1 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/kubernetes/utils.py +35 -22
- sky/provision/vast/utils.py +1 -1
- sky/provision/vsphere/common/vim_utils.py +1 -2
- sky/provision/vsphere/instance.py +1 -1
- sky/provision/vsphere/vsphere_utils.py +7 -11
- sky/resources.py +24 -3
- sky/serve/server/core.py +1 -1
- sky/server/constants.py +3 -1
- sky/server/requests/executor.py +4 -1
- sky/server/requests/payloads.py +25 -0
- sky/server/requests/serializers/decoders.py +1 -1
- sky/server/server.py +33 -12
- sky/server/stream_utils.py +2 -38
- sky/setup_files/MANIFEST.in +1 -0
- sky/setup_files/dependencies.py +2 -0
- sky/skylet/constants.py +10 -4
- sky/skypilot_config.py +92 -39
- sky/templates/websocket_proxy.py +11 -1
- sky/usage/usage_lib.py +4 -3
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +178 -0
- sky/users/rbac.py +86 -0
- sky/users/server.py +66 -0
- sky/utils/accelerator_registry.py +3 -3
- sky/utils/kubernetes/deploy_remote_cluster.py +2 -1
- sky/utils/schemas.py +20 -10
- sky/workspaces/core.py +2 -2
- {skypilot_nightly-1.0.0.dev20250603.dist-info → skypilot_nightly-1.0.0.dev20250605.dist-info}/METADATA +3 -1
- {skypilot_nightly-1.0.0.dev20250603.dist-info → skypilot_nightly-1.0.0.dev20250605.dist-info}/RECORD +134 -130
- sky/clouds/service_catalog/constants.py +0 -8
- sky/dashboard/out/_next/static/chunks/614-3d29f98e0634b179.js +0 -66
- sky/dashboard/out/_next/static/chunks/937.f97f83652028e944.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/config-35383adcb0edb5e2.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/users-07b523ccb19317ad.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/workspaces-f54921ec9eb20965.js +0 -1
- sky/dashboard/out/_next/static/css/63d3995d8b528eb1.css +0 -3
- sky/dashboard/out/_next/static/zTAFq_Iv6_yxQj3fXvJWR/_buildManifest.js +0 -1
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +0 -0
- /sky/dashboard/out/_next/static/chunks/{121-8f55ee3fa6301784.js → 121-865d2bf8a3b84c6a.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/{236-fef38aa6e5639300.js → 236-4c0dc6f63ccc6319.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/{37-947904ccc5687bac.js → 37-beedd583fea84cc8.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/{682-2be9b0f169727f2f.js → 682-6647f0417d5662f0.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/{843-a097338acb89b7d7.js → 843-c296541442d4af88.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/{969-d7b6fb7f602bfcb3.js → 969-c7abda31c10440ac.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/{_app-67925f5e6382e22f.js → _app-cb81dc4d27f4d009.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/{[job]-158b70da336d8607.js → [job]-65d04d5d77cbb6b6.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-62c9982dc3675725.js → [cluster]-beabbcd7606c1a23.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/jobs/{[job]-a62a3c65dc9bc57c.js → [job]-86c47edc500f15f9.js} +0 -0
- /sky/dashboard/out/_next/static/{zTAFq_Iv6_yxQj3fXvJWR → qjhIe-yC6nHcLKBqpzO1M}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250603.dist-info → skypilot_nightly-1.0.0.dev20250605.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250603.dist-info → skypilot_nightly-1.0.0.dev20250605.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250603.dist-info → skypilot_nightly-1.0.0.dev20250605.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250603.dist-info → skypilot_nightly-1.0.0.dev20250605.dist-info}/top_level.txt +0 -0
sky/global_user_state.py
CHANGED
@@ -26,6 +26,7 @@ import yaml
|
|
26
26
|
|
27
27
|
from sky import models
|
28
28
|
from sky import sky_logging
|
29
|
+
from sky import skypilot_config
|
29
30
|
from sky.skylet import constants
|
30
31
|
from sky.utils import common_utils
|
31
32
|
from sky.utils import context_utils
|
@@ -43,18 +44,6 @@ logger = sky_logging.init_logger(__name__)
|
|
43
44
|
|
44
45
|
_ENABLED_CLOUDS_KEY_PREFIX = 'enabled_clouds_'
|
45
46
|
|
46
|
-
_DB_PATH = os.path.expanduser('~/.sky/state.db')
|
47
|
-
pathlib.Path(_DB_PATH).parents[0].mkdir(parents=True, exist_ok=True)
|
48
|
-
|
49
|
-
if os.environ.get(constants.SKYPILOT_API_SERVER_DB_URL_ENV_VAR):
|
50
|
-
# If SKYPILOT_API_SERVER_DB_URL_ENV_VAR is set, use it as the database URI.
|
51
|
-
logger.debug(
|
52
|
-
f'using db URI from {constants.SKYPILOT_API_SERVER_DB_URL_ENV_VAR}')
|
53
|
-
_SQLALCHEMY_ENGINE = sqlalchemy.create_engine(
|
54
|
-
os.environ.get(constants.SKYPILOT_API_SERVER_DB_URL_ENV_VAR))
|
55
|
-
else:
|
56
|
-
_SQLALCHEMY_ENGINE = sqlalchemy.create_engine('sqlite:///' + _DB_PATH)
|
57
|
-
|
58
47
|
Base = declarative.declarative_base()
|
59
48
|
|
60
49
|
config_table = sqlalchemy.Table(
|
@@ -182,11 +171,11 @@ def create_table():
|
|
182
171
|
# https://github.com/microsoft/WSL/issues/2395
|
183
172
|
# TODO(romilb): We do not enable WAL for WSL because of known issue in WSL.
|
184
173
|
# This may cause the database locked problem from WSL issue #1441.
|
185
|
-
if (
|
174
|
+
if (SQLALCHEMY_ENGINE.dialect.name
|
186
175
|
== db_utils.SQLAlchemyDialect.SQLITE.value and
|
187
176
|
not common_utils.is_wsl()):
|
188
177
|
try:
|
189
|
-
with orm.Session(
|
178
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
190
179
|
session.execute(sqlalchemy.text('PRAGMA journal_mode=WAL'))
|
191
180
|
session.commit()
|
192
181
|
except sqlalchemy_exc.OperationalError as e:
|
@@ -196,12 +185,12 @@ def create_table():
|
|
196
185
|
# is not critical and is likely to be enabled by other processes.
|
197
186
|
|
198
187
|
# Create tables if they don't exist
|
199
|
-
Base.metadata.create_all(bind=
|
188
|
+
Base.metadata.create_all(bind=SQLALCHEMY_ENGINE)
|
200
189
|
|
201
190
|
# For backward compatibility.
|
202
191
|
# TODO(zhwu): Remove this function after all users have migrated to
|
203
192
|
# the latest version of SkyPilot.
|
204
|
-
with orm.Session(
|
193
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
205
194
|
# Add autostop column to clusters table
|
206
195
|
db_utils.add_column_to_table_sqlalchemy(session,
|
207
196
|
'clusters',
|
@@ -308,34 +297,81 @@ def create_table():
|
|
308
297
|
session.commit()
|
309
298
|
|
310
299
|
|
300
|
+
conn_string = None
|
301
|
+
if os.environ.get(constants.ENV_VAR_IS_SKYPILOT_SERVER) is not None:
|
302
|
+
conn_string = skypilot_config.get_nested(('db',), None)
|
303
|
+
if conn_string:
|
304
|
+
logger.debug(f'using db URI from {conn_string}')
|
305
|
+
SQLALCHEMY_ENGINE = sqlalchemy.create_engine(conn_string)
|
306
|
+
else:
|
307
|
+
_DB_PATH = os.path.expanduser('~/.sky/state.db')
|
308
|
+
pathlib.Path(_DB_PATH).parents[0].mkdir(parents=True, exist_ok=True)
|
309
|
+
SQLALCHEMY_ENGINE = sqlalchemy.create_engine('sqlite:///' + _DB_PATH)
|
311
310
|
create_table()
|
312
311
|
|
313
312
|
|
314
|
-
def add_or_update_user(user: models.User):
|
315
|
-
"""Store the mapping from user hash to user name for display purposes.
|
313
|
+
def add_or_update_user(user: models.User) -> bool:
|
314
|
+
"""Store the mapping from user hash to user name for display purposes.
|
315
|
+
|
316
|
+
Returns:
|
317
|
+
Boolean: whether the user is newly added
|
318
|
+
"""
|
316
319
|
if user.name is None:
|
317
|
-
return
|
320
|
+
return False
|
318
321
|
|
319
|
-
with orm.Session(
|
320
|
-
if (
|
322
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
323
|
+
if (SQLALCHEMY_ENGINE.dialect.name ==
|
321
324
|
db_utils.SQLAlchemyDialect.SQLITE.value):
|
325
|
+
# For SQLite, use INSERT OR IGNORE followed by UPDATE to detect new
|
326
|
+
# vs existing
|
322
327
|
insert_func = sqlite.insert
|
323
|
-
|
328
|
+
|
329
|
+
# First try INSERT OR IGNORE - this won't fail if user exists
|
330
|
+
insert_stmnt = insert_func(user_table).prefix_with(
|
331
|
+
'OR IGNORE').values(id=user.id, name=user.name)
|
332
|
+
result = session.execute(insert_stmnt)
|
333
|
+
|
334
|
+
# Check if the INSERT actually inserted a row
|
335
|
+
was_inserted = result.rowcount > 0
|
336
|
+
|
337
|
+
if not was_inserted:
|
338
|
+
# User existed, so update it
|
339
|
+
session.query(user_table).filter_by(id=user.id).update(
|
340
|
+
{user_table.c.name: user.name})
|
341
|
+
|
342
|
+
session.commit()
|
343
|
+
return was_inserted
|
344
|
+
|
345
|
+
elif (SQLALCHEMY_ENGINE.dialect.name ==
|
324
346
|
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
347
|
+
# For PostgreSQL, use INSERT ... ON CONFLICT with RETURNING to
|
348
|
+
# detect insert vs update
|
325
349
|
insert_func = postgresql.insert
|
350
|
+
insert_stmnt = insert_func(user_table).values(id=user.id,
|
351
|
+
name=user.name)
|
352
|
+
|
353
|
+
# Use a sentinel in the RETURNING clause to detect insert vs update
|
354
|
+
upsert_stmnt = insert_stmnt.on_conflict_do_update(
|
355
|
+
index_elements=[user_table.c.id],
|
356
|
+
set_={
|
357
|
+
user_table.c.name: user.name
|
358
|
+
}).returning(
|
359
|
+
user_table.c.id,
|
360
|
+
# This will be True for INSERT, False for UPDATE
|
361
|
+
sqlalchemy.literal_column('(xmax = 0)').label('was_inserted'
|
362
|
+
))
|
363
|
+
|
364
|
+
result = session.execute(upsert_stmnt)
|
365
|
+
session.commit()
|
366
|
+
|
367
|
+
row = result.fetchone()
|
368
|
+
return bool(row.was_inserted) if row else False
|
326
369
|
else:
|
327
370
|
raise ValueError('Unsupported database dialect')
|
328
|
-
insert_stmnt = insert_func(user_table).values(id=user.id,
|
329
|
-
name=user.name)
|
330
|
-
do_update_stmt = insert_stmnt.on_conflict_do_update(
|
331
|
-
index_elements=[user_table.c.id],
|
332
|
-
set_={user_table.c.name: user.name})
|
333
|
-
session.execute(do_update_stmt)
|
334
|
-
session.commit()
|
335
371
|
|
336
372
|
|
337
373
|
def get_user(user_id: str) -> models.User:
|
338
|
-
with orm.Session(
|
374
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
339
375
|
row = session.query(user_table).filter_by(id=user_id).first()
|
340
376
|
if row is None:
|
341
377
|
return models.User(id=user_id)
|
@@ -343,7 +379,7 @@ def get_user(user_id: str) -> models.User:
|
|
343
379
|
|
344
380
|
|
345
381
|
def get_all_users() -> List[models.User]:
|
346
|
-
with orm.Session(
|
382
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
347
383
|
rows = session.query(user_table).all()
|
348
384
|
return [models.User(id=row.id, name=row.name) for row in rows]
|
349
385
|
|
@@ -368,9 +404,6 @@ def add_or_update_cluster(cluster_name: str,
|
|
368
404
|
config_hash: Configuration hash for the cluster.
|
369
405
|
task_config: The config of the task being launched.
|
370
406
|
"""
|
371
|
-
# TODO(zhwu): have to be imported here to avoid circular import.
|
372
|
-
from sky import skypilot_config # pylint: disable=import-outside-toplevel
|
373
|
-
|
374
407
|
# FIXME: launched_at will be changed when `sky launch -c` is called.
|
375
408
|
handle = pickle.dumps(cluster_handle)
|
376
409
|
cluster_launched_at = int(time.time()) if is_launch else None
|
@@ -423,7 +456,7 @@ def add_or_update_cluster(cluster_name: str,
|
|
423
456
|
'config_hash': config_hash,
|
424
457
|
})
|
425
458
|
|
426
|
-
with orm.Session(
|
459
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
427
460
|
# with_for_update() locks the row until commit() or rollback()
|
428
461
|
# is called, or until the code escapes the with block.
|
429
462
|
cluster_row = session.query(cluster_table).filter_by(
|
@@ -450,10 +483,10 @@ def add_or_update_cluster(cluster_name: str,
|
|
450
483
|
'last_creation_command': last_use,
|
451
484
|
})
|
452
485
|
|
453
|
-
if (
|
486
|
+
if (SQLALCHEMY_ENGINE.dialect.name ==
|
454
487
|
db_utils.SQLAlchemyDialect.SQLITE.value):
|
455
488
|
insert_func = sqlite.insert
|
456
|
-
elif (
|
489
|
+
elif (SQLALCHEMY_ENGINE.dialect.name ==
|
457
490
|
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
458
491
|
insert_func = postgresql.insert
|
459
492
|
else:
|
@@ -531,7 +564,7 @@ def _get_user_hash_or_current_user(user_hash: Optional[str]) -> str:
|
|
531
564
|
def update_cluster_handle(cluster_name: str,
|
532
565
|
cluster_handle: 'backends.ResourceHandle'):
|
533
566
|
handle = pickle.dumps(cluster_handle)
|
534
|
-
with orm.Session(
|
567
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
535
568
|
session.query(cluster_table).filter_by(name=cluster_name).update(
|
536
569
|
{cluster_table.c.handle: handle})
|
537
570
|
session.commit()
|
@@ -539,7 +572,7 @@ def update_cluster_handle(cluster_name: str,
|
|
539
572
|
|
540
573
|
def update_last_use(cluster_name: str):
|
541
574
|
"""Updates the last used command for the cluster."""
|
542
|
-
with orm.Session(
|
575
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
543
576
|
session.query(cluster_table).filter_by(name=cluster_name).update(
|
544
577
|
{cluster_table.c.last_use: common_utils.get_current_command()})
|
545
578
|
session.commit()
|
@@ -550,7 +583,7 @@ def remove_cluster(cluster_name: str, terminate: bool) -> None:
|
|
550
583
|
cluster_hash = _get_hash_for_existing_cluster(cluster_name)
|
551
584
|
usage_intervals = _get_cluster_usage_intervals(cluster_hash)
|
552
585
|
|
553
|
-
with orm.Session(
|
586
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
554
587
|
# usage_intervals is not None and not empty
|
555
588
|
if usage_intervals:
|
556
589
|
assert cluster_hash is not None, cluster_name
|
@@ -583,7 +616,7 @@ def remove_cluster(cluster_name: str, terminate: bool) -> None:
|
|
583
616
|
def get_handle_from_cluster_name(
|
584
617
|
cluster_name: str) -> Optional['backends.ResourceHandle']:
|
585
618
|
assert cluster_name is not None, 'cluster_name cannot be None'
|
586
|
-
with orm.Session(
|
619
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
587
620
|
row = session.query(cluster_table).filter_by(name=cluster_name).first()
|
588
621
|
if row is None:
|
589
622
|
return None
|
@@ -592,12 +625,12 @@ def get_handle_from_cluster_name(
|
|
592
625
|
|
593
626
|
def get_glob_cluster_names(cluster_name: str) -> List[str]:
|
594
627
|
assert cluster_name is not None, 'cluster_name cannot be None'
|
595
|
-
with orm.Session(
|
596
|
-
if (
|
628
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
629
|
+
if (SQLALCHEMY_ENGINE.dialect.name ==
|
597
630
|
db_utils.SQLAlchemyDialect.SQLITE.value):
|
598
631
|
rows = session.query(cluster_table).filter(
|
599
632
|
cluster_table.c.name.op('GLOB')(cluster_name)).all()
|
600
|
-
elif (
|
633
|
+
elif (SQLALCHEMY_ENGINE.dialect.name ==
|
601
634
|
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
602
635
|
rows = session.query(cluster_table).filter(
|
603
636
|
cluster_table.c.name.op('SIMILAR TO')(
|
@@ -610,7 +643,7 @@ def get_glob_cluster_names(cluster_name: str) -> List[str]:
|
|
610
643
|
def set_cluster_status(cluster_name: str,
|
611
644
|
status: status_lib.ClusterStatus) -> None:
|
612
645
|
current_time = int(time.time())
|
613
|
-
with orm.Session(
|
646
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
614
647
|
count = session.query(cluster_table).filter_by(
|
615
648
|
name=cluster_name).update({
|
616
649
|
cluster_table.c.status: status.value,
|
@@ -624,7 +657,7 @@ def set_cluster_status(cluster_name: str,
|
|
624
657
|
|
625
658
|
def set_cluster_autostop_value(cluster_name: str, idle_minutes: int,
|
626
659
|
to_down: bool) -> None:
|
627
|
-
with orm.Session(
|
660
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
628
661
|
count = session.query(cluster_table).filter_by(
|
629
662
|
name=cluster_name).update({
|
630
663
|
cluster_table.c.autostop: idle_minutes,
|
@@ -637,7 +670,7 @@ def set_cluster_autostop_value(cluster_name: str, idle_minutes: int,
|
|
637
670
|
|
638
671
|
|
639
672
|
def get_cluster_launch_time(cluster_name: str) -> Optional[int]:
|
640
|
-
with orm.Session(
|
673
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
641
674
|
row = session.query(cluster_table).filter_by(name=cluster_name).first()
|
642
675
|
if row is None or row.launched_at is None:
|
643
676
|
return None
|
@@ -645,7 +678,7 @@ def get_cluster_launch_time(cluster_name: str) -> Optional[int]:
|
|
645
678
|
|
646
679
|
|
647
680
|
def get_cluster_info(cluster_name: str) -> Optional[Dict[str, Any]]:
|
648
|
-
with orm.Session(
|
681
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
649
682
|
row = session.query(cluster_table).filter_by(name=cluster_name).first()
|
650
683
|
if row is None or row.metadata is None:
|
651
684
|
return None
|
@@ -653,7 +686,7 @@ def get_cluster_info(cluster_name: str) -> Optional[Dict[str, Any]]:
|
|
653
686
|
|
654
687
|
|
655
688
|
def set_cluster_info(cluster_name: str, metadata: Dict[str, Any]) -> None:
|
656
|
-
with orm.Session(
|
689
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
657
690
|
count = session.query(cluster_table).filter_by(
|
658
691
|
name=cluster_name).update(
|
659
692
|
{cluster_table.c.metadata: json.dumps(metadata)})
|
@@ -665,7 +698,7 @@ def set_cluster_info(cluster_name: str, metadata: Dict[str, Any]) -> None:
|
|
665
698
|
|
666
699
|
def get_cluster_storage_mounts_metadata(
|
667
700
|
cluster_name: str) -> Optional[Dict[str, Any]]:
|
668
|
-
with orm.Session(
|
701
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
669
702
|
row = session.query(cluster_table).filter_by(name=cluster_name).first()
|
670
703
|
if row is None or row.storage_mounts_metadata is None:
|
671
704
|
return None
|
@@ -674,7 +707,7 @@ def get_cluster_storage_mounts_metadata(
|
|
674
707
|
|
675
708
|
def set_cluster_storage_mounts_metadata(
|
676
709
|
cluster_name: str, storage_mounts_metadata: Dict[str, Any]) -> None:
|
677
|
-
with orm.Session(
|
710
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
678
711
|
count = session.query(cluster_table).filter_by(
|
679
712
|
name=cluster_name).update({
|
680
713
|
cluster_table.c.storage_mounts_metadata:
|
@@ -691,7 +724,7 @@ def _get_cluster_usage_intervals(
|
|
691
724
|
) -> Optional[List[Tuple[int, Optional[int]]]]:
|
692
725
|
if cluster_hash is None:
|
693
726
|
return None
|
694
|
-
with orm.Session(
|
727
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
695
728
|
row = session.query(cluster_history_table).filter_by(
|
696
729
|
cluster_hash=cluster_hash).first()
|
697
730
|
if row is None or row.usage_intervals is None:
|
@@ -728,7 +761,7 @@ def _get_cluster_duration(cluster_hash: str) -> int:
|
|
728
761
|
def _set_cluster_usage_intervals(
|
729
762
|
cluster_hash: str, usage_intervals: List[Tuple[int,
|
730
763
|
Optional[int]]]) -> None:
|
731
|
-
with orm.Session(
|
764
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
732
765
|
count = session.query(cluster_history_table).filter_by(
|
733
766
|
cluster_hash=cluster_hash).update({
|
734
767
|
cluster_history_table.c.usage_intervals:
|
@@ -745,7 +778,7 @@ def set_owner_identity_for_cluster(cluster_name: str,
|
|
745
778
|
if owner_identity is None:
|
746
779
|
return
|
747
780
|
owner_identity_str = json.dumps(owner_identity)
|
748
|
-
with orm.Session(
|
781
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
749
782
|
count = session.query(cluster_table).filter_by(
|
750
783
|
name=cluster_name).update(
|
751
784
|
{cluster_table.c.owner: owner_identity_str})
|
@@ -756,7 +789,7 @@ def set_owner_identity_for_cluster(cluster_name: str,
|
|
756
789
|
|
757
790
|
|
758
791
|
def _get_hash_for_existing_cluster(cluster_name: str) -> Optional[str]:
|
759
|
-
with orm.Session(
|
792
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
760
793
|
row = session.query(cluster_table).filter_by(name=cluster_name).first()
|
761
794
|
if row is None or row.cluster_hash is None:
|
762
795
|
return None
|
@@ -765,7 +798,7 @@ def _get_hash_for_existing_cluster(cluster_name: str) -> Optional[str]:
|
|
765
798
|
|
766
799
|
def get_launched_resources_from_cluster_hash(
|
767
800
|
cluster_hash: str) -> Optional[Tuple[int, Any]]:
|
768
|
-
with orm.Session(
|
801
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
769
802
|
row = session.query(cluster_history_table).filter_by(
|
770
803
|
cluster_hash=cluster_hash).first()
|
771
804
|
if row is None:
|
@@ -810,7 +843,7 @@ def _load_storage_mounts_metadata(
|
|
810
843
|
@context_utils.cancellation_guard
|
811
844
|
def get_cluster_from_name(
|
812
845
|
cluster_name: Optional[str]) -> Optional[Dict[str, Any]]:
|
813
|
-
with orm.Session(
|
846
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
814
847
|
row = session.query(cluster_table).filter_by(name=cluster_name).first()
|
815
848
|
if row is None:
|
816
849
|
return None
|
@@ -843,7 +876,7 @@ def get_cluster_from_name(
|
|
843
876
|
|
844
877
|
|
845
878
|
def get_clusters() -> List[Dict[str, Any]]:
|
846
|
-
with orm.Session(
|
879
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
847
880
|
rows = session.query(cluster_table).order_by(
|
848
881
|
sqlalchemy.desc(cluster_table.c.launched_at)).all()
|
849
882
|
records = []
|
@@ -878,7 +911,7 @@ def get_clusters() -> List[Dict[str, Any]]:
|
|
878
911
|
|
879
912
|
|
880
913
|
def get_clusters_from_history() -> List[Dict[str, Any]]:
|
881
|
-
with orm.Session(
|
914
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
882
915
|
rows = session.query(
|
883
916
|
cluster_history_table.join(cluster_table,
|
884
917
|
cluster_history_table.c.cluster_hash ==
|
@@ -914,7 +947,7 @@ def get_clusters_from_history() -> List[Dict[str, Any]]:
|
|
914
947
|
|
915
948
|
|
916
949
|
def get_cluster_names_start_with(starts_with: str) -> List[str]:
|
917
|
-
with orm.Session(
|
950
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
918
951
|
rows = session.query(cluster_table).filter(
|
919
952
|
cluster_table.c.name.like(f'{starts_with}%')).all()
|
920
953
|
return [row.name for row in rows]
|
@@ -922,7 +955,7 @@ def get_cluster_names_start_with(starts_with: str) -> List[str]:
|
|
922
955
|
|
923
956
|
def get_cached_enabled_clouds(cloud_capability: 'cloud.CloudCapability',
|
924
957
|
workspace: str) -> List['clouds.Cloud']:
|
925
|
-
with orm.Session(
|
958
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
926
959
|
row = session.query(config_table).filter_by(
|
927
960
|
key=_get_enabled_clouds_key(cloud_capability, workspace)).first()
|
928
961
|
ret = []
|
@@ -946,11 +979,11 @@ def get_cached_enabled_clouds(cloud_capability: 'cloud.CloudCapability',
|
|
946
979
|
def set_enabled_clouds(enabled_clouds: List[str],
|
947
980
|
cloud_capability: 'cloud.CloudCapability',
|
948
981
|
workspace: str) -> None:
|
949
|
-
with orm.Session(
|
950
|
-
if (
|
982
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
983
|
+
if (SQLALCHEMY_ENGINE.dialect.name ==
|
951
984
|
db_utils.SQLAlchemyDialect.SQLITE.value):
|
952
985
|
insert_func = sqlite.insert
|
953
|
-
elif (
|
986
|
+
elif (SQLALCHEMY_ENGINE.dialect.name ==
|
954
987
|
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
955
988
|
insert_func = postgresql.insert
|
956
989
|
else:
|
@@ -983,11 +1016,11 @@ def add_or_update_storage(storage_name: str,
|
|
983
1016
|
if not status_check(storage_status):
|
984
1017
|
raise ValueError(f'Error in updating global state. Storage Status '
|
985
1018
|
f'{storage_status} is passed in incorrectly')
|
986
|
-
with orm.Session(
|
987
|
-
if (
|
1019
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
1020
|
+
if (SQLALCHEMY_ENGINE.dialect.name ==
|
988
1021
|
db_utils.SQLAlchemyDialect.SQLITE.value):
|
989
1022
|
insert_func = sqlite.insert
|
990
|
-
elif (
|
1023
|
+
elif (SQLALCHEMY_ENGINE.dialect.name ==
|
991
1024
|
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
992
1025
|
insert_func = postgresql.insert
|
993
1026
|
else:
|
@@ -1012,14 +1045,14 @@ def add_or_update_storage(storage_name: str,
|
|
1012
1045
|
|
1013
1046
|
def remove_storage(storage_name: str):
|
1014
1047
|
"""Removes Storage from Database"""
|
1015
|
-
with orm.Session(
|
1048
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
1016
1049
|
session.query(storage_table).filter_by(name=storage_name).delete()
|
1017
1050
|
session.commit()
|
1018
1051
|
|
1019
1052
|
|
1020
1053
|
def set_storage_status(storage_name: str,
|
1021
1054
|
status: status_lib.StorageStatus) -> None:
|
1022
|
-
with orm.Session(
|
1055
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
1023
1056
|
count = session.query(storage_table).filter_by(
|
1024
1057
|
name=storage_name).update({storage_table.c.status: status.value})
|
1025
1058
|
session.commit()
|
@@ -1030,7 +1063,7 @@ def set_storage_status(storage_name: str,
|
|
1030
1063
|
|
1031
1064
|
def get_storage_status(storage_name: str) -> Optional[status_lib.StorageStatus]:
|
1032
1065
|
assert storage_name is not None, 'storage_name cannot be None'
|
1033
|
-
with orm.Session(
|
1066
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
1034
1067
|
row = session.query(storage_table).filter_by(name=storage_name).first()
|
1035
1068
|
if row:
|
1036
1069
|
return status_lib.StorageStatus[row.status]
|
@@ -1039,7 +1072,7 @@ def get_storage_status(storage_name: str) -> Optional[status_lib.StorageStatus]:
|
|
1039
1072
|
|
1040
1073
|
def set_storage_handle(storage_name: str,
|
1041
1074
|
handle: 'Storage.StorageMetadata') -> None:
|
1042
|
-
with orm.Session(
|
1075
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
1043
1076
|
count = session.query(storage_table).filter_by(
|
1044
1077
|
name=storage_name).update(
|
1045
1078
|
{storage_table.c.handle: pickle.dumps(handle)})
|
@@ -1053,7 +1086,7 @@ def get_handle_from_storage_name(
|
|
1053
1086
|
storage_name: Optional[str]) -> Optional['Storage.StorageMetadata']:
|
1054
1087
|
if storage_name is None:
|
1055
1088
|
return None
|
1056
|
-
with orm.Session(
|
1089
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
1057
1090
|
row = session.query(storage_table).filter_by(name=storage_name).first()
|
1058
1091
|
if row:
|
1059
1092
|
return pickle.loads(row.handle)
|
@@ -1062,12 +1095,12 @@ def get_handle_from_storage_name(
|
|
1062
1095
|
|
1063
1096
|
def get_glob_storage_name(storage_name: str) -> List[str]:
|
1064
1097
|
assert storage_name is not None, 'storage_name cannot be None'
|
1065
|
-
with orm.Session(
|
1066
|
-
if (
|
1098
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
1099
|
+
if (SQLALCHEMY_ENGINE.dialect.name ==
|
1067
1100
|
db_utils.SQLAlchemyDialect.SQLITE.value):
|
1068
1101
|
rows = session.query(storage_table).filter(
|
1069
1102
|
storage_table.c.name.op('GLOB')(storage_name)).all()
|
1070
|
-
elif (
|
1103
|
+
elif (SQLALCHEMY_ENGINE.dialect.name ==
|
1071
1104
|
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
1072
1105
|
rows = session.query(storage_table).filter(
|
1073
1106
|
storage_table.c.name.op('SIMILAR TO')(
|
@@ -1078,14 +1111,14 @@ def get_glob_storage_name(storage_name: str) -> List[str]:
|
|
1078
1111
|
|
1079
1112
|
|
1080
1113
|
def get_storage_names_start_with(starts_with: str) -> List[str]:
|
1081
|
-
with orm.Session(
|
1114
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
1082
1115
|
rows = session.query(storage_table).filter(
|
1083
1116
|
storage_table.c.name.like(f'{starts_with}%')).all()
|
1084
1117
|
return [row.name for row in rows]
|
1085
1118
|
|
1086
1119
|
|
1087
1120
|
def get_storage() -> List[Dict[str, Any]]:
|
1088
|
-
with orm.Session(
|
1121
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
1089
1122
|
rows = session.query(storage_table).all()
|
1090
1123
|
records = []
|
1091
1124
|
for row in rows:
|
@@ -1101,7 +1134,7 @@ def get_storage() -> List[Dict[str, Any]]:
|
|
1101
1134
|
|
1102
1135
|
|
1103
1136
|
def get_ssh_keys(user_hash: str) -> Tuple[str, str, bool]:
|
1104
|
-
with orm.Session(
|
1137
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
1105
1138
|
row = session.query(ssh_key_table).filter_by(
|
1106
1139
|
user_hash=user_hash).first()
|
1107
1140
|
if row:
|
@@ -1110,11 +1143,11 @@ def get_ssh_keys(user_hash: str) -> Tuple[str, str, bool]:
|
|
1110
1143
|
|
1111
1144
|
|
1112
1145
|
def set_ssh_keys(user_hash: str, ssh_public_key: str, ssh_private_key: str):
|
1113
|
-
with orm.Session(
|
1114
|
-
if (
|
1146
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
1147
|
+
if (SQLALCHEMY_ENGINE.dialect.name ==
|
1115
1148
|
db_utils.SQLAlchemyDialect.SQLITE.value):
|
1116
1149
|
insert_func = sqlite.insert
|
1117
|
-
elif (
|
1150
|
+
elif (SQLALCHEMY_ENGINE.dialect.name ==
|
1118
1151
|
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
1119
1152
|
insert_func = postgresql.insert
|
1120
1153
|
else:
|
@@ -1144,7 +1177,7 @@ def get_cluster_yaml_str(cluster_yaml_path: Optional[str]) -> Optional[str]:
|
|
1144
1177
|
raise ValueError('Attempted to read a None YAML.')
|
1145
1178
|
cluster_file_name = os.path.basename(cluster_yaml_path)
|
1146
1179
|
cluster_name, _ = os.path.splitext(cluster_file_name)
|
1147
|
-
with orm.Session(
|
1180
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
1148
1181
|
row = session.query(cluster_yaml_table).filter_by(
|
1149
1182
|
cluster_name=cluster_name).first()
|
1150
1183
|
if row is None:
|
@@ -1174,11 +1207,11 @@ def get_cluster_yaml_dict(cluster_yaml_path: Optional[str]) -> Dict[str, Any]:
|
|
1174
1207
|
|
1175
1208
|
def set_cluster_yaml(cluster_name: str, yaml_str: str) -> None:
|
1176
1209
|
"""Set the cluster yaml in the database."""
|
1177
|
-
with orm.Session(
|
1178
|
-
if (
|
1210
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
1211
|
+
if (SQLALCHEMY_ENGINE.dialect.name ==
|
1179
1212
|
db_utils.SQLAlchemyDialect.SQLITE.value):
|
1180
1213
|
insert_func = sqlite.insert
|
1181
|
-
elif (
|
1214
|
+
elif (SQLALCHEMY_ENGINE.dialect.name ==
|
1182
1215
|
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
1183
1216
|
insert_func = postgresql.insert
|
1184
1217
|
else:
|
@@ -1193,7 +1226,7 @@ def set_cluster_yaml(cluster_name: str, yaml_str: str) -> None:
|
|
1193
1226
|
|
1194
1227
|
|
1195
1228
|
def remove_cluster_yaml(cluster_name: str):
|
1196
|
-
with orm.Session(
|
1229
|
+
with orm.Session(SQLALCHEMY_ENGINE) as session:
|
1197
1230
|
session.query(cluster_yaml_table).filter_by(
|
1198
1231
|
cluster_name=cluster_name).delete()
|
1199
1232
|
session.commit()
|
sky/jobs/client/sdk.py
CHANGED
@@ -92,7 +92,8 @@ def launch(
|
|
92
92
|
@server_common.check_server_healthy_or_start
|
93
93
|
def queue(refresh: bool,
|
94
94
|
skip_finished: bool = False,
|
95
|
-
all_users: bool = False
|
95
|
+
all_users: bool = False,
|
96
|
+
job_ids: Optional[List[int]] = None) -> server_common.RequestId:
|
96
97
|
"""Gets statuses of managed jobs.
|
97
98
|
|
98
99
|
Please refer to sky.cli.job_queue for documentation.
|
@@ -101,6 +102,7 @@ def queue(refresh: bool,
|
|
101
102
|
refresh: Whether to restart the jobs controller if it is stopped.
|
102
103
|
skip_finished: Whether to skip finished jobs.
|
103
104
|
all_users: Whether to show all users' jobs.
|
105
|
+
job_ids: IDs of the managed jobs to show.
|
104
106
|
|
105
107
|
Returns:
|
106
108
|
The request ID of the queue request.
|
@@ -135,6 +137,7 @@ def queue(refresh: bool,
|
|
135
137
|
refresh=refresh,
|
136
138
|
skip_finished=skip_finished,
|
137
139
|
all_users=all_users,
|
140
|
+
job_ids=job_ids,
|
138
141
|
)
|
139
142
|
response = requests.post(
|
140
143
|
f'{server_common.get_server_url()}/jobs/queue',
|
sky/jobs/server/core.py
CHANGED
@@ -20,7 +20,7 @@ from sky import sky_logging
|
|
20
20
|
from sky import skypilot_config
|
21
21
|
from sky import task as task_lib
|
22
22
|
from sky.backends import backend_utils
|
23
|
-
from sky.
|
23
|
+
from sky.catalog import common as service_catalog_common
|
24
24
|
from sky.data import storage as storage_lib
|
25
25
|
from sky.jobs import constants as managed_job_constants
|
26
26
|
from sky.jobs import utils as managed_job_utils
|
@@ -378,7 +378,8 @@ def _maybe_restart_controller(
|
|
378
378
|
@usage_lib.entrypoint
|
379
379
|
def queue(refresh: bool,
|
380
380
|
skip_finished: bool = False,
|
381
|
-
all_users: bool = False
|
381
|
+
all_users: bool = False,
|
382
|
+
job_ids: Optional[List[int]] = None) -> List[Dict[str, Any]]:
|
382
383
|
# NOTE(dev): Keep the docstring consistent between the Python API and CLI.
|
383
384
|
"""Gets statuses of managed jobs.
|
384
385
|
|
@@ -450,6 +451,9 @@ def queue(refresh: bool,
|
|
450
451
|
jobs = list(
|
451
452
|
filter(lambda job: job['job_id'] in non_finished_job_ids, jobs))
|
452
453
|
|
454
|
+
if job_ids:
|
455
|
+
jobs = [job for job in jobs if job['job_id'] in job_ids]
|
456
|
+
|
453
457
|
return jobs
|
454
458
|
|
455
459
|
|
sky/models.py
CHANGED
sky/optimizer.py
CHANGED
@@ -1313,7 +1313,7 @@ def _fill_in_launchable_resources(
|
|
1313
1313
|
if feasible_resources.resources_list:
|
1314
1314
|
# Assume feasible_resources is sorted by prices. Guaranteed by
|
1315
1315
|
# the implementation of get_feasible_launchable_resources and
|
1316
|
-
# the underlying
|
1316
|
+
# the underlying catalog filtering
|
1317
1317
|
cheapest = feasible_resources.resources_list[0]
|
1318
1318
|
# Generate region/zone-specified resources.
|
1319
1319
|
launchable[resources].extend(
|