skypilot-nightly 1.0.0.dev20250523__py3-none-any.whl → 1.0.0.dev20250526__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/backend_utils.py +62 -45
- sky/backends/cloud_vm_ray_backend.py +3 -1
- sky/check.py +335 -170
- sky/cli.py +56 -13
- sky/client/cli.py +56 -13
- sky/client/sdk.py +54 -10
- sky/clouds/gcp.py +19 -3
- sky/core.py +5 -2
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/7GEgRyZKRaSnYZCV1Jwol/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/25-062253ea41fb8eec.js +6 -0
- sky/dashboard/out/_next/static/chunks/480-5a0de8b6570ea105.js +1 -0
- sky/dashboard/out/_next/static/chunks/488-50d843fdb5396d32.js +15 -0
- sky/dashboard/out/_next/static/chunks/498-d7722313e5e5b4e6.js +21 -0
- sky/dashboard/out/_next/static/chunks/573-f17bd89d9f9118b3.js +66 -0
- sky/dashboard/out/_next/static/chunks/578-d351125af46c293f.js +6 -0
- sky/dashboard/out/_next/static/chunks/734-a6e01d7f98904741.js +1 -0
- sky/dashboard/out/_next/static/chunks/937.f97f83652028e944.js +1 -0
- sky/dashboard/out/_next/static/chunks/938-59956af3950b02ed.js +1 -0
- sky/dashboard/out/_next/static/chunks/9f96d65d-5a3e4af68c26849e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-96a715a6fb01e228.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-3b5aad09a25f64b7.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-9529d9e882a0e75c.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-9e6d1ec6e1ac5b29.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-abb7d744ecf15109.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-48dc8d67d4b60be1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-73d5e0c369d00346.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/users-b8acf6e6735323a2.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-bbf436f41381e169.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-7733c960685b4385.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-5ed48b3201b998c8.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-deda68c926e8d0bc.js +1 -0
- sky/dashboard/out/_next/static/css/28558d57108b05ae.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/storage.py +1 -1
- sky/global_user_state.py +606 -543
- sky/jobs/constants.py +1 -1
- sky/jobs/server/core.py +72 -56
- sky/jobs/state.py +26 -5
- sky/jobs/utils.py +65 -13
- sky/optimizer.py +6 -3
- sky/provision/fluidstack/instance.py +1 -0
- sky/serve/server/core.py +9 -6
- sky/server/html/token_page.html +6 -1
- sky/server/requests/executor.py +1 -0
- sky/server/requests/payloads.py +28 -0
- sky/server/server.py +59 -5
- sky/setup_files/dependencies.py +1 -0
- sky/skylet/constants.py +4 -1
- sky/skypilot_config.py +107 -11
- sky/utils/cli_utils/status_utils.py +18 -8
- sky/utils/db_utils.py +53 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/deploy_remote_cluster.py +166 -147
- sky/utils/kubernetes/kubernetes_deploy_utils.py +49 -5
- sky/utils/kubernetes/ssh-tunnel.sh +20 -28
- sky/utils/log_utils.py +4 -0
- sky/utils/schemas.py +54 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +295 -0
- sky/workspaces/server.py +62 -0
- {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/METADATA +2 -1
- {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/RECORD +79 -63
- sky/dashboard/out/_next/static/ECKwDNS9v9y3_IKFZ2lpp/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-1a3a9440417720eb.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-d584022b0da4ac3b.js +0 -6
- sky/dashboard/out/_next/static/chunks/393-e1eaa440481337ec.js +0 -1
- sky/dashboard/out/_next/static/chunks/480-f28cd152a98997de.js +0 -1
- sky/dashboard/out/_next/static/chunks/582-683f4f27b81996dc.js +0 -59
- sky/dashboard/out/_next/static/chunks/pages/_app-8cfab319f9fb3ae8.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33bc2bec322249b1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-e2fc2dd1955e6c36.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-3a748bd76e5c2984.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra-abf08c4384190a39.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-70756c2dad850a7e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-ecd804b9272f4a7c.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/7e7ce4ff31d3977b.css +0 -3
- /sky/dashboard/out/_next/static/{ECKwDNS9v9y3_IKFZ2lpp → 7GEgRyZKRaSnYZCV1Jwol}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/top_level.txt +0 -0
sky/jobs/constants.py
CHANGED
@@ -47,7 +47,7 @@ JOBS_CLUSTER_NAME_PREFIX_LENGTH = 25
|
|
47
47
|
# The version of the lib files that jobs/utils use. Whenever there is an API
|
48
48
|
# change for the jobs/utils, we need to bump this version and update
|
49
49
|
# job.utils.ManagedJobCodeGen to handle the version update.
|
50
|
-
MANAGED_JOBS_VERSION =
|
50
|
+
MANAGED_JOBS_VERSION = 4
|
51
51
|
|
52
52
|
# The command for setting up the jobs dashboard on the controller. It firstly
|
53
53
|
# checks if the systemd services are available, and if not (e.g., Kubernetes
|
sky/jobs/server/core.py
CHANGED
@@ -17,6 +17,7 @@ from sky import execution
|
|
17
17
|
from sky import global_user_state
|
18
18
|
from sky import provision as provision_lib
|
19
19
|
from sky import sky_logging
|
20
|
+
from sky import skypilot_config
|
20
21
|
from sky import task as task_lib
|
21
22
|
from sky.backends import backend_utils
|
22
23
|
from sky.clouds.service_catalog import common as service_catalog_common
|
@@ -207,7 +208,7 @@ def launch(
|
|
207
208
|
|
208
209
|
controller_task.managed_job_dag = dag
|
209
210
|
|
210
|
-
|
211
|
+
logger.info(
|
211
212
|
f'{colorama.Fore.YELLOW}'
|
212
213
|
f'Launching managed job {dag.name!r} from jobs controller...'
|
213
214
|
f'{colorama.Style.RESET_ALL}')
|
@@ -215,12 +216,20 @@ def launch(
|
|
215
216
|
# Launch with the api server's user hash, so that sky status does not
|
216
217
|
# show the owner of the controller as whatever user launched it first.
|
217
218
|
with common.with_server_user_hash():
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
219
|
+
# Always launch the controller in the default workspace.
|
220
|
+
with skypilot_config.local_active_workspace_ctx(
|
221
|
+
skylet_constants.SKYPILOT_DEFAULT_WORKSPACE):
|
222
|
+
# TODO(zhwu): the buckets need to be correctly handled for
|
223
|
+
# a specific workspace. For example, if a job is launched in
|
224
|
+
# workspace A, but the controller is in workspace B, the
|
225
|
+
# intermediate bucket and newly created bucket should be in
|
226
|
+
# workspace A.
|
227
|
+
return execution.launch(task=controller_task,
|
228
|
+
cluster_name=controller_name,
|
229
|
+
stream_logs=stream_logs,
|
230
|
+
retry_until_up=True,
|
231
|
+
fast=True,
|
232
|
+
_disable_controller_check=True)
|
224
233
|
|
225
234
|
|
226
235
|
def queue_from_kubernetes_pod(
|
@@ -318,14 +327,17 @@ def _maybe_restart_controller(
|
|
318
327
|
if handle is not None:
|
319
328
|
return handle
|
320
329
|
|
321
|
-
|
322
|
-
|
323
|
-
|
330
|
+
logger.info(f'{colorama.Fore.YELLOW}'
|
331
|
+
f'Restarting {jobs_controller_type.value.name}...'
|
332
|
+
f'{colorama.Style.RESET_ALL}')
|
324
333
|
|
325
334
|
rich_utils.force_update_status(
|
326
335
|
ux_utils.spinner_message(f'{spinner_message} - restarting '
|
327
336
|
'controller'))
|
328
|
-
|
337
|
+
with skypilot_config.local_active_workspace_ctx(
|
338
|
+
skylet_constants.SKYPILOT_DEFAULT_WORKSPACE):
|
339
|
+
handle = core.start(
|
340
|
+
cluster_name=jobs_controller_type.value.cluster_name)
|
329
341
|
# Make sure the dashboard is running when the controller is restarted.
|
330
342
|
# We should not directly use execution.launch() and have the dashboard cmd
|
331
343
|
# in the task setup because since we are using detached_setup, it will
|
@@ -440,52 +452,56 @@ def cancel(name: Optional[str] = None,
|
|
440
452
|
sky.exceptions.ClusterNotUpError: the jobs controller is not up.
|
441
453
|
RuntimeError: failed to cancel the job.
|
442
454
|
"""
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
455
|
+
with rich_utils.safe_status(
|
456
|
+
ux_utils.spinner_message('Cancelling managed jobs')):
|
457
|
+
job_ids = [] if job_ids is None else job_ids
|
458
|
+
handle = backend_utils.is_controller_accessible(
|
459
|
+
controller=controller_utils.Controllers.JOBS_CONTROLLER,
|
460
|
+
stopped_message='All managed jobs should have finished.')
|
461
|
+
|
462
|
+
job_id_str = ','.join(map(str, job_ids))
|
463
|
+
if sum([bool(job_ids), name is not None, all or all_users]) != 1:
|
464
|
+
arguments = []
|
465
|
+
arguments += [f'job_ids={job_id_str}'] if job_ids else []
|
466
|
+
arguments += [f'name={name}'] if name is not None else []
|
467
|
+
arguments += ['all'] if all else []
|
468
|
+
arguments += ['all_users'] if all_users else []
|
469
|
+
with ux_utils.print_exception_no_traceback():
|
470
|
+
raise ValueError(
|
471
|
+
'Can only specify one of JOB_IDS, name, or all/'
|
472
|
+
f'all_users. Provided {" ".join(arguments)!r}.')
|
473
|
+
|
474
|
+
backend = backend_utils.get_backend_from_handle(handle)
|
475
|
+
assert isinstance(backend, backends.CloudVmRayBackend)
|
476
|
+
if all_users:
|
477
|
+
code = managed_job_utils.ManagedJobCodeGen.cancel_jobs_by_id(
|
478
|
+
None, all_users=True)
|
479
|
+
elif all:
|
480
|
+
code = managed_job_utils.ManagedJobCodeGen.cancel_jobs_by_id(None)
|
481
|
+
elif job_ids:
|
482
|
+
code = managed_job_utils.ManagedJobCodeGen.cancel_jobs_by_id(
|
483
|
+
job_ids)
|
484
|
+
else:
|
485
|
+
assert name is not None, (job_ids, name, all)
|
486
|
+
code = managed_job_utils.ManagedJobCodeGen.cancel_job_by_name(name)
|
487
|
+
# The stderr is redirected to stdout
|
488
|
+
returncode, stdout, stderr = backend.run_on_head(handle,
|
489
|
+
code,
|
490
|
+
require_outputs=True,
|
491
|
+
stream_logs=False)
|
492
|
+
try:
|
493
|
+
subprocess_utils.handle_returncode(returncode, code,
|
494
|
+
'Failed to cancel managed job',
|
495
|
+
stdout + stderr)
|
496
|
+
except exceptions.CommandError as e:
|
497
|
+
with ux_utils.print_exception_no_traceback():
|
498
|
+
raise RuntimeError(e.error_msg) from e
|
483
499
|
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
500
|
+
logger.info(stdout)
|
501
|
+
if 'Multiple jobs found with name' in stdout:
|
502
|
+
with ux_utils.print_exception_no_traceback():
|
503
|
+
raise RuntimeError(
|
504
|
+
'Please specify the job ID instead of the job name.')
|
489
505
|
|
490
506
|
|
491
507
|
@usage_lib.entrypoint
|
sky/jobs/state.py
CHANGED
@@ -13,6 +13,7 @@ import colorama
|
|
13
13
|
|
14
14
|
from sky import exceptions
|
15
15
|
from sky import sky_logging
|
16
|
+
from sky.skylet import constants
|
16
17
|
from sky.utils import common_utils
|
17
18
|
from sky.utils import db_utils
|
18
19
|
|
@@ -118,7 +119,8 @@ def create_table(cursor, conn):
|
|
118
119
|
controller_pid INTEGER DEFAULT NULL,
|
119
120
|
dag_yaml_path TEXT,
|
120
121
|
env_file_path TEXT,
|
121
|
-
user_hash TEXT
|
122
|
+
user_hash TEXT,
|
123
|
+
workspace TEXT DEFAULT NULL)""")
|
122
124
|
|
123
125
|
db_utils.add_column_to_table(cursor, conn, 'job_info', 'schedule_state',
|
124
126
|
'TEXT')
|
@@ -134,6 +136,12 @@ def create_table(cursor, conn):
|
|
134
136
|
|
135
137
|
db_utils.add_column_to_table(cursor, conn, 'job_info', 'user_hash', 'TEXT')
|
136
138
|
|
139
|
+
db_utils.add_column_to_table(cursor,
|
140
|
+
conn,
|
141
|
+
'job_info',
|
142
|
+
'workspace',
|
143
|
+
'TEXT DEFAULT NULL',
|
144
|
+
value_to_replace_existing_entries='default')
|
137
145
|
conn.commit()
|
138
146
|
|
139
147
|
|
@@ -190,6 +198,7 @@ columns = [
|
|
190
198
|
'dag_yaml_path',
|
191
199
|
'env_file_path',
|
192
200
|
'user_hash',
|
201
|
+
'workspace',
|
193
202
|
]
|
194
203
|
|
195
204
|
|
@@ -380,14 +389,14 @@ class ManagedJobScheduleState(enum.Enum):
|
|
380
389
|
|
381
390
|
|
382
391
|
# === Status transition functions ===
|
383
|
-
def set_job_info(job_id: int, name: str):
|
392
|
+
def set_job_info(job_id: int, name: str, workspace: str):
|
384
393
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
385
394
|
cursor.execute(
|
386
395
|
"""\
|
387
396
|
INSERT INTO job_info
|
388
|
-
(spot_job_id, name, schedule_state)
|
389
|
-
VALUES (?, ?, ?)""",
|
390
|
-
(job_id, name, ManagedJobScheduleState.INACTIVE.value))
|
397
|
+
(spot_job_id, name, schedule_state, workspace)
|
398
|
+
VALUES (?, ?, ?, ?)""",
|
399
|
+
(job_id, name, ManagedJobScheduleState.INACTIVE.value, workspace))
|
391
400
|
|
392
401
|
|
393
402
|
def set_pending(job_id: int, task_id: int, task_name: str, resources_str: str):
|
@@ -1116,3 +1125,15 @@ def get_waiting_job() -> Optional[Dict[str, Any]]:
|
|
1116
1125
|
'dag_yaml_path': row[2],
|
1117
1126
|
'env_file_path': row[3],
|
1118
1127
|
} if row is not None else None
|
1128
|
+
|
1129
|
+
|
1130
|
+
def get_workspace(job_id: int) -> str:
|
1131
|
+
"""Get the workspace of a job."""
|
1132
|
+
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
1133
|
+
workspace = cursor.execute(
|
1134
|
+
'SELECT workspace FROM job_info WHERE spot_job_id = (?)',
|
1135
|
+
(job_id,)).fetchone()
|
1136
|
+
job_workspace = workspace[0] if workspace else None
|
1137
|
+
if job_workspace is None:
|
1138
|
+
return constants.SKYPILOT_DEFAULT_WORKSPACE
|
1139
|
+
return job_workspace
|
sky/jobs/utils.py
CHANGED
@@ -23,6 +23,7 @@ from sky import backends
|
|
23
23
|
from sky import exceptions
|
24
24
|
from sky import global_user_state
|
25
25
|
from sky import sky_logging
|
26
|
+
from sky import skypilot_config
|
26
27
|
from sky.adaptors import common as adaptors_common
|
27
28
|
from sky.backends import backend_utils
|
28
29
|
from sky.jobs import constants as managed_job_constants
|
@@ -463,7 +464,8 @@ def generate_managed_job_cluster_name(task_name: str, job_id: int) -> str:
|
|
463
464
|
|
464
465
|
|
465
466
|
def cancel_jobs_by_id(job_ids: Optional[List[int]],
|
466
|
-
all_users: bool = False
|
467
|
+
all_users: bool = False,
|
468
|
+
current_workspace: Optional[str] = None) -> str:
|
467
469
|
"""Cancel jobs by id.
|
468
470
|
|
469
471
|
If job_ids is None, cancel all jobs.
|
@@ -474,9 +476,11 @@ def cancel_jobs_by_id(job_ids: Optional[List[int]],
|
|
474
476
|
job_ids = list(set(job_ids))
|
475
477
|
if not job_ids:
|
476
478
|
return 'No job to cancel.'
|
477
|
-
|
478
|
-
|
479
|
+
if current_workspace is None:
|
480
|
+
current_workspace = constants.SKYPILOT_DEFAULT_WORKSPACE
|
481
|
+
|
479
482
|
cancelled_job_ids: List[int] = []
|
483
|
+
wrong_workspace_job_ids: List[int] = []
|
480
484
|
for job_id in job_ids:
|
481
485
|
# Check the status of the managed job status. If it is in
|
482
486
|
# terminal state, we can safely skip it.
|
@@ -491,6 +495,11 @@ def cancel_jobs_by_id(job_ids: Optional[List[int]],
|
|
491
495
|
|
492
496
|
update_managed_jobs_statuses(job_id)
|
493
497
|
|
498
|
+
job_workspace = managed_job_state.get_workspace(job_id)
|
499
|
+
if current_workspace is not None and job_workspace != current_workspace:
|
500
|
+
wrong_workspace_job_ids.append(job_id)
|
501
|
+
continue
|
502
|
+
|
494
503
|
# Send the signal to the jobs controller.
|
495
504
|
signal_file = pathlib.Path(SIGNAL_FILE_PREFIX.format(job_id))
|
496
505
|
# Filelock is needed to prevent race condition between signal
|
@@ -501,17 +510,30 @@ def cancel_jobs_by_id(job_ids: Optional[List[int]],
|
|
501
510
|
f.flush()
|
502
511
|
cancelled_job_ids.append(job_id)
|
503
512
|
|
513
|
+
wrong_workspace_job_str = ''
|
514
|
+
if wrong_workspace_job_ids:
|
515
|
+
plural = 's' if len(wrong_workspace_job_ids) > 1 else ''
|
516
|
+
plural_verb = 'are' if len(wrong_workspace_job_ids) > 1 else 'is'
|
517
|
+
wrong_workspace_job_str = (
|
518
|
+
f' Job{plural} with ID{plural}'
|
519
|
+
f' {", ".join(map(str, wrong_workspace_job_ids))} '
|
520
|
+
f'{plural_verb} skipped as they are not in the active workspace '
|
521
|
+
f'{current_workspace!r}. Check the workspace of the job with: '
|
522
|
+
f'sky jobs queue')
|
523
|
+
|
504
524
|
if not cancelled_job_ids:
|
505
|
-
return 'No job to cancel.'
|
525
|
+
return f'No job to cancel.{wrong_workspace_job_str}'
|
506
526
|
identity_str = f'Job with ID {cancelled_job_ids[0]} is'
|
507
527
|
if len(cancelled_job_ids) > 1:
|
508
528
|
cancelled_job_ids_str = ', '.join(map(str, cancelled_job_ids))
|
509
529
|
identity_str = f'Jobs with IDs {cancelled_job_ids_str} are'
|
510
530
|
|
511
|
-
|
531
|
+
msg = f'{identity_str} scheduled to be cancelled.{wrong_workspace_job_str}'
|
532
|
+
return msg
|
512
533
|
|
513
534
|
|
514
|
-
def cancel_job_by_name(job_name: str
|
535
|
+
def cancel_job_by_name(job_name: str,
|
536
|
+
current_workspace: Optional[str] = None) -> str:
|
515
537
|
"""Cancel a job by name."""
|
516
538
|
job_ids = managed_job_state.get_nonterminal_job_ids_by_name(job_name)
|
517
539
|
if not job_ids:
|
@@ -520,8 +542,8 @@ def cancel_job_by_name(job_name: str) -> str:
|
|
520
542
|
return (f'{colorama.Fore.RED}Multiple running jobs found '
|
521
543
|
f'with name {job_name!r}.\n'
|
522
544
|
f'Job IDs: {job_ids}{colorama.Style.RESET_ALL}')
|
523
|
-
cancel_jobs_by_id(job_ids)
|
524
|
-
return f'
|
545
|
+
msg = cancel_jobs_by_id(job_ids, current_workspace=current_workspace)
|
546
|
+
return f'{job_name!r} {msg}'
|
525
547
|
|
526
548
|
|
527
549
|
def stream_logs_by_id(job_id: int, follow: bool = True) -> Tuple[str, int]:
|
@@ -1020,10 +1042,15 @@ def format_job_table(
|
|
1020
1042
|
jobs[get_hash(task)].append(task)
|
1021
1043
|
|
1022
1044
|
status_counts: Dict[str, int] = collections.defaultdict(int)
|
1045
|
+
workspaces = set()
|
1023
1046
|
for job_tasks in jobs.values():
|
1024
1047
|
managed_job_status = _get_job_status_from_tasks(job_tasks)[0]
|
1025
1048
|
if not managed_job_status.is_terminal():
|
1026
1049
|
status_counts[managed_job_status.value] += 1
|
1050
|
+
workspaces.add(job_tasks[0].get('workspace',
|
1051
|
+
constants.SKYPILOT_DEFAULT_WORKSPACE))
|
1052
|
+
|
1053
|
+
show_workspace = len(workspaces) > 1 or show_all
|
1027
1054
|
|
1028
1055
|
user_cols: List[str] = []
|
1029
1056
|
if show_user:
|
@@ -1034,6 +1061,7 @@ def format_job_table(
|
|
1034
1061
|
columns = [
|
1035
1062
|
'ID',
|
1036
1063
|
'TASK',
|
1064
|
+
*(['WORKSPACE'] if show_workspace else []),
|
1037
1065
|
'NAME',
|
1038
1066
|
*user_cols,
|
1039
1067
|
'REQUESTED',
|
@@ -1093,6 +1121,8 @@ def format_job_table(
|
|
1093
1121
|
for job_hash, job_tasks in jobs.items():
|
1094
1122
|
if show_all:
|
1095
1123
|
schedule_state = job_tasks[0]['schedule_state']
|
1124
|
+
workspace = job_tasks[0].get('workspace',
|
1125
|
+
constants.SKYPILOT_DEFAULT_WORKSPACE)
|
1096
1126
|
|
1097
1127
|
if len(job_tasks) > 1:
|
1098
1128
|
# Aggregate the tasks into a new row in the table.
|
@@ -1134,6 +1164,7 @@ def format_job_table(
|
|
1134
1164
|
job_values = [
|
1135
1165
|
job_id,
|
1136
1166
|
'',
|
1167
|
+
*([''] if show_workspace else []),
|
1137
1168
|
job_name,
|
1138
1169
|
*user_values,
|
1139
1170
|
'-',
|
@@ -1163,9 +1194,11 @@ def format_job_table(
|
|
1163
1194
|
0, task['job_duration'], absolute=True)
|
1164
1195
|
submitted = log_utils.readable_time_duration(task['submitted_at'])
|
1165
1196
|
user_values = get_user_column_values(task)
|
1197
|
+
task_workspace = '-' if len(job_tasks) > 1 else workspace
|
1166
1198
|
values = [
|
1167
1199
|
task['job_id'] if len(job_tasks) == 1 else ' \u21B3',
|
1168
1200
|
task['task_id'] if len(job_tasks) > 1 else '-',
|
1201
|
+
*([task_workspace] if show_workspace else []),
|
1169
1202
|
task['task_name'],
|
1170
1203
|
*user_values,
|
1171
1204
|
task['resources'],
|
@@ -1263,22 +1296,36 @@ class ManagedJobCodeGen:
|
|
1263
1296
|
def cancel_jobs_by_id(cls,
|
1264
1297
|
job_ids: Optional[List[int]],
|
1265
1298
|
all_users: bool = False) -> str:
|
1299
|
+
active_workspace = skypilot_config.get_active_workspace()
|
1266
1300
|
code = textwrap.dedent(f"""\
|
1267
1301
|
if managed_job_version < 2:
|
1268
1302
|
# For backward compatibility, since all_users is not supported
|
1269
|
-
# before #4787.
|
1303
|
+
# before #4787.
|
1270
1304
|
# TODO(cooperc): Remove compatibility before 0.12.0
|
1271
1305
|
msg = utils.cancel_jobs_by_id({job_ids})
|
1272
|
-
|
1306
|
+
elif managed_job_version < 4:
|
1307
|
+
# For backward compatibility, since current_workspace is not
|
1308
|
+
# supported before #5660. Don't check the workspace.
|
1309
|
+
# TODO(zhwu): Remove compatibility before 0.12.0
|
1273
1310
|
msg = utils.cancel_jobs_by_id({job_ids}, all_users={all_users})
|
1311
|
+
else:
|
1312
|
+
msg = utils.cancel_jobs_by_id({job_ids}, all_users={all_users},
|
1313
|
+
current_workspace={active_workspace!r})
|
1274
1314
|
print(msg, end="", flush=True)
|
1275
1315
|
""")
|
1276
1316
|
return cls._build(code)
|
1277
1317
|
|
1278
1318
|
@classmethod
|
1279
1319
|
def cancel_job_by_name(cls, job_name: str) -> str:
|
1320
|
+
active_workspace = skypilot_config.get_active_workspace()
|
1280
1321
|
code = textwrap.dedent(f"""\
|
1281
|
-
|
1322
|
+
if managed_job_version < 4:
|
1323
|
+
# For backward compatibility, since current_workspace is not
|
1324
|
+
# supported before #5660. Don't check the workspace.
|
1325
|
+
# TODO(zhwu): Remove compatibility before 0.12.0
|
1326
|
+
msg = utils.cancel_job_by_name({job_name!r})
|
1327
|
+
else:
|
1328
|
+
msg = utils.cancel_job_by_name({job_name!r}, {active_workspace!r})
|
1282
1329
|
print(msg, end="", flush=True)
|
1283
1330
|
""")
|
1284
1331
|
return cls._build(code)
|
@@ -1314,11 +1361,16 @@ class ManagedJobCodeGen:
|
|
1314
1361
|
return cls._build(code)
|
1315
1362
|
|
1316
1363
|
@classmethod
|
1317
|
-
def set_pending(cls, job_id: int, managed_job_dag: 'dag_lib.Dag'
|
1364
|
+
def set_pending(cls, job_id: int, managed_job_dag: 'dag_lib.Dag',
|
1365
|
+
workspace) -> str:
|
1318
1366
|
dag_name = managed_job_dag.name
|
1319
1367
|
# Add the managed job to queue table.
|
1320
1368
|
code = textwrap.dedent(f"""\
|
1321
|
-
|
1369
|
+
set_job_info_kwargs = {{'workspace': {workspace!r}}}
|
1370
|
+
if managed_job_version < 4:
|
1371
|
+
set_job_info_kwargs = {{}}
|
1372
|
+
managed_job_state.set_job_info(
|
1373
|
+
{job_id}, {dag_name!r}, **set_job_info_kwargs)
|
1322
1374
|
""")
|
1323
1375
|
for task_id, task in enumerate(managed_job_dag.tasks):
|
1324
1376
|
resources_str = backend_utils.get_task_resources_str(
|
sky/optimizer.py
CHANGED
@@ -14,6 +14,7 @@ from sky import clouds
|
|
14
14
|
from sky import exceptions
|
15
15
|
from sky import resources as resources_lib
|
16
16
|
from sky import sky_logging
|
17
|
+
from sky import skypilot_config
|
17
18
|
from sky import task as task_lib
|
18
19
|
from sky.adaptors import common as adaptors_common
|
19
20
|
from sky.clouds import cloud as sky_cloud
|
@@ -1217,9 +1218,11 @@ def _check_specified_clouds(dag: 'dag_lib.Dag') -> None:
|
|
1217
1218
|
clouds_to_check_again = list(clouds_need_recheck -
|
1218
1219
|
global_disabled_clouds)
|
1219
1220
|
if len(clouds_to_check_again) > 0:
|
1220
|
-
sky_check.check_capability(
|
1221
|
-
|
1222
|
-
|
1221
|
+
sky_check.check_capability(
|
1222
|
+
sky_cloud.CloudCapability.COMPUTE,
|
1223
|
+
quiet=True,
|
1224
|
+
clouds=clouds_to_check_again,
|
1225
|
+
workspace=skypilot_config.get_active_workspace())
|
1223
1226
|
enabled_clouds = sky_check.get_cached_enabled_clouds_or_refresh(
|
1224
1227
|
capability=sky_cloud.CloudCapability.COMPUTE,
|
1225
1228
|
raise_if_no_cloud_access=True)
|
@@ -26,6 +26,7 @@ logger = sky_logging.init_logger(__name__)
|
|
26
26
|
|
27
27
|
def get_internal_ip(node_info: Dict[str, Any]) -> None:
|
28
28
|
node_info['internal_ip'] = node_info['ip_address']
|
29
|
+
|
29
30
|
private_key_path, _ = auth.get_or_generate_keys()
|
30
31
|
runner = command_runner.SSHCommandRunner(
|
31
32
|
(node_info['ip_address'], 22),
|
sky/serve/server/core.py
CHANGED
@@ -14,6 +14,7 @@ from sky import backends
|
|
14
14
|
from sky import exceptions
|
15
15
|
from sky import execution
|
16
16
|
from sky import sky_logging
|
17
|
+
from sky import skypilot_config
|
17
18
|
from sky import task as task_lib
|
18
19
|
from sky.backends import backend_utils
|
19
20
|
from sky.clouds.service_catalog import common as service_catalog_common
|
@@ -221,12 +222,14 @@ def up(
|
|
221
222
|
# Since the controller may be shared among multiple users, launch the
|
222
223
|
# controller with the API server's user hash.
|
223
224
|
with common.with_server_user_hash():
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
225
|
+
with skypilot_config.local_active_workspace_ctx(
|
226
|
+
constants.SKYPILOT_DEFAULT_WORKSPACE):
|
227
|
+
controller_job_id, controller_handle = execution.launch(
|
228
|
+
task=controller_task,
|
229
|
+
cluster_name=controller_name,
|
230
|
+
retry_until_up=True,
|
231
|
+
_disable_controller_check=True,
|
232
|
+
)
|
230
233
|
|
231
234
|
style = colorama.Style
|
232
235
|
fore = colorama.Fore
|
sky/server/html/token_page.html
CHANGED
@@ -49,6 +49,11 @@
|
|
49
49
|
margin-bottom: 20px;
|
50
50
|
color: #5f6368;
|
51
51
|
}
|
52
|
+
.user-identifier {
|
53
|
+
font-size: 12px; /* Smaller font size */
|
54
|
+
color: #80868b; /* Lighter color */
|
55
|
+
margin-bottom: 8px; /* Adjusted margin */
|
56
|
+
}
|
52
57
|
.code-block {
|
53
58
|
background-color: #f1f3f4;
|
54
59
|
border: 1px solid #dadce0;
|
@@ -110,8 +115,8 @@
|
|
110
115
|
</svg>
|
111
116
|
</div>
|
112
117
|
<h1>Sign in to SkyPilot CLI</h1>
|
118
|
+
<p class="user-identifier">USER_PLACEHOLDER</p>
|
113
119
|
<p>You are seeing this page because a SkyPilot command requires authentication.</p>
|
114
|
-
|
115
120
|
<p>Please copy the following token and paste it into your SkyPilot CLI prompt:</p>
|
116
121
|
<div id="token-box" class="code-block">SKYPILOT_API_SERVER_USER_TOKEN_PLACEHOLDER</div>
|
117
122
|
<button id="copy-btn" class="copy-button">Copy Token</button>
|
sky/server/requests/executor.py
CHANGED
@@ -228,6 +228,7 @@ def override_request_env_and_config(
|
|
228
228
|
"""Override the environment and SkyPilot config for a request."""
|
229
229
|
original_env = os.environ.copy()
|
230
230
|
os.environ.update(request_body.env_vars)
|
231
|
+
# Note: may be overridden by AuthProxyMiddleware.
|
231
232
|
user = models.User(id=request_body.env_vars[constants.USER_ID_ENV_VAR],
|
232
233
|
name=request_body.env_vars[constants.USER_ENV_VAR])
|
233
234
|
global_user_state.add_or_update_user(user)
|
sky/server/requests/payloads.py
CHANGED
@@ -88,6 +88,11 @@ class RequestBody(pydantic.BaseModel):
|
|
88
88
|
using_remote_api_server: bool = False
|
89
89
|
override_skypilot_config: Optional[Dict[str, Any]] = {}
|
90
90
|
|
91
|
+
# Allow extra fields in the request body, which is useful for backward
|
92
|
+
# compatibility, i.e., we can add new fields to the request body without
|
93
|
+
# breaking the existing old API server.
|
94
|
+
model_config = pydantic.ConfigDict(extra='allow')
|
95
|
+
|
91
96
|
def __init__(self, **data):
|
92
97
|
data['env_vars'] = data.get('env_vars', request_body_env_vars())
|
93
98
|
usage_lib_entrypoint = usage_lib.messages.usage.entrypoint
|
@@ -126,6 +131,7 @@ class CheckBody(RequestBody):
|
|
126
131
|
"""The request body for the check endpoint."""
|
127
132
|
clouds: Optional[Tuple[str, ...]] = None
|
128
133
|
verbose: bool = False
|
134
|
+
workspace: Optional[str] = None
|
129
135
|
|
130
136
|
|
131
137
|
class DagRequestBody(RequestBody):
|
@@ -525,3 +531,25 @@ class UploadZipFileResponse(pydantic.BaseModel):
|
|
525
531
|
"""The response body for the upload zip file endpoint."""
|
526
532
|
status: str
|
527
533
|
missing_chunks: Optional[List[str]] = None
|
534
|
+
|
535
|
+
|
536
|
+
class EnabledCloudsBody(RequestBody):
|
537
|
+
"""The request body for the enabled clouds endpoint."""
|
538
|
+
workspace: Optional[str] = None
|
539
|
+
|
540
|
+
|
541
|
+
class UpdateWorkspaceBody(RequestBody):
|
542
|
+
"""The request body for updating a specific workspace configuration."""
|
543
|
+
workspace_name: str = '' # Will be set from path parameter
|
544
|
+
config: Dict[str, Any]
|
545
|
+
|
546
|
+
|
547
|
+
class CreateWorkspaceBody(RequestBody):
|
548
|
+
"""The request body for creating a new workspace."""
|
549
|
+
workspace_name: str = '' # Will be set from path parameter
|
550
|
+
config: Dict[str, Any]
|
551
|
+
|
552
|
+
|
553
|
+
class DeleteWorkspaceBody(RequestBody):
|
554
|
+
"""The request body for deleting a workspace."""
|
555
|
+
workspace_name: str
|