skypilot-nightly 1.0.0.dev20250528__py3-none-any.whl → 1.0.0.dev20250529__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. sky/__init__.py +2 -2
  2. sky/cli.py +13 -3
  3. sky/client/cli.py +13 -3
  4. sky/client/oauth.py +82 -0
  5. sky/client/sdk.py +60 -10
  6. sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +3 -3
  7. sky/dashboard/out/404.html +1 -1
  8. sky/dashboard/out/_next/static/HvNkg7hqKM1p0ptAcdDcF/_buildManifest.js +1 -0
  9. sky/dashboard/out/_next/static/chunks/236-90e5498a5b00ec29.js +6 -0
  10. sky/dashboard/out/_next/static/chunks/303-2c7b0f7af571710b.js +6 -0
  11. sky/dashboard/out/_next/static/chunks/{856-62b87c68917b08ed.js → 856-59a1760784c9e770.js} +1 -1
  12. sky/dashboard/out/_next/static/chunks/pages/config-7c48919fe030bc43.js +6 -0
  13. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-909f1ceb0fcf1b99.js +1 -0
  14. sky/dashboard/out/_next/static/chunks/pages/infra-d4c6875c88771e17.js +1 -0
  15. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-6b80e9e0c6aa16a1.js +6 -0
  16. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  17. sky/dashboard/out/clusters/[cluster].html +1 -1
  18. sky/dashboard/out/clusters.html +1 -1
  19. sky/dashboard/out/config.html +1 -1
  20. sky/dashboard/out/index.html +1 -1
  21. sky/dashboard/out/infra/[context].html +1 -0
  22. sky/dashboard/out/infra.html +1 -1
  23. sky/dashboard/out/jobs/[job].html +1 -1
  24. sky/dashboard/out/jobs.html +1 -1
  25. sky/dashboard/out/users.html +1 -1
  26. sky/dashboard/out/workspace/new.html +1 -1
  27. sky/dashboard/out/workspaces/[name].html +1 -1
  28. sky/dashboard/out/workspaces.html +1 -1
  29. sky/exceptions.py +1 -1
  30. sky/jobs/client/sdk.py +1 -0
  31. sky/jobs/constants.py +2 -0
  32. sky/jobs/controller.py +3 -5
  33. sky/jobs/recovery_strategy.py +148 -102
  34. sky/jobs/scheduler.py +23 -8
  35. sky/jobs/server/core.py +16 -0
  36. sky/jobs/state.py +130 -35
  37. sky/jobs/utils.py +30 -4
  38. sky/resources.py +16 -1
  39. sky/server/common.py +6 -2
  40. sky/server/html/token_page.html +32 -6
  41. sky/server/server.py +3 -1
  42. sky/setup_files/dependencies.py +7 -1
  43. sky/skylet/constants.py +1 -1
  44. sky/task.py +26 -0
  45. sky/templates/jobs-controller.yaml.j2 +2 -1
  46. sky/utils/schemas.py +12 -0
  47. {skypilot_nightly-1.0.0.dev20250528.dist-info → skypilot_nightly-1.0.0.dev20250529.dist-info}/METADATA +3 -1
  48. {skypilot_nightly-1.0.0.dev20250528.dist-info → skypilot_nightly-1.0.0.dev20250529.dist-info}/RECORD +53 -49
  49. sky/dashboard/out/_next/static/Mx1iAbDQn1jMHh3UHmK3R/_buildManifest.js +0 -1
  50. sky/dashboard/out/_next/static/chunks/236-d6900c828331f664.js +0 -6
  51. sky/dashboard/out/_next/static/chunks/pages/config-41738d1896fc02fe.js +0 -6
  52. sky/dashboard/out/_next/static/chunks/pages/infra-881fcd902fbbd0e5.js +0 -6
  53. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-2c29e97a6aa50dd4.js +0 -6
  54. /sky/dashboard/out/_next/static/{Mx1iAbDQn1jMHh3UHmK3R → HvNkg7hqKM1p0ptAcdDcF}/_ssgManifest.js +0 -0
  55. {skypilot_nightly-1.0.0.dev20250528.dist-info → skypilot_nightly-1.0.0.dev20250529.dist-info}/WHEEL +0 -0
  56. {skypilot_nightly-1.0.0.dev20250528.dist-info → skypilot_nightly-1.0.0.dev20250529.dist-info}/entry_points.txt +0 -0
  57. {skypilot_nightly-1.0.0.dev20250528.dist-info → skypilot_nightly-1.0.0.dev20250529.dist-info}/licenses/LICENSE +0 -0
  58. {skypilot_nightly-1.0.0.dev20250528.dist-info → skypilot_nightly-1.0.0.dev20250529.dist-info}/top_level.txt +0 -0
@@ -18,6 +18,7 @@ from sky import global_user_state
18
18
  from sky import sky_logging
19
19
  from sky.backends import backend_utils
20
20
  from sky.jobs import scheduler
21
+ from sky.jobs import state
21
22
  from sky.jobs import utils as managed_job_utils
22
23
  from sky.skylet import job_lib
23
24
  from sky.usage import usage_lib
@@ -49,7 +50,7 @@ class StrategyExecutor:
49
50
 
50
51
  def __init__(self, cluster_name: str, backend: 'backends.Backend',
51
52
  task: 'task_lib.Task', max_restarts_on_errors: int,
52
- job_id: int) -> None:
53
+ job_id: int, task_id: int) -> None:
53
54
  """Initialize the strategy executor.
54
55
 
55
56
  Args:
@@ -65,11 +66,13 @@ class StrategyExecutor:
65
66
  self.backend = backend
66
67
  self.max_restarts_on_errors = max_restarts_on_errors
67
68
  self.job_id = job_id
69
+ self.task_id = task_id
68
70
  self.restart_cnt_on_failure = 0
69
71
 
70
72
  @classmethod
71
73
  def make(cls, cluster_name: str, backend: 'backends.Backend',
72
- task: 'task_lib.Task', job_id: int) -> 'StrategyExecutor':
74
+ task: 'task_lib.Task', job_id: int,
75
+ task_id: int) -> 'StrategyExecutor':
73
76
  """Create a strategy from a task."""
74
77
 
75
78
  resource_list = list(task.resources)
@@ -100,7 +103,7 @@ class StrategyExecutor:
100
103
  from_str(job_recovery_name))
101
104
  assert job_recovery_strategy is not None, job_recovery_name
102
105
  return job_recovery_strategy(cluster_name, backend, task,
103
- max_restarts_on_errors, job_id)
106
+ max_restarts_on_errors, job_id, task_id)
104
107
 
105
108
  def launch(self) -> float:
106
109
  """Launch the cluster for the first time.
@@ -235,7 +238,8 @@ class StrategyExecutor:
235
238
 
236
239
  def _launch(self,
237
240
  max_retry: Optional[int] = 3,
238
- raise_on_failure: bool = True) -> Optional[float]:
241
+ raise_on_failure: bool = True,
242
+ recovery: bool = False) -> Optional[float]:
239
243
  """Implementation of launch().
240
244
 
241
245
  The function will wait until the job starts running, but will leave the
@@ -275,98 +279,134 @@ class StrategyExecutor:
275
279
  backoff = common_utils.Backoff(self.RETRY_INIT_GAP_SECONDS)
276
280
  while True:
277
281
  retry_cnt += 1
278
- with scheduler.scheduled_launch(self.job_id):
279
- try:
280
- usage_lib.messages.usage.set_internal()
281
- # Detach setup, so that the setup failure can be detected
282
- # by the controller process (job_status -> FAILED_SETUP).
283
- execution.launch(
284
- self.dag,
285
- cluster_name=self.cluster_name,
286
- # We expect to tear down the cluster as soon as the job
287
- # is finished. However, in case the controller dies, set
288
- # autodown to try and avoid a resource leak.
289
- idle_minutes_to_autostop=_AUTODOWN_MINUTES,
290
- down=True,
291
- _is_launched_by_jobs_controller=True)
292
- logger.info('Managed job cluster launched.')
293
- except (exceptions.InvalidClusterNameError,
294
- exceptions.NoCloudAccessError,
295
- exceptions.ResourcesMismatchError) as e:
296
- logger.error('Failure happened before provisioning. '
297
- f'{common_utils.format_exception(e)}')
298
- if raise_on_failure:
299
- raise exceptions.ProvisionPrechecksError(reasons=[e])
300
- return None
301
- except exceptions.ResourcesUnavailableError as e:
302
- # This is raised when the launch fails due to prechecks or
303
- # after failing over through all the candidates.
304
- # Please refer to the docstring of `sky.launch` for more
305
- # details of how the exception will be structured.
306
- if not any(
307
- isinstance(err,
308
- exceptions.ResourcesUnavailableError)
309
- for err in e.failover_history):
310
- # _launch() (this function) should fail/exit directly,
311
- # if none of the failover reasons were because of
312
- # resource unavailability or no failover was attempted
313
- # (the optimizer cannot find feasible resources for
314
- # requested resources), i.e., e.failover_history is
315
- # empty. Failing directly avoids the infinite loop of
316
- # retrying the launch when, e.g., an invalid cluster
317
- # name is used and --retry-until-up is specified.
318
- reasons = (e.failover_history
319
- if e.failover_history else [e])
320
- reasons_str = '; '.join(
321
- common_utils.format_exception(err)
322
- for err in reasons)
323
- logger.error(
324
- 'Failure happened before provisioning. Failover '
325
- f'reasons: {reasons_str}')
282
+ try:
283
+ with scheduler.scheduled_launch(self.job_id):
284
+ # The job state may have been PENDING during backoff -
285
+ # update to STARTING or RECOVERING.
286
+ # On the first attempt (when retry_cnt is 1), we should
287
+ # already be in STARTING or RECOVERING.
288
+ if retry_cnt > 1:
289
+ state.set_restarting(self.job_id, self.task_id,
290
+ recovery)
291
+ try:
292
+ usage_lib.messages.usage.set_internal()
293
+ # Detach setup, so that the setup failure can be
294
+ # detected by the controller process (job_status ->
295
+ # FAILED_SETUP).
296
+ execution.launch(
297
+ self.dag,
298
+ cluster_name=self.cluster_name,
299
+ # We expect to tear down the cluster as soon as the
300
+ # job is finished. However, in case the controller
301
+ # dies, set autodown to try and avoid a resource
302
+ # leak.
303
+ idle_minutes_to_autostop=_AUTODOWN_MINUTES,
304
+ down=True,
305
+ _is_launched_by_jobs_controller=True)
306
+ logger.info('Managed job cluster launched.')
307
+ except (exceptions.InvalidClusterNameError,
308
+ exceptions.NoCloudAccessError,
309
+ exceptions.ResourcesMismatchError) as e:
310
+ logger.error('Failure happened before provisioning. '
311
+ f'{common_utils.format_exception(e)}')
326
312
  if raise_on_failure:
327
- raise exceptions.ProvisionPrechecksError(reasons)
328
- return None
329
- logger.info('Failed to launch a cluster with error: '
330
- f'{common_utils.format_exception(e)})')
331
- except Exception as e: # pylint: disable=broad-except
332
- # If the launch fails, it will be recovered by the following
333
- # code.
334
- logger.info('Failed to launch a cluster with error: '
335
- f'{common_utils.format_exception(e)})')
336
- with ux_utils.enable_traceback():
337
- logger.info(f' Traceback: {traceback.format_exc()}')
338
- else: # No exception, the launch succeeds.
339
- # At this point, a sky.launch() has succeeded. Cluster may
340
- # be UP (no preemption since) or DOWN (newly preempted).
341
- job_submitted_at = self._wait_until_job_starts_on_cluster()
342
- if job_submitted_at is not None:
343
- return job_submitted_at
344
- # The job fails to start on the cluster, retry the launch.
345
- # TODO(zhwu): log the unexpected error to usage collection
346
- # for future debugging.
347
- logger.info(
348
- 'Failed to successfully submit the job to the '
349
- 'launched cluster, due to unexpected submission errors '
350
- 'or the cluster being preempted during job submission.')
351
-
352
- # If we get here, the launch did not succeed. Tear down the
353
- # cluster and retry.
354
- managed_job_utils.terminate_cluster(self.cluster_name)
355
- if max_retry is not None and retry_cnt >= max_retry:
356
- # Retry forever if max_retry is None.
357
- if raise_on_failure:
358
- with ux_utils.print_exception_no_traceback():
359
- raise exceptions.ManagedJobReachedMaxRetriesError(
360
- 'Resources unavailable: failed to launch '
361
- f'clusters after {max_retry} retries.')
362
- else:
313
+ raise exceptions.ProvisionPrechecksError(
314
+ reasons=[e])
363
315
  return None
364
- # Exit the scheduled_launch context so that the scheulde state is
365
- # ALIVE during the backoff. This allows other jobs to launch.
366
- gap_seconds = backoff.current_backoff()
367
- logger.info('Retrying to launch the cluster in '
368
- f'{gap_seconds:.1f} seconds.')
369
- time.sleep(gap_seconds)
316
+ except exceptions.ResourcesUnavailableError as e:
317
+ # This is raised when the launch fails due to prechecks
318
+ # or after failing over through all the candidates.
319
+ # Please refer to the docstring of `sky.launch` for more
320
+ # details of how the exception will be structured.
321
+ if not any(
322
+ isinstance(err,
323
+ exceptions.ResourcesUnavailableError)
324
+ for err in e.failover_history):
325
+ # _launch() (this function) should fail/exit
326
+ # directly, if none of the failover reasons were
327
+ # because of resource unavailability or no failover
328
+ # was attempted (the optimizer cannot find feasible
329
+ # resources for requested resources), i.e.,
330
+ # e.failover_history is empty. Failing directly
331
+ # avoids the infinite loop of retrying the launch
332
+ # when, e.g., an invalid cluster name is used and
333
+ # --retry-until-up is specified.
334
+ reasons = (e.failover_history
335
+ if e.failover_history else [e])
336
+ reasons_str = '; '.join(
337
+ common_utils.format_exception(err)
338
+ for err in reasons)
339
+ logger.error(
340
+ 'Failure happened before provisioning. '
341
+ f'Failover reasons: {reasons_str}')
342
+ if raise_on_failure:
343
+ raise exceptions.ProvisionPrechecksError(
344
+ reasons)
345
+ return None
346
+ logger.info('Failed to launch a cluster with error: '
347
+ f'{common_utils.format_exception(e)})')
348
+ except Exception as e: # pylint: disable=broad-except
349
+ # If the launch fails, it will be recovered by the
350
+ # following code.
351
+ logger.info('Failed to launch a cluster with error: '
352
+ f'{common_utils.format_exception(e)})')
353
+ with ux_utils.enable_traceback():
354
+ logger.info(
355
+ f' Traceback: {traceback.format_exc()}')
356
+ else: # No exception, the launch succeeds.
357
+ # At this point, a sky.launch() has succeeded. Cluster
358
+ # may be UP (no preemption since) or DOWN (newly
359
+ # preempted).
360
+ job_submitted_at = (
361
+ self._wait_until_job_starts_on_cluster())
362
+ if job_submitted_at is not None:
363
+ return job_submitted_at
364
+ # The job fails to start on the cluster, retry the
365
+ # launch.
366
+ # TODO(zhwu): log the unexpected error to usage
367
+ # collection for future debugging.
368
+ logger.info(
369
+ 'Failed to successfully submit the job to the '
370
+ 'launched cluster, due to unexpected submission '
371
+ 'errors or the cluster being preempted during '
372
+ 'job submission.')
373
+
374
+ # If we get here, the launch did not succeed. Tear down the
375
+ # cluster and retry.
376
+ managed_job_utils.terminate_cluster(self.cluster_name)
377
+ if max_retry is not None and retry_cnt >= max_retry:
378
+ # Retry forever if max_retry is None.
379
+ if raise_on_failure:
380
+ with ux_utils.print_exception_no_traceback():
381
+ raise (
382
+ exceptions.ManagedJobReachedMaxRetriesError(
383
+ 'Resources unavailable: failed to '
384
+ f'launch clusters after {max_retry} '
385
+ 'retries.'))
386
+ else:
387
+ return None
388
+
389
+ # Raise NoClusterLaunchedError to indicate that the job is
390
+ # in retry backoff. This will trigger special handling in
391
+ # scheduler.schedule_launched().
392
+ # We will exit the scheduled_launch context so that the
393
+ # schedule state is ALIVE_BACKOFF during the backoff. This
394
+ # allows other jobs to launch.
395
+ raise exceptions.NoClusterLaunchedError()
396
+
397
+ except exceptions.NoClusterLaunchedError:
398
+ # Update the status to PENDING during backoff.
399
+ state.set_backoff_pending(self.job_id, self.task_id)
400
+ # Calculate the backoff time and sleep.
401
+ gap_seconds = backoff.current_backoff()
402
+ logger.info('Retrying to launch the cluster in '
403
+ f'{gap_seconds:.1f} seconds.')
404
+ time.sleep(gap_seconds)
405
+ continue
406
+ else:
407
+ # The inner loop should either return or throw
408
+ # NoClusterLaunchedError.
409
+ assert False, 'Unreachable'
370
410
 
371
411
  def should_restart_on_failure(self) -> bool:
372
412
  """Increments counter & checks if job should be restarted on a failure.
@@ -389,9 +429,9 @@ class FailoverStrategyExecutor(StrategyExecutor):
389
429
 
390
430
  def __init__(self, cluster_name: str, backend: 'backends.Backend',
391
431
  task: 'task_lib.Task', max_restarts_on_errors: int,
392
- job_id: int) -> None:
432
+ job_id: int, task_id: int) -> None:
393
433
  super().__init__(cluster_name, backend, task, max_restarts_on_errors,
394
- job_id)
434
+ job_id, task_id)
395
435
  # Note down the cloud/region of the launched cluster, so that we can
396
436
  # first retry in the same cloud/region. (Inside recover() we may not
397
437
  # rely on cluster handle, as it can be None if the cluster is
@@ -400,8 +440,10 @@ class FailoverStrategyExecutor(StrategyExecutor):
400
440
 
401
441
  def _launch(self,
402
442
  max_retry: Optional[int] = 3,
403
- raise_on_failure: bool = True) -> Optional[float]:
404
- job_submitted_at = super()._launch(max_retry, raise_on_failure)
443
+ raise_on_failure: bool = True,
444
+ recovery: bool = False) -> Optional[float]:
445
+ job_submitted_at = super()._launch(max_retry, raise_on_failure,
446
+ recovery)
405
447
  if job_submitted_at is not None:
406
448
  # Only record the cloud/region if the launch is successful.
407
449
  handle = global_user_state.get_handle_from_cluster_name(
@@ -436,7 +478,8 @@ class FailoverStrategyExecutor(StrategyExecutor):
436
478
  cloud=launched_cloud, region=launched_region, zone=None)
437
479
  task.set_resources({new_resources})
438
480
  # Not using self.launch to avoid the retry until up logic.
439
- job_submitted_at = self._launch(raise_on_failure=False)
481
+ job_submitted_at = self._launch(raise_on_failure=False,
482
+ recovery=True)
440
483
  # Restore the original dag, i.e. reset the region constraint.
441
484
  task.set_resources(original_resources)
442
485
  if job_submitted_at is not None:
@@ -452,7 +495,8 @@ class FailoverStrategyExecutor(StrategyExecutor):
452
495
  'cloud/region.')
453
496
  # Not using self.launch to avoid the retry until up logic.
454
497
  job_submitted_at = self._launch(max_retry=self._MAX_RETRY_CNT,
455
- raise_on_failure=False)
498
+ raise_on_failure=False,
499
+ recovery=True)
456
500
  if job_submitted_at is None:
457
501
  # Failed to launch the cluster.
458
502
  gap_seconds = self.RETRY_INIT_GAP_SECONDS
@@ -524,7 +568,8 @@ class EagerFailoverStrategyExecutor(FailoverStrategyExecutor):
524
568
  region=launched_region)
525
569
  }
526
570
  # Not using self.launch to avoid the retry until up logic.
527
- job_submitted_at = self._launch(raise_on_failure=False)
571
+ job_submitted_at = self._launch(raise_on_failure=False,
572
+ recovery=True)
528
573
  task.blocked_resources = None
529
574
  if job_submitted_at is not None:
530
575
  return job_submitted_at
@@ -535,7 +580,8 @@ class EagerFailoverStrategyExecutor(FailoverStrategyExecutor):
535
580
  'cloud/region.')
536
581
  # Not using self.launch to avoid the retry until up logic.
537
582
  job_submitted_at = self._launch(max_retry=self._MAX_RETRY_CNT,
538
- raise_on_failure=False)
583
+ raise_on_failure=False,
584
+ recovery=True)
539
585
  if job_submitted_at is None:
540
586
  # Failed to launch the cluster.
541
587
  gap_seconds = self.RETRY_INIT_GAP_SECONDS
sky/jobs/scheduler.py CHANGED
@@ -45,6 +45,7 @@ import typing
45
45
 
46
46
  import filelock
47
47
 
48
+ from sky import exceptions
48
49
  from sky import sky_logging
49
50
  from sky.adaptors import common as adaptors_common
50
51
  from sky.jobs import constants as managed_job_constants
@@ -190,7 +191,8 @@ def maybe_schedule_next_jobs() -> None:
190
191
  pass
191
192
 
192
193
 
193
- def submit_job(job_id: int, dag_yaml_path: str, env_file_path: str) -> None:
194
+ def submit_job(job_id: int, dag_yaml_path: str, env_file_path: str,
195
+ priority: int) -> None:
194
196
  """Submit an existing job to the scheduler.
195
197
 
196
198
  This should be called after a job is created in the `spot` table as
@@ -202,7 +204,7 @@ def submit_job(job_id: int, dag_yaml_path: str, env_file_path: str) -> None:
202
204
  """
203
205
  with filelock.FileLock(_get_lock_path()):
204
206
  state.scheduler_set_waiting(job_id, dag_yaml_path, env_file_path,
205
- common_utils.get_user_hash())
207
+ common_utils.get_user_hash(), priority)
206
208
  maybe_schedule_next_jobs()
207
209
 
208
210
 
@@ -240,11 +242,19 @@ def scheduled_launch(job_id: int):
240
242
  state.ManagedJobScheduleState.LAUNCHING):
241
243
  time.sleep(_ALIVE_JOB_LAUNCH_WAIT_INTERVAL)
242
244
 
243
- yield
244
-
245
- with filelock.FileLock(_get_lock_path()):
246
- state.scheduler_set_alive(job_id)
247
- maybe_schedule_next_jobs()
245
+ try:
246
+ yield
247
+ except exceptions.NoClusterLaunchedError:
248
+ # NoClusterLaunchedError is indicates that the job is in retry backoff.
249
+ # We should transition to ALIVE_BACKOFF instead of ALIVE.
250
+ with filelock.FileLock(_get_lock_path()):
251
+ state.scheduler_set_alive_backoff(job_id)
252
+ raise
253
+ else:
254
+ with filelock.FileLock(_get_lock_path()):
255
+ state.scheduler_set_alive(job_id)
256
+ finally:
257
+ maybe_schedule_next_jobs()
248
258
 
249
259
 
250
260
  def job_done(job_id: int, idempotent: bool = False) -> None:
@@ -309,5 +319,10 @@ if __name__ == '__main__':
309
319
  parser.add_argument('--env-file',
310
320
  type=str,
311
321
  help='The path to the controller env file.')
322
+ parser.add_argument(
323
+ '--priority',
324
+ type=int,
325
+ default=500,
326
+ help='Job priority (0-1000, lower is higher). Default: 500.')
312
327
  args = parser.parse_args()
313
- submit_job(args.job_id, args.dag_yaml, args.env_file)
328
+ submit_job(args.job_id, args.dag_yaml, args.env_file, args.priority)
sky/jobs/server/core.py CHANGED
@@ -91,6 +91,7 @@ def launch(
91
91
  dag_utils.maybe_infer_and_fill_dag_and_task_names(dag)
92
92
 
93
93
  task_names = set()
94
+ priority = None
94
95
  for task_ in dag.tasks:
95
96
  if task_.name in task_names:
96
97
  with ux_utils.print_exception_no_traceback():
@@ -100,6 +101,20 @@ def launch(
100
101
  'name only and comment out the task names (so that they '
101
102
  'will be auto-generated) .')
102
103
  task_names.add(task_.name)
104
+ if task_.job_priority is not None:
105
+ if (priority is not None and priority != task_.job_priority):
106
+ with ux_utils.print_exception_no_traceback():
107
+ raise ValueError(
108
+ 'Multiple tasks in the DAG have different priorities. '
109
+ 'Either specify a priority in only one task, or set '
110
+ 'the same priority for each task.')
111
+ priority = task_.job_priority
112
+
113
+ if priority is None:
114
+ priority = managed_job_constants.DEFAULT_PRIORITY
115
+
116
+ if priority < 0 or priority > 1000:
117
+ raise ValueError(f'Priority must be between 0 and 1000, got {priority}')
103
118
 
104
119
  dag_utils.fill_default_config_in_dag_for_job_launch(dag)
105
120
 
@@ -186,6 +201,7 @@ def launch(
186
201
  service_catalog_common.get_modified_catalog_file_mounts(),
187
202
  'dashboard_setup_cmd': managed_job_constants.DASHBOARD_SETUP_CMD,
188
203
  'dashboard_user_id': common.SERVER_ID,
204
+ 'priority': priority,
189
205
  **controller_utils.shared_controller_vars_to_fill(
190
206
  controller,
191
207
  remote_user_config_path=remote_user_config_path,