dstack 0.18.40rc1__py3-none-any.whl → 0.18.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. dstack/_internal/cli/commands/apply.py +8 -5
  2. dstack/_internal/cli/services/configurators/base.py +4 -2
  3. dstack/_internal/cli/services/configurators/fleet.py +21 -9
  4. dstack/_internal/cli/services/configurators/gateway.py +15 -0
  5. dstack/_internal/cli/services/configurators/run.py +6 -5
  6. dstack/_internal/cli/services/configurators/volume.py +15 -0
  7. dstack/_internal/cli/services/repos.py +3 -3
  8. dstack/_internal/cli/utils/fleet.py +44 -33
  9. dstack/_internal/cli/utils/run.py +27 -7
  10. dstack/_internal/cli/utils/volume.py +30 -9
  11. dstack/_internal/core/backends/aws/compute.py +94 -53
  12. dstack/_internal/core/backends/aws/resources.py +22 -12
  13. dstack/_internal/core/backends/azure/compute.py +2 -0
  14. dstack/_internal/core/backends/base/compute.py +20 -2
  15. dstack/_internal/core/backends/gcp/compute.py +32 -24
  16. dstack/_internal/core/backends/gcp/resources.py +0 -15
  17. dstack/_internal/core/backends/oci/compute.py +10 -5
  18. dstack/_internal/core/backends/oci/resources.py +23 -26
  19. dstack/_internal/core/backends/remote/provisioning.py +65 -27
  20. dstack/_internal/core/backends/runpod/compute.py +1 -0
  21. dstack/_internal/core/models/backends/azure.py +3 -1
  22. dstack/_internal/core/models/configurations.py +24 -1
  23. dstack/_internal/core/models/fleets.py +46 -0
  24. dstack/_internal/core/models/instances.py +5 -1
  25. dstack/_internal/core/models/pools.py +4 -1
  26. dstack/_internal/core/models/profiles.py +10 -4
  27. dstack/_internal/core/models/runs.py +23 -3
  28. dstack/_internal/core/models/volumes.py +26 -0
  29. dstack/_internal/core/services/ssh/attach.py +92 -53
  30. dstack/_internal/core/services/ssh/tunnel.py +58 -31
  31. dstack/_internal/proxy/gateway/routers/registry.py +2 -0
  32. dstack/_internal/proxy/gateway/schemas/registry.py +2 -0
  33. dstack/_internal/proxy/gateway/services/registry.py +4 -0
  34. dstack/_internal/proxy/lib/models.py +3 -0
  35. dstack/_internal/proxy/lib/services/service_connection.py +8 -1
  36. dstack/_internal/server/background/tasks/process_instances.py +73 -35
  37. dstack/_internal/server/background/tasks/process_metrics.py +9 -9
  38. dstack/_internal/server/background/tasks/process_running_jobs.py +77 -26
  39. dstack/_internal/server/background/tasks/process_runs.py +2 -12
  40. dstack/_internal/server/background/tasks/process_submitted_jobs.py +121 -49
  41. dstack/_internal/server/background/tasks/process_terminating_jobs.py +14 -3
  42. dstack/_internal/server/background/tasks/process_volumes.py +11 -1
  43. dstack/_internal/server/migrations/versions/1338b788b612_reverse_job_instance_relationship.py +71 -0
  44. dstack/_internal/server/migrations/versions/1e76fb0dde87_add_jobmodel_inactivity_secs.py +32 -0
  45. dstack/_internal/server/migrations/versions/51d45659d574_add_instancemodel_blocks_fields.py +43 -0
  46. dstack/_internal/server/migrations/versions/63c3f19cb184_add_jobterminationreason_inactivity_.py +83 -0
  47. dstack/_internal/server/migrations/versions/a751ef183f27_move_attachment_data_to_volumes_.py +34 -0
  48. dstack/_internal/server/models.py +27 -23
  49. dstack/_internal/server/routers/runs.py +1 -0
  50. dstack/_internal/server/schemas/runner.py +1 -0
  51. dstack/_internal/server/services/backends/configurators/azure.py +34 -8
  52. dstack/_internal/server/services/config.py +9 -0
  53. dstack/_internal/server/services/fleets.py +32 -3
  54. dstack/_internal/server/services/gateways/client.py +9 -1
  55. dstack/_internal/server/services/jobs/__init__.py +217 -45
  56. dstack/_internal/server/services/jobs/configurators/base.py +47 -2
  57. dstack/_internal/server/services/offers.py +96 -10
  58. dstack/_internal/server/services/pools.py +98 -14
  59. dstack/_internal/server/services/proxy/repo.py +17 -3
  60. dstack/_internal/server/services/runner/client.py +9 -6
  61. dstack/_internal/server/services/runner/ssh.py +33 -5
  62. dstack/_internal/server/services/runs.py +48 -179
  63. dstack/_internal/server/services/services/__init__.py +9 -1
  64. dstack/_internal/server/services/volumes.py +68 -9
  65. dstack/_internal/server/statics/index.html +1 -1
  66. dstack/_internal/server/statics/{main-11ec5e4a00ea6ec833e3.js → main-2ac66bfcbd2e39830b88.js} +30 -31
  67. dstack/_internal/server/statics/{main-11ec5e4a00ea6ec833e3.js.map → main-2ac66bfcbd2e39830b88.js.map} +1 -1
  68. dstack/_internal/server/statics/{main-fc56d1f4af8e57522a1c.css → main-ad5150a441de98cd8987.css} +1 -1
  69. dstack/_internal/server/testing/common.py +130 -61
  70. dstack/_internal/utils/common.py +22 -8
  71. dstack/_internal/utils/env.py +14 -0
  72. dstack/_internal/utils/ssh.py +1 -1
  73. dstack/api/server/_fleets.py +25 -1
  74. dstack/api/server/_runs.py +23 -2
  75. dstack/api/server/_volumes.py +12 -1
  76. dstack/version.py +1 -1
  77. {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/METADATA +1 -1
  78. {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/RECORD +104 -93
  79. tests/_internal/cli/services/configurators/test_profile.py +3 -3
  80. tests/_internal/core/services/ssh/test_tunnel.py +56 -4
  81. tests/_internal/proxy/gateway/routers/test_registry.py +30 -7
  82. tests/_internal/server/background/tasks/test_process_instances.py +138 -20
  83. tests/_internal/server/background/tasks/test_process_metrics.py +12 -0
  84. tests/_internal/server/background/tasks/test_process_running_jobs.py +193 -0
  85. tests/_internal/server/background/tasks/test_process_runs.py +27 -3
  86. tests/_internal/server/background/tasks/test_process_submitted_jobs.py +53 -6
  87. tests/_internal/server/background/tasks/test_process_terminating_jobs.py +135 -17
  88. tests/_internal/server/routers/test_fleets.py +15 -2
  89. tests/_internal/server/routers/test_pools.py +6 -0
  90. tests/_internal/server/routers/test_runs.py +27 -0
  91. tests/_internal/server/routers/test_volumes.py +9 -2
  92. tests/_internal/server/services/jobs/__init__.py +0 -0
  93. tests/_internal/server/services/jobs/configurators/__init__.py +0 -0
  94. tests/_internal/server/services/jobs/configurators/test_base.py +72 -0
  95. tests/_internal/server/services/runner/test_client.py +22 -3
  96. tests/_internal/server/services/test_offers.py +167 -0
  97. tests/_internal/server/services/test_pools.py +109 -1
  98. tests/_internal/server/services/test_runs.py +5 -41
  99. tests/_internal/utils/test_common.py +21 -0
  100. tests/_internal/utils/test_env.py +38 -0
  101. {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/LICENSE.md +0 -0
  102. {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/WHEEL +0 -0
  103. {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/entry_points.txt +0 -0
  104. {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/top_level.txt +0 -0
@@ -3,18 +3,19 @@ import json
3
3
  from typing import Dict, List, Optional
4
4
 
5
5
  from sqlalchemy import delete, select
6
- from sqlalchemy.orm import selectinload
6
+ from sqlalchemy.orm import joinedload
7
7
 
8
8
  from dstack._internal.core.consts import DSTACK_RUNNER_HTTP_PORT
9
9
  from dstack._internal.core.models.runs import JobStatus
10
10
  from dstack._internal.server import settings
11
11
  from dstack._internal.server.db import get_session_ctx
12
- from dstack._internal.server.models import JobMetricsPoint, JobModel
12
+ from dstack._internal.server.models import InstanceModel, JobMetricsPoint, JobModel
13
13
  from dstack._internal.server.schemas.runner import MetricsResponse
14
14
  from dstack._internal.server.services.jobs import get_job_provisioning_data, get_job_runtime_data
15
+ from dstack._internal.server.services.pools import get_instance_ssh_private_keys
15
16
  from dstack._internal.server.services.runner import client
16
17
  from dstack._internal.server.services.runner.ssh import runner_ssh_tunnel
17
- from dstack._internal.utils.common import batched, get_current_datetime, run_async
18
+ from dstack._internal.utils.common import batched, get_current_datetime, get_or_error, run_async
18
19
  from dstack._internal.utils.logging import get_logger
19
20
 
20
21
  logger = get_logger(__name__)
@@ -29,14 +30,12 @@ async def collect_metrics():
29
30
  async with get_session_ctx() as session:
30
31
  res = await session.execute(
31
32
  select(JobModel)
32
- .where(
33
- JobModel.status.in_([JobStatus.RUNNING]),
34
- )
35
- .options(selectinload(JobModel.project))
33
+ .where(JobModel.status.in_([JobStatus.RUNNING]))
34
+ .options(joinedload(JobModel.instance).joinedload(InstanceModel.project))
36
35
  .order_by(JobModel.last_processed_at.asc())
37
36
  .limit(MAX_JOBS_FETCHED)
38
37
  )
39
- job_models = res.scalars().all()
38
+ job_models = res.unique().scalars().all()
40
39
 
41
40
  for batch in batched(job_models, BATCH_SIZE):
42
41
  await _collect_jobs_metrics(batch)
@@ -87,6 +86,7 @@ def _get_recently_collected_metric_cutoff() -> int:
87
86
 
88
87
 
89
88
  async def _collect_job_metrics(job_model: JobModel) -> Optional[JobMetricsPoint]:
89
+ ssh_private_keys = get_instance_ssh_private_keys(get_or_error(job_model.instance))
90
90
  jpd = get_job_provisioning_data(job_model)
91
91
  jrd = get_job_runtime_data(job_model)
92
92
  if jpd is None:
@@ -94,7 +94,7 @@ async def _collect_job_metrics(job_model: JobModel) -> Optional[JobMetricsPoint]
94
94
  try:
95
95
  res = await run_async(
96
96
  _pull_runner_metrics,
97
- job_model.project.ssh_private_key,
97
+ ssh_private_keys,
98
98
  jpd,
99
99
  jrd,
100
100
  )
@@ -10,7 +10,12 @@ from dstack._internal.core.consts import DSTACK_RUNNER_HTTP_PORT, DSTACK_SHIM_HT
10
10
  from dstack._internal.core.errors import GatewayError
11
11
  from dstack._internal.core.models.backends.base import BackendType
12
12
  from dstack._internal.core.models.common import NetworkMode, RegistryAuth, is_core_model_instance
13
- from dstack._internal.core.models.instances import InstanceStatus, RemoteConnectionInfo
13
+ from dstack._internal.core.models.configurations import DevEnvironmentConfiguration
14
+ from dstack._internal.core.models.instances import (
15
+ InstanceStatus,
16
+ RemoteConnectionInfo,
17
+ SSHConnectionParams,
18
+ )
14
19
  from dstack._internal.core.models.repos import RemoteRepoCreds
15
20
  from dstack._internal.core.models.runs import (
16
21
  ClusterInfo,
@@ -20,10 +25,12 @@ from dstack._internal.core.models.runs import (
20
25
  JobStatus,
21
26
  JobTerminationReason,
22
27
  Run,
28
+ RunSpec,
23
29
  )
24
30
  from dstack._internal.core.models.volumes import InstanceMountPoint, Volume, VolumeMountPoint
25
31
  from dstack._internal.server.db import get_session_ctx
26
32
  from dstack._internal.server.models import (
33
+ InstanceModel,
27
34
  JobModel,
28
35
  ProjectModel,
29
36
  RepoModel,
@@ -34,11 +41,13 @@ from dstack._internal.server.services import logs as logs_services
34
41
  from dstack._internal.server.services import services
35
42
  from dstack._internal.server.services.jobs import (
36
43
  find_job,
44
+ get_job_attached_volumes,
37
45
  get_job_runtime_data,
38
46
  job_model_to_job_submission,
39
47
  )
40
48
  from dstack._internal.server.services.locking import get_locker
41
49
  from dstack._internal.server.services.logging import fmt
50
+ from dstack._internal.server.services.pools import get_instance_ssh_private_keys
42
51
  from dstack._internal.server.services.repos import (
43
52
  get_code_model,
44
53
  get_repo_creds,
@@ -47,7 +56,6 @@ from dstack._internal.server.services.repos import (
47
56
  from dstack._internal.server.services.runner import client
48
57
  from dstack._internal.server.services.runner.ssh import runner_ssh_tunnel
49
58
  from dstack._internal.server.services.runs import (
50
- get_job_volumes,
51
59
  run_model_to_run,
52
60
  )
53
61
  from dstack._internal.server.services.storage import get_default_storage
@@ -81,7 +89,7 @@ async def _process_next_running_job():
81
89
  .limit(1)
82
90
  .with_for_update(skip_locked=True)
83
91
  )
84
- job_model = res.scalar()
92
+ job_model = res.unique().scalar()
85
93
  if job_model is None:
86
94
  return
87
95
  lockset.add(job_model.id)
@@ -99,10 +107,10 @@ async def _process_running_job(session: AsyncSession, job_model: JobModel):
99
107
  res = await session.execute(
100
108
  select(JobModel)
101
109
  .where(JobModel.id == job_model.id)
102
- .options(joinedload(JobModel.instance))
110
+ .options(joinedload(JobModel.instance).joinedload(InstanceModel.project))
103
111
  .execution_options(populate_existing=True)
104
112
  )
105
- job_model = res.scalar_one()
113
+ job_model = res.unique().scalar_one()
106
114
  res = await session.execute(
107
115
  select(RunModel)
108
116
  .where(RunModel.id == job_model.run_id)
@@ -142,25 +150,17 @@ async def _process_running_job(session: AsyncSession, job_model: JobModel):
142
150
  job_provisioning_data=job_provisioning_data,
143
151
  )
144
152
 
145
- volumes = await get_job_volumes(
153
+ volumes = await get_job_attached_volumes(
146
154
  session=session,
147
155
  project=project,
148
156
  run_spec=run.run_spec,
157
+ job_num=job.job_spec.job_num,
149
158
  job_provisioning_data=job_provisioning_data,
150
159
  )
151
160
 
152
- server_ssh_private_key = project.ssh_private_key
153
- # TODO: Drop this logic and always use project key once it's safe to assume that most on-prem
154
- # fleets are (re)created after this change: https://github.com/dstackai/dstack/pull/1716
155
- if (
156
- job_model.instance is not None
157
- and job_model.instance.remote_connection_info is not None
158
- and job_provisioning_data.dockerized
159
- ):
160
- remote_conn_info: RemoteConnectionInfo = RemoteConnectionInfo.__response__.parse_raw(
161
- job_model.instance.remote_connection_info
162
- )
163
- server_ssh_private_key = remote_conn_info.ssh_keys[0].private
161
+ server_ssh_private_keys = get_instance_ssh_private_keys(
162
+ common_utils.get_or_error(job_model.instance)
163
+ )
164
164
 
165
165
  secrets = {} # TODO secrets
166
166
 
@@ -200,11 +200,12 @@ async def _process_running_job(session: AsyncSession, job_model: JobModel):
200
200
  user_ssh_key = ""
201
201
  success = await common_utils.run_async(
202
202
  _process_provisioning_with_shim,
203
- server_ssh_private_key,
203
+ server_ssh_private_keys,
204
204
  job_provisioning_data,
205
205
  None,
206
206
  run,
207
207
  job_model,
208
+ job_provisioning_data,
208
209
  volumes,
209
210
  secrets,
210
211
  job.job_spec.registry_auth,
@@ -226,7 +227,7 @@ async def _process_running_job(session: AsyncSession, job_model: JobModel):
226
227
  )
227
228
  success = await common_utils.run_async(
228
229
  _submit_job_to_runner,
229
- server_ssh_private_key,
230
+ server_ssh_private_keys,
230
231
  job_provisioning_data,
231
232
  None,
232
233
  run,
@@ -269,7 +270,7 @@ async def _process_running_job(session: AsyncSession, job_model: JobModel):
269
270
  )
270
271
  success = await common_utils.run_async(
271
272
  _process_pulling_with_shim,
272
- server_ssh_private_key,
273
+ server_ssh_private_keys,
273
274
  job_provisioning_data,
274
275
  None,
275
276
  run,
@@ -279,14 +280,14 @@ async def _process_running_job(session: AsyncSession, job_model: JobModel):
279
280
  code,
280
281
  secrets,
281
282
  repo_creds,
282
- server_ssh_private_key,
283
+ server_ssh_private_keys,
283
284
  job_provisioning_data,
284
285
  )
285
286
  elif initial_status == JobStatus.RUNNING:
286
287
  logger.debug("%s: process running job, age=%s", fmt(job_model), job_submission.age)
287
288
  success = await common_utils.run_async(
288
289
  _process_running,
289
- server_ssh_private_key,
290
+ server_ssh_private_keys,
290
291
  job_provisioning_data,
291
292
  job_submission.job_runtime_data,
292
293
  run_model,
@@ -312,8 +313,24 @@ async def _process_running_job(session: AsyncSession, job_model: JobModel):
312
313
  and job_model.job_num == 0 # gateway connects only to the first node
313
314
  and run.run_spec.configuration.type == "service"
314
315
  ):
316
+ ssh_head_proxy: Optional[SSHConnectionParams] = None
317
+ ssh_head_proxy_private_key: Optional[str] = None
318
+ instance = common_utils.get_or_error(job_model.instance)
319
+ if instance.remote_connection_info is not None:
320
+ rci = RemoteConnectionInfo.__response__.parse_raw(instance.remote_connection_info)
321
+ if rci.ssh_proxy is not None:
322
+ ssh_head_proxy = rci.ssh_proxy
323
+ ssh_head_proxy_keys = common_utils.get_or_error(rci.ssh_proxy_keys)
324
+ ssh_head_proxy_private_key = ssh_head_proxy_keys[0].private
315
325
  try:
316
- await services.register_replica(session, run_model.gateway_id, run, job_model)
326
+ await services.register_replica(
327
+ session,
328
+ run_model.gateway_id,
329
+ run,
330
+ job_model,
331
+ ssh_head_proxy,
332
+ ssh_head_proxy_private_key,
333
+ )
317
334
  except GatewayError as e:
318
335
  logger.warning(
319
336
  "%s: failed to register service replica: %s, age=%s",
@@ -360,6 +377,7 @@ def _process_provisioning_with_shim(
360
377
  ports: Dict[int, int],
361
378
  run: Run,
362
379
  job_model: JobModel,
380
+ job_provisioning_data: JobProvisioningData,
363
381
  volumes: List[Volume],
364
382
  secrets: Dict[str, str],
365
383
  registry_auth: Optional[RegistryAuth],
@@ -443,6 +461,7 @@ def _process_provisioning_with_shim(
443
461
  host_ssh_user=ssh_user,
444
462
  host_ssh_keys=[ssh_key] if ssh_key else [],
445
463
  container_ssh_keys=public_keys,
464
+ instance_id=job_provisioning_data.instance_id,
446
465
  )
447
466
  else:
448
467
  submitted = shim_client.submit(
@@ -459,6 +478,7 @@ def _process_provisioning_with_shim(
459
478
  mounts=volume_mounts,
460
479
  volumes=volumes,
461
480
  instance_mounts=instance_mounts,
481
+ instance_id=job_provisioning_data.instance_id,
462
482
  )
463
483
  if not submitted:
464
484
  # This can happen when we lost connection to the runner (e.g., network issues), marked
@@ -490,7 +510,7 @@ def _process_pulling_with_shim(
490
510
  code: bytes,
491
511
  secrets: Dict[str, str],
492
512
  repo_credentials: Optional[RemoteRepoCreds],
493
- server_ssh_private_key: str,
513
+ server_ssh_private_keys: tuple[str, Optional[str]],
494
514
  job_provisioning_data: JobProvisioningData,
495
515
  ) -> bool:
496
516
  """
@@ -555,7 +575,7 @@ def _process_pulling_with_shim(
555
575
  return True
556
576
 
557
577
  return _submit_job_to_runner(
558
- server_ssh_private_key,
578
+ server_ssh_private_keys,
559
579
  job_provisioning_data,
560
580
  job_runtime_data,
561
581
  run=run,
@@ -597,6 +617,7 @@ def _process_running(
597
617
  runner_logs=resp.runner_logs,
598
618
  job_logs=resp.job_logs,
599
619
  )
620
+ previous_status = job_model.status
600
621
  if len(resp.job_states) > 0:
601
622
  latest_state_event = resp.job_states[-1]
602
623
  latest_status = latest_state_event.state
@@ -612,10 +633,40 @@ def _process_running(
612
633
  )
613
634
  if latest_state_event.termination_message:
614
635
  job_model.termination_reason_message = latest_state_event.termination_message
636
+ else:
637
+ _terminate_if_inactivity_duration_exceeded(run_model, job_model, resp.no_connections_secs)
638
+ if job_model.status != previous_status:
615
639
  logger.info("%s: now is %s", fmt(job_model), job_model.status.name)
616
640
  return True
617
641
 
618
642
 
643
+ def _terminate_if_inactivity_duration_exceeded(
644
+ run_model: RunModel, job_model: JobModel, no_connections_secs: Optional[int]
645
+ ) -> None:
646
+ conf = RunSpec.__response__.parse_raw(run_model.run_spec).configuration
647
+ if is_core_model_instance(conf, DevEnvironmentConfiguration) and isinstance(
648
+ conf.inactivity_duration, int
649
+ ):
650
+ logger.debug("%s: no SSH connections for %s seconds", fmt(job_model), no_connections_secs)
651
+ job_model.inactivity_secs = no_connections_secs
652
+ if no_connections_secs is None:
653
+ # TODO(0.19 or earlier): make no_connections_secs required
654
+ job_model.status = JobStatus.TERMINATING
655
+ job_model.termination_reason = JobTerminationReason.INTERRUPTED_BY_NO_CAPACITY
656
+ job_model.termination_reason_message = (
657
+ "The selected instance was created before dstack 0.18.41"
658
+ " and does not support inactivity_duration"
659
+ )
660
+ elif no_connections_secs >= conf.inactivity_duration:
661
+ job_model.status = JobStatus.TERMINATING
662
+ # TODO(0.19 or earlier): set JobTerminationReason.INACTIVITY_DURATION_EXCEEDED
663
+ job_model.termination_reason = JobTerminationReason.TERMINATED_BY_SERVER
664
+ job_model.termination_reason_message = (
665
+ f"The job was inactive for {no_connections_secs} seconds,"
666
+ f" exceeding the inactivity_duration of {conf.inactivity_duration} seconds"
667
+ )
668
+
669
+
619
670
  def _get_cluster_info(
620
671
  jobs: List[Job],
621
672
  replica_num: int,
@@ -230,7 +230,8 @@ async def _process_active_run(session: AsyncSession, run_model: RunModel):
230
230
  # the job is submitted
231
231
  replica_statuses.add(RunStatus.SUBMITTED)
232
232
  elif job_model.status == JobStatus.FAILED or (
233
- job_model.status == JobStatus.TERMINATING
233
+ job_model.status
234
+ in [JobStatus.TERMINATING, JobStatus.TERMINATED, JobStatus.ABORTED]
234
235
  and job_model.termination_reason
235
236
  not in {JobTerminationReason.DONE_BY_RUNNER, JobTerminationReason.SCALED_DOWN}
236
237
  ):
@@ -244,17 +245,6 @@ async def _process_active_run(session: AsyncSession, run_model: RunModel):
244
245
  run_termination_reasons.add(RunTerminationReason.RETRY_LIMIT_EXCEEDED)
245
246
  else:
246
247
  replica_needs_retry = True
247
- elif job_model.status in {
248
- JobStatus.TERMINATING,
249
- JobStatus.TERMINATED,
250
- JobStatus.ABORTED,
251
- }:
252
- # FIXME: This code does not expect JobStatus.TERMINATED status,
253
- # so if a job transitions from RUNNING to TERMINATED,
254
- # the run will transition to PENDING instead of TERMINATING.
255
- # This may not be observed because process_runs is invoked more frequently
256
- # than process_terminating_jobs and because most jobs usually transition to FAILED.
257
- pass # unexpected, but let's ignore it
258
248
  else:
259
249
  raise ValueError(f"Unexpected job status {job_model.status}")
260
250