dstack 0.18.43__py3-none-any.whl → 0.18.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (59) hide show
  1. dstack/_internal/cli/services/configurators/run.py +1 -0
  2. dstack/_internal/cli/utils/run.py +11 -0
  3. dstack/_internal/core/backends/aws/compute.py +1 -0
  4. dstack/_internal/core/backends/azure/compute.py +1 -1
  5. dstack/_internal/core/backends/gcp/compute.py +1 -1
  6. dstack/_internal/core/backends/runpod/compute.py +21 -3
  7. dstack/_internal/core/backends/runpod/config.py +8 -0
  8. dstack/_internal/core/models/backends/runpod.py +2 -0
  9. dstack/_internal/core/models/configurations.py +2 -1
  10. dstack/_internal/core/models/profiles.py +46 -1
  11. dstack/_internal/core/models/runs.py +4 -0
  12. dstack/_internal/server/app.py +11 -1
  13. dstack/_internal/server/background/__init__.py +10 -0
  14. dstack/_internal/server/background/tasks/process_placement_groups.py +1 -0
  15. dstack/_internal/server/background/tasks/process_prometheus_metrics.py +135 -0
  16. dstack/_internal/server/background/tasks/process_running_jobs.py +66 -19
  17. dstack/_internal/server/background/tasks/process_runs.py +1 -0
  18. dstack/_internal/server/background/tasks/process_submitted_jobs.py +4 -1
  19. dstack/_internal/server/migrations/versions/60e444118b6d_add_jobprometheusmetrics.py +40 -0
  20. dstack/_internal/server/migrations/versions/98d1b92988bc_add_jobterminationreason_terminated_due_.py +140 -0
  21. dstack/_internal/server/models.py +11 -0
  22. dstack/_internal/server/routers/metrics.py +21 -2
  23. dstack/_internal/server/routers/prometheus.py +36 -0
  24. dstack/_internal/server/security/permissions.py +1 -1
  25. dstack/_internal/server/services/backends/configurators/runpod.py +3 -33
  26. dstack/_internal/server/services/config.py +13 -3
  27. dstack/_internal/server/services/fleets.py +1 -0
  28. dstack/_internal/server/services/gateways/__init__.py +1 -0
  29. dstack/_internal/server/services/jobs/configurators/base.py +9 -1
  30. dstack/_internal/server/services/metrics.py +103 -70
  31. dstack/_internal/server/services/prometheus.py +87 -0
  32. dstack/_internal/server/services/runner/client.py +14 -3
  33. dstack/_internal/server/services/runs.py +43 -15
  34. dstack/_internal/server/services/volumes.py +1 -0
  35. dstack/_internal/server/settings.py +3 -0
  36. dstack/_internal/server/statics/index.html +1 -1
  37. dstack/_internal/server/statics/{main-fe8fd9db55df8d10e648.js → main-4eb116b97819badd1e2c.js} +66 -13
  38. dstack/_internal/server/statics/{main-fe8fd9db55df8d10e648.js.map → main-4eb116b97819badd1e2c.js.map} +1 -1
  39. dstack/_internal/server/statics/{main-7510e71dfa9749a4e70e.css → main-da9f8c06a69c20dac23e.css} +1 -1
  40. dstack/_internal/server/statics/static/media/entraID.d65d1f3e9486a8e56d24fc07b3230885.svg +9 -0
  41. dstack/_internal/server/testing/common.py +17 -0
  42. dstack/api/_public/runs.py +3 -0
  43. dstack/api/server/_fleets.py +2 -0
  44. dstack/api/server/_runs.py +4 -0
  45. dstack/api/utils.py +3 -0
  46. dstack/version.py +1 -1
  47. {dstack-0.18.43.dist-info → dstack-0.18.44.dist-info}/METADATA +10 -1
  48. {dstack-0.18.43.dist-info → dstack-0.18.44.dist-info}/RECORD +59 -50
  49. tests/_internal/server/background/tasks/test_process_prometheus_metrics.py +189 -0
  50. tests/_internal/server/background/tasks/test_process_running_jobs.py +125 -0
  51. tests/_internal/server/routers/test_fleets.py +2 -0
  52. tests/_internal/server/routers/test_metrics.py +15 -0
  53. tests/_internal/server/routers/test_prometheus.py +244 -0
  54. tests/_internal/server/routers/test_runs.py +79 -56
  55. tests/_internal/server/services/test_metrics.py +163 -0
  56. {dstack-0.18.43.dist-info → dstack-0.18.44.dist-info}/LICENSE.md +0 -0
  57. {dstack-0.18.43.dist-info → dstack-0.18.44.dist-info}/WHEEL +0 -0
  58. {dstack-0.18.43.dist-info → dstack-0.18.44.dist-info}/entry_points.txt +0 -0
  59. {dstack-0.18.43.dist-info → dstack-0.18.44.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,40 @@
1
+ """Add JobPrometheusMetrics
2
+
3
+ Revision ID: 60e444118b6d
4
+ Revises: a751ef183f27
5
+ Create Date: 2025-02-21 10:59:26.339353
6
+
7
+ """
8
+
9
+ import sqlalchemy as sa
10
+ import sqlalchemy_utils
11
+ from alembic import op
12
+
13
+ import dstack._internal.server.models
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision = "60e444118b6d"
17
+ down_revision = "a751ef183f27"
18
+ branch_labels = None
19
+ depends_on = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ # ### commands auto generated by Alembic - please adjust! ###
24
+ op.create_table(
25
+ "job_prometheus_metrics",
26
+ sa.Column("job_id", sqlalchemy_utils.types.uuid.UUIDType(binary=False), nullable=False),
27
+ sa.Column("collected_at", dstack._internal.server.models.NaiveDateTime(), nullable=False),
28
+ sa.Column("text", sa.Text(), nullable=False),
29
+ sa.ForeignKeyConstraint(
30
+ ["job_id"], ["jobs.id"], name=op.f("fk_job_prometheus_metrics_job_id_jobs")
31
+ ),
32
+ sa.PrimaryKeyConstraint("job_id", name=op.f("pk_job_prometheus_metrics")),
33
+ )
34
+ # ### end Alembic commands ###
35
+
36
+
37
+ def downgrade() -> None:
38
+ # ### commands auto generated by Alembic - please adjust! ###
39
+ op.drop_table("job_prometheus_metrics")
40
+ # ### end Alembic commands ###
@@ -0,0 +1,140 @@
1
+ """Add JobTerminationReason.TERMINATED_DUE_TO_UTILIZATION_POLICY
2
+
3
+ Revision ID: 98d1b92988bc
4
+ Revises: 60e444118b6d
5
+ Create Date: 2025-02-28 15:12:37.649876
6
+
7
+ """
8
+
9
+ import sqlalchemy as sa
10
+ from alembic import op
11
+ from alembic_postgresql_enum import TableReference
12
+
13
+ # revision identifiers, used by Alembic.
14
+ revision = "98d1b92988bc"
15
+ down_revision = "60e444118b6d"
16
+ branch_labels = None
17
+ depends_on = None
18
+
19
+
20
+ def upgrade() -> None:
21
+ # SQLite
22
+ with op.batch_alter_table("jobs", schema=None) as batch_op:
23
+ batch_op.alter_column(
24
+ "termination_reason",
25
+ existing_type=sa.VARCHAR(length=34),
26
+ type_=sa.Enum(
27
+ "FAILED_TO_START_DUE_TO_NO_CAPACITY",
28
+ "INTERRUPTED_BY_NO_CAPACITY",
29
+ "WAITING_INSTANCE_LIMIT_EXCEEDED",
30
+ "WAITING_RUNNER_LIMIT_EXCEEDED",
31
+ "TERMINATED_BY_USER",
32
+ "VOLUME_ERROR",
33
+ "GATEWAY_ERROR",
34
+ "SCALED_DOWN",
35
+ "DONE_BY_RUNNER",
36
+ "ABORTED_BY_USER",
37
+ "TERMINATED_BY_SERVER",
38
+ "INACTIVITY_DURATION_EXCEEDED",
39
+ "TERMINATED_DUE_TO_UTILIZATION_POLICY",
40
+ "CONTAINER_EXITED_WITH_ERROR",
41
+ "PORTS_BINDING_FAILED",
42
+ "CREATING_CONTAINER_ERROR",
43
+ "EXECUTOR_ERROR",
44
+ "MAX_DURATION_EXCEEDED",
45
+ name="jobterminationreason",
46
+ ),
47
+ existing_nullable=True,
48
+ )
49
+ # PostgreSQL
50
+ op.sync_enum_values(
51
+ enum_schema="public",
52
+ enum_name="jobterminationreason",
53
+ new_values=[
54
+ "FAILED_TO_START_DUE_TO_NO_CAPACITY",
55
+ "INTERRUPTED_BY_NO_CAPACITY",
56
+ "WAITING_INSTANCE_LIMIT_EXCEEDED",
57
+ "WAITING_RUNNER_LIMIT_EXCEEDED",
58
+ "TERMINATED_BY_USER",
59
+ "VOLUME_ERROR",
60
+ "GATEWAY_ERROR",
61
+ "SCALED_DOWN",
62
+ "DONE_BY_RUNNER",
63
+ "ABORTED_BY_USER",
64
+ "TERMINATED_BY_SERVER",
65
+ "INACTIVITY_DURATION_EXCEEDED",
66
+ "TERMINATED_DUE_TO_UTILIZATION_POLICY",
67
+ "CONTAINER_EXITED_WITH_ERROR",
68
+ "PORTS_BINDING_FAILED",
69
+ "CREATING_CONTAINER_ERROR",
70
+ "EXECUTOR_ERROR",
71
+ "MAX_DURATION_EXCEEDED",
72
+ ],
73
+ affected_columns=[
74
+ TableReference(
75
+ table_schema="public", table_name="jobs", column_name="termination_reason"
76
+ )
77
+ ],
78
+ enum_values_to_rename=[],
79
+ )
80
+
81
+
82
+ def downgrade() -> None:
83
+ # SQLite
84
+ with op.batch_alter_table("jobs", schema=None) as batch_op:
85
+ batch_op.alter_column(
86
+ "termination_reason",
87
+ existing_type=sa.Enum(
88
+ "FAILED_TO_START_DUE_TO_NO_CAPACITY",
89
+ "INTERRUPTED_BY_NO_CAPACITY",
90
+ "WAITING_INSTANCE_LIMIT_EXCEEDED",
91
+ "WAITING_RUNNER_LIMIT_EXCEEDED",
92
+ "TERMINATED_BY_USER",
93
+ "VOLUME_ERROR",
94
+ "GATEWAY_ERROR",
95
+ "SCALED_DOWN",
96
+ "DONE_BY_RUNNER",
97
+ "ABORTED_BY_USER",
98
+ "TERMINATED_BY_SERVER",
99
+ "INACTIVITY_DURATION_EXCEEDED",
100
+ "TERMINATED_DUE_TO_UTILIZATION_POLICY",
101
+ "CONTAINER_EXITED_WITH_ERROR",
102
+ "PORTS_BINDING_FAILED",
103
+ "CREATING_CONTAINER_ERROR",
104
+ "EXECUTOR_ERROR",
105
+ "MAX_DURATION_EXCEEDED",
106
+ name="jobterminationreason",
107
+ ),
108
+ type_=sa.VARCHAR(length=34),
109
+ existing_nullable=True,
110
+ )
111
+ # PostgreSQL
112
+ op.sync_enum_values(
113
+ enum_schema="public",
114
+ enum_name="jobterminationreason",
115
+ new_values=[
116
+ "FAILED_TO_START_DUE_TO_NO_CAPACITY",
117
+ "INTERRUPTED_BY_NO_CAPACITY",
118
+ "WAITING_INSTANCE_LIMIT_EXCEEDED",
119
+ "WAITING_RUNNER_LIMIT_EXCEEDED",
120
+ "TERMINATED_BY_USER",
121
+ "VOLUME_ERROR",
122
+ "GATEWAY_ERROR",
123
+ "SCALED_DOWN",
124
+ "DONE_BY_RUNNER",
125
+ "ABORTED_BY_USER",
126
+ "TERMINATED_BY_SERVER",
127
+ "INACTIVITY_DURATION_EXCEEDED",
128
+ "CONTAINER_EXITED_WITH_ERROR",
129
+ "PORTS_BINDING_FAILED",
130
+ "CREATING_CONTAINER_ERROR",
131
+ "EXECUTOR_ERROR",
132
+ "MAX_DURATION_EXCEEDED",
133
+ ],
134
+ affected_columns=[
135
+ TableReference(
136
+ table_schema="public", table_name="jobs", column_name="termination_reason"
137
+ )
138
+ ],
139
+ enum_values_to_rename=[],
140
+ )
@@ -648,3 +648,14 @@ class JobMetricsPoint(BaseModel):
648
648
  # json-encoded lists of metric values of len(gpus) length
649
649
  gpus_memory_usage_bytes: Mapped[str] = mapped_column(Text)
650
650
  gpus_util_percent: Mapped[str] = mapped_column(Text)
651
+
652
+
653
+ class JobPrometheusMetrics(BaseModel):
654
+ __tablename__ = "job_prometheus_metrics"
655
+
656
+ job_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("jobs.id"), primary_key=True)
657
+ job: Mapped["JobModel"] = relationship()
658
+
659
+ collected_at: Mapped[datetime] = mapped_column(NaiveDateTime)
660
+ # Raw Prometheus text response
661
+ text: Mapped[str] = mapped_column(Text)
@@ -1,13 +1,16 @@
1
- from typing import Tuple
1
+ from datetime import datetime
2
+ from typing import Optional, Tuple
2
3
 
3
4
  from fastapi import APIRouter, Depends
4
5
  from sqlalchemy.ext.asyncio import AsyncSession
5
6
 
7
+ from dstack._internal.core.errors import ResourceNotExistsError
6
8
  from dstack._internal.core.models.metrics import JobMetrics
7
9
  from dstack._internal.server.db import get_session
8
10
  from dstack._internal.server.models import ProjectModel, UserModel
9
11
  from dstack._internal.server.security.permissions import ProjectMember
10
12
  from dstack._internal.server.services import metrics
13
+ from dstack._internal.server.services.jobs import get_run_job_model
11
14
  from dstack._internal.server.utils.routers import get_base_api_additional_responses
12
15
 
13
16
  router = APIRouter(
@@ -24,6 +27,9 @@ async def get_job_metrics(
24
27
  run_name: str,
25
28
  replica_num: int = 0,
26
29
  job_num: int = 0,
30
+ limit: int = 1,
31
+ after: Optional[datetime] = None,
32
+ before: Optional[datetime] = None,
27
33
  session: AsyncSession = Depends(get_session),
28
34
  user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectMember()),
29
35
  ) -> JobMetrics:
@@ -31,6 +37,8 @@ async def get_job_metrics(
31
37
  Returns job-level metrics such as hardware utilization
32
38
  given `run_name`, `replica_num`, and `job_num`.
33
39
  If only `run_name` is specified, returns metrics of `(replica_num=0, job_num=0)`.
40
+ By default, returns one latest sample. To control time window/number of samples, use
41
+ `limit`, `after`, `before`.
34
42
 
35
43
  Supported metrics: [
36
44
  "cpu_usage_percent",
@@ -42,10 +50,21 @@ async def get_job_metrics(
42
50
  ]
43
51
  """
44
52
  _, project = user_project
45
- return await metrics.get_job_metrics(
53
+
54
+ job_model = await get_run_job_model(
46
55
  session=session,
47
56
  project=project,
48
57
  run_name=run_name,
49
58
  replica_num=replica_num,
50
59
  job_num=job_num,
51
60
  )
61
+ if job_model is None:
62
+ raise ResourceNotExistsError("Found no job with given parameters")
63
+
64
+ return await metrics.get_job_metrics(
65
+ session=session,
66
+ job_model=job_model,
67
+ limit=limit,
68
+ after=after,
69
+ before=before,
70
+ )
@@ -0,0 +1,36 @@
1
+ from typing import Annotated
2
+
3
+ from fastapi import APIRouter, Depends
4
+ from fastapi.responses import PlainTextResponse
5
+ from sqlalchemy.ext.asyncio import AsyncSession
6
+
7
+ from dstack._internal.server import settings
8
+ from dstack._internal.server.db import get_session
9
+ from dstack._internal.server.deps import Project
10
+ from dstack._internal.server.models import ProjectModel
11
+ from dstack._internal.server.services import prometheus
12
+ from dstack._internal.server.utils.routers import error_not_found
13
+
14
+ router = APIRouter(
15
+ tags=["prometheus"],
16
+ default_response_class=PlainTextResponse,
17
+ )
18
+
19
+
20
+ @router.get("/metrics")
21
+ async def get_prometheus_metrics(
22
+ session: Annotated[AsyncSession, Depends(get_session)],
23
+ ) -> str:
24
+ if not settings.ENABLE_PROMETHEUS_METRICS:
25
+ raise error_not_found()
26
+ return await prometheus.get_metrics(session=session)
27
+
28
+
29
+ @router.get("/metrics/project/{project_name}")
30
+ async def get_project_prometheus_metrics(
31
+ session: Annotated[AsyncSession, Depends(get_session)],
32
+ project: Annotated[ProjectModel, Depends(Project())],
33
+ ) -> str:
34
+ if not settings.ENABLE_PROMETHEUS_METRICS:
35
+ raise error_not_found()
36
+ return await prometheus.get_project_metrics(session=session, project=project)
@@ -80,7 +80,7 @@ class ProjectManager:
80
80
  project = await get_project_model_by_name(session=session, project_name=project_name)
81
81
  if project is None:
82
82
  raise error_forbidden()
83
- if user.global_role in GlobalRole.ADMIN:
83
+ if user.global_role == GlobalRole.ADMIN:
84
84
  return user, project
85
85
  project_role = get_user_project_role(user=user, project=project)
86
86
  if project_role in [ProjectRole.ADMIN, ProjectRole.MANAGER]:
@@ -3,11 +3,7 @@ from typing import List
3
3
 
4
4
  from dstack._internal.core.backends.base import Backend
5
5
  from dstack._internal.core.backends.runpod import RunpodBackend, RunpodConfig, api_client
6
- from dstack._internal.core.models.backends.base import (
7
- BackendType,
8
- ConfigElementValue,
9
- ConfigMultiElement,
10
- )
6
+ from dstack._internal.core.models.backends.base import BackendType, ConfigMultiElement
11
7
  from dstack._internal.core.models.backends.runpod import (
12
8
  RunpodConfigInfo,
13
9
  RunpodConfigInfoWithCreds,
@@ -22,25 +18,6 @@ from dstack._internal.server.services.backends.configurators.base import (
22
18
  raise_invalid_credentials_error,
23
19
  )
24
20
 
25
- REGIONS = [
26
- "CA-MTL-1",
27
- "CA-MTL-2",
28
- "CA-MTL-3",
29
- "EU-NL-1",
30
- "EU-RO-1",
31
- "EU-SE-1",
32
- "EUR-IS-1",
33
- "EUR-IS-2",
34
- "US-CA-1",
35
- "US-GA-1",
36
- "US-GA-2",
37
- "US-KS-2",
38
- "US-OR-1",
39
- "US-TX-3",
40
- ]
41
-
42
- DEFAULT_REGION = "CA-MTL-1"
43
-
44
21
 
45
22
  class RunpodConfigurator(Configurator):
46
23
  TYPE: BackendType = BackendType.RUNPOD
@@ -50,16 +27,12 @@ class RunpodConfigurator(Configurator):
50
27
  if config.creds is None:
51
28
  return config_values
52
29
  self._validate_runpod_api_key(config.creds.api_key)
53
- config_values.regions = self._get_regions_element(
54
- selected=config.regions or [DEFAULT_REGION]
55
- )
30
+ config_values.regions = self._get_regions_element(selected=config.regions or [])
56
31
  return config_values
57
32
 
58
33
  def create_backend(
59
34
  self, project: ProjectModel, config: RunpodConfigInfoWithCreds
60
35
  ) -> BackendModel:
61
- if config.regions is None:
62
- config.regions = REGIONS
63
36
  return BackendModel(
64
37
  project_id=project.id,
65
38
  type=self.TYPE.value,
@@ -80,10 +53,7 @@ class RunpodConfigurator(Configurator):
80
53
  return RunpodBackend(config=config)
81
54
 
82
55
  def _get_regions_element(self, selected: List[str]) -> ConfigMultiElement:
83
- element = ConfigMultiElement(selected=selected)
84
- for r in REGIONS:
85
- element.values.append(ConfigElementValue(value=r, label=r))
86
- return element
56
+ return ConfigMultiElement(selected=selected)
87
57
 
88
58
  def _get_backend_config(self, model: BackendModel) -> RunpodConfig:
89
59
  return RunpodConfig(
@@ -6,6 +6,7 @@ from pydantic import BaseModel, Field, ValidationError, root_validator
6
6
  from sqlalchemy.ext.asyncio import AsyncSession
7
7
  from typing_extensions import Annotated
8
8
 
9
+ from dstack._internal.core.backends.runpod.config import RUNPOD_COMMUNITY_CLOUD_DEFAULT
9
10
  from dstack._internal.core.errors import (
10
11
  BackendNotAvailable,
11
12
  ResourceNotExistsError,
@@ -45,7 +46,7 @@ logger = get_logger(__name__)
45
46
  # By default, PyYAML chooses the style of a collection depending on whether it has nested collections.
46
47
  # If a collection has nested collections, it will be assigned the block style. Otherwise it will have the flow style.
47
48
  #
48
- # We want mapping to always be display in block-style but lists without nested objects in flow-style.
49
+ # We want mapping to always be displayed in block-style but lists without nested objects in flow-style.
49
50
  # So we define a custom representeter
50
51
 
51
52
 
@@ -340,7 +341,7 @@ class KubernetesConfig(CoreModel):
340
341
  kubeconfig: Annotated[KubeconfigConfig, Field(description="The kubeconfig configuration")]
341
342
  networking: Annotated[
342
343
  Optional[KubernetesNetworkingConfig], Field(description="The networking configuration")
343
- ]
344
+ ] = None
344
345
 
345
346
 
346
347
  class KubernetesAPIConfig(CoreModel):
@@ -348,7 +349,7 @@ class KubernetesAPIConfig(CoreModel):
348
349
  kubeconfig: Annotated[KubeconfigAPIConfig, Field(description="The kubeconfig configuration")]
349
350
  networking: Annotated[
350
351
  Optional[KubernetesNetworkingConfig], Field(description="The networking configuration")
351
- ]
352
+ ] = None
352
353
 
353
354
 
354
355
  class LambdaConfig(CoreModel):
@@ -428,6 +429,15 @@ class RunpodConfig(CoreModel):
428
429
  Optional[List[str]],
429
430
  Field(description="The list of RunPod regions. Omit to use all regions"),
430
431
  ] = None
432
+ community_cloud: Annotated[
433
+ Optional[bool],
434
+ Field(
435
+ description=(
436
+ "Whether Community Cloud offers can be suggested in addition to Secure Cloud."
437
+ f" Defaults to `{str(RUNPOD_COMMUNITY_CLOUD_DEFAULT).lower()}`"
438
+ )
439
+ ),
440
+ ] = None
431
441
  creds: Annotated[AnyRunpodCreds, Field(description="The credentials")]
432
442
 
433
443
 
@@ -517,6 +517,7 @@ async def delete_fleets(
517
517
  .options(selectinload(FleetModel.instances))
518
518
  .options(selectinload(FleetModel.runs))
519
519
  .execution_options(populate_existing=True)
520
+ .order_by(FleetModel.id) # take locks in order
520
521
  .with_for_update()
521
522
  )
522
523
  fleet_models = res.scalars().unique().all()
@@ -220,6 +220,7 @@ async def delete_gateways(
220
220
  )
221
221
  .options(selectinload(GatewayModel.gateway_compute))
222
222
  .execution_options(populate_existing=True)
223
+ .order_by(GatewayModel.id) # take locks in order
223
224
  .with_for_update()
224
225
  )
225
226
  gateway_models = res.scalars().all()
@@ -13,7 +13,11 @@ from dstack._internal.core.models.configurations import (
13
13
  PythonVersion,
14
14
  RunConfigurationType,
15
15
  )
16
- from dstack._internal.core.models.profiles import DEFAULT_STOP_DURATION, SpotPolicy
16
+ from dstack._internal.core.models.profiles import (
17
+ DEFAULT_STOP_DURATION,
18
+ SpotPolicy,
19
+ UtilizationPolicy,
20
+ )
17
21
  from dstack._internal.core.models.runs import (
18
22
  AppSpec,
19
23
  JobSpec,
@@ -113,6 +117,7 @@ class JobConfigurator(ABC):
113
117
  single_branch=self._single_branch(),
114
118
  max_duration=self._max_duration(),
115
119
  stop_duration=self._stop_duration(),
120
+ utilization_policy=self._utilization_policy(),
116
121
  registry_auth=self._registry_auth(),
117
122
  requirements=self._requirements(),
118
123
  retry=self._retry(),
@@ -201,6 +206,9 @@ class JobConfigurator(ABC):
201
206
  # pydantic validator ensures this is int
202
207
  return self.run_spec.merged_profile.stop_duration
203
208
 
209
+ def _utilization_policy(self) -> Optional[UtilizationPolicy]:
210
+ return self.run_spec.merged_profile.utilization_policy
211
+
204
212
  def _registry_auth(self) -> Optional[RegistryAuth]:
205
213
  return self.run_spec.configuration.registry_auth
206
214