dstack 0.18.44__py3-none-any.whl → 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dstack might be problematic. Click here for more details.
- dstack/_internal/cli/commands/gateway.py +15 -3
- dstack/_internal/cli/commands/logs.py +0 -22
- dstack/_internal/cli/commands/stats.py +8 -17
- dstack/_internal/cli/main.py +1 -5
- dstack/_internal/cli/services/configurators/fleet.py +4 -39
- dstack/_internal/cli/services/configurators/run.py +22 -21
- dstack/_internal/cli/services/profile.py +34 -83
- dstack/_internal/cli/utils/gateway.py +1 -1
- dstack/_internal/core/backends/__init__.py +56 -39
- dstack/_internal/core/backends/aws/__init__.py +0 -25
- dstack/_internal/core/backends/aws/auth.py +1 -10
- dstack/_internal/core/backends/aws/backend.py +26 -0
- dstack/_internal/core/backends/aws/compute.py +20 -45
- dstack/_internal/{server/services/backends/configurators/aws.py → core/backends/aws/configurator.py} +46 -85
- dstack/_internal/core/backends/aws/models.py +135 -0
- dstack/_internal/core/backends/aws/resources.py +1 -1
- dstack/_internal/core/backends/azure/__init__.py +0 -20
- dstack/_internal/core/backends/azure/auth.py +2 -11
- dstack/_internal/core/backends/azure/backend.py +21 -0
- dstack/_internal/core/backends/azure/compute.py +13 -27
- dstack/_internal/{server/services/backends/configurators/azure.py → core/backends/azure/configurator.py} +141 -210
- dstack/_internal/core/backends/azure/models.py +89 -0
- dstack/_internal/core/backends/base/__init__.py +0 -12
- dstack/_internal/core/backends/base/backend.py +18 -0
- dstack/_internal/core/backends/base/compute.py +153 -33
- dstack/_internal/core/backends/base/configurator.py +105 -0
- dstack/_internal/core/backends/base/models.py +14 -0
- dstack/_internal/core/backends/configurators.py +138 -0
- dstack/_internal/core/backends/cudo/__init__.py +0 -15
- dstack/_internal/core/backends/cudo/backend.py +16 -0
- dstack/_internal/core/backends/cudo/compute.py +8 -26
- dstack/_internal/core/backends/cudo/configurator.py +72 -0
- dstack/_internal/core/backends/cudo/models.py +37 -0
- dstack/_internal/core/backends/datacrunch/__init__.py +0 -15
- dstack/_internal/core/backends/datacrunch/backend.py +16 -0
- dstack/_internal/core/backends/datacrunch/compute.py +8 -25
- dstack/_internal/core/backends/datacrunch/configurator.py +66 -0
- dstack/_internal/core/backends/datacrunch/models.py +38 -0
- dstack/_internal/core/{models/backends/dstack.py → backends/dstack/models.py} +7 -7
- dstack/_internal/core/backends/gcp/__init__.py +0 -16
- dstack/_internal/core/backends/gcp/auth.py +2 -11
- dstack/_internal/core/backends/gcp/backend.py +17 -0
- dstack/_internal/core/backends/gcp/compute.py +13 -43
- dstack/_internal/{server/services/backends/configurators/gcp.py → core/backends/gcp/configurator.py} +46 -103
- dstack/_internal/core/backends/gcp/models.py +125 -0
- dstack/_internal/core/backends/kubernetes/__init__.py +0 -15
- dstack/_internal/core/backends/kubernetes/backend.py +16 -0
- dstack/_internal/core/backends/kubernetes/compute.py +16 -5
- dstack/_internal/core/backends/kubernetes/configurator.py +55 -0
- dstack/_internal/core/backends/kubernetes/models.py +72 -0
- dstack/_internal/core/backends/lambdalabs/__init__.py +0 -16
- dstack/_internal/core/backends/lambdalabs/backend.py +17 -0
- dstack/_internal/core/backends/lambdalabs/compute.py +7 -28
- dstack/_internal/core/backends/lambdalabs/configurator.py +82 -0
- dstack/_internal/core/backends/lambdalabs/models.py +37 -0
- dstack/_internal/core/backends/local/__init__.py +0 -13
- dstack/_internal/core/backends/local/backend.py +14 -0
- dstack/_internal/core/backends/local/compute.py +16 -2
- dstack/_internal/core/backends/models.py +128 -0
- dstack/_internal/core/backends/oci/__init__.py +0 -15
- dstack/_internal/core/backends/oci/auth.py +1 -5
- dstack/_internal/core/backends/oci/backend.py +16 -0
- dstack/_internal/core/backends/oci/compute.py +9 -23
- dstack/_internal/{server/services/backends/configurators/oci.py → core/backends/oci/configurator.py} +40 -85
- dstack/_internal/core/{models/backends/oci.py → backends/oci/models.py} +24 -25
- dstack/_internal/core/backends/oci/region.py +1 -1
- dstack/_internal/core/backends/runpod/__init__.py +0 -15
- dstack/_internal/core/backends/runpod/backend.py +16 -0
- dstack/_internal/core/backends/runpod/compute.py +7 -3
- dstack/_internal/core/backends/runpod/configurator.py +59 -0
- dstack/_internal/core/backends/runpod/models.py +54 -0
- dstack/_internal/core/backends/template/__init__.py +0 -0
- dstack/_internal/core/backends/tensordock/__init__.py +0 -15
- dstack/_internal/core/backends/tensordock/backend.py +16 -0
- dstack/_internal/core/backends/tensordock/compute.py +8 -27
- dstack/_internal/core/backends/tensordock/configurator.py +68 -0
- dstack/_internal/core/backends/tensordock/models.py +38 -0
- dstack/_internal/core/backends/vastai/__init__.py +0 -15
- dstack/_internal/core/backends/vastai/backend.py +16 -0
- dstack/_internal/core/backends/vastai/compute.py +2 -2
- dstack/_internal/core/backends/vastai/configurator.py +66 -0
- dstack/_internal/core/backends/vastai/models.py +37 -0
- dstack/_internal/core/backends/vultr/__init__.py +0 -15
- dstack/_internal/core/backends/vultr/backend.py +16 -0
- dstack/_internal/core/backends/vultr/compute.py +10 -24
- dstack/_internal/core/backends/vultr/configurator.py +64 -0
- dstack/_internal/core/backends/vultr/models.py +34 -0
- dstack/_internal/core/models/backends/__init__.py +0 -184
- dstack/_internal/core/models/backends/base.py +0 -19
- dstack/_internal/core/models/configurations.py +20 -15
- dstack/_internal/core/models/envs.py +4 -3
- dstack/_internal/core/models/fleets.py +17 -22
- dstack/_internal/core/models/gateways.py +3 -3
- dstack/_internal/core/models/instances.py +24 -0
- dstack/_internal/core/models/profiles.py +41 -46
- dstack/_internal/core/models/projects.py +1 -1
- dstack/_internal/core/models/repos/base.py +0 -5
- dstack/_internal/core/models/repos/local.py +3 -3
- dstack/_internal/core/models/repos/remote.py +26 -12
- dstack/_internal/core/models/repos/virtual.py +1 -1
- dstack/_internal/core/models/resources.py +45 -76
- dstack/_internal/core/models/runs.py +17 -19
- dstack/_internal/core/models/volumes.py +1 -3
- dstack/_internal/core/services/profiles.py +7 -16
- dstack/_internal/core/services/repos.py +0 -4
- dstack/_internal/server/app.py +0 -3
- dstack/_internal/server/background/tasks/process_gateways.py +4 -8
- dstack/_internal/server/background/tasks/process_instances.py +14 -9
- dstack/_internal/server/background/tasks/process_metrics.py +1 -1
- dstack/_internal/server/background/tasks/process_placement_groups.py +4 -1
- dstack/_internal/server/background/tasks/process_prometheus_metrics.py +1 -1
- dstack/_internal/server/background/tasks/process_running_jobs.py +14 -5
- dstack/_internal/server/background/tasks/process_submitted_jobs.py +16 -37
- dstack/_internal/server/background/tasks/process_volumes.py +5 -2
- dstack/_internal/server/migrations/versions/7bc2586e8b9e_make_instancemodel_pool_id_optional.py +36 -0
- dstack/_internal/server/migrations/versions/bc8ca4a505c6_store_backendtype_as_string.py +171 -0
- dstack/_internal/server/models.py +48 -9
- dstack/_internal/server/routers/backends.py +14 -23
- dstack/_internal/server/routers/instances.py +3 -4
- dstack/_internal/server/routers/metrics.py +10 -8
- dstack/_internal/server/routers/prometheus.py +1 -1
- dstack/_internal/server/routers/repos.py +1 -2
- dstack/_internal/server/routers/runs.py +13 -59
- dstack/_internal/server/schemas/gateways.py +14 -23
- dstack/_internal/server/schemas/projects.py +7 -2
- dstack/_internal/server/schemas/repos.py +2 -38
- dstack/_internal/server/schemas/runner.py +1 -0
- dstack/_internal/server/schemas/runs.py +1 -24
- dstack/_internal/server/services/backends/__init__.py +85 -158
- dstack/_internal/server/services/config.py +52 -576
- dstack/_internal/server/services/fleets.py +8 -103
- dstack/_internal/server/services/gateways/__init__.py +12 -4
- dstack/_internal/server/services/{pools.py → instances.py} +22 -329
- dstack/_internal/server/services/jobs/__init__.py +9 -6
- dstack/_internal/server/services/jobs/configurators/base.py +16 -0
- dstack/_internal/server/services/jobs/configurators/dev.py +9 -1
- dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +42 -0
- dstack/_internal/server/services/metrics.py +39 -13
- dstack/_internal/server/services/offers.py +1 -1
- dstack/_internal/server/services/projects.py +23 -14
- dstack/_internal/server/services/prometheus.py +176 -18
- dstack/_internal/server/services/runs.py +24 -16
- dstack/_internal/server/services/volumes.py +8 -4
- dstack/_internal/server/statics/index.html +1 -1
- dstack/_internal/server/statics/{main-4eb116b97819badd1e2c.js → main-4a0fe83e84574654e397.js} +18 -14
- dstack/_internal/server/statics/{main-4eb116b97819badd1e2c.js.map → main-4a0fe83e84574654e397.js.map} +1 -1
- dstack/_internal/server/testing/common.py +58 -32
- dstack/_internal/utils/json_schema.py +6 -0
- dstack/_internal/utils/ssh.py +2 -1
- dstack/api/__init__.py +4 -0
- dstack/api/_public/__init__.py +16 -20
- dstack/api/_public/backends.py +1 -1
- dstack/api/_public/repos.py +36 -36
- dstack/api/_public/runs.py +167 -83
- dstack/api/server/__init__.py +11 -13
- dstack/api/server/_backends.py +12 -16
- dstack/api/server/_fleets.py +15 -57
- dstack/api/server/_gateways.py +3 -14
- dstack/api/server/_repos.py +1 -4
- dstack/api/server/_runs.py +21 -100
- dstack/api/server/_volumes.py +10 -5
- dstack/version.py +1 -1
- {dstack-0.18.44.dist-info → dstack-0.19.0.dist-info}/METADATA +1 -1
- {dstack-0.18.44.dist-info → dstack-0.19.0.dist-info}/RECORD +218 -204
- tests/_internal/cli/services/configurators/test_profile.py +6 -6
- tests/_internal/core/backends/aws/test_configurator.py +35 -0
- tests/_internal/core/backends/aws/test_resources.py +1 -1
- tests/_internal/core/backends/azure/test_configurator.py +61 -0
- tests/_internal/core/backends/cudo/__init__.py +0 -0
- tests/_internal/core/backends/cudo/test_configurator.py +37 -0
- tests/_internal/core/backends/datacrunch/__init__.py +0 -0
- tests/_internal/core/backends/datacrunch/test_configurator.py +17 -0
- tests/_internal/core/backends/gcp/test_configurator.py +42 -0
- tests/_internal/core/backends/kubernetes/test_configurator.py +43 -0
- tests/_internal/core/backends/lambdalabs/__init__.py +0 -0
- tests/_internal/core/backends/lambdalabs/test_configurator.py +38 -0
- tests/_internal/core/backends/oci/test_configurator.py +55 -0
- tests/_internal/core/backends/runpod/__init__.py +0 -0
- tests/_internal/core/backends/runpod/test_configurator.py +33 -0
- tests/_internal/core/backends/tensordock/__init__.py +0 -0
- tests/_internal/core/backends/tensordock/test_configurator.py +38 -0
- tests/_internal/core/backends/vastai/__init__.py +0 -0
- tests/_internal/core/backends/vastai/test_configurator.py +33 -0
- tests/_internal/core/backends/vultr/__init__.py +0 -0
- tests/_internal/core/backends/vultr/test_configurator.py +33 -0
- tests/_internal/server/background/tasks/test_process_gateways.py +4 -0
- tests/_internal/server/background/tasks/test_process_instances.py +49 -48
- tests/_internal/server/background/tasks/test_process_metrics.py +0 -3
- tests/_internal/server/background/tasks/test_process_placement_groups.py +2 -0
- tests/_internal/server/background/tasks/test_process_prometheus_metrics.py +0 -3
- tests/_internal/server/background/tasks/test_process_running_jobs.py +0 -21
- tests/_internal/server/background/tasks/test_process_runs.py +8 -22
- tests/_internal/server/background/tasks/test_process_submitted_jobs.py +3 -40
- tests/_internal/server/background/tasks/test_process_submitted_volumes.py +2 -0
- tests/_internal/server/background/tasks/test_process_terminating_jobs.py +10 -15
- tests/_internal/server/routers/test_backends.py +6 -764
- tests/_internal/server/routers/test_fleets.py +0 -26
- tests/_internal/server/routers/test_gateways.py +27 -3
- tests/_internal/server/routers/test_instances.py +0 -10
- tests/_internal/server/routers/test_metrics.py +27 -0
- tests/_internal/server/routers/test_projects.py +56 -0
- tests/_internal/server/routers/test_prometheus.py +116 -27
- tests/_internal/server/routers/test_repos.py +0 -15
- tests/_internal/server/routers/test_runs.py +4 -219
- tests/_internal/server/routers/test_volumes.py +2 -3
- tests/_internal/server/services/backends/__init__.py +0 -0
- tests/_internal/server/services/jobs/configurators/test_task.py +35 -0
- tests/_internal/server/services/test_config.py +7 -4
- tests/_internal/server/services/test_fleets.py +1 -4
- tests/_internal/server/services/{test_pools.py → test_instances.py} +11 -49
- tests/_internal/server/services/test_metrics.py +9 -5
- tests/_internal/server/services/test_repos.py +1 -14
- tests/_internal/server/services/test_runs.py +0 -4
- dstack/_internal/cli/commands/pool.py +0 -581
- dstack/_internal/cli/commands/run.py +0 -75
- dstack/_internal/core/backends/aws/config.py +0 -18
- dstack/_internal/core/backends/azure/config.py +0 -12
- dstack/_internal/core/backends/base/config.py +0 -5
- dstack/_internal/core/backends/cudo/config.py +0 -9
- dstack/_internal/core/backends/datacrunch/config.py +0 -9
- dstack/_internal/core/backends/gcp/config.py +0 -22
- dstack/_internal/core/backends/kubernetes/config.py +0 -6
- dstack/_internal/core/backends/lambdalabs/config.py +0 -9
- dstack/_internal/core/backends/nebius/__init__.py +0 -15
- dstack/_internal/core/backends/nebius/api_client.py +0 -319
- dstack/_internal/core/backends/nebius/compute.py +0 -220
- dstack/_internal/core/backends/nebius/config.py +0 -6
- dstack/_internal/core/backends/nebius/types.py +0 -37
- dstack/_internal/core/backends/oci/config.py +0 -6
- dstack/_internal/core/backends/runpod/config.py +0 -17
- dstack/_internal/core/backends/tensordock/config.py +0 -9
- dstack/_internal/core/backends/vastai/config.py +0 -6
- dstack/_internal/core/backends/vultr/config.py +0 -9
- dstack/_internal/core/models/backends/aws.py +0 -86
- dstack/_internal/core/models/backends/azure.py +0 -68
- dstack/_internal/core/models/backends/cudo.py +0 -43
- dstack/_internal/core/models/backends/datacrunch.py +0 -44
- dstack/_internal/core/models/backends/gcp.py +0 -67
- dstack/_internal/core/models/backends/kubernetes.py +0 -40
- dstack/_internal/core/models/backends/lambdalabs.py +0 -43
- dstack/_internal/core/models/backends/nebius.py +0 -54
- dstack/_internal/core/models/backends/runpod.py +0 -42
- dstack/_internal/core/models/backends/tensordock.py +0 -44
- dstack/_internal/core/models/backends/vastai.py +0 -43
- dstack/_internal/core/models/backends/vultr.py +0 -40
- dstack/_internal/core/models/pools.py +0 -43
- dstack/_internal/server/routers/pools.py +0 -142
- dstack/_internal/server/schemas/pools.py +0 -38
- dstack/_internal/server/services/backends/configurators/base.py +0 -72
- dstack/_internal/server/services/backends/configurators/cudo.py +0 -87
- dstack/_internal/server/services/backends/configurators/datacrunch.py +0 -79
- dstack/_internal/server/services/backends/configurators/kubernetes.py +0 -63
- dstack/_internal/server/services/backends/configurators/lambdalabs.py +0 -98
- dstack/_internal/server/services/backends/configurators/nebius.py +0 -85
- dstack/_internal/server/services/backends/configurators/runpod.py +0 -67
- dstack/_internal/server/services/backends/configurators/tensordock.py +0 -82
- dstack/_internal/server/services/backends/configurators/vastai.py +0 -80
- dstack/_internal/server/services/backends/configurators/vultr.py +0 -80
- dstack/api/_public/pools.py +0 -41
- dstack/api/_public/resources.py +0 -105
- dstack/api/server/_pools.py +0 -63
- tests/_internal/server/routers/test_pools.py +0 -612
- /dstack/_internal/{server/services/backends/configurators → core/backends/dstack}/__init__.py +0 -0
- {dstack-0.18.44.dist-info → dstack-0.19.0.dist-info}/LICENSE.md +0 -0
- {dstack-0.18.44.dist-info → dstack-0.19.0.dist-info}/WHEEL +0 -0
- {dstack-0.18.44.dist-info → dstack-0.19.0.dist-info}/entry_points.txt +0 -0
- {dstack-0.18.44.dist-info → dstack-0.19.0.dist-info}/top_level.txt +0 -0
|
@@ -28,9 +28,9 @@ from dstack._internal.server.background.tasks.process_instances import (
|
|
|
28
28
|
process_instances,
|
|
29
29
|
)
|
|
30
30
|
from dstack._internal.server.testing.common import (
|
|
31
|
+
ComputeMockSpec,
|
|
31
32
|
create_instance,
|
|
32
33
|
create_job,
|
|
33
|
-
create_pool,
|
|
34
34
|
create_project,
|
|
35
35
|
create_repo,
|
|
36
36
|
create_run,
|
|
@@ -49,10 +49,10 @@ class TestCheckShim:
|
|
|
49
49
|
self, test_db, session: AsyncSession
|
|
50
50
|
):
|
|
51
51
|
project = await create_project(session=session)
|
|
52
|
-
pool = await create_pool(session, project)
|
|
53
|
-
|
|
54
52
|
instance = await create_instance(
|
|
55
|
-
session,
|
|
53
|
+
session=session,
|
|
54
|
+
project=project,
|
|
55
|
+
status=InstanceStatus.PROVISIONING,
|
|
56
56
|
)
|
|
57
57
|
instance.termination_deadline = get_current_datetime() + dt.timedelta(days=1)
|
|
58
58
|
instance.health_status = "ssh connect problem"
|
|
@@ -78,10 +78,10 @@ class TestCheckShim:
|
|
|
78
78
|
self, test_db, session: AsyncSession
|
|
79
79
|
):
|
|
80
80
|
project = await create_project(session=session)
|
|
81
|
-
pool = await create_pool(session, project)
|
|
82
|
-
|
|
83
81
|
instance = await create_instance(
|
|
84
|
-
session,
|
|
82
|
+
session=session,
|
|
83
|
+
project=project,
|
|
84
|
+
status=InstanceStatus.PROVISIONING,
|
|
85
85
|
)
|
|
86
86
|
instance.started_at = get_current_datetime() + dt.timedelta(minutes=-20)
|
|
87
87
|
instance.health_status = "ssh connect problem"
|
|
@@ -110,7 +110,6 @@ class TestCheckShim:
|
|
|
110
110
|
):
|
|
111
111
|
user = await create_user(session=session)
|
|
112
112
|
project = await create_project(session=session, owner=user)
|
|
113
|
-
pool = await create_pool(session, project)
|
|
114
113
|
repo = await create_repo(
|
|
115
114
|
session=session,
|
|
116
115
|
project_id=project.id,
|
|
@@ -121,9 +120,10 @@ class TestCheckShim:
|
|
|
121
120
|
repo=repo,
|
|
122
121
|
user=user,
|
|
123
122
|
)
|
|
124
|
-
|
|
125
123
|
instance = await create_instance(
|
|
126
|
-
session,
|
|
124
|
+
session=session,
|
|
125
|
+
project=project,
|
|
126
|
+
status=InstanceStatus.PROVISIONING,
|
|
127
127
|
)
|
|
128
128
|
instance.termination_deadline = get_current_datetime().replace(
|
|
129
129
|
tzinfo=dt.timezone.utc
|
|
@@ -158,10 +158,11 @@ class TestCheckShim:
|
|
|
158
158
|
@pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
|
|
159
159
|
async def test_check_shim_start_termination_deadline(self, test_db, session: AsyncSession):
|
|
160
160
|
project = await create_project(session=session)
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
161
|
+
instance = await create_instance(
|
|
162
|
+
session=session,
|
|
163
|
+
project=project,
|
|
164
|
+
status=InstanceStatus.IDLE,
|
|
165
|
+
)
|
|
165
166
|
health_status = "SSH connection fail"
|
|
166
167
|
with patch(
|
|
167
168
|
"dstack._internal.server.background.tasks.process_instances._instance_healthcheck"
|
|
@@ -183,9 +184,11 @@ class TestCheckShim:
|
|
|
183
184
|
@pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
|
|
184
185
|
async def test_check_shim_stop_termination_deadline(self, test_db, session: AsyncSession):
|
|
185
186
|
project = await create_project(session=session)
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
187
|
+
instance = await create_instance(
|
|
188
|
+
session=session,
|
|
189
|
+
project=project,
|
|
190
|
+
status=InstanceStatus.IDLE,
|
|
191
|
+
)
|
|
189
192
|
instance.termination_deadline = get_current_datetime() + dt.timedelta(minutes=19)
|
|
190
193
|
await session.commit()
|
|
191
194
|
|
|
@@ -206,9 +209,11 @@ class TestCheckShim:
|
|
|
206
209
|
@pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
|
|
207
210
|
async def test_check_shim_terminate_instance_by_dedaline(self, test_db, session: AsyncSession):
|
|
208
211
|
project = await create_project(session=session)
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
+
instance = await create_instance(
|
|
213
|
+
session=session,
|
|
214
|
+
project=project,
|
|
215
|
+
status=InstanceStatus.IDLE,
|
|
216
|
+
)
|
|
212
217
|
termination_deadline_time = get_current_datetime() + dt.timedelta(minutes=-19)
|
|
213
218
|
instance.termination_deadline = termination_deadline_time
|
|
214
219
|
await session.commit()
|
|
@@ -251,7 +256,6 @@ class TestCheckShim:
|
|
|
251
256
|
):
|
|
252
257
|
# see https://github.com/dstackai/dstack/issues/2041
|
|
253
258
|
project = await create_project(session=session)
|
|
254
|
-
pool = await create_pool(session, project)
|
|
255
259
|
if has_job:
|
|
256
260
|
user = await create_user(session=session)
|
|
257
261
|
repo = await create_repo(
|
|
@@ -272,9 +276,8 @@ class TestCheckShim:
|
|
|
272
276
|
else:
|
|
273
277
|
job = None
|
|
274
278
|
instance = await create_instance(
|
|
275
|
-
session,
|
|
276
|
-
project,
|
|
277
|
-
pool,
|
|
279
|
+
session=session,
|
|
280
|
+
project=project,
|
|
278
281
|
created_at=get_current_datetime(),
|
|
279
282
|
termination_policy=termination_policy,
|
|
280
283
|
status=InstanceStatus.IDLE,
|
|
@@ -302,8 +305,9 @@ class TestTerminateIdleTime:
|
|
|
302
305
|
@pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
|
|
303
306
|
async def test_terminate_by_idle_timeout(self, test_db, session: AsyncSession):
|
|
304
307
|
project = await create_project(session=session)
|
|
305
|
-
|
|
306
|
-
|
|
308
|
+
instance = await create_instance(
|
|
309
|
+
session=session, project=project, status=InstanceStatus.IDLE
|
|
310
|
+
)
|
|
307
311
|
instance.termination_idle_time = 300
|
|
308
312
|
instance.termination_policy = TerminationPolicy.DESTROY_AFTER_IDLE
|
|
309
313
|
instance.last_job_processed_at = get_current_datetime() + dt.timedelta(minutes=-19)
|
|
@@ -320,11 +324,9 @@ class TestSSHInstanceTerminateProvisionTimeoutExpired:
|
|
|
320
324
|
@pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
|
|
321
325
|
async def test_terminate_by_idle_timeout(self, test_db, session: AsyncSession):
|
|
322
326
|
project = await create_project(session=session)
|
|
323
|
-
pool = await create_pool(session, project)
|
|
324
327
|
instance = await create_instance(
|
|
325
|
-
session,
|
|
326
|
-
project,
|
|
327
|
-
pool,
|
|
328
|
+
session=session,
|
|
329
|
+
project=project,
|
|
328
330
|
status=InstanceStatus.PENDING,
|
|
329
331
|
created_at=get_current_datetime() - dt.timedelta(days=100),
|
|
330
332
|
)
|
|
@@ -357,10 +359,9 @@ class TestTerminate:
|
|
|
357
359
|
@pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
|
|
358
360
|
async def test_terminate(self, test_db, session: AsyncSession):
|
|
359
361
|
project = await create_project(session=session)
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
362
|
+
instance = await create_instance(
|
|
363
|
+
session=session, project=project, status=InstanceStatus.TERMINATING
|
|
364
|
+
)
|
|
364
365
|
reason = "some reason"
|
|
365
366
|
instance.termination_reason = reason
|
|
366
367
|
instance.last_job_processed_at = get_current_datetime() + dt.timedelta(minutes=-19)
|
|
@@ -384,8 +385,9 @@ class TestTerminate:
|
|
|
384
385
|
@pytest.mark.parametrize("error", [BackendError("err"), RuntimeError("err")])
|
|
385
386
|
async def test_terminate_retry(self, test_db, session: AsyncSession, error: Exception):
|
|
386
387
|
project = await create_project(session=session)
|
|
387
|
-
|
|
388
|
-
|
|
388
|
+
instance = await create_instance(
|
|
389
|
+
session=session, project=project, status=InstanceStatus.TERMINATING
|
|
390
|
+
)
|
|
389
391
|
instance.termination_reason = "some reason"
|
|
390
392
|
initial_time = dt.datetime(2025, 1, 1, tzinfo=dt.timezone.utc)
|
|
391
393
|
instance.last_job_processed_at = initial_time
|
|
@@ -415,8 +417,9 @@ class TestTerminate:
|
|
|
415
417
|
@pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
|
|
416
418
|
async def test_terminate_not_retries_if_too_early(self, test_db, session: AsyncSession):
|
|
417
419
|
project = await create_project(session=session)
|
|
418
|
-
|
|
419
|
-
|
|
420
|
+
instance = await create_instance(
|
|
421
|
+
session=session, project=project, status=InstanceStatus.TERMINATING
|
|
422
|
+
)
|
|
420
423
|
instance.termination_reason = "some reason"
|
|
421
424
|
initial_time = dt.datetime(2025, 1, 1, tzinfo=dt.timezone.utc)
|
|
422
425
|
instance.last_job_processed_at = initial_time
|
|
@@ -446,8 +449,9 @@ class TestTerminate:
|
|
|
446
449
|
@pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
|
|
447
450
|
async def test_terminate_on_termination_deadline(self, test_db, session: AsyncSession):
|
|
448
451
|
project = await create_project(session=session)
|
|
449
|
-
|
|
450
|
-
|
|
452
|
+
instance = await create_instance(
|
|
453
|
+
session=session, project=project, status=InstanceStatus.TERMINATING
|
|
454
|
+
)
|
|
451
455
|
instance.termination_reason = "some reason"
|
|
452
456
|
initial_time = dt.datetime(2025, 1, 1, tzinfo=dt.timezone.utc)
|
|
453
457
|
instance.last_job_processed_at = initial_time
|
|
@@ -505,11 +509,9 @@ class TestCreateInstance:
|
|
|
505
509
|
expected_blocks: int,
|
|
506
510
|
):
|
|
507
511
|
project = await create_project(session=session)
|
|
508
|
-
pool = await create_pool(session, project)
|
|
509
512
|
instance = await create_instance(
|
|
510
|
-
session,
|
|
511
|
-
project,
|
|
512
|
-
pool,
|
|
513
|
+
session=session,
|
|
514
|
+
project=project,
|
|
513
515
|
status=InstanceStatus.PENDING,
|
|
514
516
|
total_blocks=requested_blocks,
|
|
515
517
|
busy_blocks=0,
|
|
@@ -531,6 +533,7 @@ class TestCreateInstance:
|
|
|
531
533
|
price=1.0,
|
|
532
534
|
availability=InstanceAvailability.AVAILABLE,
|
|
533
535
|
)
|
|
536
|
+
backend_mock.compute.return_value = Mock(spec=ComputeMockSpec)
|
|
534
537
|
backend_mock.compute.return_value.get_offers_cached.return_value = [offer]
|
|
535
538
|
backend_mock.compute.return_value.create_instance.return_value = JobProvisioningData(
|
|
536
539
|
backend=offer.backend,
|
|
@@ -611,11 +614,9 @@ class TestAddSSHInstance:
|
|
|
611
614
|
host_info["cpus"] = cpus
|
|
612
615
|
host_info["gpu_count"] = gpus
|
|
613
616
|
project = await create_project(session=session)
|
|
614
|
-
pool = await create_pool(session, project)
|
|
615
617
|
instance = await create_instance(
|
|
616
|
-
session,
|
|
617
|
-
project,
|
|
618
|
-
pool,
|
|
618
|
+
session=session,
|
|
619
|
+
project=project,
|
|
619
620
|
status=InstanceStatus.PENDING,
|
|
620
621
|
created_at=get_current_datetime(),
|
|
621
622
|
remote_connection_info=get_remote_connection_info(),
|
|
@@ -21,7 +21,6 @@ from dstack._internal.server.testing.common import (
|
|
|
21
21
|
create_instance,
|
|
22
22
|
create_job,
|
|
23
23
|
create_job_metrics_point,
|
|
24
|
-
create_pool,
|
|
25
24
|
create_project,
|
|
26
25
|
create_repo,
|
|
27
26
|
create_run,
|
|
@@ -45,11 +44,9 @@ class TestCollectMetrics:
|
|
|
45
44
|
session=session,
|
|
46
45
|
project_id=project.id,
|
|
47
46
|
)
|
|
48
|
-
pool = await create_pool(session=session, project=project)
|
|
49
47
|
instance = await create_instance(
|
|
50
48
|
session=session,
|
|
51
49
|
project=project,
|
|
52
|
-
pool=pool,
|
|
53
50
|
status=InstanceStatus.BUSY,
|
|
54
51
|
)
|
|
55
52
|
run = await create_run(
|
|
@@ -7,6 +7,7 @@ from dstack._internal.server.background.tasks.process_placement_groups import (
|
|
|
7
7
|
process_placement_groups,
|
|
8
8
|
)
|
|
9
9
|
from dstack._internal.server.testing.common import (
|
|
10
|
+
ComputeMockSpec,
|
|
10
11
|
create_fleet,
|
|
11
12
|
create_placement_group,
|
|
12
13
|
create_project,
|
|
@@ -34,6 +35,7 @@ class TestProcessPlacementGroups:
|
|
|
34
35
|
with patch("dstack._internal.server.services.backends.get_project_backend_by_type") as m:
|
|
35
36
|
aws_mock = Mock()
|
|
36
37
|
m.return_value = aws_mock
|
|
38
|
+
aws_mock.compute.return_value = Mock(spec=ComputeMockSpec)
|
|
37
39
|
await process_placement_groups()
|
|
38
40
|
aws_mock.compute.return_value.delete_placement_group.assert_called_once()
|
|
39
41
|
await session.refresh(placement_group1)
|
|
@@ -21,7 +21,6 @@ from dstack._internal.server.testing.common import (
|
|
|
21
21
|
create_instance,
|
|
22
22
|
create_job,
|
|
23
23
|
create_job_prometheus_metrics,
|
|
24
|
-
create_pool,
|
|
25
24
|
create_project,
|
|
26
25
|
create_repo,
|
|
27
26
|
create_run,
|
|
@@ -45,11 +44,9 @@ class TestCollectPrometheusMetrics:
|
|
|
45
44
|
session=session,
|
|
46
45
|
project_id=project.id,
|
|
47
46
|
)
|
|
48
|
-
pool = await create_pool(session=session, project=project)
|
|
49
47
|
instance = await create_instance(
|
|
50
48
|
session=session,
|
|
51
49
|
project=project,
|
|
52
|
-
pool=pool,
|
|
53
50
|
status=InstanceStatus.BUSY,
|
|
54
51
|
)
|
|
55
52
|
run = await create_run(
|
|
@@ -42,7 +42,6 @@ from dstack._internal.server.testing.common import (
|
|
|
42
42
|
create_instance,
|
|
43
43
|
create_job,
|
|
44
44
|
create_job_metrics_point,
|
|
45
|
-
create_pool,
|
|
46
45
|
create_project,
|
|
47
46
|
create_repo,
|
|
48
47
|
create_run,
|
|
@@ -106,11 +105,9 @@ class TestProcessRunningJobs:
|
|
|
106
105
|
repo=repo,
|
|
107
106
|
user=user,
|
|
108
107
|
)
|
|
109
|
-
pool = await create_pool(session=session, project=project)
|
|
110
108
|
instance = await create_instance(
|
|
111
109
|
session=session,
|
|
112
110
|
project=project,
|
|
113
|
-
pool=pool,
|
|
114
111
|
status=InstanceStatus.BUSY,
|
|
115
112
|
)
|
|
116
113
|
job_provisioning_data = get_job_provisioning_data(dockerized=False)
|
|
@@ -156,11 +153,9 @@ class TestProcessRunningJobs:
|
|
|
156
153
|
repo=repo,
|
|
157
154
|
user=user,
|
|
158
155
|
)
|
|
159
|
-
pool = await create_pool(session=session, project=project)
|
|
160
156
|
instance = await create_instance(
|
|
161
157
|
session=session,
|
|
162
158
|
project=project,
|
|
163
|
-
pool=pool,
|
|
164
159
|
status=InstanceStatus.BUSY,
|
|
165
160
|
)
|
|
166
161
|
job_provisioning_data = get_job_provisioning_data(dockerized=False)
|
|
@@ -207,11 +202,9 @@ class TestProcessRunningJobs:
|
|
|
207
202
|
repo=repo,
|
|
208
203
|
user=user,
|
|
209
204
|
)
|
|
210
|
-
pool = await create_pool(session=session, project=project)
|
|
211
205
|
instance = await create_instance(
|
|
212
206
|
session=session,
|
|
213
207
|
project=project,
|
|
214
|
-
pool=pool,
|
|
215
208
|
status=InstanceStatus.BUSY,
|
|
216
209
|
)
|
|
217
210
|
job_provisioning_data = get_job_provisioning_data(dockerized=False)
|
|
@@ -307,11 +300,9 @@ class TestProcessRunningJobs:
|
|
|
307
300
|
run_name="test-run",
|
|
308
301
|
run_spec=run_spec,
|
|
309
302
|
)
|
|
310
|
-
pool = await create_pool(session=session, project=project)
|
|
311
303
|
instance = await create_instance(
|
|
312
304
|
session=session,
|
|
313
305
|
project=project,
|
|
314
|
-
pool=pool,
|
|
315
306
|
status=InstanceStatus.BUSY,
|
|
316
307
|
)
|
|
317
308
|
job_provisioning_data = get_job_provisioning_data(dockerized=True)
|
|
@@ -377,11 +368,9 @@ class TestProcessRunningJobs:
|
|
|
377
368
|
repo=repo,
|
|
378
369
|
user=user,
|
|
379
370
|
)
|
|
380
|
-
pool = await create_pool(session=session, project=project)
|
|
381
371
|
instance = await create_instance(
|
|
382
372
|
session=session,
|
|
383
373
|
project=project,
|
|
384
|
-
pool=pool,
|
|
385
374
|
status=InstanceStatus.BUSY,
|
|
386
375
|
)
|
|
387
376
|
job = await create_job(
|
|
@@ -434,11 +423,9 @@ class TestProcessRunningJobs:
|
|
|
434
423
|
repo=repo,
|
|
435
424
|
user=user,
|
|
436
425
|
)
|
|
437
|
-
pool = await create_pool(session=session, project=project)
|
|
438
426
|
instance = await create_instance(
|
|
439
427
|
session=session,
|
|
440
428
|
project=project,
|
|
441
|
-
pool=pool,
|
|
442
429
|
status=InstanceStatus.BUSY,
|
|
443
430
|
)
|
|
444
431
|
job_provisioning_data = get_job_provisioning_data(dockerized=True)
|
|
@@ -479,11 +466,9 @@ class TestProcessRunningJobs:
|
|
|
479
466
|
repo=repo,
|
|
480
467
|
user=user,
|
|
481
468
|
)
|
|
482
|
-
pool = await create_pool(session, project)
|
|
483
469
|
instance = await create_instance(
|
|
484
470
|
session=session,
|
|
485
471
|
project=project,
|
|
486
|
-
pool=pool,
|
|
487
472
|
status=InstanceStatus.IDLE,
|
|
488
473
|
)
|
|
489
474
|
job_provisioning_data = get_job_provisioning_data(dockerized=True)
|
|
@@ -528,11 +513,9 @@ class TestProcessRunningJobs:
|
|
|
528
513
|
run_name="test-run",
|
|
529
514
|
run_spec=run_spec,
|
|
530
515
|
)
|
|
531
|
-
pool = await create_pool(session=session, project=project)
|
|
532
516
|
instance = await create_instance(
|
|
533
517
|
session=session,
|
|
534
518
|
project=project,
|
|
535
|
-
pool=pool,
|
|
536
519
|
status=InstanceStatus.BUSY,
|
|
537
520
|
)
|
|
538
521
|
job = await create_job(
|
|
@@ -655,11 +638,9 @@ class TestProcessRunningJobs:
|
|
|
655
638
|
),
|
|
656
639
|
),
|
|
657
640
|
)
|
|
658
|
-
pool = await create_pool(session=session, project=project)
|
|
659
641
|
instance = await create_instance(
|
|
660
642
|
session=session,
|
|
661
643
|
project=project,
|
|
662
|
-
pool=pool,
|
|
663
644
|
status=InstanceStatus.BUSY,
|
|
664
645
|
)
|
|
665
646
|
job = await create_job(
|
|
@@ -762,11 +743,9 @@ class TestProcessRunningJobs:
|
|
|
762
743
|
),
|
|
763
744
|
),
|
|
764
745
|
)
|
|
765
|
-
pool = await create_pool(session=session, project=project)
|
|
766
746
|
instance = await create_instance(
|
|
767
747
|
session=session,
|
|
768
748
|
project=project,
|
|
769
|
-
pool=pool,
|
|
770
749
|
status=InstanceStatus.BUSY,
|
|
771
750
|
)
|
|
772
751
|
job = await create_job(
|
|
@@ -21,7 +21,6 @@ from dstack._internal.server.models import RunModel
|
|
|
21
21
|
from dstack._internal.server.testing.common import (
|
|
22
22
|
create_instance,
|
|
23
23
|
create_job,
|
|
24
|
-
create_pool,
|
|
25
24
|
create_project,
|
|
26
25
|
create_repo,
|
|
27
26
|
create_run,
|
|
@@ -42,9 +41,6 @@ async def make_run(
|
|
|
42
41
|
session=session,
|
|
43
42
|
project_id=project.id,
|
|
44
43
|
)
|
|
45
|
-
project.default_pool = await create_pool(
|
|
46
|
-
session=session, project=project, pool_name="default-pool"
|
|
47
|
-
)
|
|
48
44
|
run_name = "test-run"
|
|
49
45
|
profile = Profile(
|
|
50
46
|
name="test-profile",
|
|
@@ -60,7 +56,7 @@ async def make_run(
|
|
|
60
56
|
replicas=parse_obj_as(Range[int], replicas),
|
|
61
57
|
),
|
|
62
58
|
)
|
|
63
|
-
|
|
59
|
+
run = await create_run(
|
|
64
60
|
session=session,
|
|
65
61
|
project=project,
|
|
66
62
|
repo=repo,
|
|
@@ -69,6 +65,8 @@ async def make_run(
|
|
|
69
65
|
run_spec=run_spec,
|
|
70
66
|
status=status,
|
|
71
67
|
)
|
|
68
|
+
run.project = project
|
|
69
|
+
return run
|
|
72
70
|
|
|
73
71
|
|
|
74
72
|
class TestProcessRuns:
|
|
@@ -117,11 +115,9 @@ class TestProcessRuns:
|
|
|
117
115
|
async def test_terminate_run_jobs(self, test_db, session: AsyncSession):
|
|
118
116
|
run = await make_run(session, status=RunStatus.TERMINATING)
|
|
119
117
|
run.termination_reason = RunTerminationReason.JOB_FAILED
|
|
120
|
-
pool = await create_pool(session=session, project=run.project)
|
|
121
118
|
instance = await create_instance(
|
|
122
119
|
session=session,
|
|
123
120
|
project=run.project,
|
|
124
|
-
pool=pool,
|
|
125
121
|
status=InstanceStatus.BUSY,
|
|
126
122
|
)
|
|
127
123
|
job = await create_job(
|
|
@@ -146,9 +142,7 @@ class TestProcessRuns:
|
|
|
146
142
|
@pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
|
|
147
143
|
async def test_retry_running_to_pending(self, test_db, session: AsyncSession):
|
|
148
144
|
run = await make_run(session, status=RunStatus.RUNNING)
|
|
149
|
-
instance = await create_instance(
|
|
150
|
-
session, project=run.project, pool=run.project.default_pool, spot=True
|
|
151
|
-
)
|
|
145
|
+
instance = await create_instance(session, project=run.project, spot=True)
|
|
152
146
|
await create_job(
|
|
153
147
|
session=session,
|
|
154
148
|
run=run,
|
|
@@ -169,9 +163,7 @@ class TestProcessRuns:
|
|
|
169
163
|
@pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
|
|
170
164
|
async def test_retry_running_to_failed(self, test_db, session: AsyncSession):
|
|
171
165
|
run = await make_run(session, status=RunStatus.RUNNING)
|
|
172
|
-
instance = await create_instance(
|
|
173
|
-
session, project=run.project, pool=run.project.default_pool, spot=True
|
|
174
|
-
)
|
|
166
|
+
instance = await create_instance(session, project=run.project, spot=True)
|
|
175
167
|
# job exited with non-zero code
|
|
176
168
|
await create_job(
|
|
177
169
|
session=session,
|
|
@@ -237,9 +229,7 @@ class TestProcessRunsReplicas:
|
|
|
237
229
|
submitted_at=run.submitted_at,
|
|
238
230
|
last_processed_at=run.submitted_at,
|
|
239
231
|
replica_num=0,
|
|
240
|
-
instance=await create_instance(
|
|
241
|
-
session, project=run.project, pool=run.project.default_pool, spot=True
|
|
242
|
-
),
|
|
232
|
+
instance=await create_instance(session, project=run.project, spot=True),
|
|
243
233
|
job_provisioning_data=get_job_provisioning_data(),
|
|
244
234
|
)
|
|
245
235
|
await create_job(
|
|
@@ -250,9 +240,7 @@ class TestProcessRunsReplicas:
|
|
|
250
240
|
submitted_at=run.submitted_at,
|
|
251
241
|
last_processed_at=run.submitted_at,
|
|
252
242
|
replica_num=1,
|
|
253
|
-
instance=await create_instance(
|
|
254
|
-
session, project=run.project, pool=run.project.default_pool, spot=True
|
|
255
|
-
),
|
|
243
|
+
instance=await create_instance(session, project=run.project, spot=True),
|
|
256
244
|
job_provisioning_data=get_job_provisioning_data(),
|
|
257
245
|
)
|
|
258
246
|
with patch("dstack._internal.utils.common.get_current_datetime") as datetime_mock:
|
|
@@ -273,9 +261,7 @@ class TestProcessRunsReplicas:
|
|
|
273
261
|
submitted_at=run.submitted_at,
|
|
274
262
|
last_processed_at=run.last_processed_at,
|
|
275
263
|
replica_num=0,
|
|
276
|
-
instance=await create_instance(
|
|
277
|
-
session, project=run.project, pool=run.project.default_pool, spot=True
|
|
278
|
-
),
|
|
264
|
+
instance=await create_instance(session, project=run.project, spot=True),
|
|
279
265
|
job_provisioning_data=get_job_provisioning_data(),
|
|
280
266
|
)
|
|
281
267
|
await create_job(
|