dstack 0.18.43__py3-none-any.whl → 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dstack/_internal/cli/commands/gateway.py +15 -3
- dstack/_internal/cli/commands/logs.py +0 -22
- dstack/_internal/cli/commands/stats.py +8 -17
- dstack/_internal/cli/main.py +1 -5
- dstack/_internal/cli/services/configurators/fleet.py +4 -39
- dstack/_internal/cli/services/configurators/run.py +22 -20
- dstack/_internal/cli/services/profile.py +34 -83
- dstack/_internal/cli/utils/gateway.py +1 -1
- dstack/_internal/cli/utils/run.py +11 -0
- dstack/_internal/core/backends/__init__.py +56 -39
- dstack/_internal/core/backends/aws/__init__.py +0 -25
- dstack/_internal/core/backends/aws/auth.py +1 -10
- dstack/_internal/core/backends/aws/backend.py +26 -0
- dstack/_internal/core/backends/aws/compute.py +21 -45
- dstack/_internal/{server/services/backends/configurators/aws.py → core/backends/aws/configurator.py} +46 -85
- dstack/_internal/core/backends/aws/models.py +135 -0
- dstack/_internal/core/backends/aws/resources.py +1 -1
- dstack/_internal/core/backends/azure/__init__.py +0 -20
- dstack/_internal/core/backends/azure/auth.py +2 -11
- dstack/_internal/core/backends/azure/backend.py +21 -0
- dstack/_internal/core/backends/azure/compute.py +14 -28
- dstack/_internal/{server/services/backends/configurators/azure.py → core/backends/azure/configurator.py} +141 -210
- dstack/_internal/core/backends/azure/models.py +89 -0
- dstack/_internal/core/backends/base/__init__.py +0 -12
- dstack/_internal/core/backends/base/backend.py +18 -0
- dstack/_internal/core/backends/base/compute.py +153 -33
- dstack/_internal/core/backends/base/configurator.py +105 -0
- dstack/_internal/core/backends/base/models.py +14 -0
- dstack/_internal/core/backends/configurators.py +138 -0
- dstack/_internal/core/backends/cudo/__init__.py +0 -15
- dstack/_internal/core/backends/cudo/backend.py +16 -0
- dstack/_internal/core/backends/cudo/compute.py +8 -26
- dstack/_internal/core/backends/cudo/configurator.py +72 -0
- dstack/_internal/core/backends/cudo/models.py +37 -0
- dstack/_internal/core/backends/datacrunch/__init__.py +0 -15
- dstack/_internal/core/backends/datacrunch/backend.py +16 -0
- dstack/_internal/core/backends/datacrunch/compute.py +8 -25
- dstack/_internal/core/backends/datacrunch/configurator.py +66 -0
- dstack/_internal/core/backends/datacrunch/models.py +38 -0
- dstack/_internal/core/{models/backends/dstack.py → backends/dstack/models.py} +7 -7
- dstack/_internal/core/backends/gcp/__init__.py +0 -16
- dstack/_internal/core/backends/gcp/auth.py +2 -11
- dstack/_internal/core/backends/gcp/backend.py +17 -0
- dstack/_internal/core/backends/gcp/compute.py +14 -44
- dstack/_internal/{server/services/backends/configurators/gcp.py → core/backends/gcp/configurator.py} +46 -103
- dstack/_internal/core/backends/gcp/models.py +125 -0
- dstack/_internal/core/backends/kubernetes/__init__.py +0 -15
- dstack/_internal/core/backends/kubernetes/backend.py +16 -0
- dstack/_internal/core/backends/kubernetes/compute.py +16 -5
- dstack/_internal/core/backends/kubernetes/configurator.py +55 -0
- dstack/_internal/core/backends/kubernetes/models.py +72 -0
- dstack/_internal/core/backends/lambdalabs/__init__.py +0 -16
- dstack/_internal/core/backends/lambdalabs/backend.py +17 -0
- dstack/_internal/core/backends/lambdalabs/compute.py +7 -28
- dstack/_internal/core/backends/lambdalabs/configurator.py +82 -0
- dstack/_internal/core/backends/lambdalabs/models.py +37 -0
- dstack/_internal/core/backends/local/__init__.py +0 -13
- dstack/_internal/core/backends/local/backend.py +14 -0
- dstack/_internal/core/backends/local/compute.py +16 -2
- dstack/_internal/core/backends/models.py +128 -0
- dstack/_internal/core/backends/oci/__init__.py +0 -15
- dstack/_internal/core/backends/oci/auth.py +1 -5
- dstack/_internal/core/backends/oci/backend.py +16 -0
- dstack/_internal/core/backends/oci/compute.py +9 -23
- dstack/_internal/{server/services/backends/configurators/oci.py → core/backends/oci/configurator.py} +40 -85
- dstack/_internal/core/{models/backends/oci.py → backends/oci/models.py} +24 -25
- dstack/_internal/core/backends/oci/region.py +1 -1
- dstack/_internal/core/backends/runpod/__init__.py +0 -15
- dstack/_internal/core/backends/runpod/backend.py +16 -0
- dstack/_internal/core/backends/runpod/compute.py +28 -6
- dstack/_internal/core/backends/runpod/configurator.py +59 -0
- dstack/_internal/core/backends/runpod/models.py +54 -0
- dstack/_internal/core/backends/template/__init__.py +0 -0
- dstack/_internal/core/backends/tensordock/__init__.py +0 -15
- dstack/_internal/core/backends/tensordock/backend.py +16 -0
- dstack/_internal/core/backends/tensordock/compute.py +8 -27
- dstack/_internal/core/backends/tensordock/configurator.py +68 -0
- dstack/_internal/core/backends/tensordock/models.py +38 -0
- dstack/_internal/core/backends/vastai/__init__.py +0 -15
- dstack/_internal/core/backends/vastai/backend.py +16 -0
- dstack/_internal/core/backends/vastai/compute.py +2 -2
- dstack/_internal/core/backends/vastai/configurator.py +66 -0
- dstack/_internal/core/backends/vastai/models.py +37 -0
- dstack/_internal/core/backends/vultr/__init__.py +0 -15
- dstack/_internal/core/backends/vultr/backend.py +16 -0
- dstack/_internal/core/backends/vultr/compute.py +10 -24
- dstack/_internal/core/backends/vultr/configurator.py +64 -0
- dstack/_internal/core/backends/vultr/models.py +34 -0
- dstack/_internal/core/models/backends/__init__.py +0 -184
- dstack/_internal/core/models/backends/base.py +0 -19
- dstack/_internal/core/models/configurations.py +22 -16
- dstack/_internal/core/models/envs.py +4 -3
- dstack/_internal/core/models/fleets.py +17 -22
- dstack/_internal/core/models/gateways.py +3 -3
- dstack/_internal/core/models/instances.py +24 -0
- dstack/_internal/core/models/profiles.py +85 -45
- dstack/_internal/core/models/projects.py +1 -1
- dstack/_internal/core/models/repos/base.py +0 -5
- dstack/_internal/core/models/repos/local.py +3 -3
- dstack/_internal/core/models/repos/remote.py +26 -12
- dstack/_internal/core/models/repos/virtual.py +1 -1
- dstack/_internal/core/models/resources.py +45 -76
- dstack/_internal/core/models/runs.py +21 -19
- dstack/_internal/core/models/volumes.py +1 -3
- dstack/_internal/core/services/profiles.py +7 -16
- dstack/_internal/core/services/repos.py +0 -4
- dstack/_internal/server/app.py +11 -4
- dstack/_internal/server/background/__init__.py +10 -0
- dstack/_internal/server/background/tasks/process_gateways.py +4 -8
- dstack/_internal/server/background/tasks/process_instances.py +14 -9
- dstack/_internal/server/background/tasks/process_metrics.py +1 -1
- dstack/_internal/server/background/tasks/process_placement_groups.py +5 -1
- dstack/_internal/server/background/tasks/process_prometheus_metrics.py +135 -0
- dstack/_internal/server/background/tasks/process_running_jobs.py +80 -24
- dstack/_internal/server/background/tasks/process_runs.py +1 -0
- dstack/_internal/server/background/tasks/process_submitted_jobs.py +20 -38
- dstack/_internal/server/background/tasks/process_volumes.py +5 -2
- dstack/_internal/server/migrations/versions/60e444118b6d_add_jobprometheusmetrics.py +40 -0
- dstack/_internal/server/migrations/versions/7bc2586e8b9e_make_instancemodel_pool_id_optional.py +36 -0
- dstack/_internal/server/migrations/versions/98d1b92988bc_add_jobterminationreason_terminated_due_.py +140 -0
- dstack/_internal/server/migrations/versions/bc8ca4a505c6_store_backendtype_as_string.py +171 -0
- dstack/_internal/server/models.py +59 -9
- dstack/_internal/server/routers/backends.py +14 -23
- dstack/_internal/server/routers/instances.py +3 -4
- dstack/_internal/server/routers/metrics.py +31 -10
- dstack/_internal/server/routers/prometheus.py +36 -0
- dstack/_internal/server/routers/repos.py +1 -2
- dstack/_internal/server/routers/runs.py +13 -59
- dstack/_internal/server/schemas/gateways.py +14 -23
- dstack/_internal/server/schemas/projects.py +7 -2
- dstack/_internal/server/schemas/repos.py +2 -38
- dstack/_internal/server/schemas/runner.py +1 -0
- dstack/_internal/server/schemas/runs.py +1 -24
- dstack/_internal/server/security/permissions.py +1 -1
- dstack/_internal/server/services/backends/__init__.py +85 -158
- dstack/_internal/server/services/config.py +53 -567
- dstack/_internal/server/services/fleets.py +9 -103
- dstack/_internal/server/services/gateways/__init__.py +13 -4
- dstack/_internal/server/services/{pools.py → instances.py} +22 -329
- dstack/_internal/server/services/jobs/__init__.py +9 -6
- dstack/_internal/server/services/jobs/configurators/base.py +25 -1
- dstack/_internal/server/services/jobs/configurators/dev.py +9 -1
- dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +42 -0
- dstack/_internal/server/services/metrics.py +131 -72
- dstack/_internal/server/services/offers.py +1 -1
- dstack/_internal/server/services/projects.py +23 -14
- dstack/_internal/server/services/prometheus.py +245 -0
- dstack/_internal/server/services/runner/client.py +14 -3
- dstack/_internal/server/services/runs.py +67 -31
- dstack/_internal/server/services/volumes.py +9 -4
- dstack/_internal/server/settings.py +3 -0
- dstack/_internal/server/statics/index.html +1 -1
- dstack/_internal/server/statics/{main-fe8fd9db55df8d10e648.js → main-4a0fe83e84574654e397.js} +76 -19
- dstack/_internal/server/statics/{main-fe8fd9db55df8d10e648.js.map → main-4a0fe83e84574654e397.js.map} +1 -1
- dstack/_internal/server/statics/{main-7510e71dfa9749a4e70e.css → main-da9f8c06a69c20dac23e.css} +1 -1
- dstack/_internal/server/statics/static/media/entraID.d65d1f3e9486a8e56d24fc07b3230885.svg +9 -0
- dstack/_internal/server/testing/common.py +75 -32
- dstack/_internal/utils/json_schema.py +6 -0
- dstack/_internal/utils/ssh.py +2 -1
- dstack/api/__init__.py +4 -0
- dstack/api/_public/__init__.py +16 -20
- dstack/api/_public/backends.py +1 -1
- dstack/api/_public/repos.py +36 -36
- dstack/api/_public/runs.py +170 -83
- dstack/api/server/__init__.py +11 -13
- dstack/api/server/_backends.py +12 -16
- dstack/api/server/_fleets.py +15 -55
- dstack/api/server/_gateways.py +3 -14
- dstack/api/server/_repos.py +1 -4
- dstack/api/server/_runs.py +21 -96
- dstack/api/server/_volumes.py +10 -5
- dstack/api/utils.py +3 -0
- dstack/version.py +1 -1
- {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/METADATA +10 -1
- {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/RECORD +229 -206
- tests/_internal/cli/services/configurators/test_profile.py +6 -6
- tests/_internal/core/backends/aws/test_configurator.py +35 -0
- tests/_internal/core/backends/aws/test_resources.py +1 -1
- tests/_internal/core/backends/azure/test_configurator.py +61 -0
- tests/_internal/core/backends/cudo/__init__.py +0 -0
- tests/_internal/core/backends/cudo/test_configurator.py +37 -0
- tests/_internal/core/backends/datacrunch/__init__.py +0 -0
- tests/_internal/core/backends/datacrunch/test_configurator.py +17 -0
- tests/_internal/core/backends/gcp/test_configurator.py +42 -0
- tests/_internal/core/backends/kubernetes/test_configurator.py +43 -0
- tests/_internal/core/backends/lambdalabs/__init__.py +0 -0
- tests/_internal/core/backends/lambdalabs/test_configurator.py +38 -0
- tests/_internal/core/backends/oci/test_configurator.py +55 -0
- tests/_internal/core/backends/runpod/__init__.py +0 -0
- tests/_internal/core/backends/runpod/test_configurator.py +33 -0
- tests/_internal/core/backends/tensordock/__init__.py +0 -0
- tests/_internal/core/backends/tensordock/test_configurator.py +38 -0
- tests/_internal/core/backends/vastai/__init__.py +0 -0
- tests/_internal/core/backends/vastai/test_configurator.py +33 -0
- tests/_internal/core/backends/vultr/__init__.py +0 -0
- tests/_internal/core/backends/vultr/test_configurator.py +33 -0
- tests/_internal/server/background/tasks/test_process_gateways.py +4 -0
- tests/_internal/server/background/tasks/test_process_instances.py +49 -48
- tests/_internal/server/background/tasks/test_process_metrics.py +0 -3
- tests/_internal/server/background/tasks/test_process_placement_groups.py +2 -0
- tests/_internal/server/background/tasks/test_process_prometheus_metrics.py +186 -0
- tests/_internal/server/background/tasks/test_process_running_jobs.py +123 -19
- tests/_internal/server/background/tasks/test_process_runs.py +8 -22
- tests/_internal/server/background/tasks/test_process_submitted_jobs.py +3 -40
- tests/_internal/server/background/tasks/test_process_submitted_volumes.py +2 -0
- tests/_internal/server/background/tasks/test_process_terminating_jobs.py +10 -15
- tests/_internal/server/routers/test_backends.py +6 -764
- tests/_internal/server/routers/test_fleets.py +2 -26
- tests/_internal/server/routers/test_gateways.py +27 -3
- tests/_internal/server/routers/test_instances.py +0 -10
- tests/_internal/server/routers/test_metrics.py +42 -0
- tests/_internal/server/routers/test_projects.py +56 -0
- tests/_internal/server/routers/test_prometheus.py +333 -0
- tests/_internal/server/routers/test_repos.py +0 -15
- tests/_internal/server/routers/test_runs.py +83 -275
- tests/_internal/server/routers/test_volumes.py +2 -3
- tests/_internal/server/services/backends/__init__.py +0 -0
- tests/_internal/server/services/jobs/configurators/test_task.py +35 -0
- tests/_internal/server/services/test_config.py +7 -4
- tests/_internal/server/services/test_fleets.py +1 -4
- tests/_internal/server/services/{test_pools.py → test_instances.py} +11 -49
- tests/_internal/server/services/test_metrics.py +167 -0
- tests/_internal/server/services/test_repos.py +1 -14
- tests/_internal/server/services/test_runs.py +0 -4
- dstack/_internal/cli/commands/pool.py +0 -581
- dstack/_internal/cli/commands/run.py +0 -75
- dstack/_internal/core/backends/aws/config.py +0 -18
- dstack/_internal/core/backends/azure/config.py +0 -12
- dstack/_internal/core/backends/base/config.py +0 -5
- dstack/_internal/core/backends/cudo/config.py +0 -9
- dstack/_internal/core/backends/datacrunch/config.py +0 -9
- dstack/_internal/core/backends/gcp/config.py +0 -22
- dstack/_internal/core/backends/kubernetes/config.py +0 -6
- dstack/_internal/core/backends/lambdalabs/config.py +0 -9
- dstack/_internal/core/backends/nebius/__init__.py +0 -15
- dstack/_internal/core/backends/nebius/api_client.py +0 -319
- dstack/_internal/core/backends/nebius/compute.py +0 -220
- dstack/_internal/core/backends/nebius/config.py +0 -6
- dstack/_internal/core/backends/nebius/types.py +0 -37
- dstack/_internal/core/backends/oci/config.py +0 -6
- dstack/_internal/core/backends/runpod/config.py +0 -9
- dstack/_internal/core/backends/tensordock/config.py +0 -9
- dstack/_internal/core/backends/vastai/config.py +0 -6
- dstack/_internal/core/backends/vultr/config.py +0 -9
- dstack/_internal/core/models/backends/aws.py +0 -86
- dstack/_internal/core/models/backends/azure.py +0 -68
- dstack/_internal/core/models/backends/cudo.py +0 -43
- dstack/_internal/core/models/backends/datacrunch.py +0 -44
- dstack/_internal/core/models/backends/gcp.py +0 -67
- dstack/_internal/core/models/backends/kubernetes.py +0 -40
- dstack/_internal/core/models/backends/lambdalabs.py +0 -43
- dstack/_internal/core/models/backends/nebius.py +0 -54
- dstack/_internal/core/models/backends/runpod.py +0 -40
- dstack/_internal/core/models/backends/tensordock.py +0 -44
- dstack/_internal/core/models/backends/vastai.py +0 -43
- dstack/_internal/core/models/backends/vultr.py +0 -40
- dstack/_internal/core/models/pools.py +0 -43
- dstack/_internal/server/routers/pools.py +0 -142
- dstack/_internal/server/schemas/pools.py +0 -38
- dstack/_internal/server/services/backends/configurators/base.py +0 -72
- dstack/_internal/server/services/backends/configurators/cudo.py +0 -87
- dstack/_internal/server/services/backends/configurators/datacrunch.py +0 -79
- dstack/_internal/server/services/backends/configurators/kubernetes.py +0 -63
- dstack/_internal/server/services/backends/configurators/lambdalabs.py +0 -98
- dstack/_internal/server/services/backends/configurators/nebius.py +0 -85
- dstack/_internal/server/services/backends/configurators/runpod.py +0 -97
- dstack/_internal/server/services/backends/configurators/tensordock.py +0 -82
- dstack/_internal/server/services/backends/configurators/vastai.py +0 -80
- dstack/_internal/server/services/backends/configurators/vultr.py +0 -80
- dstack/api/_public/pools.py +0 -41
- dstack/api/_public/resources.py +0 -105
- dstack/api/server/_pools.py +0 -63
- tests/_internal/server/routers/test_pools.py +0 -612
- /dstack/_internal/{server/services/backends/configurators → core/backends/dstack}/__init__.py +0 -0
- {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/LICENSE.md +0 -0
- {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/WHEEL +0 -0
- {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/entry_points.txt +0 -0
- {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<svg id="uuid-f8d4d392-7c12-4bd9-baff-66fbf7814b91" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 18 18">
|
|
3
|
+
<path d="m3.802,14.032c.388.242,1.033.511,1.715.511.621,0,1.198-.18,1.676-.487,0,0,.001,0,.002-.001l1.805-1.128v4.073c-.286,0-.574-.078-.824-.234l-4.374-2.734Z" fill="#225086"/>
|
|
4
|
+
<path d="m7.853,1.507L.353,9.967c-.579.654-.428,1.642.323,2.111,0,0,2.776,1.735,3.126,1.954.388.242,1.033.511,1.715.511.621,0,1.198-.18,1.676-.487,0,0,.001,0,.002-.001l1.805-1.128-4.364-2.728,4.365-4.924V1s0,0,0,0c-.424,0-.847.169-1.147.507Z" fill="#6df"/>
|
|
5
|
+
<polygon points="4.636 10.199 4.688 10.231 9 12.927 9.001 12.927 9.001 12.927 9.001 5.276 9 5.275 4.636 10.199" fill="#cbf8ff"/>
|
|
6
|
+
<path d="m17.324,12.078c.751-.469.902-1.457.323-2.111l-4.921-5.551c-.397-.185-.842-.291-1.313-.291-.925,0-1.752.399-2.302,1.026l-.109.123h0s4.364,4.924,4.364,4.924h0s0,0,0,0l-4.365,2.728v4.073c.287,0,.573-.078.823-.234l7.5-4.688Z" fill="#074793"/>
|
|
7
|
+
<path d="m9.001,1v4.275s.109-.123.109-.123c.55-.627,1.377-1.026,2.302-1.026.472,0,.916.107,1.313.291l-2.579-2.909c-.299-.338-.723-.507-1.146-.507Z" fill="#0294e4"/>
|
|
8
|
+
<polygon points="13.365 10.199 13.365 10.199 13.365 10.199 9.001 5.276 9.001 12.926 13.365 10.199" fill="#96bcc2"/>
|
|
9
|
+
</svg>
|
|
@@ -8,6 +8,16 @@ from uuid import UUID
|
|
|
8
8
|
import gpuhunt
|
|
9
9
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
10
10
|
|
|
11
|
+
from dstack._internal.core.backends.base.compute import (
|
|
12
|
+
Compute,
|
|
13
|
+
ComputeWithCreateInstanceSupport,
|
|
14
|
+
ComputeWithGatewaySupport,
|
|
15
|
+
ComputeWithMultinodeSupport,
|
|
16
|
+
ComputeWithPlacementGroupSupport,
|
|
17
|
+
ComputeWithPrivateGatewaySupport,
|
|
18
|
+
ComputeWithReservationSupport,
|
|
19
|
+
ComputeWithVolumeSupport,
|
|
20
|
+
)
|
|
11
21
|
from dstack._internal.core.models.backends.base import BackendType
|
|
12
22
|
from dstack._internal.core.models.common import NetworkMode
|
|
13
23
|
from dstack._internal.core.models.configurations import (
|
|
@@ -35,8 +45,7 @@ from dstack._internal.core.models.placement import (
|
|
|
35
45
|
PlacementStrategy,
|
|
36
46
|
)
|
|
37
47
|
from dstack._internal.core.models.profiles import (
|
|
38
|
-
|
|
39
|
-
DEFAULT_POOL_TERMINATION_IDLE_TIME,
|
|
48
|
+
DEFAULT_FLEET_TERMINATION_IDLE_TIME,
|
|
40
49
|
Profile,
|
|
41
50
|
TerminationPolicy,
|
|
42
51
|
)
|
|
@@ -69,8 +78,8 @@ from dstack._internal.server.models import (
|
|
|
69
78
|
InstanceModel,
|
|
70
79
|
JobMetricsPoint,
|
|
71
80
|
JobModel,
|
|
81
|
+
JobPrometheusMetrics,
|
|
72
82
|
PlacementGroupModel,
|
|
73
|
-
PoolModel,
|
|
74
83
|
ProjectModel,
|
|
75
84
|
RepoCredsModel,
|
|
76
85
|
RepoModel,
|
|
@@ -180,9 +189,6 @@ async def create_repo(
|
|
|
180
189
|
if info is None:
|
|
181
190
|
info = {
|
|
182
191
|
"repo_type": "remote",
|
|
183
|
-
"repo_host_name": "",
|
|
184
|
-
"repo_port": None,
|
|
185
|
-
"repo_user_name": "",
|
|
186
192
|
"repo_name": "dstack",
|
|
187
193
|
}
|
|
188
194
|
repo = RepoModel(
|
|
@@ -205,7 +211,6 @@ async def create_repo_creds(
|
|
|
205
211
|
) -> RepoCredsModel:
|
|
206
212
|
if creds is None:
|
|
207
213
|
creds = {
|
|
208
|
-
"protocol": "https",
|
|
209
214
|
"clone_url": "https://github.com/dstackai/dstack.git",
|
|
210
215
|
"private_key": None,
|
|
211
216
|
"oauth_token": "test_token",
|
|
@@ -324,13 +329,22 @@ def get_job_provisioning_data(
|
|
|
324
329
|
backend: BackendType = BackendType.AWS,
|
|
325
330
|
region: str = "us-east-1",
|
|
326
331
|
gpu_count: int = 0,
|
|
332
|
+
gpu_memory_gib: float = 16,
|
|
333
|
+
gpu_name: str = "T4",
|
|
327
334
|
cpu_count: int = 1,
|
|
328
335
|
memory_gib: float = 0.5,
|
|
329
336
|
spot: bool = False,
|
|
330
337
|
hostname: str = "127.0.0.4",
|
|
331
338
|
internal_ip: Optional[str] = "127.0.0.4",
|
|
339
|
+
price: float = 10.5,
|
|
332
340
|
) -> JobProvisioningData:
|
|
333
|
-
gpus = [
|
|
341
|
+
gpus = [
|
|
342
|
+
Gpu(
|
|
343
|
+
name=gpu_name,
|
|
344
|
+
memory_mib=int(gpu_memory_gib * 1024),
|
|
345
|
+
vendor=gpuhunt.AcceleratorVendor.NVIDIA,
|
|
346
|
+
)
|
|
347
|
+
] * gpu_count
|
|
334
348
|
return JobProvisioningData(
|
|
335
349
|
backend=backend,
|
|
336
350
|
instance_type=InstanceType(
|
|
@@ -343,7 +357,7 @@ def get_job_provisioning_data(
|
|
|
343
357
|
hostname=hostname,
|
|
344
358
|
internal_ip=internal_ip,
|
|
345
359
|
region=region,
|
|
346
|
-
price=
|
|
360
|
+
price=price,
|
|
347
361
|
username="ubuntu",
|
|
348
362
|
ssh_port=22,
|
|
349
363
|
dockerized=dockerized,
|
|
@@ -438,22 +452,6 @@ def get_gateway_compute_configuration(
|
|
|
438
452
|
)
|
|
439
453
|
|
|
440
454
|
|
|
441
|
-
async def create_pool(
|
|
442
|
-
session: AsyncSession,
|
|
443
|
-
project: ProjectModel,
|
|
444
|
-
pool_name: Optional[str] = None,
|
|
445
|
-
) -> PoolModel:
|
|
446
|
-
pool_name = pool_name if pool_name is not None else DEFAULT_POOL_NAME
|
|
447
|
-
pool = PoolModel(
|
|
448
|
-
name=pool_name,
|
|
449
|
-
project=project,
|
|
450
|
-
project_id=project.id,
|
|
451
|
-
)
|
|
452
|
-
session.add(pool)
|
|
453
|
-
await session.commit()
|
|
454
|
-
return pool
|
|
455
|
-
|
|
456
|
-
|
|
457
455
|
async def create_fleet(
|
|
458
456
|
session: AsyncSession,
|
|
459
457
|
project: ProjectModel,
|
|
@@ -462,11 +460,14 @@ async def create_fleet(
|
|
|
462
460
|
fleet_id: Optional[UUID] = None,
|
|
463
461
|
status: FleetStatus = FleetStatus.ACTIVE,
|
|
464
462
|
deleted: bool = False,
|
|
463
|
+
name: Optional[str] = None,
|
|
465
464
|
) -> FleetModel:
|
|
466
465
|
if fleet_id is None:
|
|
467
466
|
fleet_id = uuid.uuid4()
|
|
468
467
|
if spec is None:
|
|
469
468
|
spec = get_fleet_spec()
|
|
469
|
+
if name is not None:
|
|
470
|
+
spec.configuration.name = name
|
|
470
471
|
fm = FleetModel(
|
|
471
472
|
id=fleet_id,
|
|
472
473
|
project=project,
|
|
@@ -506,7 +507,6 @@ def get_fleet_configuration(
|
|
|
506
507
|
async def create_instance(
|
|
507
508
|
session: AsyncSession,
|
|
508
509
|
project: ProjectModel,
|
|
509
|
-
pool: PoolModel,
|
|
510
510
|
fleet: Optional[FleetModel] = None,
|
|
511
511
|
status: InstanceStatus = InstanceStatus.IDLE,
|
|
512
512
|
unreachable: bool = False,
|
|
@@ -521,7 +521,7 @@ async def create_instance(
|
|
|
521
521
|
instance_num: int = 0,
|
|
522
522
|
backend: BackendType = BackendType.DATACRUNCH,
|
|
523
523
|
termination_policy: Optional[TerminationPolicy] = None,
|
|
524
|
-
termination_idle_time: int =
|
|
524
|
+
termination_idle_time: int = DEFAULT_FLEET_TERMINATION_IDLE_TIME,
|
|
525
525
|
region: str = "eu-west",
|
|
526
526
|
remote_connection_info: Optional[RemoteConnectionInfo] = None,
|
|
527
527
|
offer: Optional[InstanceOfferWithAvailability] = None,
|
|
@@ -530,6 +530,7 @@ async def create_instance(
|
|
|
530
530
|
busy_blocks: int = 0,
|
|
531
531
|
name: str = "test_instance",
|
|
532
532
|
volumes: Optional[List[VolumeModel]] = None,
|
|
533
|
+
price: float = 1.0,
|
|
533
534
|
) -> InstanceModel:
|
|
534
535
|
if instance_id is None:
|
|
535
536
|
instance_id = uuid.uuid4()
|
|
@@ -563,7 +564,6 @@ async def create_instance(
|
|
|
563
564
|
id=instance_id,
|
|
564
565
|
name=name,
|
|
565
566
|
instance_num=instance_num,
|
|
566
|
-
pool=pool,
|
|
567
567
|
fleet=fleet,
|
|
568
568
|
project=project,
|
|
569
569
|
status=status,
|
|
@@ -573,7 +573,7 @@ async def create_instance(
|
|
|
573
573
|
finished_at=finished_at,
|
|
574
574
|
job_provisioning_data=job_provisioning_data.json(),
|
|
575
575
|
offer=offer.json(),
|
|
576
|
-
price=
|
|
576
|
+
price=price,
|
|
577
577
|
region=region,
|
|
578
578
|
backend=backend,
|
|
579
579
|
termination_policy=termination_policy,
|
|
@@ -610,6 +610,8 @@ def get_instance_offer_with_availability(
|
|
|
610
610
|
backend: BackendType = BackendType.AWS,
|
|
611
611
|
region: str = "eu-west",
|
|
612
612
|
gpu_count: int = 0,
|
|
613
|
+
gpu_name: str = "T4",
|
|
614
|
+
gpu_memory_gib: float = 16,
|
|
613
615
|
cpu_count: int = 2,
|
|
614
616
|
memory_gib: float = 12,
|
|
615
617
|
disk_gib: float = 100.0,
|
|
@@ -617,12 +619,20 @@ def get_instance_offer_with_availability(
|
|
|
617
619
|
blocks: int = 1,
|
|
618
620
|
total_blocks: int = 1,
|
|
619
621
|
availability_zones: Optional[List[str]] = None,
|
|
622
|
+
price: float = 1.0,
|
|
623
|
+
instance_type: str = "instance",
|
|
620
624
|
):
|
|
621
|
-
gpus = [
|
|
625
|
+
gpus = [
|
|
626
|
+
Gpu(
|
|
627
|
+
name=gpu_name,
|
|
628
|
+
memory_mib=int(gpu_memory_gib * 1024),
|
|
629
|
+
vendor=gpuhunt.AcceleratorVendor.NVIDIA,
|
|
630
|
+
)
|
|
631
|
+
] * gpu_count
|
|
622
632
|
return InstanceOfferWithAvailability(
|
|
623
633
|
backend=backend,
|
|
624
634
|
instance=InstanceType(
|
|
625
|
-
name=
|
|
635
|
+
name=instance_type,
|
|
626
636
|
resources=Resources(
|
|
627
637
|
cpus=cpu_count,
|
|
628
638
|
memory_mib=int(memory_gib * 1024),
|
|
@@ -633,7 +643,7 @@ def get_instance_offer_with_availability(
|
|
|
633
643
|
),
|
|
634
644
|
),
|
|
635
645
|
region=region,
|
|
636
|
-
price=
|
|
646
|
+
price=price,
|
|
637
647
|
availability=InstanceAvailability.AVAILABLE,
|
|
638
648
|
availability_zones=availability_zones,
|
|
639
649
|
blocks=blocks,
|
|
@@ -858,6 +868,22 @@ async def create_job_metrics_point(
|
|
|
858
868
|
return jmp
|
|
859
869
|
|
|
860
870
|
|
|
871
|
+
async def create_job_prometheus_metrics(
|
|
872
|
+
session: AsyncSession,
|
|
873
|
+
job: JobModel,
|
|
874
|
+
collected_at: datetime = datetime(2023, 1, 2, 3, 4, tzinfo=timezone.utc),
|
|
875
|
+
text: str = "# Prometheus metrics\n",
|
|
876
|
+
):
|
|
877
|
+
metrics = JobPrometheusMetrics(
|
|
878
|
+
job_id=job.id,
|
|
879
|
+
collected_at=collected_at,
|
|
880
|
+
text=text,
|
|
881
|
+
)
|
|
882
|
+
session.add(metrics)
|
|
883
|
+
await session.commit()
|
|
884
|
+
return metrics
|
|
885
|
+
|
|
886
|
+
|
|
861
887
|
def get_private_key_string() -> str:
|
|
862
888
|
return """
|
|
863
889
|
-----BEGIN RSA PRIVATE KEY-----
|
|
@@ -930,3 +956,20 @@ class AsyncContextManager:
|
|
|
930
956
|
|
|
931
957
|
async def __aexit__(self, exc_type, exc, traceback):
|
|
932
958
|
pass
|
|
959
|
+
|
|
960
|
+
|
|
961
|
+
class ComputeMockSpec(
|
|
962
|
+
Compute,
|
|
963
|
+
ComputeWithCreateInstanceSupport,
|
|
964
|
+
ComputeWithMultinodeSupport,
|
|
965
|
+
ComputeWithReservationSupport,
|
|
966
|
+
ComputeWithPlacementGroupSupport,
|
|
967
|
+
ComputeWithGatewaySupport,
|
|
968
|
+
ComputeWithPrivateGatewaySupport,
|
|
969
|
+
ComputeWithVolumeSupport,
|
|
970
|
+
):
|
|
971
|
+
"""
|
|
972
|
+
Can be used to create Compute mocks that pass all isinstance asserts.
|
|
973
|
+
"""
|
|
974
|
+
|
|
975
|
+
pass
|
dstack/_internal/utils/ssh.py
CHANGED
|
@@ -128,7 +128,8 @@ def include_ssh_config(path: PathLike, ssh_config_path: PathLike = default_ssh_c
|
|
|
128
128
|
except PermissionError:
|
|
129
129
|
logger.warning(
|
|
130
130
|
f"Couldn't update `{ssh_config_path}` due to a permissions problem.\n"
|
|
131
|
-
f"The `vscode://vscode-remote/ssh-remote+<run name>/workflow`
|
|
131
|
+
f"The `vscode://vscode-remote/ssh-remote+<run name>/workflow` and "
|
|
132
|
+
f"`cursor://vscode-remote/ssh-remote+<run name>/workflow` links and "
|
|
132
133
|
f"the `ssh <run name>` command won't work.\n"
|
|
133
134
|
f"To fix this, make sure `{ssh_config_path}` is writable, or add "
|
|
134
135
|
f"`Include {path}` to the top of `{ssh_config_path}` manually.",
|
dstack/api/__init__.py
CHANGED
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
from dstack._internal.core.errors import ClientError
|
|
3
3
|
from dstack._internal.core.models.backends.base import BackendType
|
|
4
4
|
from dstack._internal.core.models.common import RegistryAuth
|
|
5
|
+
from dstack._internal.core.models.configurations import (
|
|
6
|
+
DevEnvironmentConfiguration as _DevEnvironmentConfiguration,
|
|
7
|
+
)
|
|
5
8
|
from dstack._internal.core.models.configurations import ScalingSpec as Scaling
|
|
6
9
|
from dstack._internal.core.models.configurations import (
|
|
7
10
|
ServiceConfiguration as _ServiceConfiguration,
|
|
@@ -22,3 +25,4 @@ from dstack.api._public.runs import Run, RunStatus
|
|
|
22
25
|
|
|
23
26
|
Service = _ServiceConfiguration
|
|
24
27
|
Task = _TaskConfiguration
|
|
28
|
+
DevEnvironment = _DevEnvironmentConfiguration
|
dstack/api/_public/__init__.py
CHANGED
|
@@ -6,7 +6,6 @@ from dstack._internal.core.services.configs import ConfigManager
|
|
|
6
6
|
from dstack._internal.utils.logging import get_logger
|
|
7
7
|
from dstack._internal.utils.path import PathLike
|
|
8
8
|
from dstack.api._public.backends import BackendCollection
|
|
9
|
-
from dstack.api._public.pools import PoolCollection
|
|
10
9
|
from dstack.api._public.repos import RepoCollection, get_ssh_keypair
|
|
11
10
|
from dstack.api._public.runs import RunCollection
|
|
12
11
|
from dstack.api.server import APIClient
|
|
@@ -16,12 +15,14 @@ logger = get_logger(__name__)
|
|
|
16
15
|
|
|
17
16
|
class Client:
|
|
18
17
|
"""
|
|
19
|
-
High-level API client for interacting with dstack server
|
|
18
|
+
High-level API client for interacting with the `dstack` server
|
|
20
19
|
|
|
21
20
|
Attributes:
|
|
21
|
+
project: The project name.
|
|
22
22
|
runs: Operations with runs.
|
|
23
23
|
repos: Operations with repositories.
|
|
24
24
|
backends: Operations with backends.
|
|
25
|
+
client: Low-level API client that supports all API endpoints.
|
|
25
26
|
"""
|
|
26
27
|
|
|
27
28
|
def __init__(
|
|
@@ -41,7 +42,6 @@ class Client:
|
|
|
41
42
|
self._repos = RepoCollection(api_client, project_name)
|
|
42
43
|
self._backends = BackendCollection(api_client, project_name)
|
|
43
44
|
self._runs = RunCollection(api_client, project_name, self)
|
|
44
|
-
self._pool = PoolCollection(api_client, project_name)
|
|
45
45
|
if ssh_identity_file:
|
|
46
46
|
self.ssh_identity_file = str(ssh_identity_file)
|
|
47
47
|
else:
|
|
@@ -58,13 +58,13 @@ class Client:
|
|
|
58
58
|
Creates a Client using the default configuration from `~/.dstack/config.yml` if it exists.
|
|
59
59
|
|
|
60
60
|
Args:
|
|
61
|
-
project_name: The name of the project
|
|
62
|
-
server_url: The dstack server URL (e.g. `http://localhost:3000/` or `https://sky.dstack.ai`)
|
|
63
|
-
user_token: The dstack user token
|
|
64
|
-
ssh_identity_file: The private SSH key path for SSH tunneling
|
|
61
|
+
project_name: The name of the project. required if `server_url` and `user_token` are specified.
|
|
62
|
+
server_url: The dstack server URL (e.g. `http://localhost:3000/` or `https://sky.dstack.ai`).
|
|
63
|
+
user_token: The dstack user token.
|
|
64
|
+
ssh_identity_file: The private SSH key path for SSH tunneling.
|
|
65
65
|
|
|
66
66
|
Returns:
|
|
67
|
-
A client instance
|
|
67
|
+
A client instance.
|
|
68
68
|
"""
|
|
69
69
|
if server_url is not None and user_token is not None:
|
|
70
70
|
if project_name is None:
|
|
@@ -79,25 +79,21 @@ class Client:
|
|
|
79
79
|
)
|
|
80
80
|
|
|
81
81
|
@property
|
|
82
|
-
def
|
|
83
|
-
return self.
|
|
84
|
-
|
|
85
|
-
@property
|
|
86
|
-
def backends(self) -> BackendCollection:
|
|
87
|
-
return self._backends
|
|
82
|
+
def project(self) -> str:
|
|
83
|
+
return self._project
|
|
88
84
|
|
|
89
85
|
@property
|
|
90
86
|
def runs(self) -> RunCollection:
|
|
91
87
|
return self._runs
|
|
92
88
|
|
|
93
89
|
@property
|
|
94
|
-
def
|
|
95
|
-
return self.
|
|
90
|
+
def repos(self) -> RepoCollection:
|
|
91
|
+
return self._repos
|
|
96
92
|
|
|
97
93
|
@property
|
|
98
|
-
def
|
|
99
|
-
return self.
|
|
94
|
+
def backends(self) -> BackendCollection:
|
|
95
|
+
return self._backends
|
|
100
96
|
|
|
101
97
|
@property
|
|
102
|
-
def
|
|
103
|
-
return self.
|
|
98
|
+
def client(self) -> APIClient:
|
|
99
|
+
return self._client
|
dstack/api/_public/backends.py
CHANGED
dstack/api/_public/repos.py
CHANGED
|
@@ -55,7 +55,7 @@ class RepoCollection:
|
|
|
55
55
|
Once the repo is initialized, you can pass the repo object to the run:
|
|
56
56
|
|
|
57
57
|
```python
|
|
58
|
-
run = client.runs.
|
|
58
|
+
run = client.runs.apply_configuration(
|
|
59
59
|
configuration=...,
|
|
60
60
|
repo=repo,
|
|
61
61
|
)
|
|
@@ -78,25 +78,6 @@ class RepoCollection:
|
|
|
78
78
|
raise ConfigurationError(*e.args)
|
|
79
79
|
self._api_client.repos.init(self._project, repo.repo_id, repo.get_repo_info(), creds)
|
|
80
80
|
|
|
81
|
-
def is_initialized(
|
|
82
|
-
self,
|
|
83
|
-
repo: Repo,
|
|
84
|
-
) -> bool:
|
|
85
|
-
# """
|
|
86
|
-
# Checks if the remote repo is initialized in the project
|
|
87
|
-
#
|
|
88
|
-
# Args:
|
|
89
|
-
# repo: repo to check
|
|
90
|
-
#
|
|
91
|
-
# Returns:
|
|
92
|
-
# repo is initialized
|
|
93
|
-
# """
|
|
94
|
-
try:
|
|
95
|
-
self._api_client.repos.get(self._project, repo.repo_id, include_creds=False)
|
|
96
|
-
return True
|
|
97
|
-
except ResourceNotExistsError:
|
|
98
|
-
return False
|
|
99
|
-
|
|
100
81
|
def load(
|
|
101
82
|
self,
|
|
102
83
|
repo_dir: PathLike,
|
|
@@ -105,22 +86,22 @@ class RepoCollection:
|
|
|
105
86
|
git_identity_file: Optional[PathLike] = None,
|
|
106
87
|
oauth_token: Optional[str] = None,
|
|
107
88
|
) -> Union[RemoteRepo, LocalRepo]:
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
89
|
+
"""
|
|
90
|
+
Loads the repo from the local directory using global config
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
repo_dir: Repo root directory.
|
|
94
|
+
local: Do not try to load `RemoteRepo` first.
|
|
95
|
+
init: Initialize the repo if it's not initialized.
|
|
96
|
+
git_identity_file: Path to an SSH private key to access the remote repo.
|
|
97
|
+
oauth_token: GitHub OAuth token to access the remote repo.
|
|
98
|
+
|
|
99
|
+
Raises:
|
|
100
|
+
ConfigurationError: If the repo is not initialized and `init` is `False`.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
repo: Initialized repo.
|
|
104
|
+
"""
|
|
124
105
|
config = ConfigManager()
|
|
125
106
|
if not init:
|
|
126
107
|
logger.debug("Loading repo config")
|
|
@@ -155,6 +136,25 @@ class RepoCollection:
|
|
|
155
136
|
)
|
|
156
137
|
return repo
|
|
157
138
|
|
|
139
|
+
def is_initialized(
|
|
140
|
+
self,
|
|
141
|
+
repo: Repo,
|
|
142
|
+
) -> bool:
|
|
143
|
+
"""
|
|
144
|
+
Checks if the remote repo is initialized in the project
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
repo: The repo to check.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
Whether the repo is initialized or not.
|
|
151
|
+
"""
|
|
152
|
+
try:
|
|
153
|
+
self._api_client.repos.get(self._project, repo.repo_id, include_creds=False)
|
|
154
|
+
return True
|
|
155
|
+
except ResourceNotExistsError:
|
|
156
|
+
return False
|
|
157
|
+
|
|
158
158
|
|
|
159
159
|
def get_ssh_keypair(key_path: Optional[PathLike], dstack_key_path: Path) -> str:
|
|
160
160
|
"""Returns a path to the private key"""
|