dstack 0.19.15rc1__py3-none-any.whl → 0.19.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dstack might be problematic. Click here for more details.
- dstack/_internal/cli/commands/secrets.py +92 -0
- dstack/_internal/cli/main.py +2 -0
- dstack/_internal/cli/services/completion.py +5 -0
- dstack/_internal/cli/services/configurators/run.py +59 -17
- dstack/_internal/cli/utils/secrets.py +25 -0
- dstack/_internal/core/backends/__init__.py +10 -4
- dstack/_internal/core/backends/cloudrift/__init__.py +0 -0
- dstack/_internal/core/backends/cloudrift/api_client.py +208 -0
- dstack/_internal/core/backends/cloudrift/backend.py +16 -0
- dstack/_internal/core/backends/cloudrift/compute.py +138 -0
- dstack/_internal/core/backends/cloudrift/configurator.py +66 -0
- dstack/_internal/core/backends/cloudrift/models.py +40 -0
- dstack/_internal/core/backends/configurators.py +9 -0
- dstack/_internal/core/backends/models.py +7 -0
- dstack/_internal/core/compatibility/logs.py +15 -0
- dstack/_internal/core/compatibility/runs.py +31 -2
- dstack/_internal/core/models/backends/base.py +2 -0
- dstack/_internal/core/models/configurations.py +33 -2
- dstack/_internal/core/models/files.py +67 -0
- dstack/_internal/core/models/logs.py +2 -1
- dstack/_internal/core/models/runs.py +24 -1
- dstack/_internal/core/models/secrets.py +9 -2
- dstack/_internal/server/app.py +2 -0
- dstack/_internal/server/background/tasks/process_fleets.py +1 -1
- dstack/_internal/server/background/tasks/process_gateways.py +1 -1
- dstack/_internal/server/background/tasks/process_instances.py +1 -1
- dstack/_internal/server/background/tasks/process_placement_groups.py +1 -1
- dstack/_internal/server/background/tasks/process_running_jobs.py +110 -13
- dstack/_internal/server/background/tasks/process_runs.py +36 -5
- dstack/_internal/server/background/tasks/process_submitted_jobs.py +10 -4
- dstack/_internal/server/background/tasks/process_terminating_jobs.py +2 -2
- dstack/_internal/server/background/tasks/process_volumes.py +1 -1
- dstack/_internal/server/migrations/versions/5f1707c525d2_add_filearchivemodel.py +39 -0
- dstack/_internal/server/migrations/versions/644b8a114187_add_secretmodel.py +49 -0
- dstack/_internal/server/models.py +33 -0
- dstack/_internal/server/routers/files.py +67 -0
- dstack/_internal/server/routers/gateways.py +6 -3
- dstack/_internal/server/routers/projects.py +63 -0
- dstack/_internal/server/routers/prometheus.py +5 -5
- dstack/_internal/server/routers/secrets.py +57 -15
- dstack/_internal/server/schemas/files.py +5 -0
- dstack/_internal/server/schemas/logs.py +10 -1
- dstack/_internal/server/schemas/projects.py +12 -0
- dstack/_internal/server/schemas/runner.py +2 -0
- dstack/_internal/server/schemas/secrets.py +7 -11
- dstack/_internal/server/security/permissions.py +75 -2
- dstack/_internal/server/services/backends/__init__.py +1 -1
- dstack/_internal/server/services/files.py +91 -0
- dstack/_internal/server/services/fleets.py +1 -1
- dstack/_internal/server/services/gateways/__init__.py +1 -1
- dstack/_internal/server/services/jobs/__init__.py +19 -8
- dstack/_internal/server/services/jobs/configurators/base.py +27 -3
- dstack/_internal/server/services/jobs/configurators/dev.py +3 -3
- dstack/_internal/server/services/logs/aws.py +38 -38
- dstack/_internal/server/services/logs/filelog.py +48 -14
- dstack/_internal/server/services/logs/gcp.py +17 -16
- dstack/_internal/server/services/projects.py +164 -5
- dstack/_internal/server/services/prometheus/__init__.py +0 -0
- dstack/_internal/server/services/prometheus/client_metrics.py +52 -0
- dstack/_internal/server/services/proxy/repo.py +3 -0
- dstack/_internal/server/services/runner/client.py +8 -0
- dstack/_internal/server/services/runs.py +55 -10
- dstack/_internal/server/services/secrets.py +204 -0
- dstack/_internal/server/services/services/__init__.py +2 -1
- dstack/_internal/server/services/storage/base.py +21 -0
- dstack/_internal/server/services/storage/gcs.py +28 -6
- dstack/_internal/server/services/storage/s3.py +27 -9
- dstack/_internal/server/services/users.py +1 -3
- dstack/_internal/server/services/volumes.py +1 -1
- dstack/_internal/server/settings.py +2 -2
- dstack/_internal/server/statics/index.html +1 -1
- dstack/_internal/server/statics/{main-0ac1e1583684417ae4d1.js → main-d151637af20f70b2e796.js} +104 -48
- dstack/_internal/server/statics/{main-0ac1e1583684417ae4d1.js.map → main-d151637af20f70b2e796.js.map} +1 -1
- dstack/_internal/server/statics/{main-f39c418b05fe14772dd8.css → main-d48635d8fe670d53961c.css} +1 -1
- dstack/_internal/server/statics/static/media/google.b194b06fafd0a52aeb566922160ea514.svg +1 -0
- dstack/_internal/server/testing/common.py +43 -5
- dstack/_internal/settings.py +5 -0
- dstack/_internal/utils/files.py +69 -0
- dstack/_internal/utils/nested_list.py +47 -0
- dstack/_internal/utils/path.py +12 -4
- dstack/api/_public/runs.py +73 -12
- dstack/api/server/__init__.py +6 -0
- dstack/api/server/_files.py +18 -0
- dstack/api/server/_logs.py +5 -1
- dstack/api/server/_projects.py +24 -0
- dstack/api/server/_secrets.py +15 -15
- dstack/version.py +1 -1
- {dstack-0.19.15rc1.dist-info → dstack-0.19.17.dist-info}/METADATA +3 -4
- {dstack-0.19.15rc1.dist-info → dstack-0.19.17.dist-info}/RECORD +93 -71
- /dstack/_internal/server/services/{prometheus.py → prometheus/custom_metrics.py} +0 -0
- {dstack-0.19.15rc1.dist-info → dstack-0.19.17.dist-info}/WHEEL +0 -0
- {dstack-0.19.15rc1.dist-info → dstack-0.19.17.dist-info}/entry_points.txt +0 -0
- {dstack-0.19.15rc1.dist-info → dstack-0.19.17.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from dstack._internal.core.backends.base.configurator import (
|
|
4
|
+
BackendRecord,
|
|
5
|
+
Configurator,
|
|
6
|
+
raise_invalid_credentials_error,
|
|
7
|
+
)
|
|
8
|
+
from dstack._internal.core.backends.cloudrift.api_client import RiftClient
|
|
9
|
+
from dstack._internal.core.backends.cloudrift.backend import CloudRiftBackend
|
|
10
|
+
from dstack._internal.core.backends.cloudrift.models import (
|
|
11
|
+
AnyCloudRiftBackendConfig,
|
|
12
|
+
AnyCloudRiftCreds,
|
|
13
|
+
CloudRiftBackendConfig,
|
|
14
|
+
CloudRiftBackendConfigWithCreds,
|
|
15
|
+
CloudRiftConfig,
|
|
16
|
+
CloudRiftCreds,
|
|
17
|
+
CloudRiftStoredConfig,
|
|
18
|
+
)
|
|
19
|
+
from dstack._internal.core.models.backends.base import (
|
|
20
|
+
BackendType,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class CloudRiftConfigurator(Configurator):
|
|
25
|
+
TYPE = BackendType.CLOUDRIFT
|
|
26
|
+
BACKEND_CLASS = CloudRiftBackend
|
|
27
|
+
|
|
28
|
+
def validate_config(
|
|
29
|
+
self, config: CloudRiftBackendConfigWithCreds, default_creds_enabled: bool
|
|
30
|
+
):
|
|
31
|
+
self._validate_creds(config.creds)
|
|
32
|
+
|
|
33
|
+
def create_backend(
|
|
34
|
+
self, project_name: str, config: CloudRiftBackendConfigWithCreds
|
|
35
|
+
) -> BackendRecord:
|
|
36
|
+
return BackendRecord(
|
|
37
|
+
config=CloudRiftStoredConfig(
|
|
38
|
+
**CloudRiftBackendConfig.__response__.parse_obj(config).dict()
|
|
39
|
+
).json(),
|
|
40
|
+
auth=CloudRiftCreds.parse_obj(config.creds).json(),
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def get_backend_config(
|
|
44
|
+
self, record: BackendRecord, include_creds: bool
|
|
45
|
+
) -> AnyCloudRiftBackendConfig:
|
|
46
|
+
config = self._get_config(record)
|
|
47
|
+
if include_creds:
|
|
48
|
+
return CloudRiftBackendConfigWithCreds.__response__.parse_obj(config)
|
|
49
|
+
return CloudRiftBackendConfig.__response__.parse_obj(config)
|
|
50
|
+
|
|
51
|
+
def get_backend(self, record: BackendRecord) -> CloudRiftBackend:
|
|
52
|
+
config = self._get_config(record)
|
|
53
|
+
return CloudRiftBackend(config=config)
|
|
54
|
+
|
|
55
|
+
def _get_config(self, record: BackendRecord) -> CloudRiftConfig:
|
|
56
|
+
return CloudRiftConfig.__response__(
|
|
57
|
+
**json.loads(record.config),
|
|
58
|
+
creds=CloudRiftCreds.parse_raw(record.auth),
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def _validate_creds(self, creds: AnyCloudRiftCreds):
|
|
62
|
+
if not isinstance(creds, CloudRiftCreds):
|
|
63
|
+
raise_invalid_credentials_error(fields=[["creds"]])
|
|
64
|
+
client = RiftClient(creds.api_key)
|
|
65
|
+
if not client.validate_api_key():
|
|
66
|
+
raise_invalid_credentials_error(fields=[["creds", "api_key"]])
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from typing import Annotated, List, Literal, Optional, Union
|
|
2
|
+
|
|
3
|
+
from pydantic import Field
|
|
4
|
+
|
|
5
|
+
from dstack._internal.core.models.common import CoreModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class CloudRiftAPIKeyCreds(CoreModel):
|
|
9
|
+
type: Annotated[Literal["api_key"], Field(description="The type of credentials")] = "api_key"
|
|
10
|
+
api_key: Annotated[str, Field(description="The API key")]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
AnyCloudRiftCreds = CloudRiftAPIKeyCreds
|
|
14
|
+
CloudRiftCreds = AnyCloudRiftCreds
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CloudRiftBackendConfig(CoreModel):
|
|
18
|
+
type: Annotated[
|
|
19
|
+
Literal["cloudrift"],
|
|
20
|
+
Field(description="The type of backend"),
|
|
21
|
+
] = "cloudrift"
|
|
22
|
+
regions: Annotated[
|
|
23
|
+
Optional[List[str]],
|
|
24
|
+
Field(description="The list of CloudRift regions. Omit to use all regions"),
|
|
25
|
+
] = None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class CloudRiftBackendConfigWithCreds(CloudRiftBackendConfig):
|
|
29
|
+
creds: Annotated[AnyCloudRiftCreds, Field(description="The credentials")]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
AnyCloudRiftBackendConfig = Union[CloudRiftBackendConfig, CloudRiftBackendConfigWithCreds]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class CloudRiftStoredConfig(CloudRiftBackendConfig):
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class CloudRiftConfig(CloudRiftStoredConfig):
|
|
40
|
+
creds: AnyCloudRiftCreds
|
|
@@ -20,6 +20,15 @@ try:
|
|
|
20
20
|
except ImportError:
|
|
21
21
|
pass
|
|
22
22
|
|
|
23
|
+
try:
|
|
24
|
+
from dstack._internal.core.backends.cloudrift.configurator import (
|
|
25
|
+
CloudRiftConfigurator,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
_CONFIGURATOR_CLASSES.append(CloudRiftConfigurator)
|
|
29
|
+
except ImportError:
|
|
30
|
+
pass
|
|
31
|
+
|
|
23
32
|
try:
|
|
24
33
|
from dstack._internal.core.backends.cudo.configurator import (
|
|
25
34
|
CudoConfigurator,
|
|
@@ -8,6 +8,10 @@ from dstack._internal.core.backends.azure.models import (
|
|
|
8
8
|
AzureBackendConfig,
|
|
9
9
|
AzureBackendConfigWithCreds,
|
|
10
10
|
)
|
|
11
|
+
from dstack._internal.core.backends.cloudrift.models import (
|
|
12
|
+
CloudRiftBackendConfig,
|
|
13
|
+
CloudRiftBackendConfigWithCreds,
|
|
14
|
+
)
|
|
11
15
|
from dstack._internal.core.backends.cudo.models import (
|
|
12
16
|
CudoBackendConfig,
|
|
13
17
|
CudoBackendConfigWithCreds,
|
|
@@ -65,6 +69,7 @@ from dstack._internal.core.models.common import CoreModel
|
|
|
65
69
|
AnyBackendConfigWithoutCreds = Union[
|
|
66
70
|
AWSBackendConfig,
|
|
67
71
|
AzureBackendConfig,
|
|
72
|
+
CloudRiftBackendConfig,
|
|
68
73
|
CudoBackendConfig,
|
|
69
74
|
DataCrunchBackendConfig,
|
|
70
75
|
GCPBackendConfig,
|
|
@@ -86,6 +91,7 @@ AnyBackendConfigWithoutCreds = Union[
|
|
|
86
91
|
AnyBackendConfigWithCreds = Union[
|
|
87
92
|
AWSBackendConfigWithCreds,
|
|
88
93
|
AzureBackendConfigWithCreds,
|
|
94
|
+
CloudRiftBackendConfigWithCreds,
|
|
89
95
|
CudoBackendConfigWithCreds,
|
|
90
96
|
DataCrunchBackendConfigWithCreds,
|
|
91
97
|
GCPBackendConfigWithCreds,
|
|
@@ -106,6 +112,7 @@ AnyBackendConfigWithCreds = Union[
|
|
|
106
112
|
AnyBackendFileConfigWithCreds = Union[
|
|
107
113
|
AWSBackendConfigWithCreds,
|
|
108
114
|
AzureBackendConfigWithCreds,
|
|
115
|
+
CloudRiftBackendConfigWithCreds,
|
|
109
116
|
CudoBackendConfigWithCreds,
|
|
110
117
|
DataCrunchBackendConfigWithCreds,
|
|
111
118
|
GCPBackendFileConfigWithCreds,
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from typing import Dict, Optional
|
|
2
|
+
|
|
3
|
+
from dstack._internal.server.schemas.logs import PollLogsRequest
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_poll_logs_excludes(request: PollLogsRequest) -> Optional[Dict]:
|
|
7
|
+
"""
|
|
8
|
+
Returns exclude mapping to exclude certain fields from the request.
|
|
9
|
+
Use this method to exclude new fields when they are not set to keep
|
|
10
|
+
clients backward-compatibility with older servers.
|
|
11
|
+
"""
|
|
12
|
+
excludes = {}
|
|
13
|
+
if request.next_token is None:
|
|
14
|
+
excludes["next_token"] = True
|
|
15
|
+
return excludes if excludes else None
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import Any, Dict, Optional
|
|
2
2
|
|
|
3
3
|
from dstack._internal.core.models.configurations import ServiceConfiguration
|
|
4
|
-
from dstack._internal.core.models.runs import ApplyRunPlanInput, JobSubmission, RunSpec
|
|
4
|
+
from dstack._internal.core.models.runs import ApplyRunPlanInput, JobSpec, JobSubmission, RunSpec
|
|
5
5
|
from dstack._internal.server.schemas.runs import GetRunPlanRequest
|
|
6
6
|
|
|
7
7
|
|
|
@@ -25,7 +25,10 @@ def get_apply_plan_excludes(plan: ApplyRunPlanInput) -> Optional[Dict]:
|
|
|
25
25
|
current_resource_excludes["run_spec"] = get_run_spec_excludes(current_resource.run_spec)
|
|
26
26
|
job_submissions_excludes = {}
|
|
27
27
|
current_resource_excludes["jobs"] = {
|
|
28
|
-
"__all__": {
|
|
28
|
+
"__all__": {
|
|
29
|
+
"job_spec": get_job_spec_excludes([job.job_spec for job in current_resource.jobs]),
|
|
30
|
+
"job_submissions": {"__all__": job_submissions_excludes},
|
|
31
|
+
}
|
|
29
32
|
}
|
|
30
33
|
job_submissions = [js for j in current_resource.jobs for js in j.job_submissions]
|
|
31
34
|
if all(map(_should_exclude_job_submission_jpd_cpu_arch, job_submissions)):
|
|
@@ -97,6 +100,8 @@ def get_run_spec_excludes(run_spec: RunSpec) -> Optional[Dict]:
|
|
|
97
100
|
configuration_excludes["rate_limits"] = True
|
|
98
101
|
if configuration.shell is None:
|
|
99
102
|
configuration_excludes["shell"] = True
|
|
103
|
+
if configuration.docker is None:
|
|
104
|
+
configuration_excludes["docker"] = True
|
|
100
105
|
if configuration.priority is None:
|
|
101
106
|
configuration_excludes["priority"] = True
|
|
102
107
|
if configuration.startup_order is None:
|
|
@@ -107,6 +112,10 @@ def get_run_spec_excludes(run_spec: RunSpec) -> Optional[Dict]:
|
|
|
107
112
|
configuration_excludes["stop_criteria"] = True
|
|
108
113
|
if profile is not None and profile.stop_criteria is None:
|
|
109
114
|
profile_excludes.add("stop_criteria")
|
|
115
|
+
if not configuration.files:
|
|
116
|
+
configuration_excludes["files"] = True
|
|
117
|
+
if not run_spec.file_archives:
|
|
118
|
+
spec_excludes["file_archives"] = True
|
|
110
119
|
|
|
111
120
|
if configuration_excludes:
|
|
112
121
|
spec_excludes["configuration"] = configuration_excludes
|
|
@@ -117,6 +126,26 @@ def get_run_spec_excludes(run_spec: RunSpec) -> Optional[Dict]:
|
|
|
117
126
|
return None
|
|
118
127
|
|
|
119
128
|
|
|
129
|
+
def get_job_spec_excludes(job_specs: list[JobSpec]) -> Optional[dict]:
|
|
130
|
+
"""
|
|
131
|
+
Returns `job_spec` exclude mapping to exclude certain fields from the request.
|
|
132
|
+
Use this method to exclude new fields when they are not set to keep
|
|
133
|
+
clients backward-compatibility with older servers.
|
|
134
|
+
"""
|
|
135
|
+
spec_excludes: dict[str, Any] = {}
|
|
136
|
+
|
|
137
|
+
if all(s.repo_code_hash is None for s in job_specs):
|
|
138
|
+
spec_excludes["repo_code_hash"] = True
|
|
139
|
+
if all(s.repo_data is None for s in job_specs):
|
|
140
|
+
spec_excludes["repo_data"] = True
|
|
141
|
+
if all(not s.file_archives for s in job_specs):
|
|
142
|
+
spec_excludes["file_archives"] = True
|
|
143
|
+
|
|
144
|
+
if spec_excludes:
|
|
145
|
+
return spec_excludes
|
|
146
|
+
return None
|
|
147
|
+
|
|
148
|
+
|
|
120
149
|
def _should_exclude_job_submission_jpd_cpu_arch(job_submission: JobSubmission) -> bool:
|
|
121
150
|
try:
|
|
122
151
|
return job_submission.job_provisioning_data.instance_type.resources.cpu_arch is None
|
|
@@ -6,6 +6,7 @@ class BackendType(str, enum.Enum):
|
|
|
6
6
|
Attributes:
|
|
7
7
|
AWS (BackendType): Amazon Web Services
|
|
8
8
|
AZURE (BackendType): Microsoft Azure
|
|
9
|
+
CLOUDRIFT (BackendType): CloudRift
|
|
9
10
|
CUDO (BackendType): Cudo
|
|
10
11
|
DSTACK (BackendType): dstack Sky
|
|
11
12
|
GCP (BackendType): Google Cloud Platform
|
|
@@ -22,6 +23,7 @@ class BackendType(str, enum.Enum):
|
|
|
22
23
|
|
|
23
24
|
AWS = "aws"
|
|
24
25
|
AZURE = "azure"
|
|
26
|
+
CLOUDRIFT = "cloudrift"
|
|
25
27
|
CUDO = "cudo"
|
|
26
28
|
DATACRUNCH = "datacrunch"
|
|
27
29
|
DSTACK = "dstack"
|
|
@@ -10,6 +10,7 @@ from typing_extensions import Annotated, Literal
|
|
|
10
10
|
from dstack._internal.core.errors import ConfigurationError
|
|
11
11
|
from dstack._internal.core.models.common import CoreModel, Duration, RegistryAuth
|
|
12
12
|
from dstack._internal.core.models.envs import Env
|
|
13
|
+
from dstack._internal.core.models.files import FilePathMapping
|
|
13
14
|
from dstack._internal.core.models.fleets import FleetConfiguration
|
|
14
15
|
from dstack._internal.core.models.gateways import GatewayConfiguration
|
|
15
16
|
from dstack._internal.core.models.profiles import ProfileParams, parse_off_duration
|
|
@@ -194,12 +195,14 @@ class BaseRunConfiguration(CoreModel):
|
|
|
194
195
|
] = None
|
|
195
196
|
python: Annotated[
|
|
196
197
|
Optional[PythonVersion],
|
|
197
|
-
Field(
|
|
198
|
+
Field(
|
|
199
|
+
description="The major version of Python. Mutually exclusive with `image` and `docker`"
|
|
200
|
+
),
|
|
198
201
|
] = None
|
|
199
202
|
nvcc: Annotated[
|
|
200
203
|
Optional[bool],
|
|
201
204
|
Field(
|
|
202
|
-
description="Use image with NVIDIA CUDA Compiler (NVCC) included. Mutually exclusive with `image`"
|
|
205
|
+
description="Use image with NVIDIA CUDA Compiler (NVCC) included. Mutually exclusive with `image` and `docker`"
|
|
203
206
|
),
|
|
204
207
|
] = None
|
|
205
208
|
single_branch: Annotated[
|
|
@@ -244,6 +247,16 @@ class BaseRunConfiguration(CoreModel):
|
|
|
244
247
|
volumes: Annotated[
|
|
245
248
|
List[Union[MountPoint, str]], Field(description="The volumes mount points")
|
|
246
249
|
] = []
|
|
250
|
+
docker: Annotated[
|
|
251
|
+
Optional[bool],
|
|
252
|
+
Field(
|
|
253
|
+
description="Use Docker inside the container. Mutually exclusive with `image`, `python`, and `nvcc`. Overrides `privileged`"
|
|
254
|
+
),
|
|
255
|
+
] = None
|
|
256
|
+
files: Annotated[
|
|
257
|
+
list[Union[FilePathMapping, str]],
|
|
258
|
+
Field(description="The local to container file path mappings"),
|
|
259
|
+
] = []
|
|
247
260
|
# deprecated since 0.18.31; task, service -- no effect; dev-environment -- executed right before `init`
|
|
248
261
|
setup: CommandsList = []
|
|
249
262
|
|
|
@@ -259,12 +272,30 @@ class BaseRunConfiguration(CoreModel):
|
|
|
259
272
|
return PythonVersion(v)
|
|
260
273
|
return v
|
|
261
274
|
|
|
275
|
+
@validator("docker", pre=True, always=True)
|
|
276
|
+
def _docker(cls, v, values) -> Optional[bool]:
|
|
277
|
+
if v is True and values.get("image"):
|
|
278
|
+
raise KeyError("`image` and `docker` are mutually exclusive fields")
|
|
279
|
+
if v is True and values.get("python"):
|
|
280
|
+
raise KeyError("`python` and `docker` are mutually exclusive fields")
|
|
281
|
+
if v is True and values.get("nvcc"):
|
|
282
|
+
raise KeyError("`nvcc` and `docker` are mutually exclusive fields")
|
|
283
|
+
# Ideally, we'd like to also prohibit privileged=False when docker=True,
|
|
284
|
+
# but it's not possible to do so without breaking backwards compatibility.
|
|
285
|
+
return v
|
|
286
|
+
|
|
262
287
|
@validator("volumes", each_item=True)
|
|
263
288
|
def convert_volumes(cls, v) -> MountPoint:
|
|
264
289
|
if isinstance(v, str):
|
|
265
290
|
return parse_mount_point(v)
|
|
266
291
|
return v
|
|
267
292
|
|
|
293
|
+
@validator("files", each_item=True)
|
|
294
|
+
def convert_files(cls, v) -> FilePathMapping:
|
|
295
|
+
if isinstance(v, str):
|
|
296
|
+
return FilePathMapping.parse(v)
|
|
297
|
+
return v
|
|
298
|
+
|
|
268
299
|
@validator("user")
|
|
269
300
|
def validate_user(cls, v) -> Optional[str]:
|
|
270
301
|
if v is None:
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import pathlib
|
|
2
|
+
import string
|
|
3
|
+
from uuid import UUID
|
|
4
|
+
|
|
5
|
+
from pydantic import Field, validator
|
|
6
|
+
from typing_extensions import Annotated, Self
|
|
7
|
+
|
|
8
|
+
from dstack._internal.core.models.common import CoreModel
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FileArchive(CoreModel):
|
|
12
|
+
id: UUID
|
|
13
|
+
hash: str
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class FilePathMapping(CoreModel):
|
|
17
|
+
local_path: Annotated[
|
|
18
|
+
str,
|
|
19
|
+
Field(
|
|
20
|
+
description=(
|
|
21
|
+
"The path on the user's machine. Relative paths are resolved relative to"
|
|
22
|
+
" the parent directory of the the configuration file"
|
|
23
|
+
)
|
|
24
|
+
),
|
|
25
|
+
]
|
|
26
|
+
path: Annotated[
|
|
27
|
+
str,
|
|
28
|
+
Field(
|
|
29
|
+
description=(
|
|
30
|
+
"The path in the container. Relative paths are resolved relative to"
|
|
31
|
+
" the repo directory (`/workflow`)"
|
|
32
|
+
)
|
|
33
|
+
),
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def parse(cls, v: str) -> Self:
|
|
38
|
+
local_path: str
|
|
39
|
+
path: str
|
|
40
|
+
parts = v.split(":")
|
|
41
|
+
# A special case for Windows paths, e.g., `C:\path\to`, 'c:/path/to'
|
|
42
|
+
if (
|
|
43
|
+
len(parts) > 1
|
|
44
|
+
and len(parts[0]) == 1
|
|
45
|
+
and parts[0] in string.ascii_letters
|
|
46
|
+
and parts[1][:1] in ["\\", "/"]
|
|
47
|
+
):
|
|
48
|
+
parts = [f"{parts[0]}:{parts[1]}", *parts[2:]]
|
|
49
|
+
if len(parts) == 1:
|
|
50
|
+
local_path = path = parts[0]
|
|
51
|
+
elif len(parts) == 2:
|
|
52
|
+
local_path, path = parts
|
|
53
|
+
else:
|
|
54
|
+
raise ValueError(f"invalid file path mapping: {v}")
|
|
55
|
+
return cls(local_path=local_path, path=path)
|
|
56
|
+
|
|
57
|
+
@validator("path")
|
|
58
|
+
def validate_path(cls, v) -> str:
|
|
59
|
+
# True for `C:/.*`, False otherwise, including `/abs/unix/path`, `rel\windows\path`, etc.
|
|
60
|
+
if pathlib.PureWindowsPath(v).is_absolute():
|
|
61
|
+
raise ValueError(f"path must be a Unix file path: {v}")
|
|
62
|
+
return v
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class FileArchiveMapping(CoreModel):
|
|
66
|
+
id: Annotated[UUID, Field(description="The File archive ID")]
|
|
67
|
+
path: Annotated[str, Field(description="The path in the container")]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
2
|
from enum import Enum
|
|
3
|
-
from typing import List
|
|
3
|
+
from typing import List, Optional
|
|
4
4
|
|
|
5
5
|
from dstack._internal.core.models.common import CoreModel
|
|
6
6
|
|
|
@@ -23,3 +23,4 @@ class LogEvent(CoreModel):
|
|
|
23
23
|
|
|
24
24
|
class JobSubmissionLogs(CoreModel):
|
|
25
25
|
logs: List[LogEvent]
|
|
26
|
+
next_token: Optional[str]
|
|
@@ -12,6 +12,7 @@ from dstack._internal.core.models.configurations import (
|
|
|
12
12
|
AnyRunConfiguration,
|
|
13
13
|
RunConfiguration,
|
|
14
14
|
)
|
|
15
|
+
from dstack._internal.core.models.files import FileArchiveMapping
|
|
15
16
|
from dstack._internal.core.models.instances import (
|
|
16
17
|
InstanceOfferWithAvailability,
|
|
17
18
|
InstanceType,
|
|
@@ -217,6 +218,15 @@ class JobSpec(CoreModel):
|
|
|
217
218
|
volumes: Optional[List[MountPoint]] = None
|
|
218
219
|
ssh_key: Optional[JobSSHKey] = None
|
|
219
220
|
working_dir: Optional[str]
|
|
221
|
+
# `repo_data` is optional for client compatibility with pre-0.19.17 servers and for compatibility
|
|
222
|
+
# with jobs submitted before 0.19.17. All new jobs are expected to have non-None `repo_data`.
|
|
223
|
+
# For --no-repo runs, `repo_data` is `VirtualRunRepoData()`.
|
|
224
|
+
repo_data: Annotated[Optional[AnyRunRepoData], Field(discriminator="repo_type")] = None
|
|
225
|
+
# `repo_code_hash` can be None because it is not used for the repo or because the job was
|
|
226
|
+
# submitted before 0.19.17. See `_get_repo_code_hash` on how to get the correct `repo_code_hash`
|
|
227
|
+
# TODO: drop this comment when supporting jobs submitted before 0.19.17 is no longer relevant.
|
|
228
|
+
repo_code_hash: Optional[str] = None
|
|
229
|
+
file_archives: list[FileArchiveMapping] = []
|
|
220
230
|
|
|
221
231
|
|
|
222
232
|
class JobProvisioningData(CoreModel):
|
|
@@ -301,7 +311,7 @@ class JobSubmission(CoreModel):
|
|
|
301
311
|
job_provisioning_data: Optional[JobProvisioningData]
|
|
302
312
|
job_runtime_data: Optional[JobRuntimeData]
|
|
303
313
|
# TODO: make status_message and error a computed field after migrating to pydanticV2
|
|
304
|
-
status_message: Optional[str]
|
|
314
|
+
status_message: Optional[str] = None
|
|
305
315
|
error: Optional[str] = None
|
|
306
316
|
|
|
307
317
|
@property
|
|
@@ -413,6 +423,10 @@ class RunSpec(CoreModel):
|
|
|
413
423
|
Optional[str],
|
|
414
424
|
Field(description="The hash of the repo diff. Can be omitted if there is no repo diff."),
|
|
415
425
|
] = None
|
|
426
|
+
file_archives: Annotated[
|
|
427
|
+
list[FileArchiveMapping],
|
|
428
|
+
Field(description="The list of file archive ID to container path mappings"),
|
|
429
|
+
] = []
|
|
416
430
|
working_dir: Annotated[
|
|
417
431
|
Optional[str],
|
|
418
432
|
Field(
|
|
@@ -548,11 +562,17 @@ class Run(CoreModel):
|
|
|
548
562
|
retry_on_events = (
|
|
549
563
|
jobs[0].job_spec.retry.on_events if jobs and jobs[0].job_spec.retry else []
|
|
550
564
|
)
|
|
565
|
+
job_status = (
|
|
566
|
+
jobs[0].job_submissions[-1].status
|
|
567
|
+
if len(jobs) == 1 and jobs[0].job_submissions
|
|
568
|
+
else None
|
|
569
|
+
)
|
|
551
570
|
termination_reason = Run.get_last_termination_reason(jobs[0]) if jobs else None
|
|
552
571
|
except KeyError:
|
|
553
572
|
return values
|
|
554
573
|
values["status_message"] = Run._get_status_message(
|
|
555
574
|
status=status,
|
|
575
|
+
job_status=job_status,
|
|
556
576
|
retry_on_events=retry_on_events,
|
|
557
577
|
termination_reason=termination_reason,
|
|
558
578
|
)
|
|
@@ -568,9 +588,12 @@ class Run(CoreModel):
|
|
|
568
588
|
@staticmethod
|
|
569
589
|
def _get_status_message(
|
|
570
590
|
status: RunStatus,
|
|
591
|
+
job_status: Optional[JobStatus],
|
|
571
592
|
retry_on_events: List[RetryEvent],
|
|
572
593
|
termination_reason: Optional[JobTerminationReason],
|
|
573
594
|
) -> str:
|
|
595
|
+
if job_status == JobStatus.PULLING:
|
|
596
|
+
return "pulling"
|
|
574
597
|
# Currently, `retrying` is shown only for `no-capacity` events
|
|
575
598
|
if (
|
|
576
599
|
status in [RunStatus.SUBMITTED, RunStatus.PENDING]
|
|
@@ -1,9 +1,16 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
from uuid import UUID
|
|
3
|
+
|
|
1
4
|
from dstack._internal.core.models.common import CoreModel
|
|
2
5
|
|
|
3
6
|
|
|
4
7
|
class Secret(CoreModel):
|
|
8
|
+
id: UUID
|
|
5
9
|
name: str
|
|
6
|
-
value: str
|
|
10
|
+
value: Optional[str] = None
|
|
7
11
|
|
|
8
12
|
def __str__(self) -> str:
|
|
9
|
-
|
|
13
|
+
displayed_value = "*"
|
|
14
|
+
if self.value is not None:
|
|
15
|
+
displayed_value = "*" * len(self.value)
|
|
16
|
+
return f'Secret(name="{self.name}", value={displayed_value})'
|
dstack/_internal/server/app.py
CHANGED
|
@@ -23,6 +23,7 @@ from dstack._internal.server.background import start_background_tasks
|
|
|
23
23
|
from dstack._internal.server.db import get_db, get_session_ctx, migrate
|
|
24
24
|
from dstack._internal.server.routers import (
|
|
25
25
|
backends,
|
|
26
|
+
files,
|
|
26
27
|
fleets,
|
|
27
28
|
gateways,
|
|
28
29
|
instances,
|
|
@@ -197,6 +198,7 @@ def register_routes(app: FastAPI, ui: bool = True):
|
|
|
197
198
|
app.include_router(service_proxy.router, prefix="/proxy/services", tags=["service-proxy"])
|
|
198
199
|
app.include_router(model_proxy.router, prefix="/proxy/models", tags=["model-proxy"])
|
|
199
200
|
app.include_router(prometheus.router)
|
|
201
|
+
app.include_router(files.router)
|
|
200
202
|
|
|
201
203
|
@app.exception_handler(ForbiddenError)
|
|
202
204
|
async def forbidden_error_handler(request: Request, exc: ForbiddenError):
|
|
@@ -40,7 +40,7 @@ async def process_submitted_gateways():
|
|
|
40
40
|
.options(lazyload(GatewayModel.gateway_compute))
|
|
41
41
|
.order_by(GatewayModel.last_processed_at.asc())
|
|
42
42
|
.limit(1)
|
|
43
|
-
.with_for_update(skip_locked=True)
|
|
43
|
+
.with_for_update(skip_locked=True, key_share=True)
|
|
44
44
|
)
|
|
45
45
|
gateway_model = res.scalar()
|
|
46
46
|
if gateway_model is None:
|
|
@@ -149,7 +149,7 @@ async def _process_next_instance():
|
|
|
149
149
|
.options(lazyload(InstanceModel.jobs))
|
|
150
150
|
.order_by(InstanceModel.last_processed_at.asc())
|
|
151
151
|
.limit(1)
|
|
152
|
-
.with_for_update(skip_locked=True)
|
|
152
|
+
.with_for_update(skip_locked=True, key_share=True)
|
|
153
153
|
)
|
|
154
154
|
instance = res.scalar()
|
|
155
155
|
if instance is None:
|
|
@@ -30,7 +30,7 @@ async def process_placement_groups():
|
|
|
30
30
|
PlacementGroupModel.id.not_in(lockset),
|
|
31
31
|
)
|
|
32
32
|
.order_by(PlacementGroupModel.id) # take locks in order
|
|
33
|
-
.with_for_update(skip_locked=True)
|
|
33
|
+
.with_for_update(skip_locked=True, key_share=True)
|
|
34
34
|
)
|
|
35
35
|
placement_group_models = res.scalars().all()
|
|
36
36
|
if len(placement_group_models) == 0:
|