dstack 0.19.15rc1__py3-none-any.whl → 0.19.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dstack might be problematic. Click here for more details.
- dstack/_internal/cli/commands/secrets.py +92 -0
- dstack/_internal/cli/main.py +2 -0
- dstack/_internal/cli/services/completion.py +5 -0
- dstack/_internal/cli/services/configurators/run.py +59 -17
- dstack/_internal/cli/utils/secrets.py +25 -0
- dstack/_internal/core/backends/__init__.py +10 -4
- dstack/_internal/core/backends/cloudrift/__init__.py +0 -0
- dstack/_internal/core/backends/cloudrift/api_client.py +208 -0
- dstack/_internal/core/backends/cloudrift/backend.py +16 -0
- dstack/_internal/core/backends/cloudrift/compute.py +138 -0
- dstack/_internal/core/backends/cloudrift/configurator.py +66 -0
- dstack/_internal/core/backends/cloudrift/models.py +40 -0
- dstack/_internal/core/backends/configurators.py +9 -0
- dstack/_internal/core/backends/models.py +7 -0
- dstack/_internal/core/compatibility/logs.py +15 -0
- dstack/_internal/core/compatibility/runs.py +31 -2
- dstack/_internal/core/models/backends/base.py +2 -0
- dstack/_internal/core/models/configurations.py +33 -2
- dstack/_internal/core/models/files.py +67 -0
- dstack/_internal/core/models/logs.py +2 -1
- dstack/_internal/core/models/runs.py +24 -1
- dstack/_internal/core/models/secrets.py +9 -2
- dstack/_internal/server/app.py +2 -0
- dstack/_internal/server/background/tasks/process_fleets.py +1 -1
- dstack/_internal/server/background/tasks/process_gateways.py +1 -1
- dstack/_internal/server/background/tasks/process_instances.py +1 -1
- dstack/_internal/server/background/tasks/process_placement_groups.py +1 -1
- dstack/_internal/server/background/tasks/process_running_jobs.py +110 -13
- dstack/_internal/server/background/tasks/process_runs.py +36 -5
- dstack/_internal/server/background/tasks/process_submitted_jobs.py +10 -4
- dstack/_internal/server/background/tasks/process_terminating_jobs.py +2 -2
- dstack/_internal/server/background/tasks/process_volumes.py +1 -1
- dstack/_internal/server/migrations/versions/5f1707c525d2_add_filearchivemodel.py +39 -0
- dstack/_internal/server/migrations/versions/644b8a114187_add_secretmodel.py +49 -0
- dstack/_internal/server/models.py +33 -0
- dstack/_internal/server/routers/files.py +67 -0
- dstack/_internal/server/routers/gateways.py +6 -3
- dstack/_internal/server/routers/projects.py +63 -0
- dstack/_internal/server/routers/prometheus.py +5 -5
- dstack/_internal/server/routers/secrets.py +57 -15
- dstack/_internal/server/schemas/files.py +5 -0
- dstack/_internal/server/schemas/logs.py +10 -1
- dstack/_internal/server/schemas/projects.py +12 -0
- dstack/_internal/server/schemas/runner.py +2 -0
- dstack/_internal/server/schemas/secrets.py +7 -11
- dstack/_internal/server/security/permissions.py +75 -2
- dstack/_internal/server/services/backends/__init__.py +1 -1
- dstack/_internal/server/services/files.py +91 -0
- dstack/_internal/server/services/fleets.py +1 -1
- dstack/_internal/server/services/gateways/__init__.py +1 -1
- dstack/_internal/server/services/jobs/__init__.py +19 -8
- dstack/_internal/server/services/jobs/configurators/base.py +27 -3
- dstack/_internal/server/services/jobs/configurators/dev.py +3 -3
- dstack/_internal/server/services/logs/aws.py +38 -38
- dstack/_internal/server/services/logs/filelog.py +48 -14
- dstack/_internal/server/services/logs/gcp.py +17 -16
- dstack/_internal/server/services/projects.py +164 -5
- dstack/_internal/server/services/prometheus/__init__.py +0 -0
- dstack/_internal/server/services/prometheus/client_metrics.py +52 -0
- dstack/_internal/server/services/proxy/repo.py +3 -0
- dstack/_internal/server/services/runner/client.py +8 -0
- dstack/_internal/server/services/runs.py +55 -10
- dstack/_internal/server/services/secrets.py +204 -0
- dstack/_internal/server/services/services/__init__.py +2 -1
- dstack/_internal/server/services/storage/base.py +21 -0
- dstack/_internal/server/services/storage/gcs.py +28 -6
- dstack/_internal/server/services/storage/s3.py +27 -9
- dstack/_internal/server/services/users.py +1 -3
- dstack/_internal/server/services/volumes.py +1 -1
- dstack/_internal/server/settings.py +2 -2
- dstack/_internal/server/statics/index.html +1 -1
- dstack/_internal/server/statics/{main-0ac1e1583684417ae4d1.js → main-d151637af20f70b2e796.js} +104 -48
- dstack/_internal/server/statics/{main-0ac1e1583684417ae4d1.js.map → main-d151637af20f70b2e796.js.map} +1 -1
- dstack/_internal/server/statics/{main-f39c418b05fe14772dd8.css → main-d48635d8fe670d53961c.css} +1 -1
- dstack/_internal/server/statics/static/media/google.b194b06fafd0a52aeb566922160ea514.svg +1 -0
- dstack/_internal/server/testing/common.py +43 -5
- dstack/_internal/settings.py +5 -0
- dstack/_internal/utils/files.py +69 -0
- dstack/_internal/utils/nested_list.py +47 -0
- dstack/_internal/utils/path.py +12 -4
- dstack/api/_public/runs.py +73 -12
- dstack/api/server/__init__.py +6 -0
- dstack/api/server/_files.py +18 -0
- dstack/api/server/_logs.py +5 -1
- dstack/api/server/_projects.py +24 -0
- dstack/api/server/_secrets.py +15 -15
- dstack/version.py +1 -1
- {dstack-0.19.15rc1.dist-info → dstack-0.19.17.dist-info}/METADATA +3 -4
- {dstack-0.19.15rc1.dist-info → dstack-0.19.17.dist-info}/RECORD +93 -71
- /dstack/_internal/server/services/{prometheus.py → prometheus/custom_metrics.py} +0 -0
- {dstack-0.19.15rc1.dist-info → dstack-0.19.17.dist-info}/WHEEL +0 -0
- {dstack-0.19.15rc1.dist-info → dstack-0.19.17.dist-info}/entry_points.txt +0 -0
- {dstack-0.19.15rc1.dist-info → dstack-0.19.17.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -7,13 +7,19 @@ from dstack._internal.core.models.projects import Project
|
|
|
7
7
|
from dstack._internal.server.db import get_session
|
|
8
8
|
from dstack._internal.server.models import ProjectModel, UserModel
|
|
9
9
|
from dstack._internal.server.schemas.projects import (
|
|
10
|
+
AddProjectMemberRequest,
|
|
10
11
|
CreateProjectRequest,
|
|
11
12
|
DeleteProjectsRequest,
|
|
13
|
+
RemoveProjectMemberRequest,
|
|
12
14
|
SetProjectMembersRequest,
|
|
15
|
+
UpdateProjectRequest,
|
|
13
16
|
)
|
|
14
17
|
from dstack._internal.server.security.permissions import (
|
|
15
18
|
Authenticated,
|
|
19
|
+
ProjectAdmin,
|
|
16
20
|
ProjectManager,
|
|
21
|
+
ProjectManagerOrPublicProject,
|
|
22
|
+
ProjectManagerOrSelfLeave,
|
|
17
23
|
ProjectMemberOrPublicAccess,
|
|
18
24
|
)
|
|
19
25
|
from dstack._internal.server.services import projects
|
|
@@ -92,3 +98,60 @@ async def set_project_members(
|
|
|
92
98
|
)
|
|
93
99
|
await session.refresh(project)
|
|
94
100
|
return projects.project_model_to_project(project)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
@router.post(
|
|
104
|
+
"/{project_name}/add_members",
|
|
105
|
+
)
|
|
106
|
+
async def add_project_members(
|
|
107
|
+
body: AddProjectMemberRequest,
|
|
108
|
+
session: AsyncSession = Depends(get_session),
|
|
109
|
+
user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectManagerOrPublicProject()),
|
|
110
|
+
) -> Project:
|
|
111
|
+
user, project = user_project
|
|
112
|
+
await projects.add_project_members(
|
|
113
|
+
session=session,
|
|
114
|
+
user=user,
|
|
115
|
+
project=project,
|
|
116
|
+
members=body.members,
|
|
117
|
+
)
|
|
118
|
+
await session.refresh(project)
|
|
119
|
+
return projects.project_model_to_project(project)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@router.post(
|
|
123
|
+
"/{project_name}/remove_members",
|
|
124
|
+
)
|
|
125
|
+
async def remove_project_members(
|
|
126
|
+
body: RemoveProjectMemberRequest,
|
|
127
|
+
session: AsyncSession = Depends(get_session),
|
|
128
|
+
user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectManagerOrSelfLeave()),
|
|
129
|
+
) -> Project:
|
|
130
|
+
user, project = user_project
|
|
131
|
+
await projects.remove_project_members(
|
|
132
|
+
session=session,
|
|
133
|
+
user=user,
|
|
134
|
+
project=project,
|
|
135
|
+
usernames=body.usernames,
|
|
136
|
+
)
|
|
137
|
+
await session.refresh(project)
|
|
138
|
+
return projects.project_model_to_project(project)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@router.post(
|
|
142
|
+
"/{project_name}/update",
|
|
143
|
+
)
|
|
144
|
+
async def update_project(
|
|
145
|
+
body: UpdateProjectRequest,
|
|
146
|
+
session: AsyncSession = Depends(get_session),
|
|
147
|
+
user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectAdmin()),
|
|
148
|
+
) -> Project:
|
|
149
|
+
user, project = user_project
|
|
150
|
+
await projects.update_project(
|
|
151
|
+
session=session,
|
|
152
|
+
user=user,
|
|
153
|
+
project=project,
|
|
154
|
+
is_public=body.is_public,
|
|
155
|
+
)
|
|
156
|
+
await session.refresh(project)
|
|
157
|
+
return projects.project_model_to_project(project)
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from typing import Annotated
|
|
3
3
|
|
|
4
|
+
import prometheus_client
|
|
4
5
|
from fastapi import APIRouter, Depends
|
|
5
6
|
from fastapi.responses import PlainTextResponse
|
|
6
|
-
from prometheus_client import generate_latest
|
|
7
7
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
8
8
|
|
|
9
9
|
from dstack._internal.server import settings
|
|
10
10
|
from dstack._internal.server.db import get_session
|
|
11
11
|
from dstack._internal.server.security.permissions import OptionalServiceAccount
|
|
12
|
-
from dstack._internal.server.services import
|
|
12
|
+
from dstack._internal.server.services.prometheus import custom_metrics
|
|
13
13
|
from dstack._internal.server.utils.routers import error_not_found
|
|
14
14
|
|
|
15
15
|
_auth = OptionalServiceAccount(os.getenv("DSTACK_PROMETHEUS_AUTH_TOKEN"))
|
|
@@ -27,6 +27,6 @@ async def get_prometheus_metrics(
|
|
|
27
27
|
) -> str:
|
|
28
28
|
if not settings.ENABLE_PROMETHEUS_METRICS:
|
|
29
29
|
raise error_not_found()
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
return
|
|
30
|
+
custom_metrics_ = await custom_metrics.get_metrics(session=session)
|
|
31
|
+
client_metrics = prometheus_client.generate_latest().decode()
|
|
32
|
+
return custom_metrics_ + client_metrics
|
|
@@ -1,15 +1,19 @@
|
|
|
1
|
-
from typing import List
|
|
1
|
+
from typing import List, Tuple
|
|
2
2
|
|
|
3
|
-
from fastapi import APIRouter
|
|
3
|
+
from fastapi import APIRouter, Depends
|
|
4
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
4
5
|
|
|
5
|
-
from dstack._internal.core.
|
|
6
|
+
from dstack._internal.core.errors import ResourceNotExistsError
|
|
6
7
|
from dstack._internal.core.models.secrets import Secret
|
|
8
|
+
from dstack._internal.server.db import get_session
|
|
9
|
+
from dstack._internal.server.models import ProjectModel, UserModel
|
|
7
10
|
from dstack._internal.server.schemas.secrets import (
|
|
8
|
-
|
|
11
|
+
CreateOrUpdateSecretRequest,
|
|
9
12
|
DeleteSecretsRequest,
|
|
10
|
-
|
|
11
|
-
ListSecretsRequest,
|
|
13
|
+
GetSecretRequest,
|
|
12
14
|
)
|
|
15
|
+
from dstack._internal.server.security.permissions import ProjectAdmin
|
|
16
|
+
from dstack._internal.server.services import secrets as secrets_services
|
|
13
17
|
|
|
14
18
|
router = APIRouter(
|
|
15
19
|
prefix="/api/project/{project_name}/secrets",
|
|
@@ -18,20 +22,58 @@ router = APIRouter(
|
|
|
18
22
|
|
|
19
23
|
|
|
20
24
|
@router.post("/list")
|
|
21
|
-
async def list_secrets(
|
|
22
|
-
|
|
25
|
+
async def list_secrets(
|
|
26
|
+
session: AsyncSession = Depends(get_session),
|
|
27
|
+
user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectAdmin()),
|
|
28
|
+
) -> List[Secret]:
|
|
29
|
+
_, project = user_project
|
|
30
|
+
return await secrets_services.list_secrets(
|
|
31
|
+
session=session,
|
|
32
|
+
project=project,
|
|
33
|
+
)
|
|
23
34
|
|
|
24
35
|
|
|
25
36
|
@router.post("/get")
|
|
26
|
-
async def get_secret(
|
|
27
|
-
|
|
37
|
+
async def get_secret(
|
|
38
|
+
body: GetSecretRequest,
|
|
39
|
+
session: AsyncSession = Depends(get_session),
|
|
40
|
+
user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectAdmin()),
|
|
41
|
+
) -> Secret:
|
|
42
|
+
_, project = user_project
|
|
43
|
+
secret = await secrets_services.get_secret(
|
|
44
|
+
session=session,
|
|
45
|
+
project=project,
|
|
46
|
+
name=body.name,
|
|
47
|
+
)
|
|
48
|
+
if secret is None:
|
|
49
|
+
raise ResourceNotExistsError()
|
|
50
|
+
return secret
|
|
28
51
|
|
|
29
52
|
|
|
30
|
-
@router.post("/
|
|
31
|
-
async def
|
|
32
|
-
|
|
53
|
+
@router.post("/create_or_update")
|
|
54
|
+
async def create_or_update_secret(
|
|
55
|
+
body: CreateOrUpdateSecretRequest,
|
|
56
|
+
session: AsyncSession = Depends(get_session),
|
|
57
|
+
user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectAdmin()),
|
|
58
|
+
) -> Secret:
|
|
59
|
+
_, project = user_project
|
|
60
|
+
return await secrets_services.create_or_update_secret(
|
|
61
|
+
session=session,
|
|
62
|
+
project=project,
|
|
63
|
+
name=body.name,
|
|
64
|
+
value=body.value,
|
|
65
|
+
)
|
|
33
66
|
|
|
34
67
|
|
|
35
68
|
@router.post("/delete")
|
|
36
|
-
async def delete_secrets(
|
|
37
|
-
|
|
69
|
+
async def delete_secrets(
|
|
70
|
+
body: DeleteSecretsRequest,
|
|
71
|
+
session: AsyncSession = Depends(get_session),
|
|
72
|
+
user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectAdmin()),
|
|
73
|
+
):
|
|
74
|
+
_, project = user_project
|
|
75
|
+
await secrets_services.delete_secrets(
|
|
76
|
+
session=session,
|
|
77
|
+
project=project,
|
|
78
|
+
names=body.secrets_names,
|
|
79
|
+
)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
2
|
from typing import Optional
|
|
3
3
|
|
|
4
|
-
from pydantic import UUID4, Field
|
|
4
|
+
from pydantic import UUID4, Field, validator
|
|
5
5
|
|
|
6
6
|
from dstack._internal.core.models.common import CoreModel
|
|
7
7
|
|
|
@@ -12,5 +12,14 @@ class PollLogsRequest(CoreModel):
|
|
|
12
12
|
start_time: Optional[datetime]
|
|
13
13
|
end_time: Optional[datetime]
|
|
14
14
|
descending: bool = False
|
|
15
|
+
next_token: Optional[str] = None
|
|
15
16
|
limit: int = Field(100, ge=0, le=1000)
|
|
16
17
|
diagnose: bool = False
|
|
18
|
+
|
|
19
|
+
@validator("descending")
|
|
20
|
+
@classmethod
|
|
21
|
+
def validate_descending(cls, v):
|
|
22
|
+
# Descending is not supported until we migrate from base64-encoded logs to plain text logs.
|
|
23
|
+
if v is True:
|
|
24
|
+
raise ValueError("descending: true is not supported")
|
|
25
|
+
return v
|
|
@@ -11,6 +11,10 @@ class CreateProjectRequest(CoreModel):
|
|
|
11
11
|
is_public: bool = False
|
|
12
12
|
|
|
13
13
|
|
|
14
|
+
class UpdateProjectRequest(CoreModel):
|
|
15
|
+
is_public: bool
|
|
16
|
+
|
|
17
|
+
|
|
14
18
|
class DeleteProjectsRequest(CoreModel):
|
|
15
19
|
projects_names: List[str]
|
|
16
20
|
|
|
@@ -25,3 +29,11 @@ class MemberSetting(CoreModel):
|
|
|
25
29
|
|
|
26
30
|
class SetProjectMembersRequest(CoreModel):
|
|
27
31
|
members: List[MemberSetting]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class AddProjectMemberRequest(CoreModel):
|
|
35
|
+
members: List[MemberSetting]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class RemoveProjectMemberRequest(CoreModel):
|
|
39
|
+
usernames: List[str]
|
|
@@ -1,20 +1,16 @@
|
|
|
1
1
|
from typing import List
|
|
2
2
|
|
|
3
|
-
from dstack._internal.core.models.
|
|
4
|
-
from dstack._internal.server.schemas.common import RepoRequest
|
|
3
|
+
from dstack._internal.core.models.common import CoreModel
|
|
5
4
|
|
|
6
5
|
|
|
7
|
-
class
|
|
8
|
-
|
|
6
|
+
class GetSecretRequest(CoreModel):
|
|
7
|
+
name: str
|
|
9
8
|
|
|
10
9
|
|
|
11
|
-
class
|
|
12
|
-
|
|
10
|
+
class CreateOrUpdateSecretRequest(CoreModel):
|
|
11
|
+
name: str
|
|
12
|
+
value: str
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
class
|
|
16
|
-
secret: Secret
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class DeleteSecretsRequest(RepoRequest):
|
|
15
|
+
class DeleteSecretsRequest(CoreModel):
|
|
20
16
|
secrets_names: List[str]
|
|
@@ -58,7 +58,7 @@ class ProjectAdmin:
|
|
|
58
58
|
raise error_invalid_token()
|
|
59
59
|
project = await get_project_model_by_name(session=session, project_name=project_name)
|
|
60
60
|
if project is None:
|
|
61
|
-
raise
|
|
61
|
+
raise error_not_found()
|
|
62
62
|
if user.global_role == GlobalRole.ADMIN:
|
|
63
63
|
return user, project
|
|
64
64
|
project_role = get_user_project_role(user=user, project=project)
|
|
@@ -68,6 +68,10 @@ class ProjectAdmin:
|
|
|
68
68
|
|
|
69
69
|
|
|
70
70
|
class ProjectManager:
|
|
71
|
+
"""
|
|
72
|
+
Allows project admins and managers to manage projects.
|
|
73
|
+
"""
|
|
74
|
+
|
|
71
75
|
async def __call__(
|
|
72
76
|
self,
|
|
73
77
|
project_name: str,
|
|
@@ -79,12 +83,15 @@ class ProjectManager:
|
|
|
79
83
|
raise error_invalid_token()
|
|
80
84
|
project = await get_project_model_by_name(session=session, project_name=project_name)
|
|
81
85
|
if project is None:
|
|
82
|
-
raise
|
|
86
|
+
raise error_not_found()
|
|
87
|
+
|
|
83
88
|
if user.global_role == GlobalRole.ADMIN:
|
|
84
89
|
return user, project
|
|
90
|
+
|
|
85
91
|
project_role = get_user_project_role(user=user, project=project)
|
|
86
92
|
if project_role in [ProjectRole.ADMIN, ProjectRole.MANAGER]:
|
|
87
93
|
return user, project
|
|
94
|
+
|
|
88
95
|
raise error_forbidden()
|
|
89
96
|
|
|
90
97
|
|
|
@@ -135,6 +142,72 @@ class ProjectMemberOrPublicAccess:
|
|
|
135
142
|
raise error_forbidden()
|
|
136
143
|
|
|
137
144
|
|
|
145
|
+
class ProjectManagerOrPublicProject:
|
|
146
|
+
"""
|
|
147
|
+
Allows:
|
|
148
|
+
1. Project managers to perform member management operations
|
|
149
|
+
2. Access to public projects for any authenticated user
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
def __init__(self):
|
|
153
|
+
self.project_manager = ProjectManager()
|
|
154
|
+
|
|
155
|
+
async def __call__(
|
|
156
|
+
self,
|
|
157
|
+
project_name: str,
|
|
158
|
+
session: AsyncSession = Depends(get_session),
|
|
159
|
+
token: HTTPAuthorizationCredentials = Security(HTTPBearer()),
|
|
160
|
+
) -> Tuple[UserModel, ProjectModel]:
|
|
161
|
+
user = await log_in_with_token(session=session, token=token.credentials)
|
|
162
|
+
if user is None:
|
|
163
|
+
raise error_invalid_token()
|
|
164
|
+
project = await get_project_model_by_name(session=session, project_name=project_name)
|
|
165
|
+
if project is None:
|
|
166
|
+
raise error_not_found()
|
|
167
|
+
|
|
168
|
+
if user.global_role == GlobalRole.ADMIN:
|
|
169
|
+
return user, project
|
|
170
|
+
|
|
171
|
+
project_role = get_user_project_role(user=user, project=project)
|
|
172
|
+
if project_role in [ProjectRole.ADMIN, ProjectRole.MANAGER]:
|
|
173
|
+
return user, project
|
|
174
|
+
|
|
175
|
+
if project.is_public:
|
|
176
|
+
return user, project
|
|
177
|
+
|
|
178
|
+
raise error_forbidden()
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class ProjectManagerOrSelfLeave:
|
|
182
|
+
"""
|
|
183
|
+
Allows:
|
|
184
|
+
1. Project managers to remove any members
|
|
185
|
+
2. Any project member to leave (remove themselves)
|
|
186
|
+
"""
|
|
187
|
+
|
|
188
|
+
async def __call__(
|
|
189
|
+
self,
|
|
190
|
+
project_name: str,
|
|
191
|
+
session: AsyncSession = Depends(get_session),
|
|
192
|
+
token: HTTPAuthorizationCredentials = Security(HTTPBearer()),
|
|
193
|
+
) -> Tuple[UserModel, ProjectModel]:
|
|
194
|
+
user = await log_in_with_token(session=session, token=token.credentials)
|
|
195
|
+
if user is None:
|
|
196
|
+
raise error_invalid_token()
|
|
197
|
+
project = await get_project_model_by_name(session=session, project_name=project_name)
|
|
198
|
+
if project is None:
|
|
199
|
+
raise error_not_found()
|
|
200
|
+
|
|
201
|
+
if user.global_role == GlobalRole.ADMIN:
|
|
202
|
+
return user, project
|
|
203
|
+
|
|
204
|
+
project_role = get_user_project_role(user=user, project=project)
|
|
205
|
+
if project_role is not None:
|
|
206
|
+
return user, project
|
|
207
|
+
|
|
208
|
+
raise error_forbidden()
|
|
209
|
+
|
|
210
|
+
|
|
138
211
|
class OptionalServiceAccount:
|
|
139
212
|
def __init__(self, token: Optional[str]) -> None:
|
|
140
213
|
self._token = token
|
|
@@ -35,7 +35,7 @@ from dstack._internal.core.models.instances import (
|
|
|
35
35
|
from dstack._internal.core.models.runs import Requirements
|
|
36
36
|
from dstack._internal.server import settings
|
|
37
37
|
from dstack._internal.server.models import BackendModel, DecryptedString, ProjectModel
|
|
38
|
-
from dstack._internal.
|
|
38
|
+
from dstack._internal.settings import LOCAL_BACKEND_ENABLED
|
|
39
39
|
from dstack._internal.utils.common import run_async
|
|
40
40
|
from dstack._internal.utils.logging import get_logger
|
|
41
41
|
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from fastapi import UploadFile
|
|
5
|
+
from sqlalchemy import select
|
|
6
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
|
+
|
|
8
|
+
from dstack._internal.core.errors import ServerClientError
|
|
9
|
+
from dstack._internal.core.models.files import FileArchive
|
|
10
|
+
from dstack._internal.server.models import FileArchiveModel, UserModel
|
|
11
|
+
from dstack._internal.server.services.storage import get_default_storage
|
|
12
|
+
from dstack._internal.utils.common import run_async
|
|
13
|
+
from dstack._internal.utils.logging import get_logger
|
|
14
|
+
|
|
15
|
+
logger = get_logger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
async def get_archive_model(
|
|
19
|
+
session: AsyncSession,
|
|
20
|
+
id: uuid.UUID,
|
|
21
|
+
user: Optional[UserModel] = None,
|
|
22
|
+
) -> Optional[FileArchiveModel]:
|
|
23
|
+
stmt = select(FileArchiveModel).where(FileArchiveModel.id == id)
|
|
24
|
+
if user is not None:
|
|
25
|
+
stmt = stmt.where(FileArchiveModel.user_id == user.id)
|
|
26
|
+
res = await session.execute(stmt)
|
|
27
|
+
return res.scalar()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
async def get_archive_model_by_hash(
|
|
31
|
+
session: AsyncSession,
|
|
32
|
+
user: UserModel,
|
|
33
|
+
hash: str,
|
|
34
|
+
) -> Optional[FileArchiveModel]:
|
|
35
|
+
res = await session.execute(
|
|
36
|
+
select(FileArchiveModel).where(
|
|
37
|
+
FileArchiveModel.user_id == user.id,
|
|
38
|
+
FileArchiveModel.blob_hash == hash,
|
|
39
|
+
)
|
|
40
|
+
)
|
|
41
|
+
return res.scalar()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
async def get_archive_by_hash(
|
|
45
|
+
session: AsyncSession,
|
|
46
|
+
user: UserModel,
|
|
47
|
+
hash: str,
|
|
48
|
+
) -> Optional[FileArchive]:
|
|
49
|
+
archive_model = await get_archive_model_by_hash(
|
|
50
|
+
session=session,
|
|
51
|
+
user=user,
|
|
52
|
+
hash=hash,
|
|
53
|
+
)
|
|
54
|
+
if archive_model is None:
|
|
55
|
+
return None
|
|
56
|
+
return archive_model_to_archive(archive_model)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
async def upload_archive(
|
|
60
|
+
session: AsyncSession,
|
|
61
|
+
user: UserModel,
|
|
62
|
+
file: UploadFile,
|
|
63
|
+
) -> FileArchive:
|
|
64
|
+
if file.filename is None:
|
|
65
|
+
raise ServerClientError("filename not specified")
|
|
66
|
+
archive_hash = file.filename
|
|
67
|
+
archive_model = await get_archive_model_by_hash(
|
|
68
|
+
session=session,
|
|
69
|
+
user=user,
|
|
70
|
+
hash=archive_hash,
|
|
71
|
+
)
|
|
72
|
+
if archive_model is not None:
|
|
73
|
+
logger.debug("File archive (user_id=%s, hash=%s) already uploaded", user.id, archive_hash)
|
|
74
|
+
return archive_model_to_archive(archive_model)
|
|
75
|
+
blob = await file.read()
|
|
76
|
+
storage = get_default_storage()
|
|
77
|
+
if storage is not None:
|
|
78
|
+
await run_async(storage.upload_archive, str(user.id), archive_hash, blob)
|
|
79
|
+
archive_model = FileArchiveModel(
|
|
80
|
+
user_id=user.id,
|
|
81
|
+
blob_hash=archive_hash,
|
|
82
|
+
blob=blob if storage is None else None,
|
|
83
|
+
)
|
|
84
|
+
session.add(archive_model)
|
|
85
|
+
await session.commit()
|
|
86
|
+
logger.debug("File archive (user_id=%s, hash=%s) has been uploaded", user.id, archive_hash)
|
|
87
|
+
return archive_model_to_archive(archive_model)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def archive_model_to_archive(archive_model: FileArchiveModel) -> FileArchive:
|
|
91
|
+
return FileArchive(id=archive_model.id, hash=archive_model.blob_hash)
|
|
@@ -532,7 +532,7 @@ async def delete_fleets(
|
|
|
532
532
|
.options(selectinload(FleetModel.runs))
|
|
533
533
|
.execution_options(populate_existing=True)
|
|
534
534
|
.order_by(FleetModel.id) # take locks in order
|
|
535
|
-
.with_for_update()
|
|
535
|
+
.with_for_update(key_share=True)
|
|
536
536
|
)
|
|
537
537
|
fleet_models = res.scalars().unique().all()
|
|
538
538
|
fleets = [fleet_model_to_fleet(m) for m in fleet_models]
|
|
@@ -240,7 +240,7 @@ async def delete_gateways(
|
|
|
240
240
|
.options(selectinload(GatewayModel.gateway_compute))
|
|
241
241
|
.execution_options(populate_existing=True)
|
|
242
242
|
.order_by(GatewayModel.id) # take locks in order
|
|
243
|
-
.with_for_update()
|
|
243
|
+
.with_for_update(key_share=True)
|
|
244
244
|
)
|
|
245
245
|
gateway_models = res.scalars().all()
|
|
246
246
|
for gateway_model in gateway_models:
|
|
@@ -33,6 +33,7 @@ from dstack._internal.core.models.runs import (
|
|
|
33
33
|
RunSpec,
|
|
34
34
|
)
|
|
35
35
|
from dstack._internal.core.models.volumes import Volume, VolumeMountPoint, VolumeStatus
|
|
36
|
+
from dstack._internal.server import settings
|
|
36
37
|
from dstack._internal.server.models import (
|
|
37
38
|
InstanceModel,
|
|
38
39
|
JobModel,
|
|
@@ -64,15 +65,23 @@ from dstack._internal.utils.logging import get_logger
|
|
|
64
65
|
logger = get_logger(__name__)
|
|
65
66
|
|
|
66
67
|
|
|
67
|
-
async def get_jobs_from_run_spec(
|
|
68
|
+
async def get_jobs_from_run_spec(
|
|
69
|
+
run_spec: RunSpec, secrets: Dict[str, str], replica_num: int
|
|
70
|
+
) -> List[Job]:
|
|
68
71
|
return [
|
|
69
72
|
Job(job_spec=s, job_submissions=[])
|
|
70
|
-
for s in await get_job_specs_from_run_spec(
|
|
73
|
+
for s in await get_job_specs_from_run_spec(
|
|
74
|
+
run_spec=run_spec,
|
|
75
|
+
secrets=secrets,
|
|
76
|
+
replica_num=replica_num,
|
|
77
|
+
)
|
|
71
78
|
]
|
|
72
79
|
|
|
73
80
|
|
|
74
|
-
async def get_job_specs_from_run_spec(
|
|
75
|
-
|
|
81
|
+
async def get_job_specs_from_run_spec(
|
|
82
|
+
run_spec: RunSpec, secrets: Dict[str, str], replica_num: int
|
|
83
|
+
) -> List[JobSpec]:
|
|
84
|
+
job_configurator = _get_job_configurator(run_spec=run_spec, secrets=secrets)
|
|
76
85
|
job_specs = await job_configurator.get_job_specs(replica_num=replica_num)
|
|
77
86
|
return job_specs
|
|
78
87
|
|
|
@@ -158,10 +167,10 @@ def delay_job_instance_termination(job_model: JobModel):
|
|
|
158
167
|
job_model.remove_at = common.get_current_datetime() + timedelta(seconds=15)
|
|
159
168
|
|
|
160
169
|
|
|
161
|
-
def _get_job_configurator(run_spec: RunSpec) -> JobConfigurator:
|
|
170
|
+
def _get_job_configurator(run_spec: RunSpec, secrets: Dict[str, str]) -> JobConfigurator:
|
|
162
171
|
configuration_type = RunConfigurationType(run_spec.configuration.type)
|
|
163
172
|
configurator_class = _configuration_type_to_configurator_class_map[configuration_type]
|
|
164
|
-
return configurator_class(run_spec)
|
|
173
|
+
return configurator_class(run_spec=run_spec, secrets=secrets)
|
|
165
174
|
|
|
166
175
|
|
|
167
176
|
_job_configurator_classes = [
|
|
@@ -380,8 +389,10 @@ def _shim_submit_stop(ports: Dict[int, int], job_model: JobModel):
|
|
|
380
389
|
message=job_model.termination_reason_message,
|
|
381
390
|
timeout=0,
|
|
382
391
|
)
|
|
383
|
-
# maybe somehow postpone removing old tasks to allow inspecting failed jobs
|
|
384
|
-
|
|
392
|
+
# maybe somehow postpone removing old tasks to allow inspecting failed jobs without
|
|
393
|
+
# the following setting?
|
|
394
|
+
if not settings.SERVER_KEEP_SHIM_TASKS:
|
|
395
|
+
shim_client.remove_task(task_id=job_model.id)
|
|
385
396
|
else:
|
|
386
397
|
shim_client.stop(force=True)
|
|
387
398
|
|
|
@@ -68,8 +68,13 @@ class JobConfigurator(ABC):
|
|
|
68
68
|
# JobSSHKey should be shared for all jobs in a replica for inter-node communication.
|
|
69
69
|
_job_ssh_key: Optional[JobSSHKey] = None
|
|
70
70
|
|
|
71
|
-
def __init__(
|
|
71
|
+
def __init__(
|
|
72
|
+
self,
|
|
73
|
+
run_spec: RunSpec,
|
|
74
|
+
secrets: Optional[Dict[str, str]] = None,
|
|
75
|
+
):
|
|
72
76
|
self.run_spec = run_spec
|
|
77
|
+
self.secrets = secrets or {}
|
|
73
78
|
|
|
74
79
|
async def get_job_specs(self, replica_num: int) -> List[JobSpec]:
|
|
75
80
|
job_spec = await self._get_job_spec(replica_num=replica_num, job_num=0, jobs_per_replica=1)
|
|
@@ -98,10 +103,20 @@ class JobConfigurator(ABC):
|
|
|
98
103
|
async def _get_image_config(self) -> ImageConfig:
|
|
99
104
|
if self._image_config is not None:
|
|
100
105
|
return self._image_config
|
|
106
|
+
interpolate = VariablesInterpolator({"secrets": self.secrets}).interpolate_or_error
|
|
107
|
+
registry_auth = self.run_spec.configuration.registry_auth
|
|
108
|
+
if registry_auth is not None:
|
|
109
|
+
try:
|
|
110
|
+
registry_auth = RegistryAuth(
|
|
111
|
+
username=interpolate(registry_auth.username),
|
|
112
|
+
password=interpolate(registry_auth.password),
|
|
113
|
+
)
|
|
114
|
+
except InterpolatorError as e:
|
|
115
|
+
raise ServerClientError(e.args[0])
|
|
101
116
|
image_config = await run_async(
|
|
102
117
|
_get_image_config,
|
|
103
118
|
self._image_name(),
|
|
104
|
-
|
|
119
|
+
registry_auth,
|
|
105
120
|
)
|
|
106
121
|
self._image_config = image_config
|
|
107
122
|
return image_config
|
|
@@ -134,6 +149,9 @@ class JobConfigurator(ABC):
|
|
|
134
149
|
working_dir=self._working_dir(),
|
|
135
150
|
volumes=self._volumes(job_num),
|
|
136
151
|
ssh_key=self._ssh_key(jobs_per_replica),
|
|
152
|
+
repo_data=self.run_spec.repo_data,
|
|
153
|
+
repo_code_hash=self.run_spec.repo_code_hash,
|
|
154
|
+
file_archives=self.run_spec.file_archives,
|
|
137
155
|
)
|
|
138
156
|
return job_spec
|
|
139
157
|
|
|
@@ -171,6 +189,8 @@ class JobConfigurator(ABC):
|
|
|
171
189
|
return result
|
|
172
190
|
|
|
173
191
|
def _dstack_image_commands(self) -> List[str]:
|
|
192
|
+
if self.run_spec.configuration.docker is True:
|
|
193
|
+
return ["start-dockerd"]
|
|
174
194
|
if (
|
|
175
195
|
self.run_spec.configuration.image is not None
|
|
176
196
|
or self.run_spec.configuration.entrypoint is not None
|
|
@@ -201,7 +221,9 @@ class JobConfigurator(ABC):
|
|
|
201
221
|
return self.run_spec.configuration.home_dir
|
|
202
222
|
|
|
203
223
|
def _image_name(self) -> str:
|
|
204
|
-
if self.run_spec.configuration.
|
|
224
|
+
if self.run_spec.configuration.docker is True:
|
|
225
|
+
return settings.DSTACK_DIND_IMAGE
|
|
226
|
+
elif self.run_spec.configuration.image is not None:
|
|
205
227
|
return self.run_spec.configuration.image
|
|
206
228
|
return get_default_image(nvcc=bool(self.run_spec.configuration.nvcc))
|
|
207
229
|
|
|
@@ -215,6 +237,8 @@ class JobConfigurator(ABC):
|
|
|
215
237
|
return UnixUser.parse(user)
|
|
216
238
|
|
|
217
239
|
def _privileged(self) -> bool:
|
|
240
|
+
if self.run_spec.configuration.docker is True:
|
|
241
|
+
return True
|
|
218
242
|
return self.run_spec.configuration.privileged
|
|
219
243
|
|
|
220
244
|
def _single_branch(self) -> bool:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List, Optional
|
|
1
|
+
from typing import Dict, List, Optional
|
|
2
2
|
|
|
3
3
|
from dstack._internal.core.errors import ServerClientError
|
|
4
4
|
from dstack._internal.core.models.configurations import PortMapping, RunConfigurationType
|
|
@@ -17,7 +17,7 @@ INSTALL_IPYKERNEL = (
|
|
|
17
17
|
class DevEnvironmentJobConfigurator(JobConfigurator):
|
|
18
18
|
TYPE: RunConfigurationType = RunConfigurationType.DEV_ENVIRONMENT
|
|
19
19
|
|
|
20
|
-
def __init__(self, run_spec: RunSpec):
|
|
20
|
+
def __init__(self, run_spec: RunSpec, secrets: Dict[str, str]):
|
|
21
21
|
if run_spec.configuration.ide == "vscode":
|
|
22
22
|
__class = VSCodeDesktop
|
|
23
23
|
elif run_spec.configuration.ide == "cursor":
|
|
@@ -29,7 +29,7 @@ class DevEnvironmentJobConfigurator(JobConfigurator):
|
|
|
29
29
|
version=run_spec.configuration.version,
|
|
30
30
|
extensions=["ms-python.python", "ms-toolsai.jupyter"],
|
|
31
31
|
)
|
|
32
|
-
super().__init__(run_spec)
|
|
32
|
+
super().__init__(run_spec=run_spec, secrets=secrets)
|
|
33
33
|
|
|
34
34
|
def _shell_commands(self) -> List[str]:
|
|
35
35
|
commands = self.ide.get_install_commands()
|