dstack 0.19.15rc1__py3-none-any.whl → 0.19.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (93) hide show
  1. dstack/_internal/cli/commands/secrets.py +92 -0
  2. dstack/_internal/cli/main.py +2 -0
  3. dstack/_internal/cli/services/completion.py +5 -0
  4. dstack/_internal/cli/services/configurators/run.py +59 -17
  5. dstack/_internal/cli/utils/secrets.py +25 -0
  6. dstack/_internal/core/backends/__init__.py +10 -4
  7. dstack/_internal/core/backends/cloudrift/__init__.py +0 -0
  8. dstack/_internal/core/backends/cloudrift/api_client.py +208 -0
  9. dstack/_internal/core/backends/cloudrift/backend.py +16 -0
  10. dstack/_internal/core/backends/cloudrift/compute.py +138 -0
  11. dstack/_internal/core/backends/cloudrift/configurator.py +66 -0
  12. dstack/_internal/core/backends/cloudrift/models.py +40 -0
  13. dstack/_internal/core/backends/configurators.py +9 -0
  14. dstack/_internal/core/backends/models.py +7 -0
  15. dstack/_internal/core/compatibility/logs.py +15 -0
  16. dstack/_internal/core/compatibility/runs.py +31 -2
  17. dstack/_internal/core/models/backends/base.py +2 -0
  18. dstack/_internal/core/models/configurations.py +33 -2
  19. dstack/_internal/core/models/files.py +67 -0
  20. dstack/_internal/core/models/logs.py +2 -1
  21. dstack/_internal/core/models/runs.py +24 -1
  22. dstack/_internal/core/models/secrets.py +9 -2
  23. dstack/_internal/server/app.py +2 -0
  24. dstack/_internal/server/background/tasks/process_fleets.py +1 -1
  25. dstack/_internal/server/background/tasks/process_gateways.py +1 -1
  26. dstack/_internal/server/background/tasks/process_instances.py +1 -1
  27. dstack/_internal/server/background/tasks/process_placement_groups.py +1 -1
  28. dstack/_internal/server/background/tasks/process_running_jobs.py +110 -13
  29. dstack/_internal/server/background/tasks/process_runs.py +36 -5
  30. dstack/_internal/server/background/tasks/process_submitted_jobs.py +10 -4
  31. dstack/_internal/server/background/tasks/process_terminating_jobs.py +2 -2
  32. dstack/_internal/server/background/tasks/process_volumes.py +1 -1
  33. dstack/_internal/server/migrations/versions/5f1707c525d2_add_filearchivemodel.py +39 -0
  34. dstack/_internal/server/migrations/versions/644b8a114187_add_secretmodel.py +49 -0
  35. dstack/_internal/server/models.py +33 -0
  36. dstack/_internal/server/routers/files.py +67 -0
  37. dstack/_internal/server/routers/gateways.py +6 -3
  38. dstack/_internal/server/routers/projects.py +63 -0
  39. dstack/_internal/server/routers/prometheus.py +5 -5
  40. dstack/_internal/server/routers/secrets.py +57 -15
  41. dstack/_internal/server/schemas/files.py +5 -0
  42. dstack/_internal/server/schemas/logs.py +10 -1
  43. dstack/_internal/server/schemas/projects.py +12 -0
  44. dstack/_internal/server/schemas/runner.py +2 -0
  45. dstack/_internal/server/schemas/secrets.py +7 -11
  46. dstack/_internal/server/security/permissions.py +75 -2
  47. dstack/_internal/server/services/backends/__init__.py +1 -1
  48. dstack/_internal/server/services/files.py +91 -0
  49. dstack/_internal/server/services/fleets.py +1 -1
  50. dstack/_internal/server/services/gateways/__init__.py +1 -1
  51. dstack/_internal/server/services/jobs/__init__.py +19 -8
  52. dstack/_internal/server/services/jobs/configurators/base.py +27 -3
  53. dstack/_internal/server/services/jobs/configurators/dev.py +3 -3
  54. dstack/_internal/server/services/logs/aws.py +38 -38
  55. dstack/_internal/server/services/logs/filelog.py +48 -14
  56. dstack/_internal/server/services/logs/gcp.py +17 -16
  57. dstack/_internal/server/services/projects.py +164 -5
  58. dstack/_internal/server/services/prometheus/__init__.py +0 -0
  59. dstack/_internal/server/services/prometheus/client_metrics.py +52 -0
  60. dstack/_internal/server/services/proxy/repo.py +3 -0
  61. dstack/_internal/server/services/runner/client.py +8 -0
  62. dstack/_internal/server/services/runs.py +55 -10
  63. dstack/_internal/server/services/secrets.py +204 -0
  64. dstack/_internal/server/services/services/__init__.py +2 -1
  65. dstack/_internal/server/services/storage/base.py +21 -0
  66. dstack/_internal/server/services/storage/gcs.py +28 -6
  67. dstack/_internal/server/services/storage/s3.py +27 -9
  68. dstack/_internal/server/services/users.py +1 -3
  69. dstack/_internal/server/services/volumes.py +1 -1
  70. dstack/_internal/server/settings.py +2 -2
  71. dstack/_internal/server/statics/index.html +1 -1
  72. dstack/_internal/server/statics/{main-0ac1e1583684417ae4d1.js → main-d151637af20f70b2e796.js} +104 -48
  73. dstack/_internal/server/statics/{main-0ac1e1583684417ae4d1.js.map → main-d151637af20f70b2e796.js.map} +1 -1
  74. dstack/_internal/server/statics/{main-f39c418b05fe14772dd8.css → main-d48635d8fe670d53961c.css} +1 -1
  75. dstack/_internal/server/statics/static/media/google.b194b06fafd0a52aeb566922160ea514.svg +1 -0
  76. dstack/_internal/server/testing/common.py +43 -5
  77. dstack/_internal/settings.py +5 -0
  78. dstack/_internal/utils/files.py +69 -0
  79. dstack/_internal/utils/nested_list.py +47 -0
  80. dstack/_internal/utils/path.py +12 -4
  81. dstack/api/_public/runs.py +73 -12
  82. dstack/api/server/__init__.py +6 -0
  83. dstack/api/server/_files.py +18 -0
  84. dstack/api/server/_logs.py +5 -1
  85. dstack/api/server/_projects.py +24 -0
  86. dstack/api/server/_secrets.py +15 -15
  87. dstack/version.py +1 -1
  88. {dstack-0.19.15rc1.dist-info → dstack-0.19.17.dist-info}/METADATA +3 -4
  89. {dstack-0.19.15rc1.dist-info → dstack-0.19.17.dist-info}/RECORD +93 -71
  90. /dstack/_internal/server/services/{prometheus.py → prometheus/custom_metrics.py} +0 -0
  91. {dstack-0.19.15rc1.dist-info → dstack-0.19.17.dist-info}/WHEEL +0 -0
  92. {dstack-0.19.15rc1.dist-info → dstack-0.19.17.dist-info}/entry_points.txt +0 -0
  93. {dstack-0.19.15rc1.dist-info → dstack-0.19.17.dist-info}/licenses/LICENSE.md +0 -0
@@ -7,13 +7,19 @@ from dstack._internal.core.models.projects import Project
7
7
  from dstack._internal.server.db import get_session
8
8
  from dstack._internal.server.models import ProjectModel, UserModel
9
9
  from dstack._internal.server.schemas.projects import (
10
+ AddProjectMemberRequest,
10
11
  CreateProjectRequest,
11
12
  DeleteProjectsRequest,
13
+ RemoveProjectMemberRequest,
12
14
  SetProjectMembersRequest,
15
+ UpdateProjectRequest,
13
16
  )
14
17
  from dstack._internal.server.security.permissions import (
15
18
  Authenticated,
19
+ ProjectAdmin,
16
20
  ProjectManager,
21
+ ProjectManagerOrPublicProject,
22
+ ProjectManagerOrSelfLeave,
17
23
  ProjectMemberOrPublicAccess,
18
24
  )
19
25
  from dstack._internal.server.services import projects
@@ -92,3 +98,60 @@ async def set_project_members(
92
98
  )
93
99
  await session.refresh(project)
94
100
  return projects.project_model_to_project(project)
101
+
102
+
103
+ @router.post(
104
+ "/{project_name}/add_members",
105
+ )
106
+ async def add_project_members(
107
+ body: AddProjectMemberRequest,
108
+ session: AsyncSession = Depends(get_session),
109
+ user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectManagerOrPublicProject()),
110
+ ) -> Project:
111
+ user, project = user_project
112
+ await projects.add_project_members(
113
+ session=session,
114
+ user=user,
115
+ project=project,
116
+ members=body.members,
117
+ )
118
+ await session.refresh(project)
119
+ return projects.project_model_to_project(project)
120
+
121
+
122
+ @router.post(
123
+ "/{project_name}/remove_members",
124
+ )
125
+ async def remove_project_members(
126
+ body: RemoveProjectMemberRequest,
127
+ session: AsyncSession = Depends(get_session),
128
+ user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectManagerOrSelfLeave()),
129
+ ) -> Project:
130
+ user, project = user_project
131
+ await projects.remove_project_members(
132
+ session=session,
133
+ user=user,
134
+ project=project,
135
+ usernames=body.usernames,
136
+ )
137
+ await session.refresh(project)
138
+ return projects.project_model_to_project(project)
139
+
140
+
141
+ @router.post(
142
+ "/{project_name}/update",
143
+ )
144
+ async def update_project(
145
+ body: UpdateProjectRequest,
146
+ session: AsyncSession = Depends(get_session),
147
+ user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectAdmin()),
148
+ ) -> Project:
149
+ user, project = user_project
150
+ await projects.update_project(
151
+ session=session,
152
+ user=user,
153
+ project=project,
154
+ is_public=body.is_public,
155
+ )
156
+ await session.refresh(project)
157
+ return projects.project_model_to_project(project)
@@ -1,15 +1,15 @@
1
1
  import os
2
2
  from typing import Annotated
3
3
 
4
+ import prometheus_client
4
5
  from fastapi import APIRouter, Depends
5
6
  from fastapi.responses import PlainTextResponse
6
- from prometheus_client import generate_latest
7
7
  from sqlalchemy.ext.asyncio import AsyncSession
8
8
 
9
9
  from dstack._internal.server import settings
10
10
  from dstack._internal.server.db import get_session
11
11
  from dstack._internal.server.security.permissions import OptionalServiceAccount
12
- from dstack._internal.server.services import prometheus
12
+ from dstack._internal.server.services.prometheus import custom_metrics
13
13
  from dstack._internal.server.utils.routers import error_not_found
14
14
 
15
15
  _auth = OptionalServiceAccount(os.getenv("DSTACK_PROMETHEUS_AUTH_TOKEN"))
@@ -27,6 +27,6 @@ async def get_prometheus_metrics(
27
27
  ) -> str:
28
28
  if not settings.ENABLE_PROMETHEUS_METRICS:
29
29
  raise error_not_found()
30
- custom_metrics = await prometheus.get_metrics(session=session)
31
- prometheus_metrics = generate_latest()
32
- return custom_metrics + prometheus_metrics.decode()
30
+ custom_metrics_ = await custom_metrics.get_metrics(session=session)
31
+ client_metrics = prometheus_client.generate_latest().decode()
32
+ return custom_metrics_ + client_metrics
@@ -1,15 +1,19 @@
1
- from typing import List
1
+ from typing import List, Tuple
2
2
 
3
- from fastapi import APIRouter
3
+ from fastapi import APIRouter, Depends
4
+ from sqlalchemy.ext.asyncio import AsyncSession
4
5
 
5
- from dstack._internal.core.models.runs import Run
6
+ from dstack._internal.core.errors import ResourceNotExistsError
6
7
  from dstack._internal.core.models.secrets import Secret
8
+ from dstack._internal.server.db import get_session
9
+ from dstack._internal.server.models import ProjectModel, UserModel
7
10
  from dstack._internal.server.schemas.secrets import (
8
- AddSecretRequest,
11
+ CreateOrUpdateSecretRequest,
9
12
  DeleteSecretsRequest,
10
- GetSecretsRequest,
11
- ListSecretsRequest,
13
+ GetSecretRequest,
12
14
  )
15
+ from dstack._internal.server.security.permissions import ProjectAdmin
16
+ from dstack._internal.server.services import secrets as secrets_services
13
17
 
14
18
  router = APIRouter(
15
19
  prefix="/api/project/{project_name}/secrets",
@@ -18,20 +22,58 @@ router = APIRouter(
18
22
 
19
23
 
20
24
  @router.post("/list")
21
- async def list_secrets(project_name: str, body: ListSecretsRequest) -> List[Run]:
22
- pass
25
+ async def list_secrets(
26
+ session: AsyncSession = Depends(get_session),
27
+ user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectAdmin()),
28
+ ) -> List[Secret]:
29
+ _, project = user_project
30
+ return await secrets_services.list_secrets(
31
+ session=session,
32
+ project=project,
33
+ )
23
34
 
24
35
 
25
36
  @router.post("/get")
26
- async def get_secret(project_name: str, body: GetSecretsRequest) -> Secret:
27
- pass
37
+ async def get_secret(
38
+ body: GetSecretRequest,
39
+ session: AsyncSession = Depends(get_session),
40
+ user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectAdmin()),
41
+ ) -> Secret:
42
+ _, project = user_project
43
+ secret = await secrets_services.get_secret(
44
+ session=session,
45
+ project=project,
46
+ name=body.name,
47
+ )
48
+ if secret is None:
49
+ raise ResourceNotExistsError()
50
+ return secret
28
51
 
29
52
 
30
- @router.post("/add")
31
- async def add_or_update_secret(project_name: str, body: AddSecretRequest) -> Secret:
32
- pass
53
+ @router.post("/create_or_update")
54
+ async def create_or_update_secret(
55
+ body: CreateOrUpdateSecretRequest,
56
+ session: AsyncSession = Depends(get_session),
57
+ user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectAdmin()),
58
+ ) -> Secret:
59
+ _, project = user_project
60
+ return await secrets_services.create_or_update_secret(
61
+ session=session,
62
+ project=project,
63
+ name=body.name,
64
+ value=body.value,
65
+ )
33
66
 
34
67
 
35
68
  @router.post("/delete")
36
- async def delete_secrets(project_name: str, body: DeleteSecretsRequest):
37
- pass
69
+ async def delete_secrets(
70
+ body: DeleteSecretsRequest,
71
+ session: AsyncSession = Depends(get_session),
72
+ user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectAdmin()),
73
+ ):
74
+ _, project = user_project
75
+ await secrets_services.delete_secrets(
76
+ session=session,
77
+ project=project,
78
+ names=body.secrets_names,
79
+ )
@@ -0,0 +1,5 @@
1
+ from dstack._internal.core.models.common import CoreModel
2
+
3
+
4
+ class GetFileArchiveByHashRequest(CoreModel):
5
+ hash: str
@@ -1,7 +1,7 @@
1
1
  from datetime import datetime
2
2
  from typing import Optional
3
3
 
4
- from pydantic import UUID4, Field
4
+ from pydantic import UUID4, Field, validator
5
5
 
6
6
  from dstack._internal.core.models.common import CoreModel
7
7
 
@@ -12,5 +12,14 @@ class PollLogsRequest(CoreModel):
12
12
  start_time: Optional[datetime]
13
13
  end_time: Optional[datetime]
14
14
  descending: bool = False
15
+ next_token: Optional[str] = None
15
16
  limit: int = Field(100, ge=0, le=1000)
16
17
  diagnose: bool = False
18
+
19
+ @validator("descending")
20
+ @classmethod
21
+ def validate_descending(cls, v):
22
+ # Descending is not supported until we migrate from base64-encoded logs to plain text logs.
23
+ if v is True:
24
+ raise ValueError("descending: true is not supported")
25
+ return v
@@ -11,6 +11,10 @@ class CreateProjectRequest(CoreModel):
11
11
  is_public: bool = False
12
12
 
13
13
 
14
+ class UpdateProjectRequest(CoreModel):
15
+ is_public: bool
16
+
17
+
14
18
  class DeleteProjectsRequest(CoreModel):
15
19
  projects_names: List[str]
16
20
 
@@ -25,3 +29,11 @@ class MemberSetting(CoreModel):
25
29
 
26
30
  class SetProjectMembersRequest(CoreModel):
27
31
  members: List[MemberSetting]
32
+
33
+
34
+ class AddProjectMemberRequest(CoreModel):
35
+ members: List[MemberSetting]
36
+
37
+
38
+ class RemoveProjectMemberRequest(CoreModel):
39
+ usernames: List[str]
@@ -77,6 +77,8 @@ class SubmitBody(CoreModel):
77
77
  "max_duration",
78
78
  "ssh_key",
79
79
  "working_dir",
80
+ "repo_data",
81
+ "file_archives",
80
82
  }
81
83
  ),
82
84
  ]
@@ -1,20 +1,16 @@
1
1
  from typing import List
2
2
 
3
- from dstack._internal.core.models.secrets import Secret
4
- from dstack._internal.server.schemas.common import RepoRequest
3
+ from dstack._internal.core.models.common import CoreModel
5
4
 
6
5
 
7
- class ListSecretsRequest(RepoRequest):
8
- pass
6
+ class GetSecretRequest(CoreModel):
7
+ name: str
9
8
 
10
9
 
11
- class GetSecretsRequest(RepoRequest):
12
- pass
10
+ class CreateOrUpdateSecretRequest(CoreModel):
11
+ name: str
12
+ value: str
13
13
 
14
14
 
15
- class AddSecretRequest(RepoRequest):
16
- secret: Secret
17
-
18
-
19
- class DeleteSecretsRequest(RepoRequest):
15
+ class DeleteSecretsRequest(CoreModel):
20
16
  secrets_names: List[str]
@@ -58,7 +58,7 @@ class ProjectAdmin:
58
58
  raise error_invalid_token()
59
59
  project = await get_project_model_by_name(session=session, project_name=project_name)
60
60
  if project is None:
61
- raise error_forbidden()
61
+ raise error_not_found()
62
62
  if user.global_role == GlobalRole.ADMIN:
63
63
  return user, project
64
64
  project_role = get_user_project_role(user=user, project=project)
@@ -68,6 +68,10 @@ class ProjectAdmin:
68
68
 
69
69
 
70
70
  class ProjectManager:
71
+ """
72
+ Allows project admins and managers to manage projects.
73
+ """
74
+
71
75
  async def __call__(
72
76
  self,
73
77
  project_name: str,
@@ -79,12 +83,15 @@ class ProjectManager:
79
83
  raise error_invalid_token()
80
84
  project = await get_project_model_by_name(session=session, project_name=project_name)
81
85
  if project is None:
82
- raise error_forbidden()
86
+ raise error_not_found()
87
+
83
88
  if user.global_role == GlobalRole.ADMIN:
84
89
  return user, project
90
+
85
91
  project_role = get_user_project_role(user=user, project=project)
86
92
  if project_role in [ProjectRole.ADMIN, ProjectRole.MANAGER]:
87
93
  return user, project
94
+
88
95
  raise error_forbidden()
89
96
 
90
97
 
@@ -135,6 +142,72 @@ class ProjectMemberOrPublicAccess:
135
142
  raise error_forbidden()
136
143
 
137
144
 
145
+ class ProjectManagerOrPublicProject:
146
+ """
147
+ Allows:
148
+ 1. Project managers to perform member management operations
149
+ 2. Access to public projects for any authenticated user
150
+ """
151
+
152
+ def __init__(self):
153
+ self.project_manager = ProjectManager()
154
+
155
+ async def __call__(
156
+ self,
157
+ project_name: str,
158
+ session: AsyncSession = Depends(get_session),
159
+ token: HTTPAuthorizationCredentials = Security(HTTPBearer()),
160
+ ) -> Tuple[UserModel, ProjectModel]:
161
+ user = await log_in_with_token(session=session, token=token.credentials)
162
+ if user is None:
163
+ raise error_invalid_token()
164
+ project = await get_project_model_by_name(session=session, project_name=project_name)
165
+ if project is None:
166
+ raise error_not_found()
167
+
168
+ if user.global_role == GlobalRole.ADMIN:
169
+ return user, project
170
+
171
+ project_role = get_user_project_role(user=user, project=project)
172
+ if project_role in [ProjectRole.ADMIN, ProjectRole.MANAGER]:
173
+ return user, project
174
+
175
+ if project.is_public:
176
+ return user, project
177
+
178
+ raise error_forbidden()
179
+
180
+
181
+ class ProjectManagerOrSelfLeave:
182
+ """
183
+ Allows:
184
+ 1. Project managers to remove any members
185
+ 2. Any project member to leave (remove themselves)
186
+ """
187
+
188
+ async def __call__(
189
+ self,
190
+ project_name: str,
191
+ session: AsyncSession = Depends(get_session),
192
+ token: HTTPAuthorizationCredentials = Security(HTTPBearer()),
193
+ ) -> Tuple[UserModel, ProjectModel]:
194
+ user = await log_in_with_token(session=session, token=token.credentials)
195
+ if user is None:
196
+ raise error_invalid_token()
197
+ project = await get_project_model_by_name(session=session, project_name=project_name)
198
+ if project is None:
199
+ raise error_not_found()
200
+
201
+ if user.global_role == GlobalRole.ADMIN:
202
+ return user, project
203
+
204
+ project_role = get_user_project_role(user=user, project=project)
205
+ if project_role is not None:
206
+ return user, project
207
+
208
+ raise error_forbidden()
209
+
210
+
138
211
  class OptionalServiceAccount:
139
212
  def __init__(self, token: Optional[str]) -> None:
140
213
  self._token = token
@@ -35,7 +35,7 @@ from dstack._internal.core.models.instances import (
35
35
  from dstack._internal.core.models.runs import Requirements
36
36
  from dstack._internal.server import settings
37
37
  from dstack._internal.server.models import BackendModel, DecryptedString, ProjectModel
38
- from dstack._internal.server.settings import LOCAL_BACKEND_ENABLED
38
+ from dstack._internal.settings import LOCAL_BACKEND_ENABLED
39
39
  from dstack._internal.utils.common import run_async
40
40
  from dstack._internal.utils.logging import get_logger
41
41
 
@@ -0,0 +1,91 @@
1
+ import uuid
2
+ from typing import Optional
3
+
4
+ from fastapi import UploadFile
5
+ from sqlalchemy import select
6
+ from sqlalchemy.ext.asyncio import AsyncSession
7
+
8
+ from dstack._internal.core.errors import ServerClientError
9
+ from dstack._internal.core.models.files import FileArchive
10
+ from dstack._internal.server.models import FileArchiveModel, UserModel
11
+ from dstack._internal.server.services.storage import get_default_storage
12
+ from dstack._internal.utils.common import run_async
13
+ from dstack._internal.utils.logging import get_logger
14
+
15
+ logger = get_logger(__name__)
16
+
17
+
18
+ async def get_archive_model(
19
+ session: AsyncSession,
20
+ id: uuid.UUID,
21
+ user: Optional[UserModel] = None,
22
+ ) -> Optional[FileArchiveModel]:
23
+ stmt = select(FileArchiveModel).where(FileArchiveModel.id == id)
24
+ if user is not None:
25
+ stmt = stmt.where(FileArchiveModel.user_id == user.id)
26
+ res = await session.execute(stmt)
27
+ return res.scalar()
28
+
29
+
30
+ async def get_archive_model_by_hash(
31
+ session: AsyncSession,
32
+ user: UserModel,
33
+ hash: str,
34
+ ) -> Optional[FileArchiveModel]:
35
+ res = await session.execute(
36
+ select(FileArchiveModel).where(
37
+ FileArchiveModel.user_id == user.id,
38
+ FileArchiveModel.blob_hash == hash,
39
+ )
40
+ )
41
+ return res.scalar()
42
+
43
+
44
+ async def get_archive_by_hash(
45
+ session: AsyncSession,
46
+ user: UserModel,
47
+ hash: str,
48
+ ) -> Optional[FileArchive]:
49
+ archive_model = await get_archive_model_by_hash(
50
+ session=session,
51
+ user=user,
52
+ hash=hash,
53
+ )
54
+ if archive_model is None:
55
+ return None
56
+ return archive_model_to_archive(archive_model)
57
+
58
+
59
+ async def upload_archive(
60
+ session: AsyncSession,
61
+ user: UserModel,
62
+ file: UploadFile,
63
+ ) -> FileArchive:
64
+ if file.filename is None:
65
+ raise ServerClientError("filename not specified")
66
+ archive_hash = file.filename
67
+ archive_model = await get_archive_model_by_hash(
68
+ session=session,
69
+ user=user,
70
+ hash=archive_hash,
71
+ )
72
+ if archive_model is not None:
73
+ logger.debug("File archive (user_id=%s, hash=%s) already uploaded", user.id, archive_hash)
74
+ return archive_model_to_archive(archive_model)
75
+ blob = await file.read()
76
+ storage = get_default_storage()
77
+ if storage is not None:
78
+ await run_async(storage.upload_archive, str(user.id), archive_hash, blob)
79
+ archive_model = FileArchiveModel(
80
+ user_id=user.id,
81
+ blob_hash=archive_hash,
82
+ blob=blob if storage is None else None,
83
+ )
84
+ session.add(archive_model)
85
+ await session.commit()
86
+ logger.debug("File archive (user_id=%s, hash=%s) has been uploaded", user.id, archive_hash)
87
+ return archive_model_to_archive(archive_model)
88
+
89
+
90
+ def archive_model_to_archive(archive_model: FileArchiveModel) -> FileArchive:
91
+ return FileArchive(id=archive_model.id, hash=archive_model.blob_hash)
@@ -532,7 +532,7 @@ async def delete_fleets(
532
532
  .options(selectinload(FleetModel.runs))
533
533
  .execution_options(populate_existing=True)
534
534
  .order_by(FleetModel.id) # take locks in order
535
- .with_for_update()
535
+ .with_for_update(key_share=True)
536
536
  )
537
537
  fleet_models = res.scalars().unique().all()
538
538
  fleets = [fleet_model_to_fleet(m) for m in fleet_models]
@@ -240,7 +240,7 @@ async def delete_gateways(
240
240
  .options(selectinload(GatewayModel.gateway_compute))
241
241
  .execution_options(populate_existing=True)
242
242
  .order_by(GatewayModel.id) # take locks in order
243
- .with_for_update()
243
+ .with_for_update(key_share=True)
244
244
  )
245
245
  gateway_models = res.scalars().all()
246
246
  for gateway_model in gateway_models:
@@ -33,6 +33,7 @@ from dstack._internal.core.models.runs import (
33
33
  RunSpec,
34
34
  )
35
35
  from dstack._internal.core.models.volumes import Volume, VolumeMountPoint, VolumeStatus
36
+ from dstack._internal.server import settings
36
37
  from dstack._internal.server.models import (
37
38
  InstanceModel,
38
39
  JobModel,
@@ -64,15 +65,23 @@ from dstack._internal.utils.logging import get_logger
64
65
  logger = get_logger(__name__)
65
66
 
66
67
 
67
- async def get_jobs_from_run_spec(run_spec: RunSpec, replica_num: int) -> List[Job]:
68
+ async def get_jobs_from_run_spec(
69
+ run_spec: RunSpec, secrets: Dict[str, str], replica_num: int
70
+ ) -> List[Job]:
68
71
  return [
69
72
  Job(job_spec=s, job_submissions=[])
70
- for s in await get_job_specs_from_run_spec(run_spec, replica_num)
73
+ for s in await get_job_specs_from_run_spec(
74
+ run_spec=run_spec,
75
+ secrets=secrets,
76
+ replica_num=replica_num,
77
+ )
71
78
  ]
72
79
 
73
80
 
74
- async def get_job_specs_from_run_spec(run_spec: RunSpec, replica_num: int) -> List[JobSpec]:
75
- job_configurator = _get_job_configurator(run_spec)
81
+ async def get_job_specs_from_run_spec(
82
+ run_spec: RunSpec, secrets: Dict[str, str], replica_num: int
83
+ ) -> List[JobSpec]:
84
+ job_configurator = _get_job_configurator(run_spec=run_spec, secrets=secrets)
76
85
  job_specs = await job_configurator.get_job_specs(replica_num=replica_num)
77
86
  return job_specs
78
87
 
@@ -158,10 +167,10 @@ def delay_job_instance_termination(job_model: JobModel):
158
167
  job_model.remove_at = common.get_current_datetime() + timedelta(seconds=15)
159
168
 
160
169
 
161
- def _get_job_configurator(run_spec: RunSpec) -> JobConfigurator:
170
+ def _get_job_configurator(run_spec: RunSpec, secrets: Dict[str, str]) -> JobConfigurator:
162
171
  configuration_type = RunConfigurationType(run_spec.configuration.type)
163
172
  configurator_class = _configuration_type_to_configurator_class_map[configuration_type]
164
- return configurator_class(run_spec)
173
+ return configurator_class(run_spec=run_spec, secrets=secrets)
165
174
 
166
175
 
167
176
  _job_configurator_classes = [
@@ -380,8 +389,10 @@ def _shim_submit_stop(ports: Dict[int, int], job_model: JobModel):
380
389
  message=job_model.termination_reason_message,
381
390
  timeout=0,
382
391
  )
383
- # maybe somehow postpone removing old tasks to allow inspecting failed jobs?
384
- shim_client.remove_task(task_id=job_model.id)
392
+ # maybe somehow postpone removing old tasks to allow inspecting failed jobs without
393
+ # the following setting?
394
+ if not settings.SERVER_KEEP_SHIM_TASKS:
395
+ shim_client.remove_task(task_id=job_model.id)
385
396
  else:
386
397
  shim_client.stop(force=True)
387
398
 
@@ -68,8 +68,13 @@ class JobConfigurator(ABC):
68
68
  # JobSSHKey should be shared for all jobs in a replica for inter-node communication.
69
69
  _job_ssh_key: Optional[JobSSHKey] = None
70
70
 
71
- def __init__(self, run_spec: RunSpec):
71
+ def __init__(
72
+ self,
73
+ run_spec: RunSpec,
74
+ secrets: Optional[Dict[str, str]] = None,
75
+ ):
72
76
  self.run_spec = run_spec
77
+ self.secrets = secrets or {}
73
78
 
74
79
  async def get_job_specs(self, replica_num: int) -> List[JobSpec]:
75
80
  job_spec = await self._get_job_spec(replica_num=replica_num, job_num=0, jobs_per_replica=1)
@@ -98,10 +103,20 @@ class JobConfigurator(ABC):
98
103
  async def _get_image_config(self) -> ImageConfig:
99
104
  if self._image_config is not None:
100
105
  return self._image_config
106
+ interpolate = VariablesInterpolator({"secrets": self.secrets}).interpolate_or_error
107
+ registry_auth = self.run_spec.configuration.registry_auth
108
+ if registry_auth is not None:
109
+ try:
110
+ registry_auth = RegistryAuth(
111
+ username=interpolate(registry_auth.username),
112
+ password=interpolate(registry_auth.password),
113
+ )
114
+ except InterpolatorError as e:
115
+ raise ServerClientError(e.args[0])
101
116
  image_config = await run_async(
102
117
  _get_image_config,
103
118
  self._image_name(),
104
- self.run_spec.configuration.registry_auth,
119
+ registry_auth,
105
120
  )
106
121
  self._image_config = image_config
107
122
  return image_config
@@ -134,6 +149,9 @@ class JobConfigurator(ABC):
134
149
  working_dir=self._working_dir(),
135
150
  volumes=self._volumes(job_num),
136
151
  ssh_key=self._ssh_key(jobs_per_replica),
152
+ repo_data=self.run_spec.repo_data,
153
+ repo_code_hash=self.run_spec.repo_code_hash,
154
+ file_archives=self.run_spec.file_archives,
137
155
  )
138
156
  return job_spec
139
157
 
@@ -171,6 +189,8 @@ class JobConfigurator(ABC):
171
189
  return result
172
190
 
173
191
  def _dstack_image_commands(self) -> List[str]:
192
+ if self.run_spec.configuration.docker is True:
193
+ return ["start-dockerd"]
174
194
  if (
175
195
  self.run_spec.configuration.image is not None
176
196
  or self.run_spec.configuration.entrypoint is not None
@@ -201,7 +221,9 @@ class JobConfigurator(ABC):
201
221
  return self.run_spec.configuration.home_dir
202
222
 
203
223
  def _image_name(self) -> str:
204
- if self.run_spec.configuration.image is not None:
224
+ if self.run_spec.configuration.docker is True:
225
+ return settings.DSTACK_DIND_IMAGE
226
+ elif self.run_spec.configuration.image is not None:
205
227
  return self.run_spec.configuration.image
206
228
  return get_default_image(nvcc=bool(self.run_spec.configuration.nvcc))
207
229
 
@@ -215,6 +237,8 @@ class JobConfigurator(ABC):
215
237
  return UnixUser.parse(user)
216
238
 
217
239
  def _privileged(self) -> bool:
240
+ if self.run_spec.configuration.docker is True:
241
+ return True
218
242
  return self.run_spec.configuration.privileged
219
243
 
220
244
  def _single_branch(self) -> bool:
@@ -1,4 +1,4 @@
1
- from typing import List, Optional
1
+ from typing import Dict, List, Optional
2
2
 
3
3
  from dstack._internal.core.errors import ServerClientError
4
4
  from dstack._internal.core.models.configurations import PortMapping, RunConfigurationType
@@ -17,7 +17,7 @@ INSTALL_IPYKERNEL = (
17
17
  class DevEnvironmentJobConfigurator(JobConfigurator):
18
18
  TYPE: RunConfigurationType = RunConfigurationType.DEV_ENVIRONMENT
19
19
 
20
- def __init__(self, run_spec: RunSpec):
20
+ def __init__(self, run_spec: RunSpec, secrets: Dict[str, str]):
21
21
  if run_spec.configuration.ide == "vscode":
22
22
  __class = VSCodeDesktop
23
23
  elif run_spec.configuration.ide == "cursor":
@@ -29,7 +29,7 @@ class DevEnvironmentJobConfigurator(JobConfigurator):
29
29
  version=run_spec.configuration.version,
30
30
  extensions=["ms-python.python", "ms-toolsai.jupyter"],
31
31
  )
32
- super().__init__(run_spec)
32
+ super().__init__(run_spec=run_spec, secrets=secrets)
33
33
 
34
34
  def _shell_commands(self) -> List[str]:
35
35
  commands = self.ide.get_install_commands()