dstack 0.19.16__py3-none-any.whl → 0.19.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dstack might be problematic. Click here for more details.
- dstack/_internal/cli/commands/secrets.py +92 -0
- dstack/_internal/cli/main.py +2 -0
- dstack/_internal/cli/services/completion.py +5 -0
- dstack/_internal/cli/services/configurators/fleet.py +13 -1
- dstack/_internal/cli/services/configurators/run.py +59 -17
- dstack/_internal/cli/utils/secrets.py +25 -0
- dstack/_internal/core/backends/__init__.py +10 -4
- dstack/_internal/core/backends/aws/compute.py +237 -18
- dstack/_internal/core/backends/base/compute.py +20 -2
- dstack/_internal/core/backends/cudo/compute.py +23 -9
- dstack/_internal/core/backends/gcp/compute.py +13 -7
- dstack/_internal/core/backends/lambdalabs/compute.py +2 -1
- dstack/_internal/core/compatibility/fleets.py +12 -11
- dstack/_internal/core/compatibility/gateways.py +9 -8
- dstack/_internal/core/compatibility/logs.py +4 -3
- dstack/_internal/core/compatibility/runs.py +41 -17
- dstack/_internal/core/compatibility/volumes.py +9 -8
- dstack/_internal/core/errors.py +4 -0
- dstack/_internal/core/models/common.py +7 -0
- dstack/_internal/core/models/configurations.py +11 -0
- dstack/_internal/core/models/files.py +67 -0
- dstack/_internal/core/models/runs.py +14 -0
- dstack/_internal/core/models/secrets.py +9 -2
- dstack/_internal/core/services/diff.py +36 -3
- dstack/_internal/server/app.py +22 -0
- dstack/_internal/server/background/__init__.py +61 -37
- dstack/_internal/server/background/tasks/process_fleets.py +19 -3
- dstack/_internal/server/background/tasks/process_gateways.py +1 -1
- dstack/_internal/server/background/tasks/process_instances.py +13 -2
- dstack/_internal/server/background/tasks/process_placement_groups.py +4 -2
- dstack/_internal/server/background/tasks/process_running_jobs.py +123 -15
- dstack/_internal/server/background/tasks/process_runs.py +23 -7
- dstack/_internal/server/background/tasks/process_submitted_jobs.py +36 -7
- dstack/_internal/server/background/tasks/process_terminating_jobs.py +5 -3
- dstack/_internal/server/background/tasks/process_volumes.py +2 -2
- dstack/_internal/server/migrations/versions/5f1707c525d2_add_filearchivemodel.py +39 -0
- dstack/_internal/server/migrations/versions/644b8a114187_add_secretmodel.py +49 -0
- dstack/_internal/server/models.py +33 -0
- dstack/_internal/server/routers/files.py +67 -0
- dstack/_internal/server/routers/secrets.py +57 -15
- dstack/_internal/server/schemas/files.py +5 -0
- dstack/_internal/server/schemas/runner.py +2 -0
- dstack/_internal/server/schemas/secrets.py +7 -11
- dstack/_internal/server/services/backends/__init__.py +1 -1
- dstack/_internal/server/services/files.py +91 -0
- dstack/_internal/server/services/fleets.py +5 -4
- dstack/_internal/server/services/gateways/__init__.py +4 -2
- dstack/_internal/server/services/jobs/__init__.py +19 -8
- dstack/_internal/server/services/jobs/configurators/base.py +25 -3
- dstack/_internal/server/services/jobs/configurators/dev.py +3 -3
- dstack/_internal/server/services/locking.py +101 -12
- dstack/_internal/server/services/proxy/repo.py +3 -0
- dstack/_internal/server/services/runner/client.py +8 -0
- dstack/_internal/server/services/runs.py +76 -47
- dstack/_internal/server/services/secrets.py +204 -0
- dstack/_internal/server/services/storage/base.py +21 -0
- dstack/_internal/server/services/storage/gcs.py +28 -6
- dstack/_internal/server/services/storage/s3.py +27 -9
- dstack/_internal/server/services/volumes.py +2 -2
- dstack/_internal/server/settings.py +19 -5
- dstack/_internal/server/statics/index.html +1 -1
- dstack/_internal/server/statics/{main-a4eafa74304e587d037c.js → main-d1ac2e8c38ed5f08a114.js} +86 -34
- dstack/_internal/server/statics/{main-a4eafa74304e587d037c.js.map → main-d1ac2e8c38ed5f08a114.js.map} +1 -1
- dstack/_internal/server/statics/{main-f53d6d0d42f8d61df1de.css → main-d58fc0460cb0eae7cb5c.css} +1 -1
- dstack/_internal/server/statics/static/media/google.b194b06fafd0a52aeb566922160ea514.svg +1 -0
- dstack/_internal/server/testing/common.py +50 -8
- dstack/_internal/settings.py +4 -0
- dstack/_internal/utils/files.py +69 -0
- dstack/_internal/utils/nested_list.py +47 -0
- dstack/_internal/utils/path.py +12 -4
- dstack/api/_public/runs.py +67 -7
- dstack/api/server/__init__.py +6 -0
- dstack/api/server/_files.py +18 -0
- dstack/api/server/_secrets.py +15 -15
- dstack/version.py +1 -1
- {dstack-0.19.16.dist-info → dstack-0.19.18.dist-info}/METADATA +13 -13
- {dstack-0.19.16.dist-info → dstack-0.19.18.dist-info}/RECORD +80 -67
- {dstack-0.19.16.dist-info → dstack-0.19.18.dist-info}/WHEEL +0 -0
- {dstack-0.19.16.dist-info → dstack-0.19.18.dist-info}/entry_points.txt +0 -0
- {dstack-0.19.16.dist-info → dstack-0.19.18.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Add SecretModel
|
|
2
|
+
|
|
3
|
+
Revision ID: 644b8a114187
|
|
4
|
+
Revises: 5f1707c525d2
|
|
5
|
+
Create Date: 2025-06-30 11:00:04.326290
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import sqlalchemy as sa
|
|
10
|
+
import sqlalchemy_utils
|
|
11
|
+
from alembic import op
|
|
12
|
+
|
|
13
|
+
import dstack._internal.server.models
|
|
14
|
+
|
|
15
|
+
# revision identifiers, used by Alembic.
|
|
16
|
+
revision = "644b8a114187"
|
|
17
|
+
down_revision = "5f1707c525d2"
|
|
18
|
+
branch_labels = None
|
|
19
|
+
depends_on = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def upgrade() -> None:
|
|
23
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
24
|
+
op.create_table(
|
|
25
|
+
"secrets",
|
|
26
|
+
sa.Column("id", sqlalchemy_utils.types.uuid.UUIDType(binary=False), nullable=False),
|
|
27
|
+
sa.Column(
|
|
28
|
+
"project_id", sqlalchemy_utils.types.uuid.UUIDType(binary=False), nullable=False
|
|
29
|
+
),
|
|
30
|
+
sa.Column("created_at", dstack._internal.server.models.NaiveDateTime(), nullable=False),
|
|
31
|
+
sa.Column("updated_at", dstack._internal.server.models.NaiveDateTime(), nullable=False),
|
|
32
|
+
sa.Column("name", sa.String(length=200), nullable=False),
|
|
33
|
+
sa.Column("value", dstack._internal.server.models.EncryptedString(), nullable=False),
|
|
34
|
+
sa.ForeignKeyConstraint(
|
|
35
|
+
["project_id"],
|
|
36
|
+
["projects.id"],
|
|
37
|
+
name=op.f("fk_secrets_project_id_projects"),
|
|
38
|
+
ondelete="CASCADE",
|
|
39
|
+
),
|
|
40
|
+
sa.PrimaryKeyConstraint("id", name=op.f("pk_secrets")),
|
|
41
|
+
sa.UniqueConstraint("project_id", "name", name="uq_secrets_project_id_name"),
|
|
42
|
+
)
|
|
43
|
+
# ### end Alembic commands ###
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def downgrade() -> None:
|
|
47
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
48
|
+
op.drop_table("secrets")
|
|
49
|
+
# ### end Alembic commands ###
|
|
@@ -315,6 +315,21 @@ class CodeModel(BaseModel):
|
|
|
315
315
|
blob: Mapped[Optional[bytes]] = mapped_column(LargeBinary) # None means blob is stored on s3
|
|
316
316
|
|
|
317
317
|
|
|
318
|
+
class FileArchiveModel(BaseModel):
|
|
319
|
+
__tablename__ = "file_archives"
|
|
320
|
+
__table_args__ = (
|
|
321
|
+
UniqueConstraint("user_id", "blob_hash", name="uq_file_archives_user_id_blob_hash"),
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
id: Mapped[uuid.UUID] = mapped_column(
|
|
325
|
+
UUIDType(binary=False), primary_key=True, default=uuid.uuid4
|
|
326
|
+
)
|
|
327
|
+
user_id: Mapped["UserModel"] = mapped_column(ForeignKey("users.id", ondelete="CASCADE"))
|
|
328
|
+
user: Mapped["UserModel"] = relationship()
|
|
329
|
+
blob_hash: Mapped[str] = mapped_column(Text)
|
|
330
|
+
blob: Mapped[Optional[bytes]] = mapped_column(LargeBinary) # None means blob is stored on s3
|
|
331
|
+
|
|
332
|
+
|
|
318
333
|
class RunModel(BaseModel):
|
|
319
334
|
__tablename__ = "runs"
|
|
320
335
|
|
|
@@ -711,3 +726,21 @@ class JobPrometheusMetrics(BaseModel):
|
|
|
711
726
|
collected_at: Mapped[datetime] = mapped_column(NaiveDateTime)
|
|
712
727
|
# Raw Prometheus text response
|
|
713
728
|
text: Mapped[str] = mapped_column(Text)
|
|
729
|
+
|
|
730
|
+
|
|
731
|
+
class SecretModel(BaseModel):
|
|
732
|
+
__tablename__ = "secrets"
|
|
733
|
+
__table_args__ = (UniqueConstraint("project_id", "name", name="uq_secrets_project_id_name"),)
|
|
734
|
+
|
|
735
|
+
id: Mapped[uuid.UUID] = mapped_column(
|
|
736
|
+
UUIDType(binary=False), primary_key=True, default=uuid.uuid4
|
|
737
|
+
)
|
|
738
|
+
|
|
739
|
+
project_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("projects.id", ondelete="CASCADE"))
|
|
740
|
+
project: Mapped["ProjectModel"] = relationship()
|
|
741
|
+
|
|
742
|
+
created_at: Mapped[datetime] = mapped_column(NaiveDateTime, default=get_current_datetime)
|
|
743
|
+
updated_at: Mapped[datetime] = mapped_column(NaiveDateTime, default=get_current_datetime)
|
|
744
|
+
|
|
745
|
+
name: Mapped[str] = mapped_column(String(200))
|
|
746
|
+
value: Mapped[DecryptedString] = mapped_column(EncryptedString())
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter, Depends, Request, UploadFile
|
|
4
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
5
|
+
|
|
6
|
+
from dstack._internal.core.errors import ResourceNotExistsError, ServerClientError
|
|
7
|
+
from dstack._internal.core.models.files import FileArchive
|
|
8
|
+
from dstack._internal.server.db import get_session
|
|
9
|
+
from dstack._internal.server.models import UserModel
|
|
10
|
+
from dstack._internal.server.schemas.files import GetFileArchiveByHashRequest
|
|
11
|
+
from dstack._internal.server.security.permissions import Authenticated
|
|
12
|
+
from dstack._internal.server.services import files
|
|
13
|
+
from dstack._internal.server.settings import SERVER_CODE_UPLOAD_LIMIT
|
|
14
|
+
from dstack._internal.server.utils.routers import (
|
|
15
|
+
get_base_api_additional_responses,
|
|
16
|
+
get_request_size,
|
|
17
|
+
)
|
|
18
|
+
from dstack._internal.utils.common import sizeof_fmt
|
|
19
|
+
|
|
20
|
+
router = APIRouter(
|
|
21
|
+
prefix="/api/files",
|
|
22
|
+
tags=["files"],
|
|
23
|
+
responses=get_base_api_additional_responses(),
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@router.post("/get_archive_by_hash")
|
|
28
|
+
async def get_archive_by_hash(
|
|
29
|
+
body: GetFileArchiveByHashRequest,
|
|
30
|
+
session: Annotated[AsyncSession, Depends(get_session)],
|
|
31
|
+
user: Annotated[UserModel, Depends(Authenticated())],
|
|
32
|
+
) -> FileArchive:
|
|
33
|
+
archive = await files.get_archive_by_hash(
|
|
34
|
+
session=session,
|
|
35
|
+
user=user,
|
|
36
|
+
hash=body.hash,
|
|
37
|
+
)
|
|
38
|
+
if archive is None:
|
|
39
|
+
raise ResourceNotExistsError()
|
|
40
|
+
return archive
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@router.post("/upload_archive")
|
|
44
|
+
async def upload_archive(
|
|
45
|
+
request: Request,
|
|
46
|
+
file: UploadFile,
|
|
47
|
+
session: Annotated[AsyncSession, Depends(get_session)],
|
|
48
|
+
user: Annotated[UserModel, Depends(Authenticated())],
|
|
49
|
+
) -> FileArchive:
|
|
50
|
+
request_size = get_request_size(request)
|
|
51
|
+
if SERVER_CODE_UPLOAD_LIMIT > 0 and request_size > SERVER_CODE_UPLOAD_LIMIT:
|
|
52
|
+
diff_size_fmt = sizeof_fmt(request_size)
|
|
53
|
+
limit_fmt = sizeof_fmt(SERVER_CODE_UPLOAD_LIMIT)
|
|
54
|
+
if diff_size_fmt == limit_fmt:
|
|
55
|
+
diff_size_fmt = f"{request_size}B"
|
|
56
|
+
limit_fmt = f"{SERVER_CODE_UPLOAD_LIMIT}B"
|
|
57
|
+
raise ServerClientError(
|
|
58
|
+
f"Archive size is {diff_size_fmt}, which exceeds the limit of {limit_fmt}."
|
|
59
|
+
" Use .gitignore/.dstackignore to exclude large files."
|
|
60
|
+
" This limit can be modified by setting the DSTACK_SERVER_CODE_UPLOAD_LIMIT environment variable."
|
|
61
|
+
)
|
|
62
|
+
archive = await files.upload_archive(
|
|
63
|
+
session=session,
|
|
64
|
+
user=user,
|
|
65
|
+
file=file,
|
|
66
|
+
)
|
|
67
|
+
return archive
|
|
@@ -1,15 +1,19 @@
|
|
|
1
|
-
from typing import List
|
|
1
|
+
from typing import List, Tuple
|
|
2
2
|
|
|
3
|
-
from fastapi import APIRouter
|
|
3
|
+
from fastapi import APIRouter, Depends
|
|
4
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
4
5
|
|
|
5
|
-
from dstack._internal.core.
|
|
6
|
+
from dstack._internal.core.errors import ResourceNotExistsError
|
|
6
7
|
from dstack._internal.core.models.secrets import Secret
|
|
8
|
+
from dstack._internal.server.db import get_session
|
|
9
|
+
from dstack._internal.server.models import ProjectModel, UserModel
|
|
7
10
|
from dstack._internal.server.schemas.secrets import (
|
|
8
|
-
|
|
11
|
+
CreateOrUpdateSecretRequest,
|
|
9
12
|
DeleteSecretsRequest,
|
|
10
|
-
|
|
11
|
-
ListSecretsRequest,
|
|
13
|
+
GetSecretRequest,
|
|
12
14
|
)
|
|
15
|
+
from dstack._internal.server.security.permissions import ProjectAdmin
|
|
16
|
+
from dstack._internal.server.services import secrets as secrets_services
|
|
13
17
|
|
|
14
18
|
router = APIRouter(
|
|
15
19
|
prefix="/api/project/{project_name}/secrets",
|
|
@@ -18,20 +22,58 @@ router = APIRouter(
|
|
|
18
22
|
|
|
19
23
|
|
|
20
24
|
@router.post("/list")
|
|
21
|
-
async def list_secrets(
|
|
22
|
-
|
|
25
|
+
async def list_secrets(
|
|
26
|
+
session: AsyncSession = Depends(get_session),
|
|
27
|
+
user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectAdmin()),
|
|
28
|
+
) -> List[Secret]:
|
|
29
|
+
_, project = user_project
|
|
30
|
+
return await secrets_services.list_secrets(
|
|
31
|
+
session=session,
|
|
32
|
+
project=project,
|
|
33
|
+
)
|
|
23
34
|
|
|
24
35
|
|
|
25
36
|
@router.post("/get")
|
|
26
|
-
async def get_secret(
|
|
27
|
-
|
|
37
|
+
async def get_secret(
|
|
38
|
+
body: GetSecretRequest,
|
|
39
|
+
session: AsyncSession = Depends(get_session),
|
|
40
|
+
user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectAdmin()),
|
|
41
|
+
) -> Secret:
|
|
42
|
+
_, project = user_project
|
|
43
|
+
secret = await secrets_services.get_secret(
|
|
44
|
+
session=session,
|
|
45
|
+
project=project,
|
|
46
|
+
name=body.name,
|
|
47
|
+
)
|
|
48
|
+
if secret is None:
|
|
49
|
+
raise ResourceNotExistsError()
|
|
50
|
+
return secret
|
|
28
51
|
|
|
29
52
|
|
|
30
|
-
@router.post("/
|
|
31
|
-
async def
|
|
32
|
-
|
|
53
|
+
@router.post("/create_or_update")
|
|
54
|
+
async def create_or_update_secret(
|
|
55
|
+
body: CreateOrUpdateSecretRequest,
|
|
56
|
+
session: AsyncSession = Depends(get_session),
|
|
57
|
+
user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectAdmin()),
|
|
58
|
+
) -> Secret:
|
|
59
|
+
_, project = user_project
|
|
60
|
+
return await secrets_services.create_or_update_secret(
|
|
61
|
+
session=session,
|
|
62
|
+
project=project,
|
|
63
|
+
name=body.name,
|
|
64
|
+
value=body.value,
|
|
65
|
+
)
|
|
33
66
|
|
|
34
67
|
|
|
35
68
|
@router.post("/delete")
|
|
36
|
-
async def delete_secrets(
|
|
37
|
-
|
|
69
|
+
async def delete_secrets(
|
|
70
|
+
body: DeleteSecretsRequest,
|
|
71
|
+
session: AsyncSession = Depends(get_session),
|
|
72
|
+
user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectAdmin()),
|
|
73
|
+
):
|
|
74
|
+
_, project = user_project
|
|
75
|
+
await secrets_services.delete_secrets(
|
|
76
|
+
session=session,
|
|
77
|
+
project=project,
|
|
78
|
+
names=body.secrets_names,
|
|
79
|
+
)
|
|
@@ -1,20 +1,16 @@
|
|
|
1
1
|
from typing import List
|
|
2
2
|
|
|
3
|
-
from dstack._internal.core.models.
|
|
4
|
-
from dstack._internal.server.schemas.common import RepoRequest
|
|
3
|
+
from dstack._internal.core.models.common import CoreModel
|
|
5
4
|
|
|
6
5
|
|
|
7
|
-
class
|
|
8
|
-
|
|
6
|
+
class GetSecretRequest(CoreModel):
|
|
7
|
+
name: str
|
|
9
8
|
|
|
10
9
|
|
|
11
|
-
class
|
|
12
|
-
|
|
10
|
+
class CreateOrUpdateSecretRequest(CoreModel):
|
|
11
|
+
name: str
|
|
12
|
+
value: str
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
class
|
|
16
|
-
secret: Secret
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class DeleteSecretsRequest(RepoRequest):
|
|
15
|
+
class DeleteSecretsRequest(CoreModel):
|
|
20
16
|
secrets_names: List[str]
|
|
@@ -35,7 +35,7 @@ from dstack._internal.core.models.instances import (
|
|
|
35
35
|
from dstack._internal.core.models.runs import Requirements
|
|
36
36
|
from dstack._internal.server import settings
|
|
37
37
|
from dstack._internal.server.models import BackendModel, DecryptedString, ProjectModel
|
|
38
|
-
from dstack._internal.
|
|
38
|
+
from dstack._internal.settings import LOCAL_BACKEND_ENABLED
|
|
39
39
|
from dstack._internal.utils.common import run_async
|
|
40
40
|
from dstack._internal.utils.logging import get_logger
|
|
41
41
|
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from fastapi import UploadFile
|
|
5
|
+
from sqlalchemy import select
|
|
6
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
|
+
|
|
8
|
+
from dstack._internal.core.errors import ServerClientError
|
|
9
|
+
from dstack._internal.core.models.files import FileArchive
|
|
10
|
+
from dstack._internal.server.models import FileArchiveModel, UserModel
|
|
11
|
+
from dstack._internal.server.services.storage import get_default_storage
|
|
12
|
+
from dstack._internal.utils.common import run_async
|
|
13
|
+
from dstack._internal.utils.logging import get_logger
|
|
14
|
+
|
|
15
|
+
logger = get_logger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
async def get_archive_model(
|
|
19
|
+
session: AsyncSession,
|
|
20
|
+
id: uuid.UUID,
|
|
21
|
+
user: Optional[UserModel] = None,
|
|
22
|
+
) -> Optional[FileArchiveModel]:
|
|
23
|
+
stmt = select(FileArchiveModel).where(FileArchiveModel.id == id)
|
|
24
|
+
if user is not None:
|
|
25
|
+
stmt = stmt.where(FileArchiveModel.user_id == user.id)
|
|
26
|
+
res = await session.execute(stmt)
|
|
27
|
+
return res.scalar()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
async def get_archive_model_by_hash(
|
|
31
|
+
session: AsyncSession,
|
|
32
|
+
user: UserModel,
|
|
33
|
+
hash: str,
|
|
34
|
+
) -> Optional[FileArchiveModel]:
|
|
35
|
+
res = await session.execute(
|
|
36
|
+
select(FileArchiveModel).where(
|
|
37
|
+
FileArchiveModel.user_id == user.id,
|
|
38
|
+
FileArchiveModel.blob_hash == hash,
|
|
39
|
+
)
|
|
40
|
+
)
|
|
41
|
+
return res.scalar()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
async def get_archive_by_hash(
|
|
45
|
+
session: AsyncSession,
|
|
46
|
+
user: UserModel,
|
|
47
|
+
hash: str,
|
|
48
|
+
) -> Optional[FileArchive]:
|
|
49
|
+
archive_model = await get_archive_model_by_hash(
|
|
50
|
+
session=session,
|
|
51
|
+
user=user,
|
|
52
|
+
hash=hash,
|
|
53
|
+
)
|
|
54
|
+
if archive_model is None:
|
|
55
|
+
return None
|
|
56
|
+
return archive_model_to_archive(archive_model)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
async def upload_archive(
|
|
60
|
+
session: AsyncSession,
|
|
61
|
+
user: UserModel,
|
|
62
|
+
file: UploadFile,
|
|
63
|
+
) -> FileArchive:
|
|
64
|
+
if file.filename is None:
|
|
65
|
+
raise ServerClientError("filename not specified")
|
|
66
|
+
archive_hash = file.filename
|
|
67
|
+
archive_model = await get_archive_model_by_hash(
|
|
68
|
+
session=session,
|
|
69
|
+
user=user,
|
|
70
|
+
hash=archive_hash,
|
|
71
|
+
)
|
|
72
|
+
if archive_model is not None:
|
|
73
|
+
logger.debug("File archive (user_id=%s, hash=%s) already uploaded", user.id, archive_hash)
|
|
74
|
+
return archive_model_to_archive(archive_model)
|
|
75
|
+
blob = await file.read()
|
|
76
|
+
storage = get_default_storage()
|
|
77
|
+
if storage is not None:
|
|
78
|
+
await run_async(storage.upload_archive, str(user.id), archive_hash, blob)
|
|
79
|
+
archive_model = FileArchiveModel(
|
|
80
|
+
user_id=user.id,
|
|
81
|
+
blob_hash=archive_hash,
|
|
82
|
+
blob=blob if storage is None else None,
|
|
83
|
+
)
|
|
84
|
+
session.add(archive_model)
|
|
85
|
+
await session.commit()
|
|
86
|
+
logger.debug("File archive (user_id=%s, hash=%s) has been uploaded", user.id, archive_hash)
|
|
87
|
+
return archive_model_to_archive(archive_model)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def archive_model_to_archive(archive_model: FileArchiveModel) -> FileArchive:
|
|
91
|
+
return FileArchive(id=archive_model.id, hash=archive_model.blob_hash)
|
|
@@ -362,7 +362,7 @@ async def create_fleet(
|
|
|
362
362
|
select(func.pg_advisory_xact_lock(string_to_lock_id(lock_namespace)))
|
|
363
363
|
)
|
|
364
364
|
|
|
365
|
-
lock, _ = get_locker().get_lockset(lock_namespace)
|
|
365
|
+
lock, _ = get_locker(get_db().dialect_name).get_lockset(lock_namespace)
|
|
366
366
|
async with lock:
|
|
367
367
|
if spec.configuration.name is not None:
|
|
368
368
|
fleet_model = await get_project_fleet_model_by_name(
|
|
@@ -516,11 +516,12 @@ async def delete_fleets(
|
|
|
516
516
|
await session.commit()
|
|
517
517
|
logger.info("Deleting fleets: %s", [v.name for v in fleet_models])
|
|
518
518
|
async with (
|
|
519
|
-
get_locker().lock_ctx(FleetModel.__tablename__, fleets_ids),
|
|
520
|
-
get_locker().lock_ctx(InstanceModel.__tablename__, instances_ids),
|
|
519
|
+
get_locker(get_db().dialect_name).lock_ctx(FleetModel.__tablename__, fleets_ids),
|
|
520
|
+
get_locker(get_db().dialect_name).lock_ctx(InstanceModel.__tablename__, instances_ids),
|
|
521
521
|
):
|
|
522
522
|
# Refetch after lock
|
|
523
|
-
# TODO
|
|
523
|
+
# TODO: Lock instances with FOR UPDATE?
|
|
524
|
+
# TODO: Do not lock fleet when deleting only instances
|
|
524
525
|
res = await session.execute(
|
|
525
526
|
select(FleetModel)
|
|
526
527
|
.where(
|
|
@@ -162,7 +162,7 @@ async def create_gateway(
|
|
|
162
162
|
select(func.pg_advisory_xact_lock(string_to_lock_id(lock_namespace)))
|
|
163
163
|
)
|
|
164
164
|
|
|
165
|
-
lock, _ = get_locker().get_lockset(lock_namespace)
|
|
165
|
+
lock, _ = get_locker(get_db().dialect_name).get_lockset(lock_namespace)
|
|
166
166
|
async with lock:
|
|
167
167
|
if configuration.name is None:
|
|
168
168
|
configuration.name = await generate_gateway_name(session=session, project=project)
|
|
@@ -229,7 +229,9 @@ async def delete_gateways(
|
|
|
229
229
|
gateways_ids = sorted([g.id for g in gateway_models])
|
|
230
230
|
await session.commit()
|
|
231
231
|
logger.info("Deleting gateways: %s", [g.name for g in gateway_models])
|
|
232
|
-
async with get_locker().lock_ctx(
|
|
232
|
+
async with get_locker(get_db().dialect_name).lock_ctx(
|
|
233
|
+
GatewayModel.__tablename__, gateways_ids
|
|
234
|
+
):
|
|
233
235
|
# Refetch after lock
|
|
234
236
|
res = await session.execute(
|
|
235
237
|
select(GatewayModel)
|
|
@@ -33,6 +33,7 @@ from dstack._internal.core.models.runs import (
|
|
|
33
33
|
RunSpec,
|
|
34
34
|
)
|
|
35
35
|
from dstack._internal.core.models.volumes import Volume, VolumeMountPoint, VolumeStatus
|
|
36
|
+
from dstack._internal.server import settings
|
|
36
37
|
from dstack._internal.server.models import (
|
|
37
38
|
InstanceModel,
|
|
38
39
|
JobModel,
|
|
@@ -64,15 +65,23 @@ from dstack._internal.utils.logging import get_logger
|
|
|
64
65
|
logger = get_logger(__name__)
|
|
65
66
|
|
|
66
67
|
|
|
67
|
-
async def get_jobs_from_run_spec(
|
|
68
|
+
async def get_jobs_from_run_spec(
|
|
69
|
+
run_spec: RunSpec, secrets: Dict[str, str], replica_num: int
|
|
70
|
+
) -> List[Job]:
|
|
68
71
|
return [
|
|
69
72
|
Job(job_spec=s, job_submissions=[])
|
|
70
|
-
for s in await get_job_specs_from_run_spec(
|
|
73
|
+
for s in await get_job_specs_from_run_spec(
|
|
74
|
+
run_spec=run_spec,
|
|
75
|
+
secrets=secrets,
|
|
76
|
+
replica_num=replica_num,
|
|
77
|
+
)
|
|
71
78
|
]
|
|
72
79
|
|
|
73
80
|
|
|
74
|
-
async def get_job_specs_from_run_spec(
|
|
75
|
-
|
|
81
|
+
async def get_job_specs_from_run_spec(
|
|
82
|
+
run_spec: RunSpec, secrets: Dict[str, str], replica_num: int
|
|
83
|
+
) -> List[JobSpec]:
|
|
84
|
+
job_configurator = _get_job_configurator(run_spec=run_spec, secrets=secrets)
|
|
76
85
|
job_specs = await job_configurator.get_job_specs(replica_num=replica_num)
|
|
77
86
|
return job_specs
|
|
78
87
|
|
|
@@ -158,10 +167,10 @@ def delay_job_instance_termination(job_model: JobModel):
|
|
|
158
167
|
job_model.remove_at = common.get_current_datetime() + timedelta(seconds=15)
|
|
159
168
|
|
|
160
169
|
|
|
161
|
-
def _get_job_configurator(run_spec: RunSpec) -> JobConfigurator:
|
|
170
|
+
def _get_job_configurator(run_spec: RunSpec, secrets: Dict[str, str]) -> JobConfigurator:
|
|
162
171
|
configuration_type = RunConfigurationType(run_spec.configuration.type)
|
|
163
172
|
configurator_class = _configuration_type_to_configurator_class_map[configuration_type]
|
|
164
|
-
return configurator_class(run_spec)
|
|
173
|
+
return configurator_class(run_spec=run_spec, secrets=secrets)
|
|
165
174
|
|
|
166
175
|
|
|
167
176
|
_job_configurator_classes = [
|
|
@@ -380,8 +389,10 @@ def _shim_submit_stop(ports: Dict[int, int], job_model: JobModel):
|
|
|
380
389
|
message=job_model.termination_reason_message,
|
|
381
390
|
timeout=0,
|
|
382
391
|
)
|
|
383
|
-
# maybe somehow postpone removing old tasks to allow inspecting failed jobs
|
|
384
|
-
|
|
392
|
+
# maybe somehow postpone removing old tasks to allow inspecting failed jobs without
|
|
393
|
+
# the following setting?
|
|
394
|
+
if not settings.SERVER_KEEP_SHIM_TASKS:
|
|
395
|
+
shim_client.remove_task(task_id=job_model.id)
|
|
385
396
|
else:
|
|
386
397
|
shim_client.stop(force=True)
|
|
387
398
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import shlex
|
|
2
2
|
import sys
|
|
3
|
+
import threading
|
|
3
4
|
from abc import ABC, abstractmethod
|
|
4
5
|
from pathlib import PurePosixPath
|
|
5
6
|
from typing import Dict, List, Optional, Union
|
|
@@ -68,8 +69,13 @@ class JobConfigurator(ABC):
|
|
|
68
69
|
# JobSSHKey should be shared for all jobs in a replica for inter-node communication.
|
|
69
70
|
_job_ssh_key: Optional[JobSSHKey] = None
|
|
70
71
|
|
|
71
|
-
def __init__(
|
|
72
|
+
def __init__(
|
|
73
|
+
self,
|
|
74
|
+
run_spec: RunSpec,
|
|
75
|
+
secrets: Optional[Dict[str, str]] = None,
|
|
76
|
+
):
|
|
72
77
|
self.run_spec = run_spec
|
|
78
|
+
self.secrets = secrets or {}
|
|
73
79
|
|
|
74
80
|
async def get_job_specs(self, replica_num: int) -> List[JobSpec]:
|
|
75
81
|
job_spec = await self._get_job_spec(replica_num=replica_num, job_num=0, jobs_per_replica=1)
|
|
@@ -98,10 +104,20 @@ class JobConfigurator(ABC):
|
|
|
98
104
|
async def _get_image_config(self) -> ImageConfig:
|
|
99
105
|
if self._image_config is not None:
|
|
100
106
|
return self._image_config
|
|
107
|
+
interpolate = VariablesInterpolator({"secrets": self.secrets}).interpolate_or_error
|
|
108
|
+
registry_auth = self.run_spec.configuration.registry_auth
|
|
109
|
+
if registry_auth is not None:
|
|
110
|
+
try:
|
|
111
|
+
registry_auth = RegistryAuth(
|
|
112
|
+
username=interpolate(registry_auth.username),
|
|
113
|
+
password=interpolate(registry_auth.password),
|
|
114
|
+
)
|
|
115
|
+
except InterpolatorError as e:
|
|
116
|
+
raise ServerClientError(e.args[0])
|
|
101
117
|
image_config = await run_async(
|
|
102
118
|
_get_image_config,
|
|
103
119
|
self._image_name(),
|
|
104
|
-
|
|
120
|
+
registry_auth,
|
|
105
121
|
)
|
|
106
122
|
self._image_config = image_config
|
|
107
123
|
return image_config
|
|
@@ -134,6 +150,9 @@ class JobConfigurator(ABC):
|
|
|
134
150
|
working_dir=self._working_dir(),
|
|
135
151
|
volumes=self._volumes(job_num),
|
|
136
152
|
ssh_key=self._ssh_key(jobs_per_replica),
|
|
153
|
+
repo_data=self.run_spec.repo_data,
|
|
154
|
+
repo_code_hash=self.run_spec.repo_code_hash,
|
|
155
|
+
file_archives=self.run_spec.file_archives,
|
|
137
156
|
)
|
|
138
157
|
return job_spec
|
|
139
158
|
|
|
@@ -336,7 +355,10 @@ def _join_shell_commands(commands: List[str]) -> str:
|
|
|
336
355
|
return " && ".join(commands)
|
|
337
356
|
|
|
338
357
|
|
|
339
|
-
@cached(
|
|
358
|
+
@cached(
|
|
359
|
+
cache=TTLCache(maxsize=2048, ttl=80),
|
|
360
|
+
lock=threading.Lock(),
|
|
361
|
+
)
|
|
340
362
|
def _get_image_config(image: str, registry_auth: Optional[RegistryAuth]) -> ImageConfig:
|
|
341
363
|
try:
|
|
342
364
|
return get_image_config(image, registry_auth).config
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List, Optional
|
|
1
|
+
from typing import Dict, List, Optional
|
|
2
2
|
|
|
3
3
|
from dstack._internal.core.errors import ServerClientError
|
|
4
4
|
from dstack._internal.core.models.configurations import PortMapping, RunConfigurationType
|
|
@@ -17,7 +17,7 @@ INSTALL_IPYKERNEL = (
|
|
|
17
17
|
class DevEnvironmentJobConfigurator(JobConfigurator):
|
|
18
18
|
TYPE: RunConfigurationType = RunConfigurationType.DEV_ENVIRONMENT
|
|
19
19
|
|
|
20
|
-
def __init__(self, run_spec: RunSpec):
|
|
20
|
+
def __init__(self, run_spec: RunSpec, secrets: Dict[str, str]):
|
|
21
21
|
if run_spec.configuration.ide == "vscode":
|
|
22
22
|
__class = VSCodeDesktop
|
|
23
23
|
elif run_spec.configuration.ide == "cursor":
|
|
@@ -29,7 +29,7 @@ class DevEnvironmentJobConfigurator(JobConfigurator):
|
|
|
29
29
|
version=run_spec.configuration.version,
|
|
30
30
|
extensions=["ms-python.python", "ms-toolsai.jupyter"],
|
|
31
31
|
)
|
|
32
|
-
super().__init__(run_spec)
|
|
32
|
+
super().__init__(run_spec=run_spec, secrets=secrets)
|
|
33
33
|
|
|
34
34
|
def _shell_commands(self) -> List[str]:
|
|
35
35
|
commands = self.ide.get_install_commands()
|