dstack 0.19.16__py3-none-any.whl → 0.19.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (80) hide show
  1. dstack/_internal/cli/commands/secrets.py +92 -0
  2. dstack/_internal/cli/main.py +2 -0
  3. dstack/_internal/cli/services/completion.py +5 -0
  4. dstack/_internal/cli/services/configurators/fleet.py +13 -1
  5. dstack/_internal/cli/services/configurators/run.py +59 -17
  6. dstack/_internal/cli/utils/secrets.py +25 -0
  7. dstack/_internal/core/backends/__init__.py +10 -4
  8. dstack/_internal/core/backends/aws/compute.py +237 -18
  9. dstack/_internal/core/backends/base/compute.py +20 -2
  10. dstack/_internal/core/backends/cudo/compute.py +23 -9
  11. dstack/_internal/core/backends/gcp/compute.py +13 -7
  12. dstack/_internal/core/backends/lambdalabs/compute.py +2 -1
  13. dstack/_internal/core/compatibility/fleets.py +12 -11
  14. dstack/_internal/core/compatibility/gateways.py +9 -8
  15. dstack/_internal/core/compatibility/logs.py +4 -3
  16. dstack/_internal/core/compatibility/runs.py +41 -17
  17. dstack/_internal/core/compatibility/volumes.py +9 -8
  18. dstack/_internal/core/errors.py +4 -0
  19. dstack/_internal/core/models/common.py +7 -0
  20. dstack/_internal/core/models/configurations.py +11 -0
  21. dstack/_internal/core/models/files.py +67 -0
  22. dstack/_internal/core/models/runs.py +14 -0
  23. dstack/_internal/core/models/secrets.py +9 -2
  24. dstack/_internal/core/services/diff.py +36 -3
  25. dstack/_internal/server/app.py +22 -0
  26. dstack/_internal/server/background/__init__.py +61 -37
  27. dstack/_internal/server/background/tasks/process_fleets.py +19 -3
  28. dstack/_internal/server/background/tasks/process_gateways.py +1 -1
  29. dstack/_internal/server/background/tasks/process_instances.py +13 -2
  30. dstack/_internal/server/background/tasks/process_placement_groups.py +4 -2
  31. dstack/_internal/server/background/tasks/process_running_jobs.py +123 -15
  32. dstack/_internal/server/background/tasks/process_runs.py +23 -7
  33. dstack/_internal/server/background/tasks/process_submitted_jobs.py +36 -7
  34. dstack/_internal/server/background/tasks/process_terminating_jobs.py +5 -3
  35. dstack/_internal/server/background/tasks/process_volumes.py +2 -2
  36. dstack/_internal/server/migrations/versions/5f1707c525d2_add_filearchivemodel.py +39 -0
  37. dstack/_internal/server/migrations/versions/644b8a114187_add_secretmodel.py +49 -0
  38. dstack/_internal/server/models.py +33 -0
  39. dstack/_internal/server/routers/files.py +67 -0
  40. dstack/_internal/server/routers/secrets.py +57 -15
  41. dstack/_internal/server/schemas/files.py +5 -0
  42. dstack/_internal/server/schemas/runner.py +2 -0
  43. dstack/_internal/server/schemas/secrets.py +7 -11
  44. dstack/_internal/server/services/backends/__init__.py +1 -1
  45. dstack/_internal/server/services/files.py +91 -0
  46. dstack/_internal/server/services/fleets.py +5 -4
  47. dstack/_internal/server/services/gateways/__init__.py +4 -2
  48. dstack/_internal/server/services/jobs/__init__.py +19 -8
  49. dstack/_internal/server/services/jobs/configurators/base.py +25 -3
  50. dstack/_internal/server/services/jobs/configurators/dev.py +3 -3
  51. dstack/_internal/server/services/locking.py +101 -12
  52. dstack/_internal/server/services/proxy/repo.py +3 -0
  53. dstack/_internal/server/services/runner/client.py +8 -0
  54. dstack/_internal/server/services/runs.py +76 -47
  55. dstack/_internal/server/services/secrets.py +204 -0
  56. dstack/_internal/server/services/storage/base.py +21 -0
  57. dstack/_internal/server/services/storage/gcs.py +28 -6
  58. dstack/_internal/server/services/storage/s3.py +27 -9
  59. dstack/_internal/server/services/volumes.py +2 -2
  60. dstack/_internal/server/settings.py +19 -5
  61. dstack/_internal/server/statics/index.html +1 -1
  62. dstack/_internal/server/statics/{main-a4eafa74304e587d037c.js → main-d1ac2e8c38ed5f08a114.js} +86 -34
  63. dstack/_internal/server/statics/{main-a4eafa74304e587d037c.js.map → main-d1ac2e8c38ed5f08a114.js.map} +1 -1
  64. dstack/_internal/server/statics/{main-f53d6d0d42f8d61df1de.css → main-d58fc0460cb0eae7cb5c.css} +1 -1
  65. dstack/_internal/server/statics/static/media/google.b194b06fafd0a52aeb566922160ea514.svg +1 -0
  66. dstack/_internal/server/testing/common.py +50 -8
  67. dstack/_internal/settings.py +4 -0
  68. dstack/_internal/utils/files.py +69 -0
  69. dstack/_internal/utils/nested_list.py +47 -0
  70. dstack/_internal/utils/path.py +12 -4
  71. dstack/api/_public/runs.py +67 -7
  72. dstack/api/server/__init__.py +6 -0
  73. dstack/api/server/_files.py +18 -0
  74. dstack/api/server/_secrets.py +15 -15
  75. dstack/version.py +1 -1
  76. {dstack-0.19.16.dist-info → dstack-0.19.18.dist-info}/METADATA +13 -13
  77. {dstack-0.19.16.dist-info → dstack-0.19.18.dist-info}/RECORD +80 -67
  78. {dstack-0.19.16.dist-info → dstack-0.19.18.dist-info}/WHEEL +0 -0
  79. {dstack-0.19.16.dist-info → dstack-0.19.18.dist-info}/entry_points.txt +0 -0
  80. {dstack-0.19.16.dist-info → dstack-0.19.18.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,49 @@
1
+ """Add SecretModel
2
+
3
+ Revision ID: 644b8a114187
4
+ Revises: 5f1707c525d2
5
+ Create Date: 2025-06-30 11:00:04.326290
6
+
7
+ """
8
+
9
+ import sqlalchemy as sa
10
+ import sqlalchemy_utils
11
+ from alembic import op
12
+
13
+ import dstack._internal.server.models
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision = "644b8a114187"
17
+ down_revision = "5f1707c525d2"
18
+ branch_labels = None
19
+ depends_on = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ # ### commands auto generated by Alembic - please adjust! ###
24
+ op.create_table(
25
+ "secrets",
26
+ sa.Column("id", sqlalchemy_utils.types.uuid.UUIDType(binary=False), nullable=False),
27
+ sa.Column(
28
+ "project_id", sqlalchemy_utils.types.uuid.UUIDType(binary=False), nullable=False
29
+ ),
30
+ sa.Column("created_at", dstack._internal.server.models.NaiveDateTime(), nullable=False),
31
+ sa.Column("updated_at", dstack._internal.server.models.NaiveDateTime(), nullable=False),
32
+ sa.Column("name", sa.String(length=200), nullable=False),
33
+ sa.Column("value", dstack._internal.server.models.EncryptedString(), nullable=False),
34
+ sa.ForeignKeyConstraint(
35
+ ["project_id"],
36
+ ["projects.id"],
37
+ name=op.f("fk_secrets_project_id_projects"),
38
+ ondelete="CASCADE",
39
+ ),
40
+ sa.PrimaryKeyConstraint("id", name=op.f("pk_secrets")),
41
+ sa.UniqueConstraint("project_id", "name", name="uq_secrets_project_id_name"),
42
+ )
43
+ # ### end Alembic commands ###
44
+
45
+
46
+ def downgrade() -> None:
47
+ # ### commands auto generated by Alembic - please adjust! ###
48
+ op.drop_table("secrets")
49
+ # ### end Alembic commands ###
@@ -315,6 +315,21 @@ class CodeModel(BaseModel):
315
315
  blob: Mapped[Optional[bytes]] = mapped_column(LargeBinary) # None means blob is stored on s3
316
316
 
317
317
 
318
+ class FileArchiveModel(BaseModel):
319
+ __tablename__ = "file_archives"
320
+ __table_args__ = (
321
+ UniqueConstraint("user_id", "blob_hash", name="uq_file_archives_user_id_blob_hash"),
322
+ )
323
+
324
+ id: Mapped[uuid.UUID] = mapped_column(
325
+ UUIDType(binary=False), primary_key=True, default=uuid.uuid4
326
+ )
327
+ user_id: Mapped["UserModel"] = mapped_column(ForeignKey("users.id", ondelete="CASCADE"))
328
+ user: Mapped["UserModel"] = relationship()
329
+ blob_hash: Mapped[str] = mapped_column(Text)
330
+ blob: Mapped[Optional[bytes]] = mapped_column(LargeBinary) # None means blob is stored on s3
331
+
332
+
318
333
  class RunModel(BaseModel):
319
334
  __tablename__ = "runs"
320
335
 
@@ -711,3 +726,21 @@ class JobPrometheusMetrics(BaseModel):
711
726
  collected_at: Mapped[datetime] = mapped_column(NaiveDateTime)
712
727
  # Raw Prometheus text response
713
728
  text: Mapped[str] = mapped_column(Text)
729
+
730
+
731
+ class SecretModel(BaseModel):
732
+ __tablename__ = "secrets"
733
+ __table_args__ = (UniqueConstraint("project_id", "name", name="uq_secrets_project_id_name"),)
734
+
735
+ id: Mapped[uuid.UUID] = mapped_column(
736
+ UUIDType(binary=False), primary_key=True, default=uuid.uuid4
737
+ )
738
+
739
+ project_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("projects.id", ondelete="CASCADE"))
740
+ project: Mapped["ProjectModel"] = relationship()
741
+
742
+ created_at: Mapped[datetime] = mapped_column(NaiveDateTime, default=get_current_datetime)
743
+ updated_at: Mapped[datetime] = mapped_column(NaiveDateTime, default=get_current_datetime)
744
+
745
+ name: Mapped[str] = mapped_column(String(200))
746
+ value: Mapped[DecryptedString] = mapped_column(EncryptedString())
@@ -0,0 +1,67 @@
1
+ from typing import Annotated
2
+
3
+ from fastapi import APIRouter, Depends, Request, UploadFile
4
+ from sqlalchemy.ext.asyncio import AsyncSession
5
+
6
+ from dstack._internal.core.errors import ResourceNotExistsError, ServerClientError
7
+ from dstack._internal.core.models.files import FileArchive
8
+ from dstack._internal.server.db import get_session
9
+ from dstack._internal.server.models import UserModel
10
+ from dstack._internal.server.schemas.files import GetFileArchiveByHashRequest
11
+ from dstack._internal.server.security.permissions import Authenticated
12
+ from dstack._internal.server.services import files
13
+ from dstack._internal.server.settings import SERVER_CODE_UPLOAD_LIMIT
14
+ from dstack._internal.server.utils.routers import (
15
+ get_base_api_additional_responses,
16
+ get_request_size,
17
+ )
18
+ from dstack._internal.utils.common import sizeof_fmt
19
+
20
+ router = APIRouter(
21
+ prefix="/api/files",
22
+ tags=["files"],
23
+ responses=get_base_api_additional_responses(),
24
+ )
25
+
26
+
27
+ @router.post("/get_archive_by_hash")
28
+ async def get_archive_by_hash(
29
+ body: GetFileArchiveByHashRequest,
30
+ session: Annotated[AsyncSession, Depends(get_session)],
31
+ user: Annotated[UserModel, Depends(Authenticated())],
32
+ ) -> FileArchive:
33
+ archive = await files.get_archive_by_hash(
34
+ session=session,
35
+ user=user,
36
+ hash=body.hash,
37
+ )
38
+ if archive is None:
39
+ raise ResourceNotExistsError()
40
+ return archive
41
+
42
+
43
+ @router.post("/upload_archive")
44
+ async def upload_archive(
45
+ request: Request,
46
+ file: UploadFile,
47
+ session: Annotated[AsyncSession, Depends(get_session)],
48
+ user: Annotated[UserModel, Depends(Authenticated())],
49
+ ) -> FileArchive:
50
+ request_size = get_request_size(request)
51
+ if SERVER_CODE_UPLOAD_LIMIT > 0 and request_size > SERVER_CODE_UPLOAD_LIMIT:
52
+ diff_size_fmt = sizeof_fmt(request_size)
53
+ limit_fmt = sizeof_fmt(SERVER_CODE_UPLOAD_LIMIT)
54
+ if diff_size_fmt == limit_fmt:
55
+ diff_size_fmt = f"{request_size}B"
56
+ limit_fmt = f"{SERVER_CODE_UPLOAD_LIMIT}B"
57
+ raise ServerClientError(
58
+ f"Archive size is {diff_size_fmt}, which exceeds the limit of {limit_fmt}."
59
+ " Use .gitignore/.dstackignore to exclude large files."
60
+ " This limit can be modified by setting the DSTACK_SERVER_CODE_UPLOAD_LIMIT environment variable."
61
+ )
62
+ archive = await files.upload_archive(
63
+ session=session,
64
+ user=user,
65
+ file=file,
66
+ )
67
+ return archive
@@ -1,15 +1,19 @@
1
- from typing import List
1
+ from typing import List, Tuple
2
2
 
3
- from fastapi import APIRouter
3
+ from fastapi import APIRouter, Depends
4
+ from sqlalchemy.ext.asyncio import AsyncSession
4
5
 
5
- from dstack._internal.core.models.runs import Run
6
+ from dstack._internal.core.errors import ResourceNotExistsError
6
7
  from dstack._internal.core.models.secrets import Secret
8
+ from dstack._internal.server.db import get_session
9
+ from dstack._internal.server.models import ProjectModel, UserModel
7
10
  from dstack._internal.server.schemas.secrets import (
8
- AddSecretRequest,
11
+ CreateOrUpdateSecretRequest,
9
12
  DeleteSecretsRequest,
10
- GetSecretsRequest,
11
- ListSecretsRequest,
13
+ GetSecretRequest,
12
14
  )
15
+ from dstack._internal.server.security.permissions import ProjectAdmin
16
+ from dstack._internal.server.services import secrets as secrets_services
13
17
 
14
18
  router = APIRouter(
15
19
  prefix="/api/project/{project_name}/secrets",
@@ -18,20 +22,58 @@ router = APIRouter(
18
22
 
19
23
 
20
24
  @router.post("/list")
21
- async def list_secrets(project_name: str, body: ListSecretsRequest) -> List[Run]:
22
- pass
25
+ async def list_secrets(
26
+ session: AsyncSession = Depends(get_session),
27
+ user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectAdmin()),
28
+ ) -> List[Secret]:
29
+ _, project = user_project
30
+ return await secrets_services.list_secrets(
31
+ session=session,
32
+ project=project,
33
+ )
23
34
 
24
35
 
25
36
  @router.post("/get")
26
- async def get_secret(project_name: str, body: GetSecretsRequest) -> Secret:
27
- pass
37
+ async def get_secret(
38
+ body: GetSecretRequest,
39
+ session: AsyncSession = Depends(get_session),
40
+ user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectAdmin()),
41
+ ) -> Secret:
42
+ _, project = user_project
43
+ secret = await secrets_services.get_secret(
44
+ session=session,
45
+ project=project,
46
+ name=body.name,
47
+ )
48
+ if secret is None:
49
+ raise ResourceNotExistsError()
50
+ return secret
28
51
 
29
52
 
30
- @router.post("/add")
31
- async def add_or_update_secret(project_name: str, body: AddSecretRequest) -> Secret:
32
- pass
53
+ @router.post("/create_or_update")
54
+ async def create_or_update_secret(
55
+ body: CreateOrUpdateSecretRequest,
56
+ session: AsyncSession = Depends(get_session),
57
+ user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectAdmin()),
58
+ ) -> Secret:
59
+ _, project = user_project
60
+ return await secrets_services.create_or_update_secret(
61
+ session=session,
62
+ project=project,
63
+ name=body.name,
64
+ value=body.value,
65
+ )
33
66
 
34
67
 
35
68
  @router.post("/delete")
36
- async def delete_secrets(project_name: str, body: DeleteSecretsRequest):
37
- pass
69
+ async def delete_secrets(
70
+ body: DeleteSecretsRequest,
71
+ session: AsyncSession = Depends(get_session),
72
+ user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectAdmin()),
73
+ ):
74
+ _, project = user_project
75
+ await secrets_services.delete_secrets(
76
+ session=session,
77
+ project=project,
78
+ names=body.secrets_names,
79
+ )
@@ -0,0 +1,5 @@
1
+ from dstack._internal.core.models.common import CoreModel
2
+
3
+
4
+ class GetFileArchiveByHashRequest(CoreModel):
5
+ hash: str
@@ -77,6 +77,8 @@ class SubmitBody(CoreModel):
77
77
  "max_duration",
78
78
  "ssh_key",
79
79
  "working_dir",
80
+ "repo_data",
81
+ "file_archives",
80
82
  }
81
83
  ),
82
84
  ]
@@ -1,20 +1,16 @@
1
1
  from typing import List
2
2
 
3
- from dstack._internal.core.models.secrets import Secret
4
- from dstack._internal.server.schemas.common import RepoRequest
3
+ from dstack._internal.core.models.common import CoreModel
5
4
 
6
5
 
7
- class ListSecretsRequest(RepoRequest):
8
- pass
6
+ class GetSecretRequest(CoreModel):
7
+ name: str
9
8
 
10
9
 
11
- class GetSecretsRequest(RepoRequest):
12
- pass
10
+ class CreateOrUpdateSecretRequest(CoreModel):
11
+ name: str
12
+ value: str
13
13
 
14
14
 
15
- class AddSecretRequest(RepoRequest):
16
- secret: Secret
17
-
18
-
19
- class DeleteSecretsRequest(RepoRequest):
15
+ class DeleteSecretsRequest(CoreModel):
20
16
  secrets_names: List[str]
@@ -35,7 +35,7 @@ from dstack._internal.core.models.instances import (
35
35
  from dstack._internal.core.models.runs import Requirements
36
36
  from dstack._internal.server import settings
37
37
  from dstack._internal.server.models import BackendModel, DecryptedString, ProjectModel
38
- from dstack._internal.server.settings import LOCAL_BACKEND_ENABLED
38
+ from dstack._internal.settings import LOCAL_BACKEND_ENABLED
39
39
  from dstack._internal.utils.common import run_async
40
40
  from dstack._internal.utils.logging import get_logger
41
41
 
@@ -0,0 +1,91 @@
1
+ import uuid
2
+ from typing import Optional
3
+
4
+ from fastapi import UploadFile
5
+ from sqlalchemy import select
6
+ from sqlalchemy.ext.asyncio import AsyncSession
7
+
8
+ from dstack._internal.core.errors import ServerClientError
9
+ from dstack._internal.core.models.files import FileArchive
10
+ from dstack._internal.server.models import FileArchiveModel, UserModel
11
+ from dstack._internal.server.services.storage import get_default_storage
12
+ from dstack._internal.utils.common import run_async
13
+ from dstack._internal.utils.logging import get_logger
14
+
15
+ logger = get_logger(__name__)
16
+
17
+
18
+ async def get_archive_model(
19
+ session: AsyncSession,
20
+ id: uuid.UUID,
21
+ user: Optional[UserModel] = None,
22
+ ) -> Optional[FileArchiveModel]:
23
+ stmt = select(FileArchiveModel).where(FileArchiveModel.id == id)
24
+ if user is not None:
25
+ stmt = stmt.where(FileArchiveModel.user_id == user.id)
26
+ res = await session.execute(stmt)
27
+ return res.scalar()
28
+
29
+
30
+ async def get_archive_model_by_hash(
31
+ session: AsyncSession,
32
+ user: UserModel,
33
+ hash: str,
34
+ ) -> Optional[FileArchiveModel]:
35
+ res = await session.execute(
36
+ select(FileArchiveModel).where(
37
+ FileArchiveModel.user_id == user.id,
38
+ FileArchiveModel.blob_hash == hash,
39
+ )
40
+ )
41
+ return res.scalar()
42
+
43
+
44
+ async def get_archive_by_hash(
45
+ session: AsyncSession,
46
+ user: UserModel,
47
+ hash: str,
48
+ ) -> Optional[FileArchive]:
49
+ archive_model = await get_archive_model_by_hash(
50
+ session=session,
51
+ user=user,
52
+ hash=hash,
53
+ )
54
+ if archive_model is None:
55
+ return None
56
+ return archive_model_to_archive(archive_model)
57
+
58
+
59
+ async def upload_archive(
60
+ session: AsyncSession,
61
+ user: UserModel,
62
+ file: UploadFile,
63
+ ) -> FileArchive:
64
+ if file.filename is None:
65
+ raise ServerClientError("filename not specified")
66
+ archive_hash = file.filename
67
+ archive_model = await get_archive_model_by_hash(
68
+ session=session,
69
+ user=user,
70
+ hash=archive_hash,
71
+ )
72
+ if archive_model is not None:
73
+ logger.debug("File archive (user_id=%s, hash=%s) already uploaded", user.id, archive_hash)
74
+ return archive_model_to_archive(archive_model)
75
+ blob = await file.read()
76
+ storage = get_default_storage()
77
+ if storage is not None:
78
+ await run_async(storage.upload_archive, str(user.id), archive_hash, blob)
79
+ archive_model = FileArchiveModel(
80
+ user_id=user.id,
81
+ blob_hash=archive_hash,
82
+ blob=blob if storage is None else None,
83
+ )
84
+ session.add(archive_model)
85
+ await session.commit()
86
+ logger.debug("File archive (user_id=%s, hash=%s) has been uploaded", user.id, archive_hash)
87
+ return archive_model_to_archive(archive_model)
88
+
89
+
90
+ def archive_model_to_archive(archive_model: FileArchiveModel) -> FileArchive:
91
+ return FileArchive(id=archive_model.id, hash=archive_model.blob_hash)
@@ -362,7 +362,7 @@ async def create_fleet(
362
362
  select(func.pg_advisory_xact_lock(string_to_lock_id(lock_namespace)))
363
363
  )
364
364
 
365
- lock, _ = get_locker().get_lockset(lock_namespace)
365
+ lock, _ = get_locker(get_db().dialect_name).get_lockset(lock_namespace)
366
366
  async with lock:
367
367
  if spec.configuration.name is not None:
368
368
  fleet_model = await get_project_fleet_model_by_name(
@@ -516,11 +516,12 @@ async def delete_fleets(
516
516
  await session.commit()
517
517
  logger.info("Deleting fleets: %s", [v.name for v in fleet_models])
518
518
  async with (
519
- get_locker().lock_ctx(FleetModel.__tablename__, fleets_ids),
520
- get_locker().lock_ctx(InstanceModel.__tablename__, instances_ids),
519
+ get_locker(get_db().dialect_name).lock_ctx(FleetModel.__tablename__, fleets_ids),
520
+ get_locker(get_db().dialect_name).lock_ctx(InstanceModel.__tablename__, instances_ids),
521
521
  ):
522
522
  # Refetch after lock
523
- # TODO lock instances with FOR UPDATE?
523
+ # TODO: Lock instances with FOR UPDATE?
524
+ # TODO: Do not lock fleet when deleting only instances
524
525
  res = await session.execute(
525
526
  select(FleetModel)
526
527
  .where(
@@ -162,7 +162,7 @@ async def create_gateway(
162
162
  select(func.pg_advisory_xact_lock(string_to_lock_id(lock_namespace)))
163
163
  )
164
164
 
165
- lock, _ = get_locker().get_lockset(lock_namespace)
165
+ lock, _ = get_locker(get_db().dialect_name).get_lockset(lock_namespace)
166
166
  async with lock:
167
167
  if configuration.name is None:
168
168
  configuration.name = await generate_gateway_name(session=session, project=project)
@@ -229,7 +229,9 @@ async def delete_gateways(
229
229
  gateways_ids = sorted([g.id for g in gateway_models])
230
230
  await session.commit()
231
231
  logger.info("Deleting gateways: %s", [g.name for g in gateway_models])
232
- async with get_locker().lock_ctx(GatewayModel.__tablename__, gateways_ids):
232
+ async with get_locker(get_db().dialect_name).lock_ctx(
233
+ GatewayModel.__tablename__, gateways_ids
234
+ ):
233
235
  # Refetch after lock
234
236
  res = await session.execute(
235
237
  select(GatewayModel)
@@ -33,6 +33,7 @@ from dstack._internal.core.models.runs import (
33
33
  RunSpec,
34
34
  )
35
35
  from dstack._internal.core.models.volumes import Volume, VolumeMountPoint, VolumeStatus
36
+ from dstack._internal.server import settings
36
37
  from dstack._internal.server.models import (
37
38
  InstanceModel,
38
39
  JobModel,
@@ -64,15 +65,23 @@ from dstack._internal.utils.logging import get_logger
64
65
  logger = get_logger(__name__)
65
66
 
66
67
 
67
- async def get_jobs_from_run_spec(run_spec: RunSpec, replica_num: int) -> List[Job]:
68
+ async def get_jobs_from_run_spec(
69
+ run_spec: RunSpec, secrets: Dict[str, str], replica_num: int
70
+ ) -> List[Job]:
68
71
  return [
69
72
  Job(job_spec=s, job_submissions=[])
70
- for s in await get_job_specs_from_run_spec(run_spec, replica_num)
73
+ for s in await get_job_specs_from_run_spec(
74
+ run_spec=run_spec,
75
+ secrets=secrets,
76
+ replica_num=replica_num,
77
+ )
71
78
  ]
72
79
 
73
80
 
74
- async def get_job_specs_from_run_spec(run_spec: RunSpec, replica_num: int) -> List[JobSpec]:
75
- job_configurator = _get_job_configurator(run_spec)
81
+ async def get_job_specs_from_run_spec(
82
+ run_spec: RunSpec, secrets: Dict[str, str], replica_num: int
83
+ ) -> List[JobSpec]:
84
+ job_configurator = _get_job_configurator(run_spec=run_spec, secrets=secrets)
76
85
  job_specs = await job_configurator.get_job_specs(replica_num=replica_num)
77
86
  return job_specs
78
87
 
@@ -158,10 +167,10 @@ def delay_job_instance_termination(job_model: JobModel):
158
167
  job_model.remove_at = common.get_current_datetime() + timedelta(seconds=15)
159
168
 
160
169
 
161
- def _get_job_configurator(run_spec: RunSpec) -> JobConfigurator:
170
+ def _get_job_configurator(run_spec: RunSpec, secrets: Dict[str, str]) -> JobConfigurator:
162
171
  configuration_type = RunConfigurationType(run_spec.configuration.type)
163
172
  configurator_class = _configuration_type_to_configurator_class_map[configuration_type]
164
- return configurator_class(run_spec)
173
+ return configurator_class(run_spec=run_spec, secrets=secrets)
165
174
 
166
175
 
167
176
  _job_configurator_classes = [
@@ -380,8 +389,10 @@ def _shim_submit_stop(ports: Dict[int, int], job_model: JobModel):
380
389
  message=job_model.termination_reason_message,
381
390
  timeout=0,
382
391
  )
383
- # maybe somehow postpone removing old tasks to allow inspecting failed jobs?
384
- shim_client.remove_task(task_id=job_model.id)
392
+ # maybe somehow postpone removing old tasks to allow inspecting failed jobs without
393
+ # the following setting?
394
+ if not settings.SERVER_KEEP_SHIM_TASKS:
395
+ shim_client.remove_task(task_id=job_model.id)
385
396
  else:
386
397
  shim_client.stop(force=True)
387
398
 
@@ -1,5 +1,6 @@
1
1
  import shlex
2
2
  import sys
3
+ import threading
3
4
  from abc import ABC, abstractmethod
4
5
  from pathlib import PurePosixPath
5
6
  from typing import Dict, List, Optional, Union
@@ -68,8 +69,13 @@ class JobConfigurator(ABC):
68
69
  # JobSSHKey should be shared for all jobs in a replica for inter-node communication.
69
70
  _job_ssh_key: Optional[JobSSHKey] = None
70
71
 
71
- def __init__(self, run_spec: RunSpec):
72
+ def __init__(
73
+ self,
74
+ run_spec: RunSpec,
75
+ secrets: Optional[Dict[str, str]] = None,
76
+ ):
72
77
  self.run_spec = run_spec
78
+ self.secrets = secrets or {}
73
79
 
74
80
  async def get_job_specs(self, replica_num: int) -> List[JobSpec]:
75
81
  job_spec = await self._get_job_spec(replica_num=replica_num, job_num=0, jobs_per_replica=1)
@@ -98,10 +104,20 @@ class JobConfigurator(ABC):
98
104
  async def _get_image_config(self) -> ImageConfig:
99
105
  if self._image_config is not None:
100
106
  return self._image_config
107
+ interpolate = VariablesInterpolator({"secrets": self.secrets}).interpolate_or_error
108
+ registry_auth = self.run_spec.configuration.registry_auth
109
+ if registry_auth is not None:
110
+ try:
111
+ registry_auth = RegistryAuth(
112
+ username=interpolate(registry_auth.username),
113
+ password=interpolate(registry_auth.password),
114
+ )
115
+ except InterpolatorError as e:
116
+ raise ServerClientError(e.args[0])
101
117
  image_config = await run_async(
102
118
  _get_image_config,
103
119
  self._image_name(),
104
- self.run_spec.configuration.registry_auth,
120
+ registry_auth,
105
121
  )
106
122
  self._image_config = image_config
107
123
  return image_config
@@ -134,6 +150,9 @@ class JobConfigurator(ABC):
134
150
  working_dir=self._working_dir(),
135
151
  volumes=self._volumes(job_num),
136
152
  ssh_key=self._ssh_key(jobs_per_replica),
153
+ repo_data=self.run_spec.repo_data,
154
+ repo_code_hash=self.run_spec.repo_code_hash,
155
+ file_archives=self.run_spec.file_archives,
137
156
  )
138
157
  return job_spec
139
158
 
@@ -336,7 +355,10 @@ def _join_shell_commands(commands: List[str]) -> str:
336
355
  return " && ".join(commands)
337
356
 
338
357
 
339
- @cached(TTLCache(maxsize=2048, ttl=80))
358
+ @cached(
359
+ cache=TTLCache(maxsize=2048, ttl=80),
360
+ lock=threading.Lock(),
361
+ )
340
362
  def _get_image_config(image: str, registry_auth: Optional[RegistryAuth]) -> ImageConfig:
341
363
  try:
342
364
  return get_image_config(image, registry_auth).config
@@ -1,4 +1,4 @@
1
- from typing import List, Optional
1
+ from typing import Dict, List, Optional
2
2
 
3
3
  from dstack._internal.core.errors import ServerClientError
4
4
  from dstack._internal.core.models.configurations import PortMapping, RunConfigurationType
@@ -17,7 +17,7 @@ INSTALL_IPYKERNEL = (
17
17
  class DevEnvironmentJobConfigurator(JobConfigurator):
18
18
  TYPE: RunConfigurationType = RunConfigurationType.DEV_ENVIRONMENT
19
19
 
20
- def __init__(self, run_spec: RunSpec):
20
+ def __init__(self, run_spec: RunSpec, secrets: Dict[str, str]):
21
21
  if run_spec.configuration.ide == "vscode":
22
22
  __class = VSCodeDesktop
23
23
  elif run_spec.configuration.ide == "cursor":
@@ -29,7 +29,7 @@ class DevEnvironmentJobConfigurator(JobConfigurator):
29
29
  version=run_spec.configuration.version,
30
30
  extensions=["ms-python.python", "ms-toolsai.jupyter"],
31
31
  )
32
- super().__init__(run_spec)
32
+ super().__init__(run_spec=run_spec, secrets=secrets)
33
33
 
34
34
  def _shell_commands(self) -> List[str]:
35
35
  commands = self.ide.get_install_commands()