dstack 0.19.27__py3-none-any.whl → 0.19.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (44) hide show
  1. dstack/_internal/cli/commands/__init__.py +11 -8
  2. dstack/_internal/cli/commands/apply.py +6 -3
  3. dstack/_internal/cli/commands/completion.py +3 -1
  4. dstack/_internal/cli/commands/config.py +1 -0
  5. dstack/_internal/cli/commands/init.py +2 -2
  6. dstack/_internal/cli/commands/offer.py +1 -1
  7. dstack/_internal/cli/commands/project.py +1 -0
  8. dstack/_internal/cli/commands/server.py +2 -2
  9. dstack/_internal/cli/main.py +1 -1
  10. dstack/_internal/cli/services/configurators/base.py +2 -4
  11. dstack/_internal/cli/services/configurators/fleet.py +4 -5
  12. dstack/_internal/cli/services/configurators/gateway.py +3 -5
  13. dstack/_internal/cli/services/configurators/run.py +51 -27
  14. dstack/_internal/cli/services/configurators/volume.py +3 -5
  15. dstack/_internal/core/compatibility/runs.py +2 -0
  16. dstack/_internal/core/models/common.py +67 -43
  17. dstack/_internal/core/models/configurations.py +88 -62
  18. dstack/_internal/core/models/fleets.py +41 -24
  19. dstack/_internal/core/models/instances.py +5 -5
  20. dstack/_internal/core/models/profiles.py +66 -47
  21. dstack/_internal/core/models/repos/remote.py +21 -16
  22. dstack/_internal/core/models/resources.py +69 -65
  23. dstack/_internal/core/models/runs.py +17 -9
  24. dstack/_internal/server/app.py +5 -0
  25. dstack/_internal/server/background/tasks/process_fleets.py +8 -0
  26. dstack/_internal/server/background/tasks/process_submitted_jobs.py +32 -12
  27. dstack/_internal/server/models.py +6 -5
  28. dstack/_internal/server/schemas/gateways.py +10 -9
  29. dstack/_internal/server/services/backends/handlers.py +2 -0
  30. dstack/_internal/server/services/docker.py +8 -7
  31. dstack/_internal/server/services/projects.py +52 -1
  32. dstack/_internal/server/settings.py +46 -0
  33. dstack/_internal/server/statics/index.html +1 -1
  34. dstack/_internal/server/statics/{main-56191c63d516fd0041c4.css → main-5e0d56245c4bd241ec27.css} +1 -1
  35. dstack/_internal/server/statics/{main-4eecc75fbe64067eb1bc.js → main-a2a16772fbf11a14d191.js} +70 -100
  36. dstack/_internal/server/statics/{main-4eecc75fbe64067eb1bc.js.map → main-a2a16772fbf11a14d191.js.map} +1 -1
  37. dstack/_internal/utils/env.py +85 -11
  38. dstack/version.py +1 -1
  39. {dstack-0.19.27.dist-info → dstack-0.19.28.dist-info}/METADATA +1 -1
  40. {dstack-0.19.27.dist-info → dstack-0.19.28.dist-info}/RECORD +43 -44
  41. dstack/_internal/server/statics/static/media/github.1f7102513534c83a9d8d735d2b8c12a2.svg +0 -3
  42. {dstack-0.19.27.dist-info → dstack-0.19.28.dist-info}/WHEEL +0 -0
  43. {dstack-0.19.27.dist-info → dstack-0.19.28.dist-info}/entry_points.txt +0 -0
  44. {dstack-0.19.27.dist-info → dstack-0.19.28.dist-info}/licenses/LICENSE.md +0 -0
@@ -7,7 +7,7 @@ from pydantic import Field, parse_obj_as, root_validator, validator
7
7
  from pydantic.generics import GenericModel
8
8
  from typing_extensions import Annotated
9
9
 
10
- from dstack._internal.core.models.common import CoreModel
10
+ from dstack._internal.core.models.common import CoreConfig, CoreModel, generate_dual_core_model
11
11
  from dstack._internal.utils.common import pretty_resources
12
12
  from dstack._internal.utils.json_schema import add_extra_schema_types
13
13
  from dstack._internal.utils.logging import get_logger
@@ -129,21 +129,22 @@ DEFAULT_MEMORY_SIZE = Range[Memory](min=Memory.parse("8GB"))
129
129
  DEFAULT_GPU_COUNT = Range[int](min=1)
130
130
 
131
131
 
132
- class CPUSpec(CoreModel):
132
+ class CPUSpecConfig(CoreConfig):
133
+ @staticmethod
134
+ def schema_extra(schema: Dict[str, Any]):
135
+ add_extra_schema_types(
136
+ schema["properties"]["count"],
137
+ extra_types=[{"type": "integer"}, {"type": "string"}],
138
+ )
139
+
140
+
141
+ class CPUSpec(generate_dual_core_model(CPUSpecConfig)):
133
142
  arch: Annotated[
134
143
  Optional[gpuhunt.CPUArchitecture],
135
144
  Field(description="The CPU architecture, one of: `x86`, `arm`"),
136
145
  ] = None
137
146
  count: Annotated[Range[int], Field(description="The number of CPU cores")] = DEFAULT_CPU_COUNT
138
147
 
139
- class Config(CoreModel.Config):
140
- @staticmethod
141
- def schema_extra(schema: Dict[str, Any]):
142
- add_extra_schema_types(
143
- schema["properties"]["count"],
144
- extra_types=[{"type": "integer"}, {"type": "string"}],
145
- )
146
-
147
148
  @classmethod
148
149
  def __get_validators__(cls):
149
150
  yield cls.parse
@@ -190,7 +191,28 @@ class CPUSpec(CoreModel):
190
191
  return v
191
192
 
192
193
 
193
- class GPUSpec(CoreModel):
194
+ class GPUSpecConfig(CoreConfig):
195
+ @staticmethod
196
+ def schema_extra(schema: Dict[str, Any]):
197
+ add_extra_schema_types(
198
+ schema["properties"]["count"],
199
+ extra_types=[{"type": "integer"}, {"type": "string"}],
200
+ )
201
+ add_extra_schema_types(
202
+ schema["properties"]["name"],
203
+ extra_types=[{"type": "string"}],
204
+ )
205
+ add_extra_schema_types(
206
+ schema["properties"]["memory"],
207
+ extra_types=[{"type": "integer"}, {"type": "string"}],
208
+ )
209
+ add_extra_schema_types(
210
+ schema["properties"]["total_memory"],
211
+ extra_types=[{"type": "integer"}, {"type": "string"}],
212
+ )
213
+
214
+
215
+ class GPUSpec(generate_dual_core_model(GPUSpecConfig)):
194
216
  vendor: Annotated[
195
217
  Optional[gpuhunt.AcceleratorVendor],
196
218
  Field(
@@ -218,26 +240,6 @@ class GPUSpec(CoreModel):
218
240
  Field(description="The minimum compute capability of the GPU (e.g., `7.5`)"),
219
241
  ] = None
220
242
 
221
- class Config(CoreModel.Config):
222
- @staticmethod
223
- def schema_extra(schema: Dict[str, Any]):
224
- add_extra_schema_types(
225
- schema["properties"]["count"],
226
- extra_types=[{"type": "integer"}, {"type": "string"}],
227
- )
228
- add_extra_schema_types(
229
- schema["properties"]["name"],
230
- extra_types=[{"type": "string"}],
231
- )
232
- add_extra_schema_types(
233
- schema["properties"]["memory"],
234
- extra_types=[{"type": "integer"}, {"type": "string"}],
235
- )
236
- add_extra_schema_types(
237
- schema["properties"]["total_memory"],
238
- extra_types=[{"type": "integer"}, {"type": "string"}],
239
- )
240
-
241
243
  @classmethod
242
244
  def __get_validators__(cls):
243
245
  yield cls.parse
@@ -317,16 +319,17 @@ class GPUSpec(CoreModel):
317
319
  return gpuhunt.AcceleratorVendor.cast(v)
318
320
 
319
321
 
320
- class DiskSpec(CoreModel):
321
- size: Annotated[Range[Memory], Field(description="Disk size")]
322
+ class DiskSpecConfig(CoreConfig):
323
+ @staticmethod
324
+ def schema_extra(schema: Dict[str, Any]):
325
+ add_extra_schema_types(
326
+ schema["properties"]["size"],
327
+ extra_types=[{"type": "integer"}, {"type": "string"}],
328
+ )
322
329
 
323
- class Config(CoreModel.Config):
324
- @staticmethod
325
- def schema_extra(schema: Dict[str, Any]):
326
- add_extra_schema_types(
327
- schema["properties"]["size"],
328
- extra_types=[{"type": "integer"}, {"type": "string"}],
329
- )
330
+
331
+ class DiskSpec(generate_dual_core_model(DiskSpecConfig)):
332
+ size: Annotated[Range[Memory], Field(description="Disk size")]
330
333
 
331
334
  @classmethod
332
335
  def __get_validators__(cls):
@@ -343,7 +346,32 @@ class DiskSpec(CoreModel):
343
346
  DEFAULT_DISK = DiskSpec(size=Range[Memory](min=Memory.parse("100GB"), max=None))
344
347
 
345
348
 
346
- class ResourcesSpec(CoreModel):
349
+ class ResourcesSpecConfig(CoreConfig):
350
+ @staticmethod
351
+ def schema_extra(schema: Dict[str, Any]):
352
+ add_extra_schema_types(
353
+ schema["properties"]["cpu"],
354
+ extra_types=[{"type": "integer"}, {"type": "string"}],
355
+ )
356
+ add_extra_schema_types(
357
+ schema["properties"]["memory"],
358
+ extra_types=[{"type": "integer"}, {"type": "string"}],
359
+ )
360
+ add_extra_schema_types(
361
+ schema["properties"]["shm_size"],
362
+ extra_types=[{"type": "integer"}, {"type": "string"}],
363
+ )
364
+ add_extra_schema_types(
365
+ schema["properties"]["gpu"],
366
+ extra_types=[{"type": "integer"}, {"type": "string"}],
367
+ )
368
+ add_extra_schema_types(
369
+ schema["properties"]["disk"],
370
+ extra_types=[{"type": "integer"}, {"type": "string"}],
371
+ )
372
+
373
+
374
+ class ResourcesSpec(generate_dual_core_model(ResourcesSpecConfig)):
347
375
  # TODO: Remove Range[int] in 0.20. Range[int] for backward compatibility only.
348
376
  cpu: Annotated[Union[CPUSpec, Range[int]], Field(description="The CPU requirements")] = (
349
377
  CPUSpec()
@@ -362,30 +390,6 @@ class ResourcesSpec(CoreModel):
362
390
  gpu: Annotated[Optional[GPUSpec], Field(description="The GPU requirements")] = None
363
391
  disk: Annotated[Optional[DiskSpec], Field(description="The disk resources")] = DEFAULT_DISK
364
392
 
365
- class Config(CoreModel.Config):
366
- @staticmethod
367
- def schema_extra(schema: Dict[str, Any]):
368
- add_extra_schema_types(
369
- schema["properties"]["cpu"],
370
- extra_types=[{"type": "integer"}, {"type": "string"}],
371
- )
372
- add_extra_schema_types(
373
- schema["properties"]["memory"],
374
- extra_types=[{"type": "integer"}, {"type": "string"}],
375
- )
376
- add_extra_schema_types(
377
- schema["properties"]["shm_size"],
378
- extra_types=[{"type": "integer"}, {"type": "string"}],
379
- )
380
- add_extra_schema_types(
381
- schema["properties"]["gpu"],
382
- extra_types=[{"type": "integer"}, {"type": "string"}],
383
- )
384
- add_extra_schema_types(
385
- schema["properties"]["disk"],
386
- extra_types=[{"type": "integer"}, {"type": "string"}],
387
- )
388
-
389
393
  def pretty_format(self) -> str:
390
394
  # TODO: Remove in 0.20. Use self.cpu directly
391
395
  cpu = parse_obj_as(CPUSpec, self.cpu)
@@ -1,13 +1,20 @@
1
1
  from datetime import datetime, timedelta
2
2
  from enum import Enum
3
- from typing import Any, Dict, List, Literal, Optional, Type
3
+ from typing import Any, Dict, List, Literal, Optional
4
4
  from urllib.parse import urlparse
5
5
 
6
6
  from pydantic import UUID4, Field, root_validator
7
7
  from typing_extensions import Annotated
8
8
 
9
9
  from dstack._internal.core.models.backends.base import BackendType
10
- from dstack._internal.core.models.common import ApplyAction, CoreModel, NetworkMode, RegistryAuth
10
+ from dstack._internal.core.models.common import (
11
+ ApplyAction,
12
+ CoreConfig,
13
+ CoreModel,
14
+ NetworkMode,
15
+ RegistryAuth,
16
+ generate_dual_core_model,
17
+ )
11
18
  from dstack._internal.core.models.configurations import (
12
19
  DEFAULT_PROBE_METHOD,
13
20
  LEGACY_REPO_DIR,
@@ -385,7 +392,14 @@ class Job(CoreModel):
385
392
  job_submissions: List[JobSubmission]
386
393
 
387
394
 
388
- class RunSpec(CoreModel):
395
+ class RunSpecConfig(CoreConfig):
396
+ @staticmethod
397
+ def schema_extra(schema: Dict[str, Any]):
398
+ prop = schema.get("properties", {})
399
+ prop.pop("merged_profile", None)
400
+
401
+
402
+ class RunSpec(generate_dual_core_model(RunSpecConfig)):
389
403
  # TODO: run_name, working_dir are redundant here since they already passed in configuration
390
404
  run_name: Annotated[
391
405
  Optional[str],
@@ -458,12 +472,6 @@ class RunSpec(CoreModel):
458
472
  # TODO: make merged_profile a computed field after migrating to pydanticV2
459
473
  merged_profile: Annotated[Profile, Field(exclude=True)] = None
460
474
 
461
- class Config(CoreModel.Config):
462
- @staticmethod
463
- def schema_extra(schema: Dict[str, Any], model: Type) -> None:
464
- prop = schema.get("properties", {})
465
- prop.pop("merged_profile", None)
466
-
467
475
  @root_validator
468
476
  def _merged_profile(cls, values) -> Dict:
469
477
  if values.get("profile") is None:
@@ -160,6 +160,11 @@ async def lifespan(app: FastAPI):
160
160
  logger.info("Background processing is disabled")
161
161
  PROBES_SCHEDULER.start()
162
162
  dstack_version = DSTACK_VERSION if DSTACK_VERSION else "(no version)"
163
+ logger.info(
164
+ "Job network mode: %s (%d)",
165
+ settings.JOB_NETWORK_MODE.name,
166
+ settings.JOB_NETWORK_MODE.value,
167
+ )
163
168
  logger.info(f"The admin token is {admin.token.get_plaintext_or_error()}", {"show_path": False})
164
169
  logger.info(
165
170
  f"The dstack server {dstack_version} is running at {SERVER_URL}",
@@ -177,6 +177,14 @@ def _maintain_fleet_nodes_min(
177
177
 
178
178
 
179
179
  def _autodelete_fleet(fleet_model: FleetModel) -> bool:
180
+ if fleet_model.project.deleted:
181
+ # It used to be possible to delete project with active resources:
182
+ # https://github.com/dstackai/dstack/issues/3077
183
+ fleet_model.status = FleetStatus.TERMINATED
184
+ fleet_model.deleted = True
185
+ logger.info("Fleet %s deleted due to deleted project", fleet_model.name)
186
+ return True
187
+
180
188
  if is_fleet_in_use(fleet_model) or not is_fleet_empty(fleet_model):
181
189
  return False
182
190
 
@@ -5,7 +5,7 @@ import uuid
5
5
  from datetime import datetime, timedelta
6
6
  from typing import List, Optional, Tuple
7
7
 
8
- from sqlalchemy import and_, not_, or_, select
8
+ from sqlalchemy import and_, func, not_, or_, select
9
9
  from sqlalchemy.ext.asyncio import AsyncSession
10
10
  from sqlalchemy.orm import contains_eager, joinedload, load_only, noload, selectinload
11
11
 
@@ -54,6 +54,7 @@ from dstack._internal.server.models import (
54
54
  from dstack._internal.server.services.backends import get_project_backend_by_type_or_error
55
55
  from dstack._internal.server.services.fleets import (
56
56
  fleet_model_to_fleet,
57
+ generate_fleet_name,
57
58
  get_fleet_requirements,
58
59
  get_next_instance_num,
59
60
  )
@@ -71,7 +72,7 @@ from dstack._internal.server.services.jobs import (
71
72
  get_job_configured_volumes,
72
73
  get_job_runtime_data,
73
74
  )
74
- from dstack._internal.server.services.locking import get_locker
75
+ from dstack._internal.server.services.locking import get_locker, string_to_lock_id
75
76
  from dstack._internal.server.services.logging import fmt
76
77
  from dstack._internal.server.services.offers import get_offers_by_requirements
77
78
  from dstack._internal.server.services.requirements.combine import (
@@ -87,7 +88,6 @@ from dstack._internal.server.services.volumes import (
87
88
  )
88
89
  from dstack._internal.server.utils import sentry_utils
89
90
  from dstack._internal.utils import common as common_utils
90
- from dstack._internal.utils import env as env_utils
91
91
  from dstack._internal.utils.logging import get_logger
92
92
 
93
93
  logger = get_logger(__name__)
@@ -188,6 +188,7 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
188
188
  run_spec = run.run_spec
189
189
  profile = run_spec.merged_profile
190
190
  job = find_job(run.jobs, job_model.replica_num, job_model.job_num)
191
+ multinode = job.job_spec.jobs_per_replica > 1
191
192
 
192
193
  # Master job chooses fleet for the run.
193
194
  # Due to two-step processing, it's saved to job_model.fleet.
@@ -310,6 +311,7 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
310
311
  session=session,
311
312
  instances_with_offers=fleet_instances_with_offers,
312
313
  job_model=job_model,
314
+ multinode=multinode,
313
315
  )
314
316
  job_model.fleet = fleet_model
315
317
  job_model.instance_assigned = True
@@ -363,7 +365,8 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
363
365
  job_model.job_provisioning_data = job_provisioning_data.json()
364
366
  job_model.status = JobStatus.PROVISIONING
365
367
  if fleet_model is None:
366
- fleet_model = _create_fleet_model_for_job(
368
+ fleet_model = await _create_fleet_model_for_job(
369
+ session=session,
367
370
  project=project,
368
371
  run=run,
369
372
  )
@@ -385,7 +388,7 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
385
388
  offer=offer,
386
389
  instance_num=instance_num,
387
390
  )
388
- job_model.job_runtime_data = _prepare_job_runtime_data(offer).json()
391
+ job_model.job_runtime_data = _prepare_job_runtime_data(offer, multinode).json()
389
392
  # Both this task and process_fleets can add instances to fleets.
390
393
  # TODO: Ensure this does not violate nodes.max when it's enforced.
391
394
  instance.fleet_id = fleet_model.id
@@ -614,6 +617,7 @@ async def _assign_job_to_fleet_instance(
614
617
  session: AsyncSession,
615
618
  instances_with_offers: list[tuple[InstanceModel, InstanceOfferWithAvailability]],
616
619
  job_model: JobModel,
620
+ multinode: bool,
617
621
  ) -> Optional[InstanceModel]:
618
622
  if len(instances_with_offers) == 0:
619
623
  return None
@@ -643,7 +647,7 @@ async def _assign_job_to_fleet_instance(
643
647
  job_model.instance = instance
644
648
  job_model.used_instance_id = instance.id
645
649
  job_model.job_provisioning_data = instance.job_provisioning_data
646
- job_model.job_runtime_data = _prepare_job_runtime_data(offer).json()
650
+ job_model.job_runtime_data = _prepare_job_runtime_data(offer, multinode).json()
647
651
  return instance
648
652
 
649
653
 
@@ -752,7 +756,8 @@ def _check_can_create_new_instance_in_fleet(fleet: Fleet) -> bool:
752
756
  return True
753
757
 
754
758
 
755
- def _create_fleet_model_for_job(
759
+ async def _create_fleet_model_for_job(
760
+ session: AsyncSession,
756
761
  project: ProjectModel,
757
762
  run: Run,
758
763
  ) -> FleetModel:
@@ -760,9 +765,19 @@ def _create_fleet_model_for_job(
760
765
  if run.run_spec.configuration.type == "task" and run.run_spec.configuration.nodes > 1:
761
766
  placement = InstanceGroupPlacement.CLUSTER
762
767
  nodes = _get_nodes_required_num_for_run(run.run_spec)
768
+
769
+ lock_namespace = f"fleet_names_{project.name}"
770
+ # TODO: Lock fleet names on SQLite.
771
+ # Needs some refactoring so that the lock is released after commit.
772
+ if get_db().dialect_name == "postgresql":
773
+ await session.execute(
774
+ select(func.pg_advisory_xact_lock(string_to_lock_id(lock_namespace)))
775
+ )
776
+ fleet_name = await generate_fleet_name(session=session, project=project)
777
+
763
778
  spec = FleetSpec(
764
779
  configuration=FleetConfiguration(
765
- name=run.run_spec.run_name,
780
+ name=fleet_name,
766
781
  placement=placement,
767
782
  reservation=run.run_spec.configuration.reservation,
768
783
  nodes=FleetNodesSpec(
@@ -776,7 +791,7 @@ def _create_fleet_model_for_job(
776
791
  )
777
792
  fleet_model = FleetModel(
778
793
  id=uuid.uuid4(),
779
- name=run.run_spec.run_name,
794
+ name=fleet_name,
780
795
  project=project,
781
796
  status=FleetStatus.ACTIVE,
782
797
  spec=spec.json(),
@@ -839,12 +854,17 @@ def _create_instance_model_for_job(
839
854
  return instance
840
855
 
841
856
 
842
- def _prepare_job_runtime_data(offer: InstanceOfferWithAvailability) -> JobRuntimeData:
857
+ def _prepare_job_runtime_data(
858
+ offer: InstanceOfferWithAvailability, multinode: bool
859
+ ) -> JobRuntimeData:
843
860
  if offer.blocks == offer.total_blocks:
844
- if env_utils.get_bool("DSTACK_FORCE_BRIDGE_NETWORK"):
861
+ if settings.JOB_NETWORK_MODE == settings.JobNetworkMode.FORCED_BRIDGE:
845
862
  network_mode = NetworkMode.BRIDGE
846
- else:
863
+ elif settings.JOB_NETWORK_MODE == settings.JobNetworkMode.HOST_WHEN_POSSIBLE:
847
864
  network_mode = NetworkMode.HOST
865
+ else:
866
+ assert settings.JOB_NETWORK_MODE == settings.JobNetworkMode.HOST_FOR_MULTINODE_ONLY
867
+ network_mode = NetworkMode.HOST if multinode else NetworkMode.BRIDGE
848
868
  return JobRuntimeData(
849
869
  network_mode=network_mode,
850
870
  offer=offer,
@@ -24,7 +24,7 @@ from sqlalchemy_utils import UUIDType
24
24
 
25
25
  from dstack._internal.core.errors import DstackError
26
26
  from dstack._internal.core.models.backends.base import BackendType
27
- from dstack._internal.core.models.common import CoreModel
27
+ from dstack._internal.core.models.common import CoreConfig, generate_dual_core_model
28
28
  from dstack._internal.core.models.fleets import FleetStatus
29
29
  from dstack._internal.core.models.gateways import GatewayStatus
30
30
  from dstack._internal.core.models.health import HealthStatus
@@ -71,7 +71,11 @@ class NaiveDateTime(TypeDecorator):
71
71
  return value.replace(tzinfo=timezone.utc)
72
72
 
73
73
 
74
- class DecryptedString(CoreModel):
74
+ class DecryptedStringConfig(CoreConfig):
75
+ arbitrary_types_allowed = True
76
+
77
+
78
+ class DecryptedString(generate_dual_core_model(DecryptedStringConfig)):
75
79
  """
76
80
  A type for representing plaintext strings encrypted with `EncryptedString`.
77
81
  Besides the string, stores information if the decryption was successful.
@@ -84,9 +88,6 @@ class DecryptedString(CoreModel):
84
88
  decrypted: bool = True
85
89
  exc: Optional[Exception] = None
86
90
 
87
- class Config(CoreModel.Config):
88
- arbitrary_types_allowed = True
89
-
90
91
  def get_plaintext_or_error(self) -> str:
91
92
  if self.decrypted and self.plaintext is not None:
92
93
  return self.plaintext
@@ -3,24 +3,25 @@ from typing import Annotated, Any, Dict, List, Optional
3
3
  from pydantic import Field
4
4
 
5
5
  from dstack._internal.core.models.backends.base import BackendType
6
- from dstack._internal.core.models.common import CoreModel
6
+ from dstack._internal.core.models.common import CoreConfig, CoreModel, generate_dual_core_model
7
7
  from dstack._internal.core.models.gateways import GatewayConfiguration
8
8
 
9
9
 
10
- class CreateGatewayRequest(CoreModel):
10
+ class CreateGatewayRequestConfig(CoreConfig):
11
+ @staticmethod
12
+ def schema_extra(schema: Dict[str, Any]):
13
+ del schema["properties"]["name"]
14
+ del schema["properties"]["backend_type"]
15
+ del schema["properties"]["region"]
16
+
17
+
18
+ class CreateGatewayRequest(generate_dual_core_model(CreateGatewayRequestConfig)):
11
19
  configuration: GatewayConfiguration
12
20
  # Deprecated and unused. Left for compatibility with 0.18 clients.
13
21
  name: Annotated[Optional[str], Field(exclude=True)] = None
14
22
  backend_type: Annotated[Optional[BackendType], Field(exclude=True)] = None
15
23
  region: Annotated[Optional[str], Field(exclude=True)] = None
16
24
 
17
- class Config(CoreModel.Config):
18
- @staticmethod
19
- def schema_extra(schema: Dict[str, Any]) -> None:
20
- del schema["properties"]["name"]
21
- del schema["properties"]["backend_type"]
22
- del schema["properties"]["region"]
23
-
24
25
 
25
26
  class GetGatewayRequest(CoreModel):
26
27
  name: str
@@ -20,6 +20,8 @@ async def delete_backends_safe(
20
20
  error: bool = True,
21
21
  ):
22
22
  try:
23
+ # FIXME: The checks are not under lock,
24
+ # so there can be dangling active resources due to race conditions.
23
25
  await _check_active_instances(
24
26
  session=session,
25
27
  project=project,
@@ -9,7 +9,11 @@ from pydantic import Field, ValidationError, validator
9
9
  from typing_extensions import Annotated
10
10
 
11
11
  from dstack._internal.core.errors import DockerRegistryError
12
- from dstack._internal.core.models.common import CoreModel, RegistryAuth
12
+ from dstack._internal.core.models.common import (
13
+ CoreModel,
14
+ FrozenCoreModel,
15
+ RegistryAuth,
16
+ )
13
17
  from dstack._internal.server.utils.common import join_byte_stream_checked
14
18
  from dstack._internal.utils.dxf import PatchedDXF
15
19
 
@@ -31,15 +35,12 @@ class DXFAuthAdapter:
31
35
  )
32
36
 
33
37
 
34
- class DockerImage(CoreModel):
38
+ class DockerImage(FrozenCoreModel):
35
39
  image: str
36
- registry: Optional[str]
40
+ registry: Optional[str] = None
37
41
  repo: str
38
42
  tag: str
39
- digest: Optional[str]
40
-
41
- class Config(CoreModel.Config):
42
- frozen = True
43
+ digest: Optional[str] = None
43
44
 
44
45
 
45
46
  class ImageConfig(CoreModel):
@@ -14,8 +14,16 @@ from dstack._internal.core.backends.dstack.models import (
14
14
  from dstack._internal.core.backends.models import BackendInfo
15
15
  from dstack._internal.core.errors import ForbiddenError, ResourceExistsError, ServerClientError
16
16
  from dstack._internal.core.models.projects import Member, MemberPermissions, Project
17
+ from dstack._internal.core.models.runs import RunStatus
17
18
  from dstack._internal.core.models.users import GlobalRole, ProjectRole
18
- from dstack._internal.server.models import MemberModel, ProjectModel, UserModel
19
+ from dstack._internal.server.models import (
20
+ FleetModel,
21
+ MemberModel,
22
+ ProjectModel,
23
+ RunModel,
24
+ UserModel,
25
+ VolumeModel,
26
+ )
19
27
  from dstack._internal.server.schemas.projects import MemberSetting
20
28
  from dstack._internal.server.services import users
21
29
  from dstack._internal.server.services.backends import (
@@ -178,6 +186,19 @@ async def delete_projects(
178
186
  raise ForbiddenError()
179
187
  if all(name in projects_names for name in user_project_names):
180
188
  raise ServerClientError("Cannot delete the only project")
189
+
190
+ res = await session.execute(
191
+ select(ProjectModel.id).where(ProjectModel.name.in_(projects_names))
192
+ )
193
+ project_ids = res.scalars().all()
194
+ if len(project_ids) != len(projects_names):
195
+ raise ServerClientError("Failed to delete non-existent projects")
196
+
197
+ for project_id in project_ids:
198
+ # FIXME: The checks are not under lock,
199
+ # so there can be dangling active resources due to race conditions.
200
+ await _check_project_has_active_resources(session=session, project_id=project_id)
201
+
181
202
  timestamp = str(int(get_current_datetime().timestamp()))
182
203
  new_project_name = "_deleted_" + timestamp + ProjectModel.name
183
204
  await session.execute(
@@ -614,6 +635,36 @@ def _is_project_admin(
614
635
  return False
615
636
 
616
637
 
638
+ async def _check_project_has_active_resources(session: AsyncSession, project_id: uuid.UUID):
639
+ res = await session.execute(
640
+ select(RunModel.run_name).where(
641
+ RunModel.project_id == project_id,
642
+ RunModel.status.not_in(RunStatus.finished_statuses()),
643
+ )
644
+ )
645
+ run_names = list(res.scalars().all())
646
+ if len(run_names) > 0:
647
+ raise ServerClientError(f"Failed to delete project with active runs: {run_names}")
648
+ res = await session.execute(
649
+ select(FleetModel.name).where(
650
+ FleetModel.project_id == project_id,
651
+ FleetModel.deleted.is_(False),
652
+ )
653
+ )
654
+ fleet_names = list(res.scalars().all())
655
+ if len(fleet_names) > 0:
656
+ raise ServerClientError(f"Failed to delete project with active fleets: {fleet_names}")
657
+ res = await session.execute(
658
+ select(VolumeModel.name).where(
659
+ VolumeModel.project_id == project_id,
660
+ VolumeModel.deleted.is_(False),
661
+ )
662
+ )
663
+ volume_names = list(res.scalars().all())
664
+ if len(volume_names) > 0:
665
+ raise ServerClientError(f"Failed to delete project with active volumes: {volume_names}")
666
+
667
+
617
668
  async def remove_project_members(
618
669
  session: AsyncSession,
619
670
  user: UserModel,
@@ -4,8 +4,14 @@ Environment variables read by the dstack server. Documented in reference/environ
4
4
 
5
5
  import os
6
6
  import warnings
7
+ from enum import Enum
7
8
  from pathlib import Path
8
9
 
10
+ from dstack._internal.utils.env import environ
11
+ from dstack._internal.utils.logging import get_logger
12
+
13
+ logger = get_logger(__name__)
14
+
9
15
  DSTACK_DIR_PATH = Path("~/.dstack/").expanduser()
10
16
 
11
17
  SERVER_DIR_PATH = Path(os.getenv("DSTACK_SERVER_DIR", DSTACK_DIR_PATH / "server"))
@@ -136,3 +142,43 @@ UPDATE_DEFAULT_PROJECT = os.getenv("DSTACK_UPDATE_DEFAULT_PROJECT") is not None
136
142
  DO_NOT_UPDATE_DEFAULT_PROJECT = os.getenv("DSTACK_DO_NOT_UPDATE_DEFAULT_PROJECT") is not None
137
143
  SKIP_GATEWAY_UPDATE = os.getenv("DSTACK_SKIP_GATEWAY_UPDATE") is not None
138
144
  ENABLE_PROMETHEUS_METRICS = os.getenv("DSTACK_ENABLE_PROMETHEUS_METRICS") is not None
145
+
146
+
147
+ class JobNetworkMode(Enum):
148
+ # "host" for multinode runs only, "bridge" otherwise. Opt-in new defaut
149
+ HOST_FOR_MULTINODE_ONLY = 1
150
+ # "bridge" if the job occupies only a part of the instance, "host" otherswise. Current default
151
+ HOST_WHEN_POSSIBLE = 2
152
+ # Always "bridge", even for multinode runs. Same as legacy DSTACK_FORCE_BRIDGE_NETWORK=true
153
+ FORCED_BRIDGE = 3
154
+
155
+
156
+ def _get_job_network_mode() -> JobNetworkMode:
157
+ # Current default
158
+ mode = JobNetworkMode.HOST_WHEN_POSSIBLE
159
+ bridge_var = "DSTACK_FORCE_BRIDGE_NETWORK"
160
+ force_bridge = environ.get_bool(bridge_var)
161
+ mode_var = "DSTACK_SERVER_JOB_NETWORK_MODE"
162
+ mode_from_env = environ.get_enum(mode_var, JobNetworkMode, value_type=int)
163
+ if mode_from_env is not None:
164
+ if force_bridge is not None:
165
+ logger.warning(
166
+ f"{bridge_var} is deprecated since 0.19.27 and ignored when {mode_var} is set"
167
+ )
168
+ return mode_from_env
169
+ if force_bridge is not None:
170
+ if force_bridge:
171
+ mode = JobNetworkMode.FORCED_BRIDGE
172
+ logger.warning(
173
+ (
174
+ f"{bridge_var} is deprecated since 0.19.27."
175
+ f" Set {mode_var} to {mode.value} and remove {bridge_var}"
176
+ )
177
+ )
178
+ else:
179
+ logger.warning(f"{bridge_var} is deprecated since 0.19.27. Remove {bridge_var}")
180
+ return mode
181
+
182
+
183
+ JOB_NETWORK_MODE = _get_job_network_mode()
184
+ del _get_job_network_mode