dstack 0.19.26__py3-none-any.whl → 0.19.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (93) hide show
  1. dstack/_internal/cli/commands/__init__.py +11 -8
  2. dstack/_internal/cli/commands/apply.py +6 -3
  3. dstack/_internal/cli/commands/completion.py +3 -1
  4. dstack/_internal/cli/commands/config.py +1 -0
  5. dstack/_internal/cli/commands/init.py +4 -4
  6. dstack/_internal/cli/commands/offer.py +1 -1
  7. dstack/_internal/cli/commands/project.py +1 -0
  8. dstack/_internal/cli/commands/server.py +2 -2
  9. dstack/_internal/cli/main.py +1 -1
  10. dstack/_internal/cli/services/configurators/base.py +2 -4
  11. dstack/_internal/cli/services/configurators/fleet.py +4 -5
  12. dstack/_internal/cli/services/configurators/gateway.py +3 -5
  13. dstack/_internal/cli/services/configurators/run.py +165 -43
  14. dstack/_internal/cli/services/configurators/volume.py +3 -5
  15. dstack/_internal/cli/services/repos.py +1 -18
  16. dstack/_internal/core/backends/amddevcloud/__init__.py +1 -0
  17. dstack/_internal/core/backends/amddevcloud/backend.py +16 -0
  18. dstack/_internal/core/backends/amddevcloud/compute.py +5 -0
  19. dstack/_internal/core/backends/amddevcloud/configurator.py +29 -0
  20. dstack/_internal/core/backends/aws/compute.py +6 -1
  21. dstack/_internal/core/backends/base/compute.py +33 -5
  22. dstack/_internal/core/backends/base/offers.py +2 -0
  23. dstack/_internal/core/backends/configurators.py +15 -0
  24. dstack/_internal/core/backends/digitalocean/__init__.py +1 -0
  25. dstack/_internal/core/backends/digitalocean/backend.py +16 -0
  26. dstack/_internal/core/backends/digitalocean/compute.py +5 -0
  27. dstack/_internal/core/backends/digitalocean/configurator.py +31 -0
  28. dstack/_internal/core/backends/digitalocean_base/__init__.py +1 -0
  29. dstack/_internal/core/backends/digitalocean_base/api_client.py +104 -0
  30. dstack/_internal/core/backends/digitalocean_base/backend.py +5 -0
  31. dstack/_internal/core/backends/digitalocean_base/compute.py +173 -0
  32. dstack/_internal/core/backends/digitalocean_base/configurator.py +57 -0
  33. dstack/_internal/core/backends/digitalocean_base/models.py +43 -0
  34. dstack/_internal/core/backends/gcp/compute.py +32 -8
  35. dstack/_internal/core/backends/hotaisle/api_client.py +25 -33
  36. dstack/_internal/core/backends/hotaisle/compute.py +1 -6
  37. dstack/_internal/core/backends/models.py +7 -0
  38. dstack/_internal/core/backends/nebius/compute.py +0 -7
  39. dstack/_internal/core/backends/oci/compute.py +4 -5
  40. dstack/_internal/core/backends/vultr/compute.py +1 -5
  41. dstack/_internal/core/compatibility/fleets.py +5 -0
  42. dstack/_internal/core/compatibility/runs.py +10 -1
  43. dstack/_internal/core/models/backends/base.py +5 -1
  44. dstack/_internal/core/models/common.py +67 -43
  45. dstack/_internal/core/models/configurations.py +109 -69
  46. dstack/_internal/core/models/files.py +1 -1
  47. dstack/_internal/core/models/fleets.py +115 -25
  48. dstack/_internal/core/models/instances.py +5 -5
  49. dstack/_internal/core/models/profiles.py +66 -47
  50. dstack/_internal/core/models/repos/remote.py +21 -16
  51. dstack/_internal/core/models/resources.py +69 -65
  52. dstack/_internal/core/models/runs.py +41 -14
  53. dstack/_internal/core/services/repos.py +85 -80
  54. dstack/_internal/server/app.py +5 -0
  55. dstack/_internal/server/background/tasks/process_fleets.py +117 -13
  56. dstack/_internal/server/background/tasks/process_instances.py +12 -71
  57. dstack/_internal/server/background/tasks/process_running_jobs.py +2 -0
  58. dstack/_internal/server/background/tasks/process_runs.py +2 -0
  59. dstack/_internal/server/background/tasks/process_submitted_jobs.py +48 -16
  60. dstack/_internal/server/migrations/versions/2498ab323443_add_fleetmodel_consolidation_attempt_.py +44 -0
  61. dstack/_internal/server/models.py +11 -7
  62. dstack/_internal/server/schemas/gateways.py +10 -9
  63. dstack/_internal/server/schemas/runner.py +1 -0
  64. dstack/_internal/server/services/backends/handlers.py +2 -0
  65. dstack/_internal/server/services/docker.py +8 -7
  66. dstack/_internal/server/services/fleets.py +23 -25
  67. dstack/_internal/server/services/instances.py +3 -3
  68. dstack/_internal/server/services/jobs/configurators/base.py +46 -6
  69. dstack/_internal/server/services/jobs/configurators/dev.py +4 -4
  70. dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +3 -5
  71. dstack/_internal/server/services/jobs/configurators/extensions/vscode.py +4 -6
  72. dstack/_internal/server/services/jobs/configurators/service.py +0 -3
  73. dstack/_internal/server/services/jobs/configurators/task.py +0 -3
  74. dstack/_internal/server/services/projects.py +52 -1
  75. dstack/_internal/server/services/runs.py +16 -0
  76. dstack/_internal/server/settings.py +46 -0
  77. dstack/_internal/server/statics/index.html +1 -1
  78. dstack/_internal/server/statics/{main-aec4762350e34d6fbff9.css → main-5e0d56245c4bd241ec27.css} +1 -1
  79. dstack/_internal/server/statics/{main-d151b300fcac3933213d.js → main-a2a16772fbf11a14d191.js} +1215 -998
  80. dstack/_internal/server/statics/{main-d151b300fcac3933213d.js.map → main-a2a16772fbf11a14d191.js.map} +1 -1
  81. dstack/_internal/server/testing/common.py +6 -3
  82. dstack/_internal/utils/env.py +85 -11
  83. dstack/_internal/utils/path.py +8 -1
  84. dstack/_internal/utils/ssh.py +7 -0
  85. dstack/api/_public/repos.py +41 -6
  86. dstack/api/_public/runs.py +14 -1
  87. dstack/version.py +1 -1
  88. {dstack-0.19.26.dist-info → dstack-0.19.28.dist-info}/METADATA +2 -2
  89. {dstack-0.19.26.dist-info → dstack-0.19.28.dist-info}/RECORD +92 -78
  90. dstack/_internal/server/statics/static/media/github.1f7102513534c83a9d8d735d2b8c12a2.svg +0 -3
  91. {dstack-0.19.26.dist-info → dstack-0.19.28.dist-info}/WHEEL +0 -0
  92. {dstack-0.19.26.dist-info → dstack-0.19.28.dist-info}/entry_points.txt +0 -0
  93. {dstack-0.19.26.dist-info → dstack-0.19.28.dist-info}/licenses/LICENSE.md +0 -0
@@ -16,6 +16,7 @@ from dstack._internal.core.models.common import NetworkMode
16
16
  from dstack._internal.core.models.fleets import (
17
17
  Fleet,
18
18
  FleetConfiguration,
19
+ FleetNodesSpec,
19
20
  FleetSpec,
20
21
  FleetStatus,
21
22
  InstanceGroupPlacement,
@@ -26,7 +27,7 @@ from dstack._internal.core.models.profiles import (
26
27
  CreationPolicy,
27
28
  TerminationPolicy,
28
29
  )
29
- from dstack._internal.core.models.resources import Memory, Range
30
+ from dstack._internal.core.models.resources import Memory
30
31
  from dstack._internal.core.models.runs import (
31
32
  Job,
32
33
  JobProvisioningData,
@@ -53,7 +54,9 @@ from dstack._internal.server.models import (
53
54
  from dstack._internal.server.services.backends import get_project_backend_by_type_or_error
54
55
  from dstack._internal.server.services.fleets import (
55
56
  fleet_model_to_fleet,
57
+ generate_fleet_name,
56
58
  get_fleet_requirements,
59
+ get_next_instance_num,
57
60
  )
58
61
  from dstack._internal.server.services.instances import (
59
62
  filter_pool_instances,
@@ -69,7 +72,7 @@ from dstack._internal.server.services.jobs import (
69
72
  get_job_configured_volumes,
70
73
  get_job_runtime_data,
71
74
  )
72
- from dstack._internal.server.services.locking import get_locker
75
+ from dstack._internal.server.services.locking import get_locker, string_to_lock_id
73
76
  from dstack._internal.server.services.logging import fmt
74
77
  from dstack._internal.server.services.offers import get_offers_by_requirements
75
78
  from dstack._internal.server.services.requirements.combine import (
@@ -85,7 +88,6 @@ from dstack._internal.server.services.volumes import (
85
88
  )
86
89
  from dstack._internal.server.utils import sentry_utils
87
90
  from dstack._internal.utils import common as common_utils
88
- from dstack._internal.utils import env as env_utils
89
91
  from dstack._internal.utils.logging import get_logger
90
92
 
91
93
  logger = get_logger(__name__)
@@ -186,6 +188,7 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
186
188
  run_spec = run.run_spec
187
189
  profile = run_spec.merged_profile
188
190
  job = find_job(run.jobs, job_model.replica_num, job_model.job_num)
191
+ multinode = job.job_spec.jobs_per_replica > 1
189
192
 
190
193
  # Master job chooses fleet for the run.
191
194
  # Due to two-step processing, it's saved to job_model.fleet.
@@ -308,6 +311,7 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
308
311
  session=session,
309
312
  instances_with_offers=fleet_instances_with_offers,
310
313
  job_model=job_model,
314
+ multinode=multinode,
311
315
  )
312
316
  job_model.fleet = fleet_model
313
317
  job_model.instance_assigned = True
@@ -361,7 +365,8 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
361
365
  job_model.job_provisioning_data = job_provisioning_data.json()
362
366
  job_model.status = JobStatus.PROVISIONING
363
367
  if fleet_model is None:
364
- fleet_model = _create_fleet_model_for_job(
368
+ fleet_model = await _create_fleet_model_for_job(
369
+ session=session,
365
370
  project=project,
366
371
  run=run,
367
372
  )
@@ -383,7 +388,9 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
383
388
  offer=offer,
384
389
  instance_num=instance_num,
385
390
  )
386
- job_model.job_runtime_data = _prepare_job_runtime_data(offer).json()
391
+ job_model.job_runtime_data = _prepare_job_runtime_data(offer, multinode).json()
392
+ # Both this task and process_fleets can add instances to fleets.
393
+ # TODO: Ensure this does not violate nodes.max when it's enforced.
387
394
  instance.fleet_id = fleet_model.id
388
395
  logger.info(
389
396
  "The job %s created the new instance %s",
@@ -610,6 +617,7 @@ async def _assign_job_to_fleet_instance(
610
617
  session: AsyncSession,
611
618
  instances_with_offers: list[tuple[InstanceModel, InstanceOfferWithAvailability]],
612
619
  job_model: JobModel,
620
+ multinode: bool,
613
621
  ) -> Optional[InstanceModel]:
614
622
  if len(instances_with_offers) == 0:
615
623
  return None
@@ -639,7 +647,7 @@ async def _assign_job_to_fleet_instance(
639
647
  job_model.instance = instance
640
648
  job_model.used_instance_id = instance.id
641
649
  job_model.job_provisioning_data = instance.job_provisioning_data
642
- job_model.job_runtime_data = _prepare_job_runtime_data(offer).json()
650
+ job_model.job_runtime_data = _prepare_job_runtime_data(offer, multinode).json()
643
651
  return instance
644
652
 
645
653
 
@@ -748,26 +756,42 @@ def _check_can_create_new_instance_in_fleet(fleet: Fleet) -> bool:
748
756
  return True
749
757
 
750
758
 
751
- def _create_fleet_model_for_job(
759
+ async def _create_fleet_model_for_job(
760
+ session: AsyncSession,
752
761
  project: ProjectModel,
753
762
  run: Run,
754
763
  ) -> FleetModel:
755
764
  placement = InstanceGroupPlacement.ANY
756
765
  if run.run_spec.configuration.type == "task" and run.run_spec.configuration.nodes > 1:
757
766
  placement = InstanceGroupPlacement.CLUSTER
767
+ nodes = _get_nodes_required_num_for_run(run.run_spec)
768
+
769
+ lock_namespace = f"fleet_names_{project.name}"
770
+ # TODO: Lock fleet names on SQLite.
771
+ # Needs some refactoring so that the lock is released after commit.
772
+ if get_db().dialect_name == "postgresql":
773
+ await session.execute(
774
+ select(func.pg_advisory_xact_lock(string_to_lock_id(lock_namespace)))
775
+ )
776
+ fleet_name = await generate_fleet_name(session=session, project=project)
777
+
758
778
  spec = FleetSpec(
759
779
  configuration=FleetConfiguration(
760
- name=run.run_spec.run_name,
780
+ name=fleet_name,
761
781
  placement=placement,
762
782
  reservation=run.run_spec.configuration.reservation,
763
- nodes=Range(min=_get_nodes_required_num_for_run(run.run_spec), max=None),
783
+ nodes=FleetNodesSpec(
784
+ min=nodes,
785
+ target=nodes,
786
+ max=None,
787
+ ),
764
788
  ),
765
789
  profile=run.run_spec.merged_profile,
766
790
  autocreated=True,
767
791
  )
768
792
  fleet_model = FleetModel(
769
793
  id=uuid.uuid4(),
770
- name=run.run_spec.run_name,
794
+ name=fleet_name,
771
795
  project=project,
772
796
  status=FleetStatus.ACTIVE,
773
797
  spec=spec.json(),
@@ -778,10 +802,13 @@ def _create_fleet_model_for_job(
778
802
 
779
803
  async def _get_next_instance_num(session: AsyncSession, fleet_model: FleetModel) -> int:
780
804
  res = await session.execute(
781
- select(func.count(InstanceModel.id)).where(InstanceModel.fleet_id == fleet_model.id)
805
+ select(InstanceModel.instance_num).where(
806
+ InstanceModel.fleet_id == fleet_model.id,
807
+ InstanceModel.deleted.is_(False),
808
+ )
782
809
  )
783
- instance_count = res.scalar_one()
784
- return instance_count
810
+ taken_instance_nums = set(res.scalars().all())
811
+ return get_next_instance_num(taken_instance_nums)
785
812
 
786
813
 
787
814
  def _create_instance_model_for_job(
@@ -827,12 +854,17 @@ def _create_instance_model_for_job(
827
854
  return instance
828
855
 
829
856
 
830
- def _prepare_job_runtime_data(offer: InstanceOfferWithAvailability) -> JobRuntimeData:
857
+ def _prepare_job_runtime_data(
858
+ offer: InstanceOfferWithAvailability, multinode: bool
859
+ ) -> JobRuntimeData:
831
860
  if offer.blocks == offer.total_blocks:
832
- if env_utils.get_bool("DSTACK_FORCE_BRIDGE_NETWORK"):
861
+ if settings.JOB_NETWORK_MODE == settings.JobNetworkMode.FORCED_BRIDGE:
833
862
  network_mode = NetworkMode.BRIDGE
834
- else:
863
+ elif settings.JOB_NETWORK_MODE == settings.JobNetworkMode.HOST_WHEN_POSSIBLE:
835
864
  network_mode = NetworkMode.HOST
865
+ else:
866
+ assert settings.JOB_NETWORK_MODE == settings.JobNetworkMode.HOST_FOR_MULTINODE_ONLY
867
+ network_mode = NetworkMode.HOST if multinode else NetworkMode.BRIDGE
836
868
  return JobRuntimeData(
837
869
  network_mode=network_mode,
838
870
  offer=offer,
@@ -0,0 +1,44 @@
1
+ """Add FleetModel.consolidation_attempt and FleetModel.last_consolidated_at
2
+
3
+ Revision ID: 2498ab323443
4
+ Revises: e2d08cd1b8d9
5
+ Create Date: 2025-08-29 16:08:48.686595
6
+
7
+ """
8
+
9
+ import sqlalchemy as sa
10
+ from alembic import op
11
+
12
+ import dstack._internal.server.models
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision = "2498ab323443"
16
+ down_revision = "e2d08cd1b8d9"
17
+ branch_labels = None
18
+ depends_on = None
19
+
20
+
21
+ def upgrade() -> None:
22
+ # ### commands auto generated by Alembic - please adjust! ###
23
+ with op.batch_alter_table("fleets", schema=None) as batch_op:
24
+ batch_op.add_column(
25
+ sa.Column("consolidation_attempt", sa.Integer(), server_default="0", nullable=False)
26
+ )
27
+ batch_op.add_column(
28
+ sa.Column(
29
+ "last_consolidated_at",
30
+ dstack._internal.server.models.NaiveDateTime(),
31
+ nullable=True,
32
+ )
33
+ )
34
+
35
+ # ### end Alembic commands ###
36
+
37
+
38
+ def downgrade() -> None:
39
+ # ### commands auto generated by Alembic - please adjust! ###
40
+ with op.batch_alter_table("fleets", schema=None) as batch_op:
41
+ batch_op.drop_column("last_consolidated_at")
42
+ batch_op.drop_column("consolidation_attempt")
43
+
44
+ # ### end Alembic commands ###
@@ -24,7 +24,7 @@ from sqlalchemy_utils import UUIDType
24
24
 
25
25
  from dstack._internal.core.errors import DstackError
26
26
  from dstack._internal.core.models.backends.base import BackendType
27
- from dstack._internal.core.models.common import CoreModel
27
+ from dstack._internal.core.models.common import CoreConfig, generate_dual_core_model
28
28
  from dstack._internal.core.models.fleets import FleetStatus
29
29
  from dstack._internal.core.models.gateways import GatewayStatus
30
30
  from dstack._internal.core.models.health import HealthStatus
@@ -71,7 +71,11 @@ class NaiveDateTime(TypeDecorator):
71
71
  return value.replace(tzinfo=timezone.utc)
72
72
 
73
73
 
74
- class DecryptedString(CoreModel):
74
+ class DecryptedStringConfig(CoreConfig):
75
+ arbitrary_types_allowed = True
76
+
77
+
78
+ class DecryptedString(generate_dual_core_model(DecryptedStringConfig)):
75
79
  """
76
80
  A type for representing plaintext strings encrypted with `EncryptedString`.
77
81
  Besides the string, stores information if the decryption was successful.
@@ -84,9 +88,6 @@ class DecryptedString(CoreModel):
84
88
  decrypted: bool = True
85
89
  exc: Optional[Exception] = None
86
90
 
87
- class Config(CoreModel.Config):
88
- arbitrary_types_allowed = True
89
-
90
91
  def get_plaintext_or_error(self) -> str:
91
92
  if self.decrypted and self.plaintext is not None:
92
93
  return self.plaintext
@@ -551,6 +552,9 @@ class FleetModel(BaseModel):
551
552
  jobs: Mapped[List["JobModel"]] = relationship(back_populates="fleet")
552
553
  instances: Mapped[List["InstanceModel"]] = relationship(back_populates="fleet")
553
554
 
555
+ consolidation_attempt: Mapped[int] = mapped_column(Integer, server_default="0")
556
+ last_consolidated_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
557
+
554
558
 
555
559
  class InstanceModel(BaseModel):
556
560
  __tablename__ = "instances"
@@ -605,8 +609,8 @@ class InstanceModel(BaseModel):
605
609
  Integer, default=DEFAULT_FLEET_TERMINATION_IDLE_TIME
606
610
  )
607
611
 
608
- # retry policy
609
- last_retry_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
612
+ # Deprecated
613
+ last_retry_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime, deferred=True)
610
614
 
611
615
  # instance termination handling
612
616
  termination_deadline: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
@@ -3,24 +3,25 @@ from typing import Annotated, Any, Dict, List, Optional
3
3
  from pydantic import Field
4
4
 
5
5
  from dstack._internal.core.models.backends.base import BackendType
6
- from dstack._internal.core.models.common import CoreModel
6
+ from dstack._internal.core.models.common import CoreConfig, CoreModel, generate_dual_core_model
7
7
  from dstack._internal.core.models.gateways import GatewayConfiguration
8
8
 
9
9
 
10
- class CreateGatewayRequest(CoreModel):
10
+ class CreateGatewayRequestConfig(CoreConfig):
11
+ @staticmethod
12
+ def schema_extra(schema: Dict[str, Any]):
13
+ del schema["properties"]["name"]
14
+ del schema["properties"]["backend_type"]
15
+ del schema["properties"]["region"]
16
+
17
+
18
+ class CreateGatewayRequest(generate_dual_core_model(CreateGatewayRequestConfig)):
11
19
  configuration: GatewayConfiguration
12
20
  # Deprecated and unused. Left for compatibility with 0.18 clients.
13
21
  name: Annotated[Optional[str], Field(exclude=True)] = None
14
22
  backend_type: Annotated[Optional[BackendType], Field(exclude=True)] = None
15
23
  region: Annotated[Optional[str], Field(exclude=True)] = None
16
24
 
17
- class Config(CoreModel.Config):
18
- @staticmethod
19
- def schema_extra(schema: Dict[str, Any]) -> None:
20
- del schema["properties"]["name"]
21
- del schema["properties"]["backend_type"]
22
- del schema["properties"]["region"]
23
-
24
25
 
25
26
  class GetGatewayRequest(CoreModel):
26
27
  name: str
@@ -78,6 +78,7 @@ class SubmitBody(CoreModel):
78
78
  "max_duration",
79
79
  "ssh_key",
80
80
  "working_dir",
81
+ "repo_dir",
81
82
  "repo_data",
82
83
  "file_archives",
83
84
  }
@@ -20,6 +20,8 @@ async def delete_backends_safe(
20
20
  error: bool = True,
21
21
  ):
22
22
  try:
23
+ # FIXME: The checks are not under lock,
24
+ # so there can be dangling active resources due to race conditions.
23
25
  await _check_active_instances(
24
26
  session=session,
25
27
  project=project,
@@ -9,7 +9,11 @@ from pydantic import Field, ValidationError, validator
9
9
  from typing_extensions import Annotated
10
10
 
11
11
  from dstack._internal.core.errors import DockerRegistryError
12
- from dstack._internal.core.models.common import CoreModel, RegistryAuth
12
+ from dstack._internal.core.models.common import (
13
+ CoreModel,
14
+ FrozenCoreModel,
15
+ RegistryAuth,
16
+ )
13
17
  from dstack._internal.server.utils.common import join_byte_stream_checked
14
18
  from dstack._internal.utils.dxf import PatchedDXF
15
19
 
@@ -31,15 +35,12 @@ class DXFAuthAdapter:
31
35
  )
32
36
 
33
37
 
34
- class DockerImage(CoreModel):
38
+ class DockerImage(FrozenCoreModel):
35
39
  image: str
36
- registry: Optional[str]
40
+ registry: Optional[str] = None
37
41
  repo: str
38
42
  tag: str
39
- digest: Optional[str]
40
-
41
- class Config(CoreModel.Config):
42
- frozen = True
43
+ digest: Optional[str] = None
43
44
 
44
45
 
45
46
  class ImageConfig(CoreModel):
@@ -449,25 +449,24 @@ async def create_fleet(
449
449
  return await _create_fleet(session=session, project=project, user=user, spec=spec)
450
450
 
451
451
 
452
- async def create_fleet_instance_model(
452
+ def create_fleet_instance_model(
453
453
  session: AsyncSession,
454
454
  project: ProjectModel,
455
- user: UserModel,
455
+ username: str,
456
456
  spec: FleetSpec,
457
- reservation: Optional[str],
458
457
  instance_num: int,
459
458
  ) -> InstanceModel:
460
459
  profile = spec.merged_profile
461
460
  requirements = get_fleet_requirements(spec)
462
- instance_model = await instances_services.create_instance_model(
461
+ instance_model = instances_services.create_instance_model(
463
462
  session=session,
464
463
  project=project,
465
- user=user,
464
+ username=username,
466
465
  profile=profile,
467
466
  requirements=requirements,
468
467
  instance_name=f"{spec.configuration.name}-{instance_num}",
469
468
  instance_num=instance_num,
470
- reservation=reservation,
469
+ reservation=spec.merged_profile.reservation,
471
470
  blocks=spec.configuration.blocks,
472
471
  tags=spec.configuration.tags,
473
472
  )
@@ -655,6 +654,19 @@ def get_fleet_requirements(fleet_spec: FleetSpec) -> Requirements:
655
654
  return requirements
656
655
 
657
656
 
657
+ def get_next_instance_num(taken_instance_nums: set[int]) -> int:
658
+ if not taken_instance_nums:
659
+ return 0
660
+ min_instance_num = min(taken_instance_nums)
661
+ if min_instance_num > 0:
662
+ return 0
663
+ instance_num = min_instance_num + 1
664
+ while True:
665
+ if instance_num not in taken_instance_nums:
666
+ return instance_num
667
+ instance_num += 1
668
+
669
+
658
670
  async def _create_fleet(
659
671
  session: AsyncSession,
660
672
  project: ProjectModel,
@@ -705,12 +717,11 @@ async def _create_fleet(
705
717
  fleet_model.instances.append(instances_model)
706
718
  else:
707
719
  for i in range(_get_fleet_nodes_to_provision(spec)):
708
- instance_model = await create_fleet_instance_model(
720
+ instance_model = create_fleet_instance_model(
709
721
  session=session,
710
722
  project=project,
711
- user=user,
723
+ username=user.name,
712
724
  spec=spec,
713
- reservation=spec.configuration.reservation,
714
725
  instance_num=i,
715
726
  )
716
727
  fleet_model.instances.append(instance_model)
@@ -778,7 +789,7 @@ async def _update_fleet(
778
789
  if added_hosts:
779
790
  await _check_ssh_hosts_not_yet_added(session, spec, fleet.id)
780
791
  for host in added_hosts.values():
781
- instance_num = _get_next_instance_num(active_instance_nums)
792
+ instance_num = get_next_instance_num(active_instance_nums)
782
793
  instance_model = await create_fleet_ssh_instance_model(
783
794
  project=project,
784
795
  spec=spec,
@@ -994,9 +1005,9 @@ def _validate_internal_ips(ssh_config: SSHParams):
994
1005
 
995
1006
 
996
1007
  def _get_fleet_nodes_to_provision(spec: FleetSpec) -> int:
997
- if spec.configuration.nodes is None or spec.configuration.nodes.min is None:
1008
+ if spec.configuration.nodes is None:
998
1009
  return 0
999
- return spec.configuration.nodes.min
1010
+ return spec.configuration.nodes.target
1000
1011
 
1001
1012
 
1002
1013
  def _terminate_fleet_instances(fleet_model: FleetModel, instance_nums: Optional[List[int]]):
@@ -1013,16 +1024,3 @@ def _terminate_fleet_instances(fleet_model: FleetModel, instance_nums: Optional[
1013
1024
  instance.deleted = True
1014
1025
  else:
1015
1026
  instance.status = InstanceStatus.TERMINATING
1016
-
1017
-
1018
- def _get_next_instance_num(instance_nums: set[int]) -> int:
1019
- if not instance_nums:
1020
- return 0
1021
- min_instance_num = min(instance_nums)
1022
- if min_instance_num > 0:
1023
- return 0
1024
- instance_num = min_instance_num + 1
1025
- while True:
1026
- if instance_num not in instance_nums:
1027
- return instance_num
1028
- instance_num += 1
@@ -513,10 +513,10 @@ async def list_active_remote_instances(
513
513
  return instance_models
514
514
 
515
515
 
516
- async def create_instance_model(
516
+ def create_instance_model(
517
517
  session: AsyncSession,
518
518
  project: ProjectModel,
519
- user: UserModel,
519
+ username: str,
520
520
  profile: Profile,
521
521
  requirements: Requirements,
522
522
  instance_name: str,
@@ -536,7 +536,7 @@ async def create_instance_model(
536
536
  instance_config = InstanceConfiguration(
537
537
  project_name=project.name,
538
538
  instance_name=instance_name,
539
- user=user.name,
539
+ user=username,
540
540
  ssh_keys=[project_ssh_key],
541
541
  instance_id=str(instance_id),
542
542
  reservation=reservation,
@@ -16,7 +16,7 @@ from dstack._internal.core.models.configurations import (
16
16
  DEFAULT_PROBE_READY_AFTER,
17
17
  DEFAULT_PROBE_TIMEOUT,
18
18
  DEFAULT_PROBE_URL,
19
- DEFAULT_REPO_DIR,
19
+ LEGACY_REPO_DIR,
20
20
  PortMapping,
21
21
  ProbeConfig,
22
22
  PythonVersion,
@@ -45,6 +45,14 @@ from dstack._internal.server.services.docker import ImageConfig, get_image_confi
45
45
  from dstack._internal.utils import crypto
46
46
  from dstack._internal.utils.common import run_async
47
47
  from dstack._internal.utils.interpolator import InterpolatorError, VariablesInterpolator
48
+ from dstack._internal.utils.logging import get_logger
49
+ from dstack._internal.utils.path import is_absolute_posix_path
50
+
51
+ logger = get_logger(__name__)
52
+
53
+
54
+ DSTACK_DIR = "/dstack"
55
+ DSTACK_PROFILE_PATH = f"{DSTACK_DIR}/profile"
48
56
 
49
57
 
50
58
  def get_default_python_verison() -> str:
@@ -160,6 +168,7 @@ class JobConfigurator(ABC):
160
168
  ssh_key=self._ssh_key(jobs_per_replica),
161
169
  repo_data=self.run_spec.repo_data,
162
170
  repo_code_hash=self.run_spec.repo_code_hash,
171
+ repo_dir=self._repo_dir(),
163
172
  file_archives=self.run_spec.file_archives,
164
173
  service_port=self._service_port(),
165
174
  probes=self._probes(),
@@ -209,9 +218,17 @@ class JobConfigurator(ABC):
209
218
  ):
210
219
  return []
211
220
  return [
212
- f"uv venv --python {self._python()} --prompt workflow --seed {DEFAULT_REPO_DIR}/.venv > /dev/null 2>&1",
213
- f"echo 'source {DEFAULT_REPO_DIR}/.venv/bin/activate' >> ~/.bashrc",
214
- f"source {DEFAULT_REPO_DIR}/.venv/bin/activate",
221
+ # `uv` may emit:
222
+ # > warning: `VIRTUAL_ENV=/dstack/venv` does not match the project environment path
223
+ # > `.venv` and will be ignored; use `--active` to target the active environment
224
+ # > instead
225
+ # Safe to ignore, reusing dstack's venv for `uv` is discouraged (it should only be
226
+ # used for legacy `pip`-based configurations). `--no-active` suppresses the warning.
227
+ # Alternatively, the user can call `deactivate` once before using `uv`.
228
+ # If the user really wants to reuse dstack's venv, they must spefify `--active`.
229
+ f"uv venv -q --prompt dstack -p {self._python()} --seed {DSTACK_DIR}/venv",
230
+ f"echo '. {DSTACK_DIR}/venv/bin/activate' >> {DSTACK_PROFILE_PATH}",
231
+ f". {DSTACK_DIR}/venv/bin/activate",
215
232
  ]
216
233
 
217
234
  def _app_specs(self) -> List[AppSpec]:
@@ -290,11 +307,34 @@ class JobConfigurator(ABC):
290
307
  def _retry(self) -> Optional[Retry]:
291
308
  return get_retry(self.run_spec.merged_profile)
292
309
 
310
+ def _repo_dir(self) -> str:
311
+ """
312
+ Returns absolute or relative path
313
+ """
314
+ repo_dir = self.run_spec.repo_dir
315
+ if repo_dir is None:
316
+ return LEGACY_REPO_DIR
317
+ return repo_dir
318
+
293
319
  def _working_dir(self) -> Optional[str]:
294
320
  """
295
- None means default working directory
321
+ Returns path or None
322
+
323
+ None means the default working directory taken from the image
324
+
325
+ Currently, for compatibility with pre-0.19.27 runners, the path may be relative.
326
+ Future versions should return only absolute paths
296
327
  """
297
- return self.run_spec.working_dir
328
+ working_dir = self.run_spec.configuration.working_dir
329
+ if working_dir is None:
330
+ return working_dir
331
+ # Return a relative path if possible
332
+ if is_absolute_posix_path(working_dir):
333
+ try:
334
+ return str(PurePosixPath(working_dir).relative_to(LEGACY_REPO_DIR))
335
+ except ValueError:
336
+ pass
337
+ return working_dir
298
338
 
299
339
  def _python(self) -> str:
300
340
  if self.run_spec.configuration.python is not None:
@@ -9,8 +9,8 @@ from dstack._internal.server.services.jobs.configurators.extensions.cursor impor
9
9
  from dstack._internal.server.services.jobs.configurators.extensions.vscode import VSCodeDesktop
10
10
 
11
11
  INSTALL_IPYKERNEL = (
12
- "(echo pip install ipykernel... && pip install -q --no-cache-dir ipykernel 2> /dev/null) || "
13
- 'echo "no pip, ipykernel was not installed"'
12
+ "(echo 'pip install ipykernel...' && pip install -q --no-cache-dir ipykernel 2> /dev/null) || "
13
+ "echo 'no pip, ipykernel was not installed'"
14
14
  )
15
15
 
16
16
 
@@ -39,12 +39,12 @@ class DevEnvironmentJobConfigurator(JobConfigurator):
39
39
  commands = self.ide.get_install_commands()
40
40
  commands.append(INSTALL_IPYKERNEL)
41
41
  commands += self.run_spec.configuration.setup
42
- commands.append("echo ''")
42
+ commands.append("echo")
43
43
  commands += self.run_spec.configuration.init
44
44
  commands += self.ide.get_print_readme_commands()
45
45
  commands += [
46
46
  f"echo 'To connect via SSH, use: `ssh {self.run_spec.run_name}`'",
47
- "echo ''",
47
+ "echo",
48
48
  "echo -n 'To exit, press Ctrl+C.'",
49
49
  ]
50
50
  commands += ["tail -f /dev/null"] # idle
@@ -1,7 +1,5 @@
1
1
  from typing import List, Optional
2
2
 
3
- from dstack._internal.core.models.configurations import DEFAULT_REPO_DIR
4
-
5
3
 
6
4
  class CursorDesktop:
7
5
  def __init__(
@@ -38,7 +36,7 @@ class CursorDesktop:
38
36
  def get_print_readme_commands(self) -> List[str]:
39
37
  return [
40
38
  "echo To open in Cursor, use link below:",
41
- "echo ''",
42
- f"echo ' cursor://vscode-remote/ssh-remote+{self.run_name}{DEFAULT_REPO_DIR}'", # TODO use $REPO_DIR
43
- "echo ''",
39
+ "echo",
40
+ f'echo " cursor://vscode-remote/ssh-remote+{self.run_name}$DSTACK_REPO_DIR"',
41
+ "echo",
44
42
  ]
@@ -1,7 +1,5 @@
1
1
  from typing import List, Optional
2
2
 
3
- from dstack._internal.core.models.configurations import DEFAULT_REPO_DIR
4
-
5
3
 
6
4
  class VSCodeDesktop:
7
5
  def __init__(
@@ -37,8 +35,8 @@ class VSCodeDesktop:
37
35
 
38
36
  def get_print_readme_commands(self) -> List[str]:
39
37
  return [
40
- "echo To open in VS Code Desktop, use link below:",
41
- "echo ''",
42
- f"echo ' vscode://vscode-remote/ssh-remote+{self.run_name}{DEFAULT_REPO_DIR}'", # TODO use $REPO_DIR
43
- "echo ''",
38
+ "echo 'To open in VS Code Desktop, use link below:'",
39
+ "echo",
40
+ f'echo " vscode://vscode-remote/ssh-remote+{self.run_name}$DSTACK_REPO_DIR"',
41
+ "echo",
44
42
  ]
@@ -23,6 +23,3 @@ class ServiceJobConfigurator(JobConfigurator):
23
23
 
24
24
  def _ports(self) -> List[PortMapping]:
25
25
  return []
26
-
27
- def _working_dir(self) -> Optional[str]:
28
- return None if not self._shell_commands() else super()._working_dir()
@@ -37,6 +37,3 @@ class TaskJobConfigurator(JobConfigurator):
37
37
  def _ports(self) -> List[PortMapping]:
38
38
  assert self.run_spec.configuration.type == "task"
39
39
  return self.run_spec.configuration.ports
40
-
41
- def _working_dir(self) -> Optional[str]:
42
- return None if not self._shell_commands() else super()._working_dir()