dstack 0.19.26__py3-none-any.whl → 0.19.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (68) hide show
  1. dstack/_internal/cli/commands/init.py +2 -2
  2. dstack/_internal/cli/services/configurators/run.py +114 -16
  3. dstack/_internal/cli/services/repos.py +1 -18
  4. dstack/_internal/core/backends/amddevcloud/__init__.py +1 -0
  5. dstack/_internal/core/backends/amddevcloud/backend.py +16 -0
  6. dstack/_internal/core/backends/amddevcloud/compute.py +5 -0
  7. dstack/_internal/core/backends/amddevcloud/configurator.py +29 -0
  8. dstack/_internal/core/backends/aws/compute.py +6 -1
  9. dstack/_internal/core/backends/base/compute.py +33 -5
  10. dstack/_internal/core/backends/base/offers.py +2 -0
  11. dstack/_internal/core/backends/configurators.py +15 -0
  12. dstack/_internal/core/backends/digitalocean/__init__.py +1 -0
  13. dstack/_internal/core/backends/digitalocean/backend.py +16 -0
  14. dstack/_internal/core/backends/digitalocean/compute.py +5 -0
  15. dstack/_internal/core/backends/digitalocean/configurator.py +31 -0
  16. dstack/_internal/core/backends/digitalocean_base/__init__.py +1 -0
  17. dstack/_internal/core/backends/digitalocean_base/api_client.py +104 -0
  18. dstack/_internal/core/backends/digitalocean_base/backend.py +5 -0
  19. dstack/_internal/core/backends/digitalocean_base/compute.py +173 -0
  20. dstack/_internal/core/backends/digitalocean_base/configurator.py +57 -0
  21. dstack/_internal/core/backends/digitalocean_base/models.py +43 -0
  22. dstack/_internal/core/backends/gcp/compute.py +32 -8
  23. dstack/_internal/core/backends/hotaisle/api_client.py +25 -33
  24. dstack/_internal/core/backends/hotaisle/compute.py +1 -6
  25. dstack/_internal/core/backends/models.py +7 -0
  26. dstack/_internal/core/backends/nebius/compute.py +0 -7
  27. dstack/_internal/core/backends/oci/compute.py +4 -5
  28. dstack/_internal/core/backends/vultr/compute.py +1 -5
  29. dstack/_internal/core/compatibility/fleets.py +5 -0
  30. dstack/_internal/core/compatibility/runs.py +8 -1
  31. dstack/_internal/core/models/backends/base.py +5 -1
  32. dstack/_internal/core/models/configurations.py +21 -7
  33. dstack/_internal/core/models/files.py +1 -1
  34. dstack/_internal/core/models/fleets.py +75 -2
  35. dstack/_internal/core/models/runs.py +24 -5
  36. dstack/_internal/core/services/repos.py +85 -80
  37. dstack/_internal/server/background/tasks/process_fleets.py +109 -13
  38. dstack/_internal/server/background/tasks/process_instances.py +12 -71
  39. dstack/_internal/server/background/tasks/process_running_jobs.py +2 -0
  40. dstack/_internal/server/background/tasks/process_runs.py +2 -0
  41. dstack/_internal/server/background/tasks/process_submitted_jobs.py +18 -6
  42. dstack/_internal/server/migrations/versions/2498ab323443_add_fleetmodel_consolidation_attempt_.py +44 -0
  43. dstack/_internal/server/models.py +5 -2
  44. dstack/_internal/server/schemas/runner.py +1 -0
  45. dstack/_internal/server/services/fleets.py +23 -25
  46. dstack/_internal/server/services/instances.py +3 -3
  47. dstack/_internal/server/services/jobs/configurators/base.py +46 -6
  48. dstack/_internal/server/services/jobs/configurators/dev.py +4 -4
  49. dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +3 -5
  50. dstack/_internal/server/services/jobs/configurators/extensions/vscode.py +4 -6
  51. dstack/_internal/server/services/jobs/configurators/service.py +0 -3
  52. dstack/_internal/server/services/jobs/configurators/task.py +0 -3
  53. dstack/_internal/server/services/runs.py +16 -0
  54. dstack/_internal/server/statics/index.html +1 -1
  55. dstack/_internal/server/statics/{main-d151b300fcac3933213d.js → main-4eecc75fbe64067eb1bc.js} +1146 -899
  56. dstack/_internal/server/statics/{main-d151b300fcac3933213d.js.map → main-4eecc75fbe64067eb1bc.js.map} +1 -1
  57. dstack/_internal/server/statics/{main-aec4762350e34d6fbff9.css → main-56191c63d516fd0041c4.css} +1 -1
  58. dstack/_internal/server/testing/common.py +6 -3
  59. dstack/_internal/utils/path.py +8 -1
  60. dstack/_internal/utils/ssh.py +7 -0
  61. dstack/api/_public/repos.py +41 -6
  62. dstack/api/_public/runs.py +14 -1
  63. dstack/version.py +1 -1
  64. {dstack-0.19.26.dist-info → dstack-0.19.27.dist-info}/METADATA +2 -2
  65. {dstack-0.19.26.dist-info → dstack-0.19.27.dist-info}/RECORD +68 -53
  66. {dstack-0.19.26.dist-info → dstack-0.19.27.dist-info}/WHEEL +0 -0
  67. {dstack-0.19.26.dist-info → dstack-0.19.27.dist-info}/entry_points.txt +0 -0
  68. {dstack-0.19.26.dist-info → dstack-0.19.27.dist-info}/licenses/LICENSE.md +0 -0
@@ -53,14 +53,12 @@ from dstack._internal.core.models.placement import (
53
53
  PlacementStrategy,
54
54
  )
55
55
  from dstack._internal.core.models.profiles import (
56
- RetryEvent,
57
56
  TerminationPolicy,
58
57
  )
59
58
  from dstack._internal.core.models.runs import (
60
59
  JobProvisioningData,
61
60
  Retry,
62
61
  )
63
- from dstack._internal.core.services.profiles import get_retry
64
62
  from dstack._internal.server import settings as server_settings
65
63
  from dstack._internal.server.background.tasks.common import get_provisioning_timeout
66
64
  from dstack._internal.server.db import get_db, get_session_ctx
@@ -327,7 +325,6 @@ async def _add_remote(instance: InstanceModel) -> None:
327
325
  e,
328
326
  )
329
327
  instance.status = InstanceStatus.PENDING
330
- instance.last_retry_at = get_current_datetime()
331
328
  return
332
329
 
333
330
  instance_type = host_info_to_instance_type(host_info, cpu_arch)
@@ -426,7 +423,6 @@ async def _add_remote(instance: InstanceModel) -> None:
426
423
  instance.offer = instance_offer.json()
427
424
  instance.job_provisioning_data = jpd.json()
428
425
  instance.started_at = get_current_datetime()
429
- instance.last_retry_at = get_current_datetime()
430
426
 
431
427
 
432
428
  def _deploy_instance(
@@ -493,29 +489,6 @@ def _deploy_instance(
493
489
 
494
490
 
495
491
  async def _create_instance(session: AsyncSession, instance: InstanceModel) -> None:
496
- if instance.last_retry_at is not None:
497
- last_retry = instance.last_retry_at
498
- if get_current_datetime() < last_retry + timedelta(minutes=1):
499
- return
500
-
501
- if (
502
- instance.profile is None
503
- or instance.requirements is None
504
- or instance.instance_configuration is None
505
- ):
506
- instance.status = InstanceStatus.TERMINATED
507
- instance.termination_reason = "Empty profile, requirements or instance_configuration"
508
- instance.last_retry_at = get_current_datetime()
509
- logger.warning(
510
- "Empty profile, requirements or instance_configuration. Terminate instance: %s",
511
- instance.name,
512
- extra={
513
- "instance_name": instance.name,
514
- "instance_status": InstanceStatus.TERMINATED.value,
515
- },
516
- )
517
- return
518
-
519
492
  if _need_to_wait_fleet_provisioning(instance):
520
493
  logger.debug("Waiting for the first instance in the fleet to be provisioned")
521
494
  return
@@ -529,7 +502,6 @@ async def _create_instance(session: AsyncSession, instance: InstanceModel) -> No
529
502
  instance.termination_reason = (
530
503
  f"Error to parse profile, requirements or instance_configuration: {e}"
531
504
  )
532
- instance.last_retry_at = get_current_datetime()
533
505
  logger.warning(
534
506
  "Error to parse profile, requirements or instance_configuration. Terminate instance: %s",
535
507
  instance.name,
@@ -540,24 +512,6 @@ async def _create_instance(session: AsyncSession, instance: InstanceModel) -> No
540
512
  )
541
513
  return
542
514
 
543
- retry = get_retry(profile)
544
- should_retry = retry is not None and RetryEvent.NO_CAPACITY in retry.on_events
545
-
546
- if retry is not None:
547
- retry_duration_deadline = _get_retry_duration_deadline(instance, retry)
548
- if get_current_datetime() > retry_duration_deadline:
549
- instance.status = InstanceStatus.TERMINATED
550
- instance.termination_reason = "Retry duration expired"
551
- logger.warning(
552
- "Retry duration expired. Terminating instance %s",
553
- instance.name,
554
- extra={
555
- "instance_name": instance.name,
556
- "instance_status": InstanceStatus.TERMINATED.value,
557
- },
558
- )
559
- return
560
-
561
515
  placement_group_models = []
562
516
  placement_group_model = None
563
517
  if instance.fleet_id:
@@ -595,15 +549,6 @@ async def _create_instance(session: AsyncSession, instance: InstanceModel) -> No
595
549
  exclude_not_available=True,
596
550
  )
597
551
 
598
- if not offers and should_retry:
599
- instance.last_retry_at = get_current_datetime()
600
- logger.debug(
601
- "No offers for instance %s. Next retry",
602
- instance.name,
603
- extra={"instance_name": instance.name},
604
- )
605
- return
606
-
607
552
  # Limit number of offers tried to prevent long-running processing
608
553
  # in case all offers fail.
609
554
  for backend, instance_offer in offers[: server_settings.MAX_OFFERS_TRIED]:
@@ -681,7 +626,6 @@ async def _create_instance(session: AsyncSession, instance: InstanceModel) -> No
681
626
  instance.offer = instance_offer.json()
682
627
  instance.total_blocks = instance_offer.total_blocks
683
628
  instance.started_at = get_current_datetime()
684
- instance.last_retry_at = get_current_datetime()
685
629
 
686
630
  logger.info(
687
631
  "Created instance %s",
@@ -702,21 +646,18 @@ async def _create_instance(session: AsyncSession, instance: InstanceModel) -> No
702
646
  )
703
647
  return
704
648
 
705
- instance.last_retry_at = get_current_datetime()
706
-
707
- if not should_retry:
708
- _mark_terminated(instance, "All offers failed" if offers else "No offers found")
709
- if (
710
- instance.fleet
711
- and _is_fleet_master_instance(instance)
712
- and _is_cloud_cluster(instance.fleet)
713
- ):
714
- # Do not attempt to deploy other instances, as they won't determine the correct cluster
715
- # backend, region, and placement group without a successfully deployed master instance
716
- for sibling_instance in instance.fleet.instances:
717
- if sibling_instance.id == instance.id:
718
- continue
719
- _mark_terminated(sibling_instance, "Master instance failed to start")
649
+ _mark_terminated(instance, "All offers failed" if offers else "No offers found")
650
+ if (
651
+ instance.fleet
652
+ and _is_fleet_master_instance(instance)
653
+ and _is_cloud_cluster(instance.fleet)
654
+ ):
655
+ # Do not attempt to deploy other instances, as they won't determine the correct cluster
656
+ # backend, region, and placement group without a successfully deployed master instance
657
+ for sibling_instance in instance.fleet.instances:
658
+ if sibling_instance.id == instance.id:
659
+ continue
660
+ _mark_terminated(sibling_instance, "Master instance failed to start")
720
661
 
721
662
 
722
663
  def _mark_terminated(instance: InstanceModel, termination_reason: str) -> None:
@@ -41,6 +41,7 @@ from dstack._internal.core.models.volumes import InstanceMountPoint, Volume, Vol
41
41
  from dstack._internal.server.background.tasks.common import get_provisioning_timeout
42
42
  from dstack._internal.server.db import get_db, get_session_ctx
43
43
  from dstack._internal.server.models import (
44
+ FleetModel,
44
45
  InstanceModel,
45
46
  JobModel,
46
47
  ProbeModel,
@@ -151,6 +152,7 @@ async def _process_running_job(session: AsyncSession, job_model: JobModel):
151
152
  .options(joinedload(RunModel.project))
152
153
  .options(joinedload(RunModel.user))
153
154
  .options(joinedload(RunModel.repo))
155
+ .options(joinedload(RunModel.fleet).load_only(FleetModel.id, FleetModel.name))
154
156
  .options(joinedload(RunModel.jobs))
155
157
  )
156
158
  run_model = res.unique().scalar_one()
@@ -21,6 +21,7 @@ from dstack._internal.core.models.runs import (
21
21
  )
22
22
  from dstack._internal.server.db import get_db, get_session_ctx
23
23
  from dstack._internal.server.models import (
24
+ FleetModel,
24
25
  InstanceModel,
25
26
  JobModel,
26
27
  ProjectModel,
@@ -145,6 +146,7 @@ async def _process_run(session: AsyncSession, run_model: RunModel):
145
146
  .execution_options(populate_existing=True)
146
147
  .options(joinedload(RunModel.project).load_only(ProjectModel.id, ProjectModel.name))
147
148
  .options(joinedload(RunModel.user).load_only(UserModel.name))
149
+ .options(joinedload(RunModel.fleet).load_only(FleetModel.id, FleetModel.name))
148
150
  .options(
149
151
  selectinload(RunModel.jobs)
150
152
  .joinedload(JobModel.instance)
@@ -5,7 +5,7 @@ import uuid
5
5
  from datetime import datetime, timedelta
6
6
  from typing import List, Optional, Tuple
7
7
 
8
- from sqlalchemy import and_, func, not_, or_, select
8
+ from sqlalchemy import and_, not_, or_, select
9
9
  from sqlalchemy.ext.asyncio import AsyncSession
10
10
  from sqlalchemy.orm import contains_eager, joinedload, load_only, noload, selectinload
11
11
 
@@ -16,6 +16,7 @@ from dstack._internal.core.models.common import NetworkMode
16
16
  from dstack._internal.core.models.fleets import (
17
17
  Fleet,
18
18
  FleetConfiguration,
19
+ FleetNodesSpec,
19
20
  FleetSpec,
20
21
  FleetStatus,
21
22
  InstanceGroupPlacement,
@@ -26,7 +27,7 @@ from dstack._internal.core.models.profiles import (
26
27
  CreationPolicy,
27
28
  TerminationPolicy,
28
29
  )
29
- from dstack._internal.core.models.resources import Memory, Range
30
+ from dstack._internal.core.models.resources import Memory
30
31
  from dstack._internal.core.models.runs import (
31
32
  Job,
32
33
  JobProvisioningData,
@@ -54,6 +55,7 @@ from dstack._internal.server.services.backends import get_project_backend_by_typ
54
55
  from dstack._internal.server.services.fleets import (
55
56
  fleet_model_to_fleet,
56
57
  get_fleet_requirements,
58
+ get_next_instance_num,
57
59
  )
58
60
  from dstack._internal.server.services.instances import (
59
61
  filter_pool_instances,
@@ -384,6 +386,8 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
384
386
  instance_num=instance_num,
385
387
  )
386
388
  job_model.job_runtime_data = _prepare_job_runtime_data(offer).json()
389
+ # Both this task and process_fleets can add instances to fleets.
390
+ # TODO: Ensure this does not violate nodes.max when it's enforced.
387
391
  instance.fleet_id = fleet_model.id
388
392
  logger.info(
389
393
  "The job %s created the new instance %s",
@@ -755,12 +759,17 @@ def _create_fleet_model_for_job(
755
759
  placement = InstanceGroupPlacement.ANY
756
760
  if run.run_spec.configuration.type == "task" and run.run_spec.configuration.nodes > 1:
757
761
  placement = InstanceGroupPlacement.CLUSTER
762
+ nodes = _get_nodes_required_num_for_run(run.run_spec)
758
763
  spec = FleetSpec(
759
764
  configuration=FleetConfiguration(
760
765
  name=run.run_spec.run_name,
761
766
  placement=placement,
762
767
  reservation=run.run_spec.configuration.reservation,
763
- nodes=Range(min=_get_nodes_required_num_for_run(run.run_spec), max=None),
768
+ nodes=FleetNodesSpec(
769
+ min=nodes,
770
+ target=nodes,
771
+ max=None,
772
+ ),
764
773
  ),
765
774
  profile=run.run_spec.merged_profile,
766
775
  autocreated=True,
@@ -778,10 +787,13 @@ def _create_fleet_model_for_job(
778
787
 
779
788
  async def _get_next_instance_num(session: AsyncSession, fleet_model: FleetModel) -> int:
780
789
  res = await session.execute(
781
- select(func.count(InstanceModel.id)).where(InstanceModel.fleet_id == fleet_model.id)
790
+ select(InstanceModel.instance_num).where(
791
+ InstanceModel.fleet_id == fleet_model.id,
792
+ InstanceModel.deleted.is_(False),
793
+ )
782
794
  )
783
- instance_count = res.scalar_one()
784
- return instance_count
795
+ taken_instance_nums = set(res.scalars().all())
796
+ return get_next_instance_num(taken_instance_nums)
785
797
 
786
798
 
787
799
  def _create_instance_model_for_job(
@@ -0,0 +1,44 @@
1
+ """Add FleetModel.consolidation_attempt and FleetModel.last_consolidated_at
2
+
3
+ Revision ID: 2498ab323443
4
+ Revises: e2d08cd1b8d9
5
+ Create Date: 2025-08-29 16:08:48.686595
6
+
7
+ """
8
+
9
+ import sqlalchemy as sa
10
+ from alembic import op
11
+
12
+ import dstack._internal.server.models
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision = "2498ab323443"
16
+ down_revision = "e2d08cd1b8d9"
17
+ branch_labels = None
18
+ depends_on = None
19
+
20
+
21
+ def upgrade() -> None:
22
+ # ### commands auto generated by Alembic - please adjust! ###
23
+ with op.batch_alter_table("fleets", schema=None) as batch_op:
24
+ batch_op.add_column(
25
+ sa.Column("consolidation_attempt", sa.Integer(), server_default="0", nullable=False)
26
+ )
27
+ batch_op.add_column(
28
+ sa.Column(
29
+ "last_consolidated_at",
30
+ dstack._internal.server.models.NaiveDateTime(),
31
+ nullable=True,
32
+ )
33
+ )
34
+
35
+ # ### end Alembic commands ###
36
+
37
+
38
+ def downgrade() -> None:
39
+ # ### commands auto generated by Alembic - please adjust! ###
40
+ with op.batch_alter_table("fleets", schema=None) as batch_op:
41
+ batch_op.drop_column("last_consolidated_at")
42
+ batch_op.drop_column("consolidation_attempt")
43
+
44
+ # ### end Alembic commands ###
@@ -551,6 +551,9 @@ class FleetModel(BaseModel):
551
551
  jobs: Mapped[List["JobModel"]] = relationship(back_populates="fleet")
552
552
  instances: Mapped[List["InstanceModel"]] = relationship(back_populates="fleet")
553
553
 
554
+ consolidation_attempt: Mapped[int] = mapped_column(Integer, server_default="0")
555
+ last_consolidated_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
556
+
554
557
 
555
558
  class InstanceModel(BaseModel):
556
559
  __tablename__ = "instances"
@@ -605,8 +608,8 @@ class InstanceModel(BaseModel):
605
608
  Integer, default=DEFAULT_FLEET_TERMINATION_IDLE_TIME
606
609
  )
607
610
 
608
- # retry policy
609
- last_retry_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
611
+ # Deprecated
612
+ last_retry_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime, deferred=True)
610
613
 
611
614
  # instance termination handling
612
615
  termination_deadline: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
@@ -78,6 +78,7 @@ class SubmitBody(CoreModel):
78
78
  "max_duration",
79
79
  "ssh_key",
80
80
  "working_dir",
81
+ "repo_dir",
81
82
  "repo_data",
82
83
  "file_archives",
83
84
  }
@@ -449,25 +449,24 @@ async def create_fleet(
449
449
  return await _create_fleet(session=session, project=project, user=user, spec=spec)
450
450
 
451
451
 
452
- async def create_fleet_instance_model(
452
+ def create_fleet_instance_model(
453
453
  session: AsyncSession,
454
454
  project: ProjectModel,
455
- user: UserModel,
455
+ username: str,
456
456
  spec: FleetSpec,
457
- reservation: Optional[str],
458
457
  instance_num: int,
459
458
  ) -> InstanceModel:
460
459
  profile = spec.merged_profile
461
460
  requirements = get_fleet_requirements(spec)
462
- instance_model = await instances_services.create_instance_model(
461
+ instance_model = instances_services.create_instance_model(
463
462
  session=session,
464
463
  project=project,
465
- user=user,
464
+ username=username,
466
465
  profile=profile,
467
466
  requirements=requirements,
468
467
  instance_name=f"{spec.configuration.name}-{instance_num}",
469
468
  instance_num=instance_num,
470
- reservation=reservation,
469
+ reservation=spec.merged_profile.reservation,
471
470
  blocks=spec.configuration.blocks,
472
471
  tags=spec.configuration.tags,
473
472
  )
@@ -655,6 +654,19 @@ def get_fleet_requirements(fleet_spec: FleetSpec) -> Requirements:
655
654
  return requirements
656
655
 
657
656
 
657
+ def get_next_instance_num(taken_instance_nums: set[int]) -> int:
658
+ if not taken_instance_nums:
659
+ return 0
660
+ min_instance_num = min(taken_instance_nums)
661
+ if min_instance_num > 0:
662
+ return 0
663
+ instance_num = min_instance_num + 1
664
+ while True:
665
+ if instance_num not in taken_instance_nums:
666
+ return instance_num
667
+ instance_num += 1
668
+
669
+
658
670
  async def _create_fleet(
659
671
  session: AsyncSession,
660
672
  project: ProjectModel,
@@ -705,12 +717,11 @@ async def _create_fleet(
705
717
  fleet_model.instances.append(instances_model)
706
718
  else:
707
719
  for i in range(_get_fleet_nodes_to_provision(spec)):
708
- instance_model = await create_fleet_instance_model(
720
+ instance_model = create_fleet_instance_model(
709
721
  session=session,
710
722
  project=project,
711
- user=user,
723
+ username=user.name,
712
724
  spec=spec,
713
- reservation=spec.configuration.reservation,
714
725
  instance_num=i,
715
726
  )
716
727
  fleet_model.instances.append(instance_model)
@@ -778,7 +789,7 @@ async def _update_fleet(
778
789
  if added_hosts:
779
790
  await _check_ssh_hosts_not_yet_added(session, spec, fleet.id)
780
791
  for host in added_hosts.values():
781
- instance_num = _get_next_instance_num(active_instance_nums)
792
+ instance_num = get_next_instance_num(active_instance_nums)
782
793
  instance_model = await create_fleet_ssh_instance_model(
783
794
  project=project,
784
795
  spec=spec,
@@ -994,9 +1005,9 @@ def _validate_internal_ips(ssh_config: SSHParams):
994
1005
 
995
1006
 
996
1007
  def _get_fleet_nodes_to_provision(spec: FleetSpec) -> int:
997
- if spec.configuration.nodes is None or spec.configuration.nodes.min is None:
1008
+ if spec.configuration.nodes is None:
998
1009
  return 0
999
- return spec.configuration.nodes.min
1010
+ return spec.configuration.nodes.target
1000
1011
 
1001
1012
 
1002
1013
  def _terminate_fleet_instances(fleet_model: FleetModel, instance_nums: Optional[List[int]]):
@@ -1013,16 +1024,3 @@ def _terminate_fleet_instances(fleet_model: FleetModel, instance_nums: Optional[
1013
1024
  instance.deleted = True
1014
1025
  else:
1015
1026
  instance.status = InstanceStatus.TERMINATING
1016
-
1017
-
1018
- def _get_next_instance_num(instance_nums: set[int]) -> int:
1019
- if not instance_nums:
1020
- return 0
1021
- min_instance_num = min(instance_nums)
1022
- if min_instance_num > 0:
1023
- return 0
1024
- instance_num = min_instance_num + 1
1025
- while True:
1026
- if instance_num not in instance_nums:
1027
- return instance_num
1028
- instance_num += 1
@@ -513,10 +513,10 @@ async def list_active_remote_instances(
513
513
  return instance_models
514
514
 
515
515
 
516
- async def create_instance_model(
516
+ def create_instance_model(
517
517
  session: AsyncSession,
518
518
  project: ProjectModel,
519
- user: UserModel,
519
+ username: str,
520
520
  profile: Profile,
521
521
  requirements: Requirements,
522
522
  instance_name: str,
@@ -536,7 +536,7 @@ async def create_instance_model(
536
536
  instance_config = InstanceConfiguration(
537
537
  project_name=project.name,
538
538
  instance_name=instance_name,
539
- user=user.name,
539
+ user=username,
540
540
  ssh_keys=[project_ssh_key],
541
541
  instance_id=str(instance_id),
542
542
  reservation=reservation,
@@ -16,7 +16,7 @@ from dstack._internal.core.models.configurations import (
16
16
  DEFAULT_PROBE_READY_AFTER,
17
17
  DEFAULT_PROBE_TIMEOUT,
18
18
  DEFAULT_PROBE_URL,
19
- DEFAULT_REPO_DIR,
19
+ LEGACY_REPO_DIR,
20
20
  PortMapping,
21
21
  ProbeConfig,
22
22
  PythonVersion,
@@ -45,6 +45,14 @@ from dstack._internal.server.services.docker import ImageConfig, get_image_confi
45
45
  from dstack._internal.utils import crypto
46
46
  from dstack._internal.utils.common import run_async
47
47
  from dstack._internal.utils.interpolator import InterpolatorError, VariablesInterpolator
48
+ from dstack._internal.utils.logging import get_logger
49
+ from dstack._internal.utils.path import is_absolute_posix_path
50
+
51
+ logger = get_logger(__name__)
52
+
53
+
54
+ DSTACK_DIR = "/dstack"
55
+ DSTACK_PROFILE_PATH = f"{DSTACK_DIR}/profile"
48
56
 
49
57
 
50
58
  def get_default_python_verison() -> str:
@@ -160,6 +168,7 @@ class JobConfigurator(ABC):
160
168
  ssh_key=self._ssh_key(jobs_per_replica),
161
169
  repo_data=self.run_spec.repo_data,
162
170
  repo_code_hash=self.run_spec.repo_code_hash,
171
+ repo_dir=self._repo_dir(),
163
172
  file_archives=self.run_spec.file_archives,
164
173
  service_port=self._service_port(),
165
174
  probes=self._probes(),
@@ -209,9 +218,17 @@ class JobConfigurator(ABC):
209
218
  ):
210
219
  return []
211
220
  return [
212
- f"uv venv --python {self._python()} --prompt workflow --seed {DEFAULT_REPO_DIR}/.venv > /dev/null 2>&1",
213
- f"echo 'source {DEFAULT_REPO_DIR}/.venv/bin/activate' >> ~/.bashrc",
214
- f"source {DEFAULT_REPO_DIR}/.venv/bin/activate",
221
+ # `uv` may emit:
222
+ # > warning: `VIRTUAL_ENV=/dstack/venv` does not match the project environment path
223
+ # > `.venv` and will be ignored; use `--active` to target the active environment
224
+ # > instead
225
+ # Safe to ignore, reusing dstack's venv for `uv` is discouraged (it should only be
226
+ # used for legacy `pip`-based configurations). `--no-active` suppresses the warning.
227
+ # Alternatively, the user can call `deactivate` once before using `uv`.
228
+ # If the user really wants to reuse dstack's venv, they must spefify `--active`.
229
+ f"uv venv -q --prompt dstack -p {self._python()} --seed {DSTACK_DIR}/venv",
230
+ f"echo '. {DSTACK_DIR}/venv/bin/activate' >> {DSTACK_PROFILE_PATH}",
231
+ f". {DSTACK_DIR}/venv/bin/activate",
215
232
  ]
216
233
 
217
234
  def _app_specs(self) -> List[AppSpec]:
@@ -290,11 +307,34 @@ class JobConfigurator(ABC):
290
307
  def _retry(self) -> Optional[Retry]:
291
308
  return get_retry(self.run_spec.merged_profile)
292
309
 
310
+ def _repo_dir(self) -> str:
311
+ """
312
+ Returns absolute or relative path
313
+ """
314
+ repo_dir = self.run_spec.repo_dir
315
+ if repo_dir is None:
316
+ return LEGACY_REPO_DIR
317
+ return repo_dir
318
+
293
319
  def _working_dir(self) -> Optional[str]:
294
320
  """
295
- None means default working directory
321
+ Returns path or None
322
+
323
+ None means the default working directory taken from the image
324
+
325
+ Currently, for compatibility with pre-0.19.27 runners, the path may be relative.
326
+ Future versions should return only absolute paths
296
327
  """
297
- return self.run_spec.working_dir
328
+ working_dir = self.run_spec.configuration.working_dir
329
+ if working_dir is None:
330
+ return working_dir
331
+ # Return a relative path if possible
332
+ if is_absolute_posix_path(working_dir):
333
+ try:
334
+ return str(PurePosixPath(working_dir).relative_to(LEGACY_REPO_DIR))
335
+ except ValueError:
336
+ pass
337
+ return working_dir
298
338
 
299
339
  def _python(self) -> str:
300
340
  if self.run_spec.configuration.python is not None:
@@ -9,8 +9,8 @@ from dstack._internal.server.services.jobs.configurators.extensions.cursor impor
9
9
  from dstack._internal.server.services.jobs.configurators.extensions.vscode import VSCodeDesktop
10
10
 
11
11
  INSTALL_IPYKERNEL = (
12
- "(echo pip install ipykernel... && pip install -q --no-cache-dir ipykernel 2> /dev/null) || "
13
- 'echo "no pip, ipykernel was not installed"'
12
+ "(echo 'pip install ipykernel...' && pip install -q --no-cache-dir ipykernel 2> /dev/null) || "
13
+ "echo 'no pip, ipykernel was not installed'"
14
14
  )
15
15
 
16
16
 
@@ -39,12 +39,12 @@ class DevEnvironmentJobConfigurator(JobConfigurator):
39
39
  commands = self.ide.get_install_commands()
40
40
  commands.append(INSTALL_IPYKERNEL)
41
41
  commands += self.run_spec.configuration.setup
42
- commands.append("echo ''")
42
+ commands.append("echo")
43
43
  commands += self.run_spec.configuration.init
44
44
  commands += self.ide.get_print_readme_commands()
45
45
  commands += [
46
46
  f"echo 'To connect via SSH, use: `ssh {self.run_spec.run_name}`'",
47
- "echo ''",
47
+ "echo",
48
48
  "echo -n 'To exit, press Ctrl+C.'",
49
49
  ]
50
50
  commands += ["tail -f /dev/null"] # idle
@@ -1,7 +1,5 @@
1
1
  from typing import List, Optional
2
2
 
3
- from dstack._internal.core.models.configurations import DEFAULT_REPO_DIR
4
-
5
3
 
6
4
  class CursorDesktop:
7
5
  def __init__(
@@ -38,7 +36,7 @@ class CursorDesktop:
38
36
  def get_print_readme_commands(self) -> List[str]:
39
37
  return [
40
38
  "echo To open in Cursor, use link below:",
41
- "echo ''",
42
- f"echo ' cursor://vscode-remote/ssh-remote+{self.run_name}{DEFAULT_REPO_DIR}'", # TODO use $REPO_DIR
43
- "echo ''",
39
+ "echo",
40
+ f'echo " cursor://vscode-remote/ssh-remote+{self.run_name}$DSTACK_REPO_DIR"',
41
+ "echo",
44
42
  ]
@@ -1,7 +1,5 @@
1
1
  from typing import List, Optional
2
2
 
3
- from dstack._internal.core.models.configurations import DEFAULT_REPO_DIR
4
-
5
3
 
6
4
  class VSCodeDesktop:
7
5
  def __init__(
@@ -37,8 +35,8 @@ class VSCodeDesktop:
37
35
 
38
36
  def get_print_readme_commands(self) -> List[str]:
39
37
  return [
40
- "echo To open in VS Code Desktop, use link below:",
41
- "echo ''",
42
- f"echo ' vscode://vscode-remote/ssh-remote+{self.run_name}{DEFAULT_REPO_DIR}'", # TODO use $REPO_DIR
43
- "echo ''",
38
+ "echo 'To open in VS Code Desktop, use link below:'",
39
+ "echo",
40
+ f'echo " vscode://vscode-remote/ssh-remote+{self.run_name}$DSTACK_REPO_DIR"',
41
+ "echo",
44
42
  ]
@@ -23,6 +23,3 @@ class ServiceJobConfigurator(JobConfigurator):
23
23
 
24
24
  def _ports(self) -> List[PortMapping]:
25
25
  return []
26
-
27
- def _working_dir(self) -> Optional[str]:
28
- return None if not self._shell_commands() else super()._working_dir()
@@ -37,6 +37,3 @@ class TaskJobConfigurator(JobConfigurator):
37
37
  def _ports(self) -> List[PortMapping]:
38
38
  assert self.run_spec.configuration.type == "task"
39
39
  return self.run_spec.configuration.ports
40
-
41
- def _working_dir(self) -> Optional[str]:
42
- return None if not self._shell_commands() else super()._working_dir()