dstack 0.19.25rc1__py3-none-any.whl → 0.19.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (128) hide show
  1. dstack/_internal/cli/commands/__init__.py +2 -2
  2. dstack/_internal/cli/commands/apply.py +3 -61
  3. dstack/_internal/cli/commands/attach.py +1 -1
  4. dstack/_internal/cli/commands/completion.py +1 -1
  5. dstack/_internal/cli/commands/delete.py +2 -2
  6. dstack/_internal/cli/commands/fleet.py +1 -1
  7. dstack/_internal/cli/commands/gateway.py +2 -2
  8. dstack/_internal/cli/commands/init.py +56 -24
  9. dstack/_internal/cli/commands/logs.py +1 -1
  10. dstack/_internal/cli/commands/metrics.py +1 -1
  11. dstack/_internal/cli/commands/offer.py +45 -7
  12. dstack/_internal/cli/commands/project.py +2 -2
  13. dstack/_internal/cli/commands/secrets.py +2 -2
  14. dstack/_internal/cli/commands/server.py +1 -1
  15. dstack/_internal/cli/commands/stop.py +1 -1
  16. dstack/_internal/cli/commands/volume.py +1 -1
  17. dstack/_internal/cli/main.py +2 -2
  18. dstack/_internal/cli/services/completion.py +2 -2
  19. dstack/_internal/cli/services/configurators/__init__.py +6 -2
  20. dstack/_internal/cli/services/configurators/base.py +6 -7
  21. dstack/_internal/cli/services/configurators/fleet.py +1 -3
  22. dstack/_internal/cli/services/configurators/gateway.py +2 -4
  23. dstack/_internal/cli/services/configurators/run.py +195 -58
  24. dstack/_internal/cli/services/configurators/volume.py +2 -4
  25. dstack/_internal/cli/services/profile.py +1 -1
  26. dstack/_internal/cli/services/repos.py +51 -47
  27. dstack/_internal/core/backends/aws/configurator.py +11 -7
  28. dstack/_internal/core/backends/azure/configurator.py +11 -7
  29. dstack/_internal/core/backends/base/configurator.py +25 -13
  30. dstack/_internal/core/backends/cloudrift/configurator.py +13 -7
  31. dstack/_internal/core/backends/cudo/configurator.py +11 -7
  32. dstack/_internal/core/backends/datacrunch/compute.py +5 -1
  33. dstack/_internal/core/backends/datacrunch/configurator.py +13 -7
  34. dstack/_internal/core/backends/gcp/configurator.py +11 -7
  35. dstack/_internal/core/backends/hotaisle/configurator.py +13 -7
  36. dstack/_internal/core/backends/kubernetes/configurator.py +13 -7
  37. dstack/_internal/core/backends/lambdalabs/configurator.py +11 -7
  38. dstack/_internal/core/backends/nebius/compute.py +1 -1
  39. dstack/_internal/core/backends/nebius/configurator.py +11 -7
  40. dstack/_internal/core/backends/nebius/resources.py +21 -11
  41. dstack/_internal/core/backends/oci/configurator.py +11 -7
  42. dstack/_internal/core/backends/runpod/configurator.py +11 -7
  43. dstack/_internal/core/backends/template/configurator.py.jinja +11 -7
  44. dstack/_internal/core/backends/tensordock/configurator.py +13 -7
  45. dstack/_internal/core/backends/vastai/configurator.py +11 -7
  46. dstack/_internal/core/backends/vultr/configurator.py +11 -4
  47. dstack/_internal/core/compatibility/gpus.py +13 -0
  48. dstack/_internal/core/compatibility/runs.py +1 -0
  49. dstack/_internal/core/models/common.py +3 -3
  50. dstack/_internal/core/models/configurations.py +172 -27
  51. dstack/_internal/core/models/files.py +1 -1
  52. dstack/_internal/core/models/fleets.py +5 -1
  53. dstack/_internal/core/models/profiles.py +41 -11
  54. dstack/_internal/core/models/resources.py +46 -42
  55. dstack/_internal/core/models/runs.py +4 -0
  56. dstack/_internal/core/services/configs/__init__.py +6 -3
  57. dstack/_internal/core/services/profiles.py +2 -2
  58. dstack/_internal/core/services/repos.py +5 -3
  59. dstack/_internal/core/services/ssh/ports.py +1 -1
  60. dstack/_internal/proxy/lib/deps.py +6 -2
  61. dstack/_internal/server/app.py +22 -17
  62. dstack/_internal/server/background/tasks/process_gateways.py +4 -1
  63. dstack/_internal/server/background/tasks/process_instances.py +10 -2
  64. dstack/_internal/server/background/tasks/process_probes.py +1 -1
  65. dstack/_internal/server/background/tasks/process_running_jobs.py +10 -4
  66. dstack/_internal/server/background/tasks/process_runs.py +1 -1
  67. dstack/_internal/server/background/tasks/process_submitted_jobs.py +54 -43
  68. dstack/_internal/server/background/tasks/process_terminating_jobs.py +2 -2
  69. dstack/_internal/server/background/tasks/process_volumes.py +1 -1
  70. dstack/_internal/server/db.py +8 -4
  71. dstack/_internal/server/models.py +1 -0
  72. dstack/_internal/server/routers/gpus.py +1 -6
  73. dstack/_internal/server/schemas/runner.py +10 -0
  74. dstack/_internal/server/services/backends/__init__.py +14 -8
  75. dstack/_internal/server/services/backends/handlers.py +6 -1
  76. dstack/_internal/server/services/docker.py +5 -5
  77. dstack/_internal/server/services/fleets.py +14 -13
  78. dstack/_internal/server/services/gateways/__init__.py +2 -0
  79. dstack/_internal/server/services/gateways/client.py +5 -2
  80. dstack/_internal/server/services/gateways/connection.py +1 -1
  81. dstack/_internal/server/services/gpus.py +50 -49
  82. dstack/_internal/server/services/instances.py +41 -1
  83. dstack/_internal/server/services/jobs/__init__.py +15 -4
  84. dstack/_internal/server/services/jobs/configurators/base.py +7 -11
  85. dstack/_internal/server/services/jobs/configurators/dev.py +5 -0
  86. dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +3 -3
  87. dstack/_internal/server/services/jobs/configurators/extensions/vscode.py +3 -3
  88. dstack/_internal/server/services/jobs/configurators/service.py +1 -0
  89. dstack/_internal/server/services/jobs/configurators/task.py +3 -0
  90. dstack/_internal/server/services/locking.py +5 -5
  91. dstack/_internal/server/services/logging.py +10 -2
  92. dstack/_internal/server/services/logs/__init__.py +8 -6
  93. dstack/_internal/server/services/logs/aws.py +330 -327
  94. dstack/_internal/server/services/logs/filelog.py +7 -6
  95. dstack/_internal/server/services/logs/gcp.py +141 -139
  96. dstack/_internal/server/services/plugins.py +1 -1
  97. dstack/_internal/server/services/projects.py +2 -5
  98. dstack/_internal/server/services/proxy/repo.py +5 -1
  99. dstack/_internal/server/services/requirements/__init__.py +0 -0
  100. dstack/_internal/server/services/requirements/combine.py +259 -0
  101. dstack/_internal/server/services/runner/client.py +7 -0
  102. dstack/_internal/server/services/runs.py +1 -1
  103. dstack/_internal/server/services/services/__init__.py +8 -2
  104. dstack/_internal/server/services/services/autoscalers.py +2 -0
  105. dstack/_internal/server/services/ssh.py +2 -1
  106. dstack/_internal/server/services/storage/__init__.py +5 -6
  107. dstack/_internal/server/services/storage/gcs.py +49 -49
  108. dstack/_internal/server/services/storage/s3.py +52 -52
  109. dstack/_internal/server/statics/index.html +1 -1
  110. dstack/_internal/server/testing/common.py +1 -1
  111. dstack/_internal/server/utils/logging.py +3 -3
  112. dstack/_internal/server/utils/provisioning.py +3 -3
  113. dstack/_internal/utils/json_schema.py +3 -1
  114. dstack/_internal/utils/typing.py +14 -0
  115. dstack/api/_public/repos.py +21 -2
  116. dstack/api/_public/runs.py +5 -7
  117. dstack/api/server/__init__.py +17 -19
  118. dstack/api/server/_gpus.py +2 -1
  119. dstack/api/server/_group.py +4 -3
  120. dstack/api/server/_repos.py +20 -3
  121. dstack/plugins/builtin/rest_plugin/_plugin.py +1 -0
  122. dstack/version.py +1 -1
  123. {dstack-0.19.25rc1.dist-info → dstack-0.19.26.dist-info}/METADATA +1 -1
  124. {dstack-0.19.25rc1.dist-info → dstack-0.19.26.dist-info}/RECORD +127 -124
  125. dstack/api/huggingface/__init__.py +0 -73
  126. {dstack-0.19.25rc1.dist-info → dstack-0.19.26.dist-info}/WHEEL +0 -0
  127. {dstack-0.19.25rc1.dist-info → dstack-0.19.26.dist-info}/entry_points.txt +0 -0
  128. {dstack-0.19.25rc1.dist-info → dstack-0.19.26.dist-info}/licenses/LICENSE.md +0 -0
@@ -5,9 +5,9 @@ import uuid
5
5
  from datetime import datetime, timedelta
6
6
  from typing import List, Optional, Tuple
7
7
 
8
- from sqlalchemy import and_, or_, select
8
+ from sqlalchemy import and_, func, not_, or_, select
9
9
  from sqlalchemy.ext.asyncio import AsyncSession
10
- from sqlalchemy.orm import contains_eager, joinedload, load_only, selectinload
10
+ from sqlalchemy.orm import contains_eager, joinedload, load_only, noload, selectinload
11
11
 
12
12
  from dstack._internal.core.backends.base.backend import Backend
13
13
  from dstack._internal.core.backends.base.compute import ComputeWithVolumeSupport
@@ -53,6 +53,7 @@ from dstack._internal.server.models import (
53
53
  from dstack._internal.server.services.backends import get_project_backend_by_type_or_error
54
54
  from dstack._internal.server.services.fleets import (
55
55
  fleet_model_to_fleet,
56
+ get_fleet_requirements,
56
57
  )
57
58
  from dstack._internal.server.services.instances import (
58
59
  filter_pool_instances,
@@ -71,6 +72,10 @@ from dstack._internal.server.services.jobs import (
71
72
  from dstack._internal.server.services.locking import get_locker
72
73
  from dstack._internal.server.services.logging import fmt
73
74
  from dstack._internal.server.services.offers import get_offers_by_requirements
75
+ from dstack._internal.server.services.requirements.combine import (
76
+ combine_fleet_and_run_profiles,
77
+ combine_fleet_and_run_requirements,
78
+ )
74
79
  from dstack._internal.server.services.runs import (
75
80
  check_run_spec_requires_instance_mounts,
76
81
  run_model_to_run,
@@ -148,8 +153,8 @@ async def _process_next_submitted_job():
148
153
  if job_model is None:
149
154
  return
150
155
  lockset.add(job_model.id)
156
+ job_model_id = job_model.id
151
157
  try:
152
- job_model_id = job_model.id
153
158
  await _process_submitted_job(session=session, job_model=job_model)
154
159
  finally:
155
160
  lockset.difference_update([job_model_id])
@@ -245,8 +250,8 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
245
250
  ]
246
251
  if run_model.fleet is not None:
247
252
  fleet_filters.append(FleetModel.id == run_model.fleet_id)
248
- if run_spec.configuration.fleets is not None:
249
- fleet_filters.append(FleetModel.name.in_(run_spec.configuration.fleets))
253
+ if run_spec.merged_profile.fleets is not None:
254
+ fleet_filters.append(FleetModel.name.in_(run_spec.merged_profile.fleets))
250
255
 
251
256
  instance_filters = [
252
257
  InstanceModel.deleted == False,
@@ -264,9 +269,6 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
264
269
  [i.id for i in f.instances] for f in fleet_models_with_instances
265
270
  )
266
271
  )
267
- fleet_models = fleet_models_with_instances + fleet_models_without_instances
268
- fleets_ids = [f.id for f in fleet_models]
269
-
270
272
  if get_db().dialect_name == "sqlite":
271
273
  # Start new transaction to see committed changes after lock
272
274
  await session.commit()
@@ -275,13 +277,15 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
275
277
  InstanceModel.__tablename__, instances_ids
276
278
  ):
277
279
  if get_db().dialect_name == "sqlite":
278
- fleet_models = await _refetch_fleet_models(
280
+ fleets_with_instances_ids = [f.id for f in fleet_models_with_instances]
281
+ fleet_models_with_instances = await _refetch_fleet_models_with_instances(
279
282
  session=session,
280
- fleets_ids=fleets_ids,
283
+ fleets_ids=fleets_with_instances_ids,
281
284
  instances_ids=instances_ids,
282
285
  fleet_filters=fleet_filters,
283
286
  instance_filters=instance_filters,
284
287
  )
288
+ fleet_models = fleet_models_with_instances + fleet_models_without_instances
285
289
  fleet_model, fleet_instances_with_offers = _find_optimal_fleet_with_offers(
286
290
  fleet_models=fleet_models,
287
291
  run_model=run_model,
@@ -290,7 +294,7 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
290
294
  master_job_provisioning_data=master_job_provisioning_data,
291
295
  volumes=volumes,
292
296
  )
293
- if fleet_model is None and run_spec.configuration.fleets is not None:
297
+ if fleet_model is None and run_spec.merged_profile.fleets is not None:
294
298
  # Run cannot create new fleets when fleets are specified
295
299
  logger.debug("%s: failed to use specified fleets", fmt(job_model))
296
300
  job_model.status = JobStatus.TERMINATING
@@ -361,6 +365,10 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
361
365
  project=project,
362
366
  run=run,
363
367
  )
368
+ # FIXME: Fleet is not locked which may lead to duplicate instance_num.
369
+ # This is currently hard to fix without locking the fleet for entire provisioning duration.
370
+ # Processing should be done in multiple steps so that
371
+ # InstanceModel is created before provisioning.
364
372
  instance_num = await _get_next_instance_num(
365
373
  session=session,
366
374
  fleet_model=fleet_model,
@@ -438,14 +446,21 @@ async def _select_fleet_models(
438
446
  *fleet_filters,
439
447
  FleetModel.id.not_in(fleet_models_with_instances_ids),
440
448
  )
441
- .where(InstanceModel.id.is_(None))
442
- .options(contains_eager(FleetModel.instances)) # loading empty relation
449
+ .where(
450
+ or_(
451
+ InstanceModel.id.is_(None),
452
+ not_(and_(*instance_filters)),
453
+ )
454
+ )
455
+ # Load empty list of instances so that downstream code
456
+ # knows this fleet has no instances eligible for offers.
457
+ .options(noload(FleetModel.instances))
443
458
  )
444
459
  fleet_models_without_instances = list(res.unique().scalars().all())
445
460
  return fleet_models_with_instances, fleet_models_without_instances
446
461
 
447
462
 
448
- async def _refetch_fleet_models(
463
+ async def _refetch_fleet_models_with_instances(
449
464
  session: AsyncSession,
450
465
  fleets_ids: list[uuid.UUID],
451
466
  instances_ids: list[uuid.UUID],
@@ -460,13 +475,8 @@ async def _refetch_fleet_models(
460
475
  *fleet_filters,
461
476
  )
462
477
  .where(
463
- or_(
464
- InstanceModel.id.is_(None),
465
- and_(
466
- InstanceModel.id.in_(instances_ids),
467
- *instance_filters,
468
- ),
469
- )
478
+ InstanceModel.id.in_(instances_ids),
479
+ *instance_filters,
470
480
  )
471
481
  .options(contains_eager(FleetModel.instances))
472
482
  .execution_options(populate_existing=True)
@@ -533,7 +543,7 @@ def _find_optimal_fleet_with_offers(
533
543
  fleet_priority,
534
544
  )
535
545
  )
536
- if run_spec.configuration.fleets is None and all(
546
+ if run_spec.merged_profile.fleets is None and all(
537
547
  t[2] == 0 for t in candidate_fleets_with_offers
538
548
  ):
539
549
  # If fleets are not specified and no fleets have available offers, create a new fleet.
@@ -646,6 +656,8 @@ async def _run_job_on_new_instance(
646
656
  ) -> Optional[Tuple[JobProvisioningData, InstanceOfferWithAvailability]]:
647
657
  if volumes is None:
648
658
  volumes = []
659
+ profile = run.run_spec.merged_profile
660
+ requirements = job.job_spec.requirements
649
661
  fleet = None
650
662
  if fleet_model is not None:
651
663
  fleet = fleet_model_to_fleet(fleet_model)
@@ -654,13 +666,26 @@ async def _run_job_on_new_instance(
654
666
  "%s: cannot fit new instance into fleet %s", fmt(job_model), fleet_model.name
655
667
  )
656
668
  return None
669
+ profile = combine_fleet_and_run_profiles(fleet.spec.merged_profile, profile)
670
+ if profile is None:
671
+ logger.debug("%s: cannot combine fleet %s profile", fmt(job_model), fleet_model.name)
672
+ return None
673
+ fleet_requirements = get_fleet_requirements(fleet.spec)
674
+ requirements = combine_fleet_and_run_requirements(fleet_requirements, requirements)
675
+ if requirements is None:
676
+ logger.debug(
677
+ "%s: cannot combine fleet %s requirements", fmt(job_model), fleet_model.name
678
+ )
679
+ return None
680
+ # TODO: Respect fleet provisioning properties such as tags
681
+
657
682
  multinode = job.job_spec.jobs_per_replica > 1 or (
658
683
  fleet is not None and fleet.spec.configuration.placement == InstanceGroupPlacement.CLUSTER
659
684
  )
660
685
  offers = await get_offers_by_requirements(
661
686
  project=project,
662
- profile=run.run_spec.merged_profile,
663
- requirements=job.job_spec.requirements,
687
+ profile=profile,
688
+ requirements=requirements,
664
689
  exclude_not_available=True,
665
690
  multinode=multinode,
666
691
  master_job_provisioning_data=master_job_provisioning_data,
@@ -752,25 +777,11 @@ def _create_fleet_model_for_job(
752
777
 
753
778
 
754
779
  async def _get_next_instance_num(session: AsyncSession, fleet_model: FleetModel) -> int:
755
- if len(fleet_model.instances) == 0:
756
- # No instances means the fleet is not in the db yet, so don't lock.
757
- return 0
758
- async with get_locker(get_db().dialect_name).lock_ctx(
759
- FleetModel.__tablename__, [fleet_model.id]
760
- ):
761
- fleet_model = (
762
- (
763
- await session.execute(
764
- select(FleetModel)
765
- .where(FleetModel.id == fleet_model.id)
766
- .options(joinedload(FleetModel.instances))
767
- .execution_options(populate_existing=True)
768
- )
769
- )
770
- .unique()
771
- .scalar_one()
772
- )
773
- return len(fleet_model.instances)
780
+ res = await session.execute(
781
+ select(func.count(InstanceModel.id)).where(InstanceModel.fleet_id == fleet_model.id)
782
+ )
783
+ instance_count = res.scalar_one()
784
+ return instance_count
774
785
 
775
786
 
776
787
  def _create_instance_model_for_job(
@@ -75,9 +75,9 @@ async def _process_next_terminating_job():
75
75
  return
76
76
  instance_lockset.add(instance_model.id)
77
77
  job_lockset.add(job_model.id)
78
+ job_model_id = job_model.id
79
+ instance_model_id = job_model.used_instance_id
78
80
  try:
79
- job_model_id = job_model.id
80
- instance_model_id = job_model.used_instance_id
81
81
  await _process_job(
82
82
  session=session,
83
83
  job_model=job_model,
@@ -42,8 +42,8 @@ async def process_submitted_volumes():
42
42
  if volume_model is None:
43
43
  return
44
44
  lockset.add(volume_model.id)
45
+ volume_model_id = volume_model.id
45
46
  try:
46
- volume_model_id = volume_model.id
47
47
  await _process_submitted_volume(session=session, volume_model=volume_model)
48
48
  finally:
49
49
  lockset.difference_update([volume_model_id])
@@ -4,8 +4,12 @@ from typing import Optional
4
4
  from alembic import command, config
5
5
  from sqlalchemy import AsyncAdaptedQueuePool, event
6
6
  from sqlalchemy.engine.interfaces import DBAPIConnection
7
- from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, create_async_engine
8
- from sqlalchemy.orm import sessionmaker
7
+ from sqlalchemy.ext.asyncio import (
8
+ AsyncEngine,
9
+ AsyncSession,
10
+ async_sessionmaker,
11
+ create_async_engine,
12
+ )
9
13
  from sqlalchemy.pool import ConnectionPoolEntry
10
14
 
11
15
  from dstack._internal.server import settings
@@ -26,8 +30,8 @@ class Database:
26
30
  pool_size=settings.DB_POOL_SIZE,
27
31
  max_overflow=settings.DB_MAX_OVERFLOW,
28
32
  )
29
- self.session_maker = sessionmaker(
30
- bind=self.engine,
33
+ self.session_maker = async_sessionmaker(
34
+ bind=self.engine, # type: ignore[assignment]
31
35
  expire_on_commit=False,
32
36
  class_=AsyncSession,
33
37
  )
@@ -622,6 +622,7 @@ class InstanceModel(BaseModel):
622
622
  backend: Mapped[Optional[BackendType]] = mapped_column(EnumAsString(BackendType, 100))
623
623
  backend_data: Mapped[Optional[str]] = mapped_column(Text)
624
624
 
625
+ # Not set for cloud fleets that haven't been provisioning
625
626
  offer: Mapped[Optional[str]] = mapped_column(Text)
626
627
  region: Mapped[Optional[str]] = mapped_column(String(2000))
627
628
  price: Mapped[Optional[float]] = mapped_column(Float)
@@ -1,9 +1,7 @@
1
1
  from typing import Tuple
2
2
 
3
3
  from fastapi import APIRouter, Depends
4
- from sqlalchemy.ext.asyncio import AsyncSession
5
4
 
6
- from dstack._internal.server.db import get_session
7
5
  from dstack._internal.server.models import ProjectModel, UserModel
8
6
  from dstack._internal.server.schemas.gpus import ListGpusRequest, ListGpusResponse
9
7
  from dstack._internal.server.security.permissions import ProjectMember
@@ -20,10 +18,7 @@ project_router = APIRouter(
20
18
  @project_router.post("/list", response_model=ListGpusResponse, response_model_exclude_none=True)
21
19
  async def list_gpus(
22
20
  body: ListGpusRequest,
23
- session: AsyncSession = Depends(get_session),
24
21
  user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectMember()),
25
22
  ) -> ListGpusResponse:
26
23
  _, project = user_project
27
- return await list_gpus_grouped(
28
- session=session, project=project, run_spec=body.run_spec, group_by=body.group_by
29
- )
24
+ return await list_gpus_grouped(project=project, run_spec=body.run_spec, group_by=body.group_by)
@@ -159,6 +159,16 @@ class GPUDevice(CoreModel):
159
159
  path_in_container: str
160
160
 
161
161
 
162
+ class TaskListItem(CoreModel):
163
+ id: str
164
+ status: TaskStatus
165
+
166
+
167
+ class TaskListResponse(CoreModel):
168
+ ids: Optional[list[str]] = None # returned by pre-0.19.26 shim
169
+ tasks: Optional[list[TaskListItem]] = None # returned by 0.19.26+ shim
170
+
171
+
162
172
  class TaskInfoResponse(CoreModel):
163
173
  id: str
164
174
  status: TaskStatus
@@ -17,8 +17,8 @@ from dstack._internal.core.backends.configurators import (
17
17
  )
18
18
  from dstack._internal.core.backends.local.backend import LocalBackend
19
19
  from dstack._internal.core.backends.models import (
20
- AnyBackendConfig,
21
20
  AnyBackendConfigWithCreds,
21
+ AnyBackendConfigWithoutCreds,
22
22
  )
23
23
  from dstack._internal.core.errors import (
24
24
  BackendError,
@@ -126,19 +126,25 @@ async def get_backend_config(
126
126
  )
127
127
  continue
128
128
  if backend_model.type == backend_type:
129
- return get_backend_config_from_backend_model(
130
- configurator, backend_model, include_creds=True
131
- )
129
+ return get_backend_config_with_creds_from_backend_model(configurator, backend_model)
132
130
  return None
133
131
 
134
132
 
135
- def get_backend_config_from_backend_model(
133
+ def get_backend_config_with_creds_from_backend_model(
134
+ configurator: Configurator,
135
+ backend_model: BackendModel,
136
+ ) -> AnyBackendConfigWithCreds:
137
+ backend_record = get_stored_backend_record(backend_model)
138
+ backend_config = configurator.get_backend_config_with_creds(backend_record)
139
+ return backend_config
140
+
141
+
142
+ def get_backend_config_without_creds_from_backend_model(
136
143
  configurator: Configurator,
137
144
  backend_model: BackendModel,
138
- include_creds: bool,
139
- ) -> AnyBackendConfig:
145
+ ) -> AnyBackendConfigWithoutCreds:
140
146
  backend_record = get_stored_backend_record(backend_model)
141
- backend_config = configurator.get_backend_config(backend_record, include_creds=include_creds)
147
+ backend_config = configurator.get_backend_config_without_creds(backend_record)
142
148
  return backend_config
143
149
 
144
150
 
@@ -55,7 +55,11 @@ async def _check_active_instances(
55
55
  )
56
56
  for fleet_model in fleet_models:
57
57
  for instance in fleet_model.instances:
58
- if instance.status.is_active() and instance.backend in backends_types:
58
+ if (
59
+ instance.status.is_active()
60
+ and instance.backend is not None
61
+ and instance.backend in backends_types
62
+ ):
59
63
  if error:
60
64
  msg = (
61
65
  f"Backend {instance.backend.value} has active instances."
@@ -83,6 +87,7 @@ async def _check_active_volumes(
83
87
  if (
84
88
  volume_model.status.is_active()
85
89
  and volume_model.provisioning_data is not None
90
+ and volume_model.provisioning_data.backend is not None
86
91
  and volume_model.provisioning_data.backend in backends_types
87
92
  ):
88
93
  if error:
@@ -32,15 +32,15 @@ class DXFAuthAdapter:
32
32
 
33
33
 
34
34
  class DockerImage(CoreModel):
35
- class Config(CoreModel.Config):
36
- frozen = True
37
-
38
35
  image: str
39
36
  registry: Optional[str]
40
37
  repo: str
41
38
  tag: str
42
39
  digest: Optional[str]
43
40
 
41
+ class Config(CoreModel.Config):
42
+ frozen = True
43
+
44
44
 
45
45
  class ImageConfig(CoreModel):
46
46
  user: Annotated[Optional[str], Field(alias="User")] = None
@@ -77,7 +77,7 @@ def get_image_config(image_name: str, registry_auth: Optional[RegistryAuth]) ->
77
77
  registry_client = PatchedDXF(
78
78
  host=image.registry or DEFAULT_REGISTRY,
79
79
  repo=image.repo,
80
- auth=DXFAuthAdapter(registry_auth),
80
+ auth=DXFAuthAdapter(registry_auth), # type: ignore[assignment]
81
81
  timeout=REGISTRY_REQUEST_TIMEOUT,
82
82
  )
83
83
 
@@ -88,7 +88,7 @@ def get_image_config(image_name: str, registry_auth: Optional[RegistryAuth]) ->
88
88
  )
89
89
  manifest = ImageManifest.__response__.parse_raw(manifest_resp)
90
90
  config_stream = registry_client.pull_blob(manifest.config.digest)
91
- config_resp = join_byte_stream_checked(config_stream, MAX_CONFIG_OBJECT_SIZE)
91
+ config_resp = join_byte_stream_checked(config_stream, MAX_CONFIG_OBJECT_SIZE) # type: ignore[arg-type]
92
92
  if config_resp is None:
93
93
  raise DockerRegistryError(
94
94
  f"Image config object exceeds the size limit of {MAX_CONFIG_OBJECT_SIZE} bytes"
@@ -279,7 +279,7 @@ async def get_plan(
279
279
  offers_with_backends = await get_create_instance_offers(
280
280
  project=project,
281
281
  profile=effective_spec.merged_profile,
282
- requirements=_get_fleet_requirements(effective_spec),
282
+ requirements=get_fleet_requirements(effective_spec),
283
283
  fleet_spec=effective_spec,
284
284
  blocks=effective_spec.configuration.blocks,
285
285
  )
@@ -458,7 +458,7 @@ async def create_fleet_instance_model(
458
458
  instance_num: int,
459
459
  ) -> InstanceModel:
460
460
  profile = spec.merged_profile
461
- requirements = _get_fleet_requirements(spec)
461
+ requirements = get_fleet_requirements(spec)
462
462
  instance_model = await instances_services.create_instance_model(
463
463
  session=session,
464
464
  project=project,
@@ -504,6 +504,7 @@ async def create_fleet_ssh_instance_model(
504
504
  raise ServerClientError("ssh key or user not specified")
505
505
 
506
506
  if proxy_jump is not None:
507
+ assert proxy_jump.ssh_key is not None
507
508
  ssh_proxy = SSHConnectionParams(
508
509
  hostname=proxy_jump.hostname,
509
510
  port=proxy_jump.port or 22,
@@ -643,6 +644,17 @@ def is_fleet_empty(fleet_model: FleetModel) -> bool:
643
644
  return len(active_instances) == 0
644
645
 
645
646
 
647
+ def get_fleet_requirements(fleet_spec: FleetSpec) -> Requirements:
648
+ profile = fleet_spec.merged_profile
649
+ requirements = Requirements(
650
+ resources=fleet_spec.configuration.resources or ResourcesSpec(),
651
+ max_price=profile.max_price,
652
+ spot=get_policy_map(profile.spot_policy, default=SpotPolicy.ONDEMAND),
653
+ reservation=fleet_spec.configuration.reservation,
654
+ )
655
+ return requirements
656
+
657
+
646
658
  async def _create_fleet(
647
659
  session: AsyncSession,
648
660
  project: ProjectModel,
@@ -1003,17 +1015,6 @@ def _terminate_fleet_instances(fleet_model: FleetModel, instance_nums: Optional[
1003
1015
  instance.status = InstanceStatus.TERMINATING
1004
1016
 
1005
1017
 
1006
- def _get_fleet_requirements(fleet_spec: FleetSpec) -> Requirements:
1007
- profile = fleet_spec.merged_profile
1008
- requirements = Requirements(
1009
- resources=fleet_spec.configuration.resources or ResourcesSpec(),
1010
- max_price=profile.max_price,
1011
- spot=get_policy_map(profile.spot_policy, default=SpotPolicy.ONDEMAND),
1012
- reservation=fleet_spec.configuration.reservation,
1013
- )
1014
- return requirements
1015
-
1016
-
1017
1018
  def _get_next_instance_num(instance_nums: set[int]) -> int:
1018
1019
  if not instance_nums:
1019
1020
  return 0
@@ -93,6 +93,8 @@ async def create_gateway_compute(
93
93
  backend_id: Optional[uuid.UUID] = None,
94
94
  ) -> GatewayComputeModel:
95
95
  assert isinstance(backend_compute, ComputeWithGatewaySupport)
96
+ assert configuration.name is not None
97
+
96
98
  private_bytes, public_bytes = generate_rsa_key_pair_bytes()
97
99
  gateway_ssh_private_key = private_bytes.decode()
98
100
  gateway_ssh_public_key = public_bytes.decode()
@@ -7,7 +7,7 @@ from pydantic import parse_obj_as
7
7
 
8
8
  from dstack._internal.core.consts import DSTACK_RUNNER_SSH_PORT
9
9
  from dstack._internal.core.errors import GatewayError
10
- from dstack._internal.core.models.configurations import RateLimit, ServiceConfiguration
10
+ from dstack._internal.core.models.configurations import RateLimit
11
11
  from dstack._internal.core.models.instances import SSHConnectionParams
12
12
  from dstack._internal.core.models.runs import JobSpec, JobSubmission, Run, get_service_port
13
13
  from dstack._internal.proxy.gateway.schemas.stats import ServiceStats
@@ -85,7 +85,7 @@ class GatewayClient:
85
85
  ssh_head_proxy: Optional[SSHConnectionParams],
86
86
  ssh_head_proxy_private_key: Optional[str],
87
87
  ):
88
- assert isinstance(run.run_spec.configuration, ServiceConfiguration)
88
+ assert run.run_spec.configuration.type == "service"
89
89
  payload = {
90
90
  "job_id": job_submission.id.hex,
91
91
  "app_port": get_service_port(job_spec, run.run_spec.configuration),
@@ -93,6 +93,9 @@ class GatewayClient:
93
93
  "ssh_head_proxy_private_key": ssh_head_proxy_private_key,
94
94
  }
95
95
  jpd = job_submission.job_provisioning_data
96
+ assert jpd is not None
97
+ assert jpd.hostname is not None
98
+ assert jpd.ssh_port is not None
96
99
  if not jpd.dockerized:
97
100
  payload.update(
98
101
  {
@@ -67,7 +67,7 @@ class GatewayConnection:
67
67
  # reverse_forwarded_sockets are added later in .open()
68
68
  )
69
69
  self.tunnel_id = uuid.uuid4()
70
- self._client = GatewayClient(uds=self.gateway_socket_path)
70
+ self._client = GatewayClient(uds=str(self.gateway_socket_path))
71
71
 
72
72
  @staticmethod
73
73
  def _init_symlink_dir(connection_dir: Path) -> Tuple[TemporaryDirectory, Path]: