dstack 0.18.40rc1__py3-none-any.whl → 0.18.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. dstack/_internal/cli/commands/apply.py +8 -5
  2. dstack/_internal/cli/services/configurators/base.py +4 -2
  3. dstack/_internal/cli/services/configurators/fleet.py +21 -9
  4. dstack/_internal/cli/services/configurators/gateway.py +15 -0
  5. dstack/_internal/cli/services/configurators/run.py +6 -5
  6. dstack/_internal/cli/services/configurators/volume.py +15 -0
  7. dstack/_internal/cli/services/repos.py +3 -3
  8. dstack/_internal/cli/utils/fleet.py +44 -33
  9. dstack/_internal/cli/utils/run.py +27 -7
  10. dstack/_internal/cli/utils/volume.py +30 -9
  11. dstack/_internal/core/backends/aws/compute.py +94 -53
  12. dstack/_internal/core/backends/aws/resources.py +22 -12
  13. dstack/_internal/core/backends/azure/compute.py +2 -0
  14. dstack/_internal/core/backends/base/compute.py +20 -2
  15. dstack/_internal/core/backends/gcp/compute.py +32 -24
  16. dstack/_internal/core/backends/gcp/resources.py +0 -15
  17. dstack/_internal/core/backends/oci/compute.py +10 -5
  18. dstack/_internal/core/backends/oci/resources.py +23 -26
  19. dstack/_internal/core/backends/remote/provisioning.py +65 -27
  20. dstack/_internal/core/backends/runpod/compute.py +1 -0
  21. dstack/_internal/core/models/backends/azure.py +3 -1
  22. dstack/_internal/core/models/configurations.py +24 -1
  23. dstack/_internal/core/models/fleets.py +46 -0
  24. dstack/_internal/core/models/instances.py +5 -1
  25. dstack/_internal/core/models/pools.py +4 -1
  26. dstack/_internal/core/models/profiles.py +10 -4
  27. dstack/_internal/core/models/runs.py +23 -3
  28. dstack/_internal/core/models/volumes.py +26 -0
  29. dstack/_internal/core/services/ssh/attach.py +92 -53
  30. dstack/_internal/core/services/ssh/tunnel.py +58 -31
  31. dstack/_internal/proxy/gateway/routers/registry.py +2 -0
  32. dstack/_internal/proxy/gateway/schemas/registry.py +2 -0
  33. dstack/_internal/proxy/gateway/services/registry.py +4 -0
  34. dstack/_internal/proxy/lib/models.py +3 -0
  35. dstack/_internal/proxy/lib/services/service_connection.py +8 -1
  36. dstack/_internal/server/background/tasks/process_instances.py +73 -35
  37. dstack/_internal/server/background/tasks/process_metrics.py +9 -9
  38. dstack/_internal/server/background/tasks/process_running_jobs.py +77 -26
  39. dstack/_internal/server/background/tasks/process_runs.py +2 -12
  40. dstack/_internal/server/background/tasks/process_submitted_jobs.py +121 -49
  41. dstack/_internal/server/background/tasks/process_terminating_jobs.py +14 -3
  42. dstack/_internal/server/background/tasks/process_volumes.py +11 -1
  43. dstack/_internal/server/migrations/versions/1338b788b612_reverse_job_instance_relationship.py +71 -0
  44. dstack/_internal/server/migrations/versions/1e76fb0dde87_add_jobmodel_inactivity_secs.py +32 -0
  45. dstack/_internal/server/migrations/versions/51d45659d574_add_instancemodel_blocks_fields.py +43 -0
  46. dstack/_internal/server/migrations/versions/63c3f19cb184_add_jobterminationreason_inactivity_.py +83 -0
  47. dstack/_internal/server/migrations/versions/a751ef183f27_move_attachment_data_to_volumes_.py +34 -0
  48. dstack/_internal/server/models.py +27 -23
  49. dstack/_internal/server/routers/runs.py +1 -0
  50. dstack/_internal/server/schemas/runner.py +1 -0
  51. dstack/_internal/server/services/backends/configurators/azure.py +34 -8
  52. dstack/_internal/server/services/config.py +9 -0
  53. dstack/_internal/server/services/fleets.py +32 -3
  54. dstack/_internal/server/services/gateways/client.py +9 -1
  55. dstack/_internal/server/services/jobs/__init__.py +217 -45
  56. dstack/_internal/server/services/jobs/configurators/base.py +47 -2
  57. dstack/_internal/server/services/offers.py +96 -10
  58. dstack/_internal/server/services/pools.py +98 -14
  59. dstack/_internal/server/services/proxy/repo.py +17 -3
  60. dstack/_internal/server/services/runner/client.py +9 -6
  61. dstack/_internal/server/services/runner/ssh.py +33 -5
  62. dstack/_internal/server/services/runs.py +48 -179
  63. dstack/_internal/server/services/services/__init__.py +9 -1
  64. dstack/_internal/server/services/volumes.py +68 -9
  65. dstack/_internal/server/statics/index.html +1 -1
  66. dstack/_internal/server/statics/{main-11ec5e4a00ea6ec833e3.js → main-2ac66bfcbd2e39830b88.js} +30 -31
  67. dstack/_internal/server/statics/{main-11ec5e4a00ea6ec833e3.js.map → main-2ac66bfcbd2e39830b88.js.map} +1 -1
  68. dstack/_internal/server/statics/{main-fc56d1f4af8e57522a1c.css → main-ad5150a441de98cd8987.css} +1 -1
  69. dstack/_internal/server/testing/common.py +130 -61
  70. dstack/_internal/utils/common.py +22 -8
  71. dstack/_internal/utils/env.py +14 -0
  72. dstack/_internal/utils/ssh.py +1 -1
  73. dstack/api/server/_fleets.py +25 -1
  74. dstack/api/server/_runs.py +23 -2
  75. dstack/api/server/_volumes.py +12 -1
  76. dstack/version.py +1 -1
  77. {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/METADATA +1 -1
  78. {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/RECORD +104 -93
  79. tests/_internal/cli/services/configurators/test_profile.py +3 -3
  80. tests/_internal/core/services/ssh/test_tunnel.py +56 -4
  81. tests/_internal/proxy/gateway/routers/test_registry.py +30 -7
  82. tests/_internal/server/background/tasks/test_process_instances.py +138 -20
  83. tests/_internal/server/background/tasks/test_process_metrics.py +12 -0
  84. tests/_internal/server/background/tasks/test_process_running_jobs.py +193 -0
  85. tests/_internal/server/background/tasks/test_process_runs.py +27 -3
  86. tests/_internal/server/background/tasks/test_process_submitted_jobs.py +53 -6
  87. tests/_internal/server/background/tasks/test_process_terminating_jobs.py +135 -17
  88. tests/_internal/server/routers/test_fleets.py +15 -2
  89. tests/_internal/server/routers/test_pools.py +6 -0
  90. tests/_internal/server/routers/test_runs.py +27 -0
  91. tests/_internal/server/routers/test_volumes.py +9 -2
  92. tests/_internal/server/services/jobs/__init__.py +0 -0
  93. tests/_internal/server/services/jobs/configurators/__init__.py +0 -0
  94. tests/_internal/server/services/jobs/configurators/test_base.py +72 -0
  95. tests/_internal/server/services/runner/test_client.py +22 -3
  96. tests/_internal/server/services/test_offers.py +167 -0
  97. tests/_internal/server/services/test_pools.py +109 -1
  98. tests/_internal/server/services/test_runs.py +5 -41
  99. tests/_internal/utils/test_common.py +21 -0
  100. tests/_internal/utils/test_env.py +38 -0
  101. {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/LICENSE.md +0 -0
  102. {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/WHEEL +0 -0
  103. {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/entry_points.txt +0 -0
  104. {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/top_level.txt +0 -0
@@ -5,6 +5,7 @@ from datetime import datetime, timezone
5
5
  from typing import Dict, List, Optional, Union
6
6
  from uuid import UUID
7
7
 
8
+ import gpuhunt
8
9
  from sqlalchemy.ext.asyncio import AsyncSession
9
10
 
10
11
  from dstack._internal.core.models.backends.base import BackendType
@@ -13,14 +14,20 @@ from dstack._internal.core.models.configurations import (
13
14
  AnyRunConfiguration,
14
15
  DevEnvironmentConfiguration,
15
16
  )
17
+ from dstack._internal.core.models.envs import Env
16
18
  from dstack._internal.core.models.fleets import FleetConfiguration, FleetSpec, FleetStatus
17
19
  from dstack._internal.core.models.gateways import GatewayStatus
18
20
  from dstack._internal.core.models.instances import (
21
+ Disk,
22
+ Gpu,
23
+ InstanceAvailability,
19
24
  InstanceConfiguration,
25
+ InstanceOfferWithAvailability,
20
26
  InstanceStatus,
21
27
  InstanceType,
22
28
  RemoteConnectionInfo,
23
29
  Resources,
30
+ SSHKey,
24
31
  )
25
32
  from dstack._internal.core.models.placement import (
26
33
  PlacementGroupConfiguration,
@@ -48,7 +55,7 @@ from dstack._internal.core.models.runs import (
48
55
  from dstack._internal.core.models.users import GlobalRole
49
56
  from dstack._internal.core.models.volumes import (
50
57
  Volume,
51
- VolumeAttachmentData,
58
+ VolumeAttachment,
52
59
  VolumeConfiguration,
53
60
  VolumeProvisioningData,
54
61
  VolumeStatus,
@@ -69,6 +76,7 @@ from dstack._internal.server.models import (
69
76
  RepoModel,
70
77
  RunModel,
71
78
  UserModel,
79
+ VolumeAttachmentModel,
72
80
  VolumeModel,
73
81
  )
74
82
  from dstack._internal.server.services.jobs import get_job_specs_from_run_spec
@@ -311,17 +319,30 @@ async def create_job(
311
319
  return job
312
320
 
313
321
 
314
- def get_job_provisioning_data(dockerized: bool = False) -> JobProvisioningData:
322
+ def get_job_provisioning_data(
323
+ dockerized: bool = False,
324
+ backend: BackendType = BackendType.AWS,
325
+ region: str = "us-east-1",
326
+ gpu_count: int = 0,
327
+ cpu_count: int = 1,
328
+ memory_gib: float = 0.5,
329
+ spot: bool = False,
330
+ hostname: str = "127.0.0.4",
331
+ internal_ip: Optional[str] = "127.0.0.4",
332
+ ) -> JobProvisioningData:
333
+ gpus = [Gpu(name="T4", memory_mib=16384, vendor=gpuhunt.AcceleratorVendor.NVIDIA)] * gpu_count
315
334
  return JobProvisioningData(
316
- backend=BackendType.AWS,
335
+ backend=backend,
317
336
  instance_type=InstanceType(
318
337
  name="instance",
319
- resources=Resources(cpus=1, memory_mib=512, spot=False, gpus=[]),
338
+ resources=Resources(
339
+ cpus=cpu_count, memory_mib=int(memory_gib * 1024), spot=spot, gpus=gpus
340
+ ),
320
341
  ),
321
342
  instance_id="instance_id",
322
- hostname="127.0.0.4",
323
- internal_ip="127.0.0.4",
324
- region="us-east-1",
343
+ hostname=hostname,
344
+ internal_ip=internal_ip,
345
+ region=region,
325
346
  price=10.5,
326
347
  username="ubuntu",
327
348
  ssh_port=22,
@@ -337,6 +358,8 @@ def get_job_runtime_data(
337
358
  gpu: Optional[int] = None,
338
359
  memory: Optional[float] = None,
339
360
  ports: Optional[dict[int, int]] = None,
361
+ offer: Optional[InstanceOfferWithAvailability] = None,
362
+ volume_names: Optional[list[str]] = None,
340
363
  ) -> JobRuntimeData:
341
364
  return JobRuntimeData(
342
365
  network_mode=NetworkMode(network_mode),
@@ -344,6 +367,8 @@ def get_job_runtime_data(
344
367
  gpu=gpu,
345
368
  memory=Memory(memory) if memory is not None else None,
346
369
  ports=ports,
370
+ offer=offer,
371
+ volume_names=volume_names,
347
372
  )
348
373
 
349
374
 
@@ -481,56 +506,26 @@ async def create_instance(
481
506
  termination_idle_time: int = DEFAULT_POOL_TERMINATION_IDLE_TIME,
482
507
  region: str = "eu-west",
483
508
  remote_connection_info: Optional[RemoteConnectionInfo] = None,
509
+ offer: Optional[InstanceOfferWithAvailability] = None,
484
510
  job_provisioning_data: Optional[JobProvisioningData] = None,
511
+ total_blocks: Optional[int] = 1,
512
+ busy_blocks: int = 0,
485
513
  name: str = "test_instance",
486
514
  volumes: Optional[List[VolumeModel]] = None,
487
515
  ) -> InstanceModel:
488
516
  if instance_id is None:
489
517
  instance_id = uuid.uuid4()
490
518
  if job_provisioning_data is None:
491
- job_provisioning_data_dict = {
492
- "backend": backend.value,
493
- "instance_type": {
494
- "name": "instance",
495
- "resources": {
496
- "cpus": 1,
497
- "memory_mib": 512,
498
- "gpus": [],
499
- "spot": spot,
500
- "disk": {"size_mib": 102400},
501
- "description": "",
502
- },
503
- },
504
- "instance_id": "running_instance.id",
505
- "ssh_proxy": None,
506
- "hostname": "running_instance.ip",
507
- "region": region,
508
- "price": 0.1,
509
- "username": "root",
510
- "ssh_port": 22,
511
- "dockerized": True,
512
- "backend_data": None,
513
- }
514
- else:
515
- job_provisioning_data_dict = job_provisioning_data.dict()
516
- offer = {
517
- "backend": backend.value,
518
- "instance": {
519
- "name": "instance",
520
- "resources": {
521
- "cpus": 2,
522
- "memory_mib": 12000,
523
- "gpus": [],
524
- "spot": spot,
525
- "disk": {"size_mib": 102400},
526
- "description": "",
527
- },
528
- },
529
- "region": region,
530
- "price": 1,
531
- "availability": "available",
532
- }
533
-
519
+ job_provisioning_data = get_job_provisioning_data(
520
+ dockerized=True,
521
+ backend=backend,
522
+ region=region,
523
+ spot=spot,
524
+ hostname="running_instance.ip",
525
+ internal_ip=None,
526
+ )
527
+ if offer is None:
528
+ offer = get_instance_offer_with_availability(backend=backend, region=region, spot=spot)
534
529
  if profile is None:
535
530
  profile = Profile(name="test_name")
536
531
 
@@ -548,6 +543,9 @@ async def create_instance(
548
543
 
549
544
  if volumes is None:
550
545
  volumes = []
546
+ volume_attachments = []
547
+ for volume in volumes:
548
+ volume_attachments.append(VolumeAttachmentModel(volume=volume))
551
549
 
552
550
  im = InstanceModel(
553
551
  id=instance_id,
@@ -561,8 +559,8 @@ async def create_instance(
561
559
  created_at=created_at,
562
560
  started_at=created_at,
563
561
  finished_at=finished_at,
564
- job_provisioning_data=json.dumps(job_provisioning_data_dict),
565
- offer=json.dumps(offer),
562
+ job_provisioning_data=job_provisioning_data.json(),
563
+ offer=offer.json(),
566
564
  price=1,
567
565
  region=region,
568
566
  backend=backend,
@@ -572,14 +570,87 @@ async def create_instance(
572
570
  requirements=requirements.json(),
573
571
  instance_configuration=instance_configuration.json(),
574
572
  remote_connection_info=remote_connection_info.json() if remote_connection_info else None,
575
- job=job,
576
- volumes=volumes,
573
+ volume_attachments=volume_attachments,
574
+ total_blocks=total_blocks,
575
+ busy_blocks=busy_blocks,
577
576
  )
577
+ if job:
578
+ im.jobs.append(job)
578
579
  session.add(im)
579
580
  await session.commit()
580
581
  return im
581
582
 
582
583
 
584
+ def get_instance_offer_with_availability(
585
+ backend: BackendType = BackendType.AWS,
586
+ region: str = "eu-west",
587
+ gpu_count: int = 0,
588
+ cpu_count: int = 2,
589
+ memory_gib: float = 12,
590
+ disk_gib: float = 100.0,
591
+ spot: bool = False,
592
+ blocks: int = 1,
593
+ total_blocks: int = 1,
594
+ availability_zones: Optional[List[str]] = None,
595
+ ):
596
+ gpus = [Gpu(name="T4", memory_mib=16384, vendor=gpuhunt.AcceleratorVendor.NVIDIA)] * gpu_count
597
+ return InstanceOfferWithAvailability(
598
+ backend=backend,
599
+ instance=InstanceType(
600
+ name="instance",
601
+ resources=Resources(
602
+ cpus=cpu_count,
603
+ memory_mib=int(memory_gib * 1024),
604
+ gpus=gpus,
605
+ spot=spot,
606
+ disk=Disk(size_mib=int(disk_gib * 1024)),
607
+ description="",
608
+ ),
609
+ ),
610
+ region=region,
611
+ price=1,
612
+ availability=InstanceAvailability.AVAILABLE,
613
+ availability_zones=availability_zones,
614
+ blocks=blocks,
615
+ total_blocks=total_blocks,
616
+ )
617
+
618
+
619
+ def get_remote_connection_info(
620
+ host: str = "10.0.0.10",
621
+ port: int = 22,
622
+ ssh_user: str = "ubuntu",
623
+ ssh_keys: Optional[list[SSHKey]] = None,
624
+ env: Optional[Union[Env, dict]] = None,
625
+ ):
626
+ if ssh_keys is None:
627
+ ssh_keys = [
628
+ SSHKey(
629
+ public="ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIO6mJxVbNtm0zXgMLvByrhXJCmJRveSrJxLB5/OzcyCk",
630
+ private="""
631
+ -----BEGIN OPENSSH PRIVATE KEY-----
632
+ b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW
633
+ QyNTUxOQAAACDupicVWzbZtM14DC7wcq4VyQpiUb3kqycSwefzs3MgpAAAAJCiWa5Volmu
634
+ VQAAAAtzc2gtZWQyNTUxOQAAACDupicVWzbZtM14DC7wcq4VyQpiUb3kqycSwefzs3MgpA
635
+ AAAEAncHi4AhS6XdMp5Gzd+IMse/4ekyQ54UngByf0Sp0uH+6mJxVbNtm0zXgMLvByrhXJ
636
+ CmJRveSrJxLB5/OzcyCkAAAACWRlZkBkZWZwYwECAwQ=
637
+ -----END OPENSSH PRIVATE KEY-----
638
+ """,
639
+ )
640
+ ]
641
+ if env is None:
642
+ env = Env()
643
+ elif isinstance(env, dict):
644
+ env = Env.parse_obj(env)
645
+ return RemoteConnectionInfo(
646
+ host=host,
647
+ port=port,
648
+ ssh_user=ssh_user,
649
+ ssh_keys=ssh_keys,
650
+ env=env,
651
+ )
652
+
653
+
583
654
  async def create_volume(
584
655
  session: AsyncSession,
585
656
  project: ProjectModel,
@@ -604,7 +675,7 @@ async def create_volume(
604
675
  volume_provisioning_data=volume_provisioning_data.json()
605
676
  if volume_provisioning_data
606
677
  else None,
607
- instances=[],
678
+ attachments=[],
608
679
  deleted_at=deleted_at,
609
680
  deleted=True if deleted_at else False,
610
681
  )
@@ -626,16 +697,14 @@ def get_volume(
626
697
  deleted: bool = False,
627
698
  volume_id: Optional[str] = None,
628
699
  provisioning_data: Optional[VolumeProvisioningData] = None,
629
- attachment_data: Optional[VolumeAttachmentData] = None,
630
- device_name: Optional[str] = None,
700
+ attachments: Optional[List[VolumeAttachment]] = None,
631
701
  ) -> Volume:
632
702
  if id_ is None:
633
703
  id_ = uuid.uuid4()
634
704
  if configuration is None:
635
705
  configuration = get_volume_configuration()
636
- if device_name is not None:
637
- assert attachment_data is None, "attachment_data and device_name are mutually exclusive"
638
- attachment_data = VolumeAttachmentData(device_name=device_name)
706
+ if attachments is None:
707
+ attachments = []
639
708
  return Volume(
640
709
  id=id_,
641
710
  name=name,
@@ -649,7 +718,7 @@ def get_volume(
649
718
  deleted=deleted,
650
719
  volume_id=volume_id,
651
720
  provisioning_data=provisioning_data,
652
- attachment_data=attachment_data,
721
+ attachments=attachments,
653
722
  )
654
723
 
655
724
 
@@ -157,24 +157,38 @@ def parse_pretty_duration(duration: str) -> int:
157
157
  return amount * multiplier
158
158
 
159
159
 
160
+ DURATION_UNITS_DESC = [
161
+ ("w", 7 * 24 * 3600),
162
+ ("d", 24 * 3600),
163
+ ("h", 3600),
164
+ ("m", 60),
165
+ ("s", 1),
166
+ ]
167
+
168
+
160
169
  def format_pretty_duration(seconds: int) -> str:
161
170
  if seconds == 0:
162
171
  return "0s"
163
172
  if seconds < 0:
164
173
  raise ValueError("Seconds cannot be negative")
165
- units = [
166
- ("w", 7 * 24 * 3600),
167
- ("d", 24 * 3600),
168
- ("h", 3600),
169
- ("m", 60),
170
- ("s", 1),
171
- ]
172
- for unit, multiplier in units:
174
+ for unit, multiplier in DURATION_UNITS_DESC:
173
175
  if seconds % multiplier == 0:
174
176
  return f"{seconds // multiplier}{unit}"
175
177
  return f"{seconds}s" # Fallback to seconds if no larger unit fits perfectly
176
178
 
177
179
 
180
+ def format_duration_multiunit(seconds: int) -> str:
181
+ """90 -> 1m 30s, 4545 -> 1h 15m 45s, etc"""
182
+ if seconds < 0:
183
+ raise ValueError("Seconds cannot be negative")
184
+ result = ""
185
+ for unit, multiplier in DURATION_UNITS_DESC:
186
+ if unit_value := seconds // multiplier:
187
+ result += f" {unit_value}{unit}"
188
+ seconds -= unit_value * multiplier
189
+ return result.lstrip() or "0s"
190
+
191
+
178
192
  def sizeof_fmt(num, suffix="B"):
179
193
  for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]:
180
194
  if abs(num) < 1024.0:
@@ -0,0 +1,14 @@
1
+ import os
2
+
3
+
4
+ def get_bool(name: str, default: bool = False) -> bool:
5
+ try:
6
+ value = os.environ[name]
7
+ except KeyError:
8
+ return default
9
+ value = value.lower()
10
+ if value in ["0", "false", "off"]:
11
+ return False
12
+ if value in ["1", "true", "on"]:
13
+ return True
14
+ raise ValueError(f"Invalid bool value: {name}={value}")
@@ -159,7 +159,7 @@ def get_ssh_config(path: PathLike, host: str) -> Optional[Dict[str, str]]:
159
159
  return None
160
160
 
161
161
 
162
- def update_ssh_config(path: PathLike, host: str, options: Dict[str, Union[str, FilePath]]):
162
+ def update_ssh_config(path: PathLike, host: str, options: Dict[str, Union[str, int, FilePath]]):
163
163
  Path(path).parent.mkdir(parents=True, exist_ok=True)
164
164
  with FileLock(str(path) + ".lock"):
165
165
  copy_mode = True
@@ -62,16 +62,29 @@ def _get_fleet_spec_excludes(fleet_spec: FleetSpec) -> Optional[_ExcludeDict]:
62
62
  spec_excludes: _ExcludeDict = {}
63
63
  configuration_excludes: _ExcludeDict = {}
64
64
  profile_excludes: set[str] = set()
65
+ ssh_config_excludes: _ExcludeDict = {}
66
+ ssh_hosts_excludes: set[str] = set()
65
67
 
66
68
  # TODO: Can be removed in 0.19
67
69
  if fleet_spec.configuration_path is None:
68
70
  spec_excludes["configuration_path"] = True
69
71
  if fleet_spec.configuration.ssh_config is not None:
72
+ if fleet_spec.configuration.ssh_config.proxy_jump is None:
73
+ ssh_config_excludes["proxy_jump"] = True
74
+ if all(
75
+ isinstance(h, str) or h.proxy_jump is None
76
+ for h in fleet_spec.configuration.ssh_config.hosts
77
+ ):
78
+ ssh_hosts_excludes.add("proxy_jump")
70
79
  if all(
71
80
  isinstance(h, str) or h.internal_ip is None
72
81
  for h in fleet_spec.configuration.ssh_config.hosts
73
82
  ):
74
- configuration_excludes["ssh_config"] = {"hosts": {"__all__": {"internal_ip"}}}
83
+ ssh_hosts_excludes.add("internal_ip")
84
+ if all(
85
+ isinstance(h, str) or h.blocks == 1 for h in fleet_spec.configuration.ssh_config.hosts
86
+ ):
87
+ ssh_hosts_excludes.add("blocks")
75
88
  # client >= 0.18.30 / server <= 0.18.29 compatibility tweak
76
89
  if fleet_spec.configuration.reservation is None:
77
90
  configuration_excludes["reservation"] = True
@@ -84,7 +97,18 @@ def _get_fleet_spec_excludes(fleet_spec: FleetSpec) -> Optional[_ExcludeDict]:
84
97
  # client >= 0.18.38 / server <= 0.18.37 compatibility tweak
85
98
  if fleet_spec.profile is not None and fleet_spec.profile.stop_duration is None:
86
99
  profile_excludes.add("stop_duration")
100
+ # client >= 0.18.41 / server <= 0.18.40 compatibility tweak
101
+ if fleet_spec.configuration.availability_zones is None:
102
+ configuration_excludes["availability_zones"] = True
103
+ if fleet_spec.profile is not None and fleet_spec.profile.availability_zones is None:
104
+ profile_excludes.add("availability_zones")
105
+ if fleet_spec.configuration.blocks == 1:
106
+ configuration_excludes["blocks"] = True
87
107
 
108
+ if ssh_hosts_excludes:
109
+ ssh_config_excludes["hosts"] = {"__all__": ssh_hosts_excludes}
110
+ if ssh_config_excludes:
111
+ configuration_excludes["ssh_config"] = ssh_config_excludes
88
112
  if configuration_excludes:
89
113
  spec_excludes["configuration"] = configuration_excludes
90
114
  if profile_excludes:
@@ -7,6 +7,7 @@ from pydantic import parse_obj_as
7
7
  from dstack._internal.core.models.common import is_core_model_instance
8
8
  from dstack._internal.core.models.configurations import (
9
9
  STRIP_PREFIX_DEFAULT,
10
+ DevEnvironmentConfiguration,
10
11
  ServiceConfiguration,
11
12
  )
12
13
  from dstack._internal.core.models.pools import Instance
@@ -82,7 +83,10 @@ class RunsAPIClient(APIClientGroup):
82
83
  ) -> Run:
83
84
  plan_input: ApplyRunPlanInput = ApplyRunPlanInput.__response__.parse_obj(plan)
84
85
  body = ApplyRunPlanRequest(plan=plan_input, force=force)
85
- resp = self._request(f"/api/project/{project_name}/runs/apply", body=body.json())
86
+ resp = self._request(
87
+ f"/api/project/{project_name}/runs/apply",
88
+ body=body.json(exclude=_get_apply_plan_excludes(plan_input)),
89
+ )
86
90
  return parse_obj_as(Run.__response__, resp.json())
87
91
 
88
92
  def submit(self, project_name: str, run_spec: RunSpec) -> Run:
@@ -121,8 +125,15 @@ class RunsAPIClient(APIClientGroup):
121
125
  return parse_obj_as(Instance.__response__, resp.json())
122
126
 
123
127
 
128
+ def _get_apply_plan_excludes(plan: ApplyRunPlanInput) -> Optional[dict]:
129
+ run_spec_excludes = _get_run_spec_excludes(plan.run_spec)
130
+ if run_spec_excludes is not None:
131
+ return {"plan": run_spec_excludes}
132
+ return None
133
+
134
+
124
135
  def _get_run_spec_excludes(run_spec: RunSpec) -> Optional[dict]:
125
- spec_excludes: dict[str, set[str]] = {}
136
+ spec_excludes: dict[str, Any] = {}
126
137
  configuration_excludes: dict[str, Any] = {}
127
138
  profile_excludes: set[str] = set()
128
139
  configuration = run_spec.configuration
@@ -164,6 +175,16 @@ def _get_run_spec_excludes(run_spec: RunSpec) -> Optional[dict]:
164
175
  for v in configuration.volumes
165
176
  ):
166
177
  configuration_excludes["volumes"] = {"__all__": {"optional"}}
178
+ # client >= 0.18.41 / server <= 0.18.40 compatibility tweak
179
+ if configuration.availability_zones is None:
180
+ configuration_excludes["availability_zones"] = True
181
+ if profile is not None and profile.availability_zones is None:
182
+ profile_excludes.add("availability_zones")
183
+ if (
184
+ is_core_model_instance(configuration, DevEnvironmentConfiguration)
185
+ and configuration.inactivity_duration is None
186
+ ):
187
+ configuration_excludes["inactivity_duration"] = True
167
188
 
168
189
  if configuration_excludes:
169
190
  spec_excludes["configuration"] = configuration_excludes
@@ -27,9 +27,20 @@ class VolumesAPIClient(APIClientGroup):
27
27
  configuration: VolumeConfiguration,
28
28
  ) -> Volume:
29
29
  body = CreateVolumeRequest(configuration=configuration)
30
- resp = self._request(f"/api/project/{project_name}/volumes/create", body=body.json())
30
+ resp = self._request(
31
+ f"/api/project/{project_name}/volumes/create",
32
+ body=body.json(exclude=_get_volume_configuration_excludes(configuration)),
33
+ )
31
34
  return parse_obj_as(Volume.__response__, resp.json())
32
35
 
33
36
  def delete(self, project_name: str, names: List[str]) -> None:
34
37
  body = DeleteVolumesRequest(names=names)
35
38
  self._request(f"/api/project/{project_name}/volumes/delete", body=body.json())
39
+
40
+
41
+ def _get_volume_configuration_excludes(configuration: VolumeConfiguration) -> dict:
42
+ configuration_excludes = {}
43
+ # client >= 0.18.41 / server <= 0.18.40 compatibility tweak
44
+ if configuration.availability_zone is None:
45
+ configuration_excludes["availability_zone"] = True
46
+ return {"configuration": configuration_excludes}
dstack/version.py CHANGED
@@ -1,3 +1,3 @@
1
- __version__ = "0.18.40rc1"
1
+ __version__ = "0.18.42"
2
2
  __is_release__ = True
3
3
  base_image = "0.6"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dstack
3
- Version: 0.18.40rc1
3
+ Version: 0.18.42
4
4
  Summary: dstack is an open-source orchestration engine for running AI workloads on any cloud or on-premises.
5
5
  Home-page: https://dstack.ai
6
6
  Author: Andrey Cheptsov