dstack 0.19.12rc1__py3-none-any.whl → 0.19.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (44) hide show
  1. dstack/_internal/cli/services/configurators/run.py +43 -47
  2. dstack/_internal/cli/utils/run.py +15 -27
  3. dstack/_internal/core/backends/aws/compute.py +22 -9
  4. dstack/_internal/core/backends/aws/resources.py +26 -0
  5. dstack/_internal/core/backends/base/offers.py +0 -1
  6. dstack/_internal/core/backends/template/configurator.py.jinja +1 -6
  7. dstack/_internal/core/backends/template/models.py.jinja +4 -0
  8. dstack/_internal/core/compatibility/__init__.py +0 -0
  9. dstack/_internal/core/compatibility/fleets.py +72 -0
  10. dstack/_internal/core/compatibility/gateways.py +34 -0
  11. dstack/_internal/core/compatibility/runs.py +125 -0
  12. dstack/_internal/core/compatibility/volumes.py +32 -0
  13. dstack/_internal/core/models/configurations.py +1 -1
  14. dstack/_internal/core/models/fleets.py +6 -1
  15. dstack/_internal/core/models/instances.py +51 -12
  16. dstack/_internal/core/models/profiles.py +43 -3
  17. dstack/_internal/core/models/repos/local.py +3 -3
  18. dstack/_internal/core/models/runs.py +118 -44
  19. dstack/_internal/server/app.py +1 -1
  20. dstack/_internal/server/background/tasks/process_running_jobs.py +47 -12
  21. dstack/_internal/server/background/tasks/process_runs.py +14 -1
  22. dstack/_internal/server/services/runner/client.py +4 -1
  23. dstack/_internal/server/services/storage/__init__.py +38 -0
  24. dstack/_internal/server/services/storage/base.py +27 -0
  25. dstack/_internal/server/services/storage/gcs.py +44 -0
  26. dstack/_internal/server/services/{storage.py → storage/s3.py} +4 -27
  27. dstack/_internal/server/settings.py +7 -3
  28. dstack/_internal/server/statics/index.html +1 -1
  29. dstack/_internal/server/statics/{main-5b9786c955b42bf93581.js → main-2066f1f22ddb4557bcde.js} +1677 -46
  30. dstack/_internal/server/statics/{main-5b9786c955b42bf93581.js.map → main-2066f1f22ddb4557bcde.js.map} +1 -1
  31. dstack/_internal/server/statics/{main-8f9c66f404e9c7e7e020.css → main-f39c418b05fe14772dd8.css} +1 -1
  32. dstack/_internal/server/testing/common.py +2 -1
  33. dstack/_internal/utils/common.py +4 -0
  34. dstack/api/server/_fleets.py +9 -69
  35. dstack/api/server/_gateways.py +3 -14
  36. dstack/api/server/_runs.py +4 -116
  37. dstack/api/server/_volumes.py +3 -14
  38. dstack/plugins/builtin/rest_plugin/_plugin.py +24 -5
  39. dstack/version.py +2 -2
  40. {dstack-0.19.12rc1.dist-info → dstack-0.19.13.dist-info}/METADATA +1 -1
  41. {dstack-0.19.12rc1.dist-info → dstack-0.19.13.dist-info}/RECORD +44 -36
  42. {dstack-0.19.12rc1.dist-info → dstack-0.19.13.dist-info}/WHEEL +0 -0
  43. {dstack-0.19.12rc1.dist-info → dstack-0.19.13.dist-info}/entry_points.txt +0 -0
  44. {dstack-0.19.12rc1.dist-info → dstack-0.19.13.dist-info}/licenses/LICENSE.md +0 -0
@@ -3,7 +3,7 @@ import subprocess
3
3
  import sys
4
4
  import time
5
5
  from pathlib import Path
6
- from typing import Dict, List, Optional, Set, Tuple
6
+ from typing import Dict, List, Optional, Set
7
7
 
8
8
  import gpuhunt
9
9
  from pydantic import parse_obj_as
@@ -41,7 +41,7 @@ from dstack._internal.core.models.configurations import (
41
41
  )
42
42
  from dstack._internal.core.models.repos.base import Repo
43
43
  from dstack._internal.core.models.resources import CPUSpec
44
- from dstack._internal.core.models.runs import JobSubmission, JobTerminationReason, RunStatus
44
+ from dstack._internal.core.models.runs import JobStatus, JobSubmission, RunStatus
45
45
  from dstack._internal.core.services.configs import ConfigManager
46
46
  from dstack._internal.core.services.diff import diff_models
47
47
  from dstack._internal.utils.common import local_time
@@ -166,12 +166,7 @@ class BaseRunConfigurator(ApplyEnvVarsConfiguratorMixin, BaseApplyConfigurator):
166
166
  # We can attach to run multiple times if it goes from running to pending (retried).
167
167
  while True:
168
168
  with MultiItemStatus(f"Launching [code]{run.name}[/]...", console=console) as live:
169
- while run.status in (
170
- RunStatus.SUBMITTED,
171
- RunStatus.PENDING,
172
- RunStatus.PROVISIONING,
173
- RunStatus.TERMINATING,
174
- ):
169
+ while not _is_ready_to_attach(run):
175
170
  table = get_runs_table([run])
176
171
  live.update(table)
177
172
  time.sleep(5)
@@ -553,35 +548,38 @@ def _print_service_urls(run: Run) -> None:
553
548
 
554
549
 
555
550
  def print_finished_message(run: Run):
551
+ status_message = (
552
+ run._run.latest_job_submission.status_message
553
+ if run._run.latest_job_submission
554
+ else run._run.status_message
555
+ )
556
+ error = (
557
+ run._run.latest_job_submission.error if run._run.latest_job_submission else run._run.error
558
+ )
559
+ termination_reason = (
560
+ run._run.latest_job_submission.termination_reason
561
+ if run._run.latest_job_submission
562
+ else None
563
+ )
564
+ termination_reason_message = (
565
+ run._run.latest_job_submission.termination_reason_message
566
+ if run._run.latest_job_submission
567
+ else None
568
+ )
556
569
  if run.status == RunStatus.DONE:
557
- console.print("[code]Done[/]")
570
+ console.print(f"[code]{status_message.capitalize()}[/code]")
558
571
  return
572
+ else:
573
+ str = f"[error]{status_message.capitalize()}[/error]"
574
+ if error:
575
+ str += f" ([error]{error.capitalize()}[/error])"
576
+ console.print(str)
559
577
 
560
- termination_reason, termination_reason_message, exit_status = (
561
- _get_run_termination_reason_and_exit_status(run)
562
- )
563
- message = "Run failed due to unknown reason. Check CLI, server, and run logs."
564
- if run.status == RunStatus.TERMINATED:
565
- message = "Run terminated due to unknown reason. Check CLI, server, and run logs."
566
-
567
- if termination_reason == JobTerminationReason.FAILED_TO_START_DUE_TO_NO_CAPACITY:
568
- message = (
569
- "All provisioning attempts failed. "
570
- "This is likely due to cloud providers not having enough capacity. "
571
- "Check CLI and server logs for more details."
572
- )
573
- elif termination_reason is not None:
574
- exit_status_details = f"Exit status: {exit_status}.\n" if exit_status else ""
575
- error_details = (
576
- f"Error: {termination_reason_message}\n" if termination_reason_message else ""
577
- )
578
- message = (
579
- f"Run failed with error code {termination_reason.name}.\n"
580
- f"{exit_status_details}"
581
- f"{error_details}"
582
- f"Check [bold]dstack logs -d {run.name}[/bold] for more details."
583
- )
584
- console.print(f"[error]{message}[/]")
578
+ if termination_reason_message:
579
+ console.print(f"[error]{termination_reason_message}[/error]")
580
+
581
+ if termination_reason:
582
+ console.print(f"Check [code]dstack logs -d {run.name}[/code] for more details.")
585
583
 
586
584
 
587
585
  def get_run_exit_code(run: Run) -> int:
@@ -590,19 +588,17 @@ def get_run_exit_code(run: Run) -> int:
590
588
  return 1
591
589
 
592
590
 
593
- def _get_run_termination_reason_and_exit_status(
594
- run: Run,
595
- ) -> Tuple[Optional[JobTerminationReason], Optional[str], Optional[int]]:
596
- if len(run._run.jobs) == 0:
597
- return None, None, None
598
- job = run._run.jobs[0]
599
- if len(job.job_submissions) == 0:
600
- return None, None, None
601
- job_submission = job.job_submissions[0]
602
- return (
603
- job_submission.termination_reason,
604
- job_submission.termination_reason_message,
605
- job_submission.exit_status,
591
+ def _is_ready_to_attach(run: Run) -> bool:
592
+ return not (
593
+ run.status
594
+ in [
595
+ RunStatus.SUBMITTED,
596
+ RunStatus.PENDING,
597
+ RunStatus.PROVISIONING,
598
+ RunStatus.TERMINATING,
599
+ ]
600
+ or run._run.jobs[0].job_submissions[-1].status
601
+ in [JobStatus.SUBMITTED, JobStatus.PROVISIONING, JobStatus.PULLING]
606
602
  )
607
603
 
608
604
 
@@ -12,7 +12,6 @@ from dstack._internal.core.models.profiles import (
12
12
  TerminationPolicy,
13
13
  )
14
14
  from dstack._internal.core.models.runs import (
15
- Job,
16
15
  RunPlan,
17
16
  )
18
17
  from dstack._internal.core.services.profiles import get_termination
@@ -154,8 +153,7 @@ def get_runs_table(
154
153
  table.add_column("BACKEND", style="grey58", ratio=2)
155
154
  table.add_column("RESOURCES", ratio=3 if not verbose else 2)
156
155
  if verbose:
157
- table.add_column("INSTANCE", no_wrap=True, ratio=1)
158
- table.add_column("RESERVATION", no_wrap=True, ratio=1)
156
+ table.add_column("INSTANCE TYPE", no_wrap=True, ratio=1)
159
157
  table.add_column("PRICE", style="grey58", ratio=1)
160
158
  table.add_column("STATUS", no_wrap=True, ratio=1)
161
159
  table.add_column("SUBMITTED", style="grey58", no_wrap=True, ratio=1)
@@ -163,16 +161,20 @@ def get_runs_table(
163
161
  table.add_column("ERROR", no_wrap=True, ratio=2)
164
162
 
165
163
  for run in runs:
166
- run_error = _get_run_error(run)
167
164
  run = run._run # TODO(egor-s): make public attribute
168
165
 
169
166
  run_row: Dict[Union[str, int], Any] = {
170
167
  "NAME": run.run_spec.run_name,
171
168
  "SUBMITTED": format_date(run.submitted_at),
172
- "ERROR": run_error,
169
+ "STATUS": (
170
+ run.latest_job_submission.status_message
171
+ if run.status.is_finished() and run.latest_job_submission
172
+ else run.status_message
173
+ ),
173
174
  }
175
+ if run.error:
176
+ run_row["ERROR"] = run.error
174
177
  if len(run.jobs) != 1:
175
- run_row["STATUS"] = run.status
176
178
  add_row_from_dict(table, run_row)
177
179
 
178
180
  for job in run.jobs:
@@ -183,25 +185,26 @@ def get_runs_table(
183
185
  status += f" (inactive for {inactive_for})"
184
186
  job_row: Dict[Union[str, int], Any] = {
185
187
  "NAME": f" replica={job.job_spec.replica_num} job={job.job_spec.job_num}",
186
- "STATUS": status,
188
+ "STATUS": latest_job_submission.status_message,
187
189
  "SUBMITTED": format_date(latest_job_submission.submitted_at),
188
- "ERROR": _get_job_error(job),
190
+ "ERROR": latest_job_submission.error,
189
191
  }
190
192
  jpd = latest_job_submission.job_provisioning_data
191
193
  if jpd is not None:
192
194
  resources = jpd.instance_type.resources
193
- instance = jpd.instance_type.name
195
+ instance_type = jpd.instance_type.name
194
196
  jrd = latest_job_submission.job_runtime_data
195
197
  if jrd is not None and jrd.offer is not None:
196
198
  resources = jrd.offer.instance.resources
197
199
  if jrd.offer.total_blocks > 1:
198
- instance += f" ({jrd.offer.blocks}/{jrd.offer.total_blocks})"
200
+ instance_type += f" ({jrd.offer.blocks}/{jrd.offer.total_blocks})"
201
+ if jpd.reservation:
202
+ instance_type += f" ({jpd.reservation})"
199
203
  job_row.update(
200
204
  {
201
205
  "BACKEND": f"{jpd.backend.value.replace('remote', 'ssh')} ({jpd.region})",
202
206
  "RESOURCES": resources.pretty_format(include_spot=True),
203
- "INSTANCE": instance,
204
- "RESERVATION": jpd.reservation,
207
+ "INSTANCE TYPE": instance_type,
205
208
  "PRICE": f"${jpd.price:.4f}".rstrip("0").rstrip("."),
206
209
  }
207
210
  )
@@ -211,18 +214,3 @@ def get_runs_table(
211
214
  add_row_from_dict(table, job_row, style="secondary" if len(run.jobs) != 1 else None)
212
215
 
213
216
  return table
214
-
215
-
216
- def _get_run_error(run: Run) -> str:
217
- return run._run.error or ""
218
-
219
-
220
- def _get_job_error(job: Job) -> str:
221
- job_submission = job.job_submissions[-1]
222
- termination_reason = job_submission.termination_reason
223
- exit_status = job_submission.exit_status
224
- if termination_reason is None:
225
- return ""
226
- if exit_status:
227
- return f"{termination_reason.name} {exit_status}"
228
- return termination_reason.name
@@ -132,7 +132,8 @@ class AWSCompute(
132
132
  availability_offers = []
133
133
  for offer in offers:
134
134
  availability = InstanceAvailability.UNKNOWN
135
- if not _has_quota(regions_to_quotas[offer.region], offer.instance.name):
135
+ quota = _has_quota(regions_to_quotas[offer.region], offer.instance.name)
136
+ if quota is not None and not quota:
136
137
  availability = InstanceAvailability.NO_QUOTA
137
138
  availability_offers.append(
138
139
  InstanceOfferWithAvailability(
@@ -231,6 +232,7 @@ class AWSCompute(
231
232
  image_id, username = aws_resources.get_image_id_and_username(
232
233
  ec2_client=ec2_client,
233
234
  cuda=len(instance_offer.instance.resources.gpus) > 0,
235
+ instance_type=instance_offer.instance.name,
234
236
  image_config=self.config.os_images,
235
237
  )
236
238
  response = ec2_resource.create_instances(
@@ -781,10 +783,18 @@ def _get_regions_to_quotas(
781
783
  ) -> Dict[str, Dict[str, int]]:
782
784
  def get_region_quotas(client: botocore.client.BaseClient) -> Dict[str, int]:
783
785
  region_quotas = {}
784
- for page in client.get_paginator("list_service_quotas").paginate(ServiceCode="ec2"):
785
- for q in page["Quotas"]:
786
- if "On-Demand" in q["QuotaName"]:
787
- region_quotas[q["UsageMetric"]["MetricDimensions"]["Class"]] = q["Value"]
786
+ try:
787
+ for page in client.get_paginator("list_service_quotas").paginate(ServiceCode="ec2"):
788
+ for q in page["Quotas"]:
789
+ if "On-Demand" in q["QuotaName"]:
790
+ region_quotas[q["UsageMetric"]["MetricDimensions"]["Class"]] = q["Value"]
791
+ except botocore.exceptions.ClientError as e:
792
+ if len(e.args) > 0 and "TooManyRequestsException" in e.args[0]:
793
+ logger.warning(
794
+ "Failed to get quotas due to rate limits. Quotas won't be accounted for."
795
+ )
796
+ else:
797
+ logger.exception(e)
788
798
  return region_quotas
789
799
 
790
800
  regions_to_quotas = {}
@@ -800,12 +810,15 @@ def _get_regions_to_quotas(
800
810
  return regions_to_quotas
801
811
 
802
812
 
803
- def _has_quota(quotas: Dict[str, int], instance_name: str) -> bool:
813
+ def _has_quota(quotas: Dict[str, int], instance_name: str) -> Optional[bool]:
814
+ quota = quotas.get("Standard/OnDemand")
804
815
  if instance_name.startswith("p"):
805
- return quotas.get("P/OnDemand", 0) > 0
816
+ quota = quotas.get("P/OnDemand")
806
817
  if instance_name.startswith("g"):
807
- return quotas.get("G/OnDemand", 0) > 0
808
- return quotas.get("Standard/OnDemand", 0) > 0
818
+ quota = quotas.get("G/OnDemand")
819
+ if quota is None:
820
+ return None
821
+ return quota > 0
809
822
 
810
823
 
811
824
  def _get_regions_to_zones(session: boto3.Session, regions: List[str]) -> Dict[str, List[str]]:
@@ -12,11 +12,13 @@ from dstack._internal.utils.logging import get_logger
12
12
  logger = get_logger(__name__)
13
13
 
14
14
  DSTACK_ACCOUNT_ID = "142421590066"
15
+ DLAMI_OWNER_ACCOUNT_ID = "898082745236"
15
16
 
16
17
 
17
18
  def get_image_id_and_username(
18
19
  ec2_client: botocore.client.BaseClient,
19
20
  cuda: bool,
21
+ instance_type: str,
20
22
  image_config: Optional[AWSOSImageConfig] = None,
21
23
  ) -> tuple[str, str]:
22
24
  if image_config is not None:
@@ -27,6 +29,11 @@ def get_image_id_and_username(
27
29
  image_name = image.name
28
30
  image_owner = image.owner
29
31
  username = image.user
32
+ elif _supported_by_dlami(instance_type):
33
+ # TODO: Update DLAMI image version from time to time
34
+ image_name = "Deep Learning Base OSS Nvidia Driver GPU AMI (Ubuntu 22.04) 20250516"
35
+ image_owner = DLAMI_OWNER_ACCOUNT_ID
36
+ username = "ubuntu"
30
37
  else:
31
38
  image_name = (
32
39
  f"dstack-{version.base_image}" if not cuda else f"dstack-cuda-{version.base_image}"
@@ -628,6 +635,25 @@ def _is_private_subnet_with_internet_egress(
628
635
  return False
629
636
 
630
637
 
638
+ def _supported_by_dlami(instance_type: str) -> bool:
639
+ # Currently only p3. instances are not supported by DLAMI among GPU instances.
640
+ return any(
641
+ instance_type.startswith(family)
642
+ for family in [
643
+ "g4dn.",
644
+ "g5.",
645
+ "g6.",
646
+ "gr6.",
647
+ "g6e.",
648
+ "p4d.",
649
+ "p4de.",
650
+ "p5.",
651
+ "p5e.",
652
+ "p6-b200.",
653
+ ]
654
+ )
655
+
656
+
631
657
  def get_reservation(
632
658
  ec2_client: botocore.client.BaseClient,
633
659
  reservation_id: str,
@@ -80,7 +80,6 @@ def catalog_item_to_offer(
80
80
  spot=item.spot,
81
81
  disk=Disk(size_mib=disk_size_mib),
82
82
  )
83
- resources.description = resources.pretty_format()
84
83
  return InstanceOffer(
85
84
  backend=backend,
86
85
  instance=InstanceType(
@@ -19,9 +19,6 @@ from dstack._internal.core.models.backends.base import (
19
19
  BackendType,
20
20
  )
21
21
 
22
- # TODO: Add all supported regions and default regions
23
- REGIONS = []
24
-
25
22
 
26
23
  class {{ backend_name }}Configurator(Configurator):
27
24
  TYPE = BackendType.{{ backend_name|upper }}
@@ -31,13 +28,11 @@ class {{ backend_name }}Configurator(Configurator):
31
28
  self, config: {{ backend_name }}BackendConfigWithCreds, default_creds_enabled: bool
32
29
  ):
33
30
  self._validate_creds(config.creds)
34
- # TODO: Validate additional config parameters if any
31
+ # TODO: If possible, validate config.regions and any other config parameters
35
32
 
36
33
  def create_backend(
37
34
  self, project_name: str, config: {{ backend_name }}BackendConfigWithCreds
38
35
  ) -> BackendRecord:
39
- if config.regions is None:
40
- config.regions = REGIONS
41
36
  return BackendRecord(
42
37
  config={{ backend_name }}StoredConfig(
43
38
  **{{ backend_name }}BackendConfig.__response__.parse_obj(config).dict()
@@ -22,6 +22,7 @@ class {{ backend_name }}BackendConfig(CoreModel):
22
22
  It also serves as a base class for other backend config models.
23
23
  Should not include creds.
24
24
  """
25
+
25
26
  type: Annotated[
26
27
  Literal["{{ backend_name|lower }}"],
27
28
  Field(description="The type of backend"),
@@ -37,6 +38,7 @@ class {{ backend_name }}BackendConfigWithCreds({{ backend_name }}BackendConfig):
37
38
  """
38
39
  Same as `{{ backend_name }}BackendConfig` but also includes creds.
39
40
  """
41
+
40
42
  creds: Annotated[Any{{ backend_name }}Creds, Field(description="The credentials")]
41
43
 
42
44
 
@@ -48,6 +50,7 @@ class {{ backend_name }}StoredConfig({{ backend_name }}BackendConfig):
48
50
  The backend config used for config parameters in the DB.
49
51
  Can extend `{{ backend_name }}BackendConfig` with additional parameters.
50
52
  """
53
+
51
54
  pass
52
55
 
53
56
 
@@ -55,4 +58,5 @@ class {{ backend_name }}Config({{ backend_name }}StoredConfig):
55
58
  """
56
59
  The backend config used by `{{ backend_name }}Backend` and `{{ backend_name }}Compute`.
57
60
  """
61
+
58
62
  creds: Any{{ backend_name }}Creds
File without changes
@@ -0,0 +1,72 @@
1
+ from typing import Any, Dict, Optional
2
+
3
+ from dstack._internal.core.models.fleets import ApplyFleetPlanInput, FleetSpec
4
+ from dstack._internal.core.models.instances import Instance
5
+
6
+
7
+ def get_get_plan_excludes(fleet_spec: FleetSpec) -> Dict:
8
+ get_plan_excludes = {}
9
+ spec_excludes = get_fleet_spec_excludes(fleet_spec)
10
+ if spec_excludes:
11
+ get_plan_excludes["spec"] = spec_excludes
12
+ return get_plan_excludes
13
+
14
+
15
+ def get_apply_plan_excludes(plan_input: ApplyFleetPlanInput) -> Dict:
16
+ apply_plan_excludes = {}
17
+ spec_excludes = get_fleet_spec_excludes(plan_input.spec)
18
+ if spec_excludes:
19
+ apply_plan_excludes["spec"] = spec_excludes
20
+ current_resource = plan_input.current_resource
21
+ if current_resource is not None:
22
+ current_resource_excludes = {}
23
+ apply_plan_excludes["current_resource"] = current_resource_excludes
24
+ if all(map(_should_exclude_instance_cpu_arch, current_resource.instances)):
25
+ current_resource_excludes["instances"] = {
26
+ "__all__": {"instance_type": {"resources": {"cpu_arch"}}}
27
+ }
28
+ return {"plan": apply_plan_excludes}
29
+
30
+
31
+ def get_create_fleet_excludes(fleet_spec: FleetSpec) -> Dict:
32
+ create_fleet_excludes = {}
33
+ spec_excludes = get_fleet_spec_excludes(fleet_spec)
34
+ if spec_excludes:
35
+ create_fleet_excludes["spec"] = spec_excludes
36
+ return create_fleet_excludes
37
+
38
+
39
+ def get_fleet_spec_excludes(fleet_spec: FleetSpec) -> Optional[Dict]:
40
+ """
41
+ Returns `fleet_spec` exclude mapping to exclude certain fields from the request.
42
+ Use this method to exclude new fields when they are not set to keep
43
+ clients backward-compatibility with older servers.
44
+ """
45
+ spec_excludes: Dict[str, Any] = {}
46
+ configuration_excludes: Dict[str, Any] = {}
47
+ profile_excludes: set[str] = set()
48
+ profile = fleet_spec.profile
49
+ if profile.fleets is None:
50
+ profile_excludes.add("fleets")
51
+ if fleet_spec.configuration.tags is None:
52
+ configuration_excludes["tags"] = True
53
+ if profile.tags is None:
54
+ profile_excludes.add("tags")
55
+ if profile.startup_order is None:
56
+ profile_excludes.add("startup_order")
57
+ if profile.stop_criteria is None:
58
+ profile_excludes.add("stop_criteria")
59
+ if configuration_excludes:
60
+ spec_excludes["configuration"] = configuration_excludes
61
+ if profile_excludes:
62
+ spec_excludes["profile"] = profile_excludes
63
+ if spec_excludes:
64
+ return spec_excludes
65
+ return None
66
+
67
+
68
+ def _should_exclude_instance_cpu_arch(instance: Instance) -> bool:
69
+ try:
70
+ return instance.instance_type.resources.cpu_arch is None
71
+ except AttributeError:
72
+ return True
@@ -0,0 +1,34 @@
1
+ from typing import Dict
2
+
3
+ from dstack._internal.core.models.gateways import GatewayConfiguration, GatewaySpec
4
+
5
+
6
+ def get_gateway_spec_excludes(gateway_spec: GatewaySpec) -> Dict:
7
+ """
8
+ Returns `gateway_spec` exclude mapping to exclude certain fields from the request.
9
+ Use this method to exclude new fields when they are not set to keep
10
+ clients backward-compatibility with older servers.
11
+ """
12
+ spec_excludes = {}
13
+ spec_excludes["configuration"] = _get_gateway_configuration_excludes(
14
+ gateway_spec.configuration
15
+ )
16
+ return spec_excludes
17
+
18
+
19
+ def get_create_gateway_excludes(configuration: GatewayConfiguration) -> Dict:
20
+ """
21
+ Returns an exclude mapping to exclude certain fields from the create gateway request.
22
+ Use this method to exclude new fields when they are not set to keep
23
+ clients backward-compatibility with older servers.
24
+ """
25
+ create_gateway_excludes = {}
26
+ create_gateway_excludes["configuration"] = _get_gateway_configuration_excludes(configuration)
27
+ return create_gateway_excludes
28
+
29
+
30
+ def _get_gateway_configuration_excludes(configuration: GatewayConfiguration) -> Dict:
31
+ configuration_excludes = {}
32
+ if configuration.tags is None:
33
+ configuration_excludes["tags"] = True
34
+ return configuration_excludes
@@ -0,0 +1,125 @@
1
+ from typing import Any, Dict, Optional
2
+
3
+ from dstack._internal.core.models.configurations import ServiceConfiguration
4
+ from dstack._internal.core.models.runs import ApplyRunPlanInput, JobSubmission, RunSpec
5
+ from dstack._internal.server.schemas.runs import GetRunPlanRequest
6
+
7
+
8
+ def get_apply_plan_excludes(plan: ApplyRunPlanInput) -> Optional[Dict]:
9
+ """
10
+ Returns `plan` exclude mapping to exclude certain fields from the request.
11
+ Use this method to exclude new fields when they are not set to keep
12
+ clients backward-compatibility with older servers.
13
+ """
14
+ apply_plan_excludes = {}
15
+ run_spec_excludes = get_run_spec_excludes(plan.run_spec)
16
+ if run_spec_excludes is not None:
17
+ apply_plan_excludes["run_spec"] = run_spec_excludes
18
+ current_resource = plan.current_resource
19
+ if current_resource is not None:
20
+ current_resource_excludes = {}
21
+ current_resource_excludes["status_message"] = True
22
+ apply_plan_excludes["current_resource"] = current_resource_excludes
23
+ current_resource_excludes["run_spec"] = get_run_spec_excludes(current_resource.run_spec)
24
+ job_submissions_excludes = {}
25
+ current_resource_excludes["jobs"] = {
26
+ "__all__": {"job_submissions": {"__all__": job_submissions_excludes}}
27
+ }
28
+ job_submissions = [js for j in current_resource.jobs for js in j.job_submissions]
29
+ if all(map(_should_exclude_job_submission_jpd_cpu_arch, job_submissions)):
30
+ job_submissions_excludes["job_provisioning_data"] = {
31
+ "instance_type": {"resources": {"cpu_arch"}}
32
+ }
33
+ if all(map(_should_exclude_job_submission_jrd_cpu_arch, job_submissions)):
34
+ job_submissions_excludes["job_runtime_data"] = {
35
+ "offer": {"instance": {"resources": {"cpu_arch"}}}
36
+ }
37
+ if all(js.exit_status is None for js in job_submissions):
38
+ job_submissions_excludes["exit_status"] = True
39
+ latest_job_submission = current_resource.latest_job_submission
40
+ if latest_job_submission is not None:
41
+ latest_job_submission_excludes = {}
42
+ current_resource_excludes["latest_job_submission"] = latest_job_submission_excludes
43
+ if _should_exclude_job_submission_jpd_cpu_arch(latest_job_submission):
44
+ latest_job_submission_excludes["job_provisioning_data"] = {
45
+ "instance_type": {"resources": {"cpu_arch"}}
46
+ }
47
+ if _should_exclude_job_submission_jrd_cpu_arch(latest_job_submission):
48
+ latest_job_submission_excludes["job_runtime_data"] = {
49
+ "offer": {"instance": {"resources": {"cpu_arch"}}}
50
+ }
51
+ if latest_job_submission.exit_status is None:
52
+ latest_job_submission_excludes["exit_status"] = True
53
+ return {"plan": apply_plan_excludes}
54
+
55
+
56
+ def get_get_plan_excludes(request: GetRunPlanRequest) -> Optional[Dict]:
57
+ """
58
+ Excludes new fields when they are not set to keep
59
+ clients backward-compatibility with older servers.
60
+ """
61
+ get_plan_excludes = {}
62
+ run_spec_excludes = get_run_spec_excludes(request.run_spec)
63
+ if run_spec_excludes is not None:
64
+ get_plan_excludes["run_spec"] = run_spec_excludes
65
+ if request.max_offers is None:
66
+ get_plan_excludes["max_offers"] = True
67
+ return get_plan_excludes
68
+
69
+
70
+ def get_run_spec_excludes(run_spec: RunSpec) -> Optional[Dict]:
71
+ """
72
+ Returns `run_spec` exclude mapping to exclude certain fields from the request.
73
+ Use this method to exclude new fields when they are not set to keep
74
+ clients backward-compatibility with older servers.
75
+ """
76
+ spec_excludes: dict[str, Any] = {}
77
+ configuration_excludes: dict[str, Any] = {}
78
+ profile_excludes: set[str] = set()
79
+ configuration = run_spec.configuration
80
+ profile = run_spec.profile
81
+
82
+ if configuration.fleets is None:
83
+ configuration_excludes["fleets"] = True
84
+ if profile is not None and profile.fleets is None:
85
+ profile_excludes.add("fleets")
86
+ if configuration.tags is None:
87
+ configuration_excludes["tags"] = True
88
+ if profile is not None and profile.tags is None:
89
+ profile_excludes.add("tags")
90
+ if isinstance(configuration, ServiceConfiguration) and not configuration.rate_limits:
91
+ configuration_excludes["rate_limits"] = True
92
+ if configuration.shell is None:
93
+ configuration_excludes["shell"] = True
94
+ if configuration.priority is None:
95
+ configuration_excludes["priority"] = True
96
+ if configuration.startup_order is None:
97
+ configuration_excludes["startup_order"] = True
98
+ if profile is not None and profile.startup_order is None:
99
+ profile_excludes.add("startup_order")
100
+ if configuration.stop_criteria is None:
101
+ configuration_excludes["stop_criteria"] = True
102
+ if profile is not None and profile.stop_criteria is None:
103
+ profile_excludes.add("stop_criteria")
104
+
105
+ if configuration_excludes:
106
+ spec_excludes["configuration"] = configuration_excludes
107
+ if profile_excludes:
108
+ spec_excludes["profile"] = profile_excludes
109
+ if spec_excludes:
110
+ return spec_excludes
111
+ return None
112
+
113
+
114
+ def _should_exclude_job_submission_jpd_cpu_arch(job_submission: JobSubmission) -> bool:
115
+ try:
116
+ return job_submission.job_provisioning_data.instance_type.resources.cpu_arch is None
117
+ except AttributeError:
118
+ return True
119
+
120
+
121
+ def _should_exclude_job_submission_jrd_cpu_arch(job_submission: JobSubmission) -> bool:
122
+ try:
123
+ return job_submission.job_runtime_data.offer.instance.resources.cpu_arch is None
124
+ except AttributeError:
125
+ return True
@@ -0,0 +1,32 @@
1
+ from typing import Dict
2
+
3
+ from dstack._internal.core.models.volumes import VolumeConfiguration, VolumeSpec
4
+
5
+
6
+ def get_volume_spec_excludes(volume_spec: VolumeSpec) -> Dict:
7
+ """
8
+ Returns `volume_spec` exclude mapping to exclude certain fields from the request.
9
+ Use this method to exclude new fields when they are not set to keep
10
+ clients backward-compatibility with older servers.
11
+ """
12
+ spec_excludes = {}
13
+ spec_excludes["configuration"] = _get_volume_configuration_excludes(volume_spec.configuration)
14
+ return spec_excludes
15
+
16
+
17
+ def get_create_volume_excludes(configuration: VolumeConfiguration) -> Dict:
18
+ """
19
+ Returns an exclude mapping to exclude certain fields from the create volume request.
20
+ Use this method to exclude new fields when they are not set to keep
21
+ clients backward-compatibility with older servers.
22
+ """
23
+ create_volume_excludes = {}
24
+ create_volume_excludes["configuration"] = _get_volume_configuration_excludes(configuration)
25
+ return create_volume_excludes
26
+
27
+
28
+ def _get_volume_configuration_excludes(configuration: VolumeConfiguration) -> Dict:
29
+ configuration_excludes = {}
30
+ if configuration.tags is None:
31
+ configuration_excludes["tags"] = True
32
+ return configuration_excludes