dstack 0.19.32__py3-none-any.whl → 0.19.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (54) hide show
  1. dstack/_internal/cli/commands/offer.py +1 -1
  2. dstack/_internal/cli/services/configurators/run.py +1 -5
  3. dstack/_internal/core/backends/aws/compute.py +8 -5
  4. dstack/_internal/core/backends/azure/compute.py +9 -6
  5. dstack/_internal/core/backends/base/compute.py +40 -17
  6. dstack/_internal/core/backends/base/offers.py +7 -1
  7. dstack/_internal/core/backends/datacrunch/compute.py +9 -6
  8. dstack/_internal/core/backends/gcp/compute.py +151 -6
  9. dstack/_internal/core/backends/gcp/models.py +10 -0
  10. dstack/_internal/core/backends/gcp/resources.py +87 -5
  11. dstack/_internal/core/backends/hotaisle/compute.py +11 -1
  12. dstack/_internal/core/backends/kubernetes/compute.py +161 -83
  13. dstack/_internal/core/backends/kubernetes/models.py +4 -2
  14. dstack/_internal/core/backends/nebius/compute.py +9 -6
  15. dstack/_internal/core/backends/oci/compute.py +9 -6
  16. dstack/_internal/core/backends/runpod/compute.py +14 -7
  17. dstack/_internal/core/backends/vastai/compute.py +3 -1
  18. dstack/_internal/core/backends/vastai/configurator.py +0 -1
  19. dstack/_internal/core/compatibility/runs.py +25 -4
  20. dstack/_internal/core/models/fleets.py +1 -1
  21. dstack/_internal/core/models/instances.py +2 -1
  22. dstack/_internal/core/models/profiles.py +1 -1
  23. dstack/_internal/core/models/runs.py +4 -2
  24. dstack/_internal/core/models/users.py +10 -0
  25. dstack/_internal/core/services/configs/__init__.py +1 -0
  26. dstack/_internal/core/services/ssh/key_manager.py +56 -0
  27. dstack/_internal/server/background/tasks/process_instances.py +5 -1
  28. dstack/_internal/server/background/tasks/process_running_jobs.py +1 -0
  29. dstack/_internal/server/migrations/versions/ff1d94f65b08_user_ssh_key.py +34 -0
  30. dstack/_internal/server/models.py +6 -0
  31. dstack/_internal/server/routers/metrics.py +6 -2
  32. dstack/_internal/server/routers/runs.py +5 -1
  33. dstack/_internal/server/routers/users.py +21 -2
  34. dstack/_internal/server/services/jobs/__init__.py +18 -9
  35. dstack/_internal/server/services/offers.py +1 -0
  36. dstack/_internal/server/services/runs.py +13 -4
  37. dstack/_internal/server/services/users.py +35 -2
  38. dstack/_internal/server/statics/index.html +1 -1
  39. dstack/_internal/server/statics/main-720ce3a11140daa480cc.css +3 -0
  40. dstack/_internal/server/statics/{main-c51afa7f243e24d3e446.js → main-e79754c136f1d8e4e7e6.js} +12632 -8039
  41. dstack/_internal/server/statics/{main-c51afa7f243e24d3e446.js.map → main-e79754c136f1d8e4e7e6.js.map} +1 -1
  42. dstack/_internal/server/testing/common.py +4 -0
  43. dstack/api/_public/__init__.py +8 -11
  44. dstack/api/_public/repos.py +0 -21
  45. dstack/api/_public/runs.py +61 -9
  46. dstack/api/server/__init__.py +4 -0
  47. dstack/api/server/_users.py +17 -2
  48. dstack/version.py +2 -2
  49. {dstack-0.19.32.dist-info → dstack-0.19.34.dist-info}/METADATA +2 -2
  50. {dstack-0.19.32.dist-info → dstack-0.19.34.dist-info}/RECORD +53 -51
  51. dstack/_internal/server/statics/main-56191fbfe77f49b251de.css +0 -3
  52. {dstack-0.19.32.dist-info → dstack-0.19.34.dist-info}/WHEEL +0 -0
  53. {dstack-0.19.32.dist-info → dstack-0.19.34.dist-info}/entry_points.txt +0 -0
  54. {dstack-0.19.32.dist-info → dstack-0.19.34.dist-info}/licenses/LICENSE.md +0 -0
@@ -104,8 +104,8 @@ class OfferCommand(APIBaseCommand):
104
104
 
105
105
  run_spec = RunSpec(
106
106
  configuration=conf,
107
- ssh_key_pub="(dummy)",
108
107
  profile=profile,
108
+ ssh_key_pub="(dummy)", # TODO: Remove since 0.19.40
109
109
  )
110
110
 
111
111
  if args.group_by:
@@ -62,7 +62,6 @@ from dstack._internal.utils.interpolator import InterpolatorError, VariablesInte
62
62
  from dstack._internal.utils.logging import get_logger
63
63
  from dstack._internal.utils.nested_list import NestedList, NestedListItem
64
64
  from dstack._internal.utils.path import is_absolute_posix_path
65
- from dstack.api._public.repos import get_ssh_keypair
66
65
  from dstack.api._public.runs import Run
67
66
  from dstack.api.server import APIClient
68
67
  from dstack.api.utils import load_profile
@@ -135,10 +134,6 @@ class BaseRunConfigurator(
135
134
 
136
135
  config_manager = ConfigManager()
137
136
  repo = self.get_repo(conf, configuration_path, configurator_args, config_manager)
138
- self.api.ssh_identity_file = get_ssh_keypair(
139
- configurator_args.ssh_identity_file,
140
- config_manager.dstack_key_path,
141
- )
142
137
  profile = load_profile(Path.cwd(), configurator_args.profile)
143
138
  with console.status("Getting apply plan..."):
144
139
  run_plan = self.api.runs.get_run_plan(
@@ -146,6 +141,7 @@ class BaseRunConfigurator(
146
141
  repo=repo,
147
142
  configuration_path=configuration_path,
148
143
  profile=profile,
144
+ ssh_identity_file=configurator_args.ssh_identity_file,
149
145
  )
150
146
 
151
147
  print_run_plan(run_plan, max_offers=configurator_args.max_offers)
@@ -1,4 +1,5 @@
1
1
  import threading
2
+ from collections.abc import Iterable
2
3
  from concurrent.futures import ThreadPoolExecutor, as_completed
3
4
  from typing import Any, Callable, Dict, List, Optional, Tuple
4
5
 
@@ -34,7 +35,11 @@ from dstack._internal.core.backends.base.compute import (
34
35
  get_user_data,
35
36
  merge_tags,
36
37
  )
37
- from dstack._internal.core.backends.base.offers import get_catalog_offers, get_offers_disk_modifier
38
+ from dstack._internal.core.backends.base.offers import (
39
+ OfferModifier,
40
+ get_catalog_offers,
41
+ get_offers_disk_modifier,
42
+ )
38
43
  from dstack._internal.core.errors import (
39
44
  ComputeError,
40
45
  NoCapacityError,
@@ -159,10 +164,8 @@ class AWSCompute(
159
164
  )
160
165
  return availability_offers
161
166
 
162
- def get_offers_modifier(
163
- self, requirements: Requirements
164
- ) -> Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]:
165
- return get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)
167
+ def get_offers_modifiers(self, requirements: Requirements) -> Iterable[OfferModifier]:
168
+ return [get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)]
166
169
 
167
170
  def _get_offers_cached_key(self, requirements: Requirements) -> int:
168
171
  # Requirements is not hashable, so we use a hack to get arguments hash
@@ -1,8 +1,9 @@
1
1
  import base64
2
2
  import enum
3
3
  import re
4
+ from collections.abc import Iterable
4
5
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
- from typing import Callable, Dict, List, Optional, Tuple
6
+ from typing import Dict, List, Optional, Tuple
6
7
 
7
8
  from azure.core.credentials import TokenCredential
8
9
  from azure.core.exceptions import ResourceExistsError, ResourceNotFoundError
@@ -51,7 +52,11 @@ from dstack._internal.core.backends.base.compute import (
51
52
  merge_tags,
52
53
  requires_nvidia_proprietary_kernel_modules,
53
54
  )
54
- from dstack._internal.core.backends.base.offers import get_catalog_offers, get_offers_disk_modifier
55
+ from dstack._internal.core.backends.base.offers import (
56
+ OfferModifier,
57
+ get_catalog_offers,
58
+ get_offers_disk_modifier,
59
+ )
55
60
  from dstack._internal.core.consts import DSTACK_OS_IMAGE_WITH_PROPRIETARY_NVIDIA_KERNEL_MODULES
56
61
  from dstack._internal.core.errors import ComputeError, NoCapacityError
57
62
  from dstack._internal.core.models.backends.base import BackendType
@@ -108,10 +113,8 @@ class AzureCompute(
108
113
  )
109
114
  return offers_with_availability
110
115
 
111
- def get_offers_modifier(
112
- self, requirements: Requirements
113
- ) -> Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]:
114
- return get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)
116
+ def get_offers_modifiers(self, requirements: Requirements) -> Iterable[OfferModifier]:
117
+ return [get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)]
115
118
 
116
119
  def create_instance(
117
120
  self,
@@ -17,12 +17,13 @@ from cachetools import TTLCache, cachedmethod
17
17
  from gpuhunt import CPUArchitecture
18
18
 
19
19
  from dstack._internal import settings
20
- from dstack._internal.core.backends.base.offers import filter_offers_by_requirements
20
+ from dstack._internal.core.backends.base.offers import OfferModifier, filter_offers_by_requirements
21
21
  from dstack._internal.core.consts import (
22
22
  DSTACK_RUNNER_HTTP_PORT,
23
23
  DSTACK_RUNNER_SSH_PORT,
24
24
  DSTACK_SHIM_HTTP_PORT,
25
25
  )
26
+ from dstack._internal.core.models.backends.base import BackendType
26
27
  from dstack._internal.core.models.configurations import LEGACY_REPO_DIR
27
28
  from dstack._internal.core.models.gateways import (
28
29
  GatewayComputeConfiguration,
@@ -168,17 +169,13 @@ class ComputeWithAllOffersCached(ABC):
168
169
  """
169
170
  pass
170
171
 
171
- def get_offers_modifier(
172
- self, requirements: Requirements
173
- ) -> Optional[
174
- Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]
175
- ]:
172
+ def get_offers_modifiers(self, requirements: Requirements) -> Iterable[OfferModifier]:
176
173
  """
177
- Returns a modifier function that modifies offers before they are filtered by requirements.
178
- Can return `None` to exclude the offer.
174
+ Returns functions that modify offers before they are filtered by requirements.
175
+ A modifier function can return `None` to exclude the offer.
179
176
  E.g. can be used to set appropriate disk size based on requirements.
180
177
  """
181
- return None
178
+ return []
182
179
 
183
180
  def get_offers_post_filter(
184
181
  self, requirements: Requirements
@@ -191,14 +188,7 @@ class ComputeWithAllOffersCached(ABC):
191
188
 
192
189
  def get_offers(self, requirements: Requirements) -> List[InstanceOfferWithAvailability]:
193
190
  offers = self._get_all_offers_with_availability_cached()
194
- modifier = self.get_offers_modifier(requirements)
195
- if modifier is not None:
196
- modified_offers = []
197
- for o in offers:
198
- modified_offer = modifier(o)
199
- if modified_offer is not None:
200
- modified_offers.append(modified_offer)
201
- offers = modified_offers
191
+ offers = self.__apply_modifiers(offers, self.get_offers_modifiers(requirements))
202
192
  offers = filter_offers_by_requirements(offers, requirements)
203
193
  post_filter = self.get_offers_post_filter(requirements)
204
194
  if post_filter is not None:
@@ -212,6 +202,20 @@ class ComputeWithAllOffersCached(ABC):
212
202
  def _get_all_offers_with_availability_cached(self) -> List[InstanceOfferWithAvailability]:
213
203
  return self.get_all_offers_with_availability()
214
204
 
205
+ @staticmethod
206
+ def __apply_modifiers(
207
+ offers: Iterable[InstanceOfferWithAvailability], modifiers: Iterable[OfferModifier]
208
+ ) -> list[InstanceOfferWithAvailability]:
209
+ modified_offers = []
210
+ for offer in offers:
211
+ for modifier in modifiers:
212
+ offer = modifier(offer)
213
+ if offer is None:
214
+ break
215
+ else:
216
+ modified_offers.append(offer)
217
+ return modified_offers
218
+
215
219
 
216
220
  class ComputeWithFilteredOffersCached(ABC):
217
221
  """
@@ -341,6 +345,15 @@ class ComputeWithMultinodeSupport:
341
345
  class ComputeWithReservationSupport:
342
346
  """
343
347
  Must be subclassed to support provisioning from reservations.
348
+
349
+ The following is expected from a backend that supports reservations:
350
+
351
+ - `get_offers` respects `Requirements.reservation` if set, and only returns
352
+ offers that can be provisioned in the configured reservation. It can
353
+ adjust some offer properties such as `availability` and
354
+ `availability_zones` if necessary.
355
+ - `create_instance` respects `InstanceConfig.reservation` if set, and
356
+ provisions the instance in the configured reservation.
344
357
  """
345
358
 
346
359
  pass
@@ -391,6 +404,16 @@ class ComputeWithPlacementGroupSupport(ABC):
391
404
  """
392
405
  pass
393
406
 
407
+ def are_placement_groups_compatible_with_reservations(self, backend_type: BackendType) -> bool:
408
+ """
409
+ Whether placement groups can be used for instances provisioned in reservations.
410
+
411
+ Arguments:
412
+ backend_type: matches the backend type of this compute, unless this compute is a proxy
413
+ for other backends (dstack Sky)
414
+ """
415
+ return True
416
+
394
417
 
395
418
  class ComputeWithGatewaySupport(ABC):
396
419
  """
@@ -23,6 +23,8 @@ SUPPORTED_GPUHUNT_FLAGS = [
23
23
  "oci-spot",
24
24
  "lambda-arm",
25
25
  "gcp-a4",
26
+ "gcp-g4",
27
+ "gcp-dws-calendar-mode",
26
28
  ]
27
29
 
28
30
 
@@ -93,6 +95,7 @@ def catalog_item_to_offer(
93
95
  ),
94
96
  region=item.location,
95
97
  price=item.price,
98
+ backend_data=item.provider_data,
96
99
  )
97
100
 
98
101
 
@@ -199,9 +202,12 @@ def choose_disk_size_mib(
199
202
  return round(disk_size_gib * 1024)
200
203
 
201
204
 
205
+ OfferModifier = Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]
206
+
207
+
202
208
  def get_offers_disk_modifier(
203
209
  configurable_disk_size: Range[Memory], requirements: Requirements
204
- ) -> Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]:
210
+ ) -> OfferModifier:
205
211
  """
206
212
  Returns a func that modifies offers disk by setting min value that satisfies both
207
213
  `configurable_disk_size` and `requirements`.
@@ -1,4 +1,5 @@
1
- from typing import Callable, Dict, List, Optional
1
+ from collections.abc import Iterable
2
+ from typing import Dict, List, Optional
2
3
 
3
4
  from datacrunch import DataCrunchClient
4
5
  from datacrunch.exceptions import APIException
@@ -12,7 +13,11 @@ from dstack._internal.core.backends.base.compute import (
12
13
  generate_unique_instance_name,
13
14
  get_shim_commands,
14
15
  )
15
- from dstack._internal.core.backends.base.offers import get_catalog_offers, get_offers_disk_modifier
16
+ from dstack._internal.core.backends.base.offers import (
17
+ OfferModifier,
18
+ get_catalog_offers,
19
+ get_offers_disk_modifier,
20
+ )
16
21
  from dstack._internal.core.backends.datacrunch.models import DataCrunchConfig
17
22
  from dstack._internal.core.errors import NoCapacityError
18
23
  from dstack._internal.core.models.backends.base import BackendType
@@ -59,10 +64,8 @@ class DataCrunchCompute(
59
64
  offers_with_availability = self._get_offers_with_availability(offers)
60
65
  return offers_with_availability
61
66
 
62
- def get_offers_modifier(
63
- self, requirements: Requirements
64
- ) -> Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]:
65
- return get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)
67
+ def get_offers_modifiers(self, requirements: Requirements) -> Iterable[OfferModifier]:
68
+ return [get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)]
66
69
 
67
70
  def _get_offers_with_availability(
68
71
  self, offers: List[InstanceOffer]
@@ -1,7 +1,9 @@
1
1
  import concurrent.futures
2
2
  import json
3
+ import re
3
4
  import threading
4
5
  from collections import defaultdict
6
+ from collections.abc import Iterable
5
7
  from dataclasses import dataclass
6
8
  from typing import Callable, Dict, List, Literal, Optional, Tuple
7
9
 
@@ -24,6 +26,7 @@ from dstack._internal.core.backends.base.compute import (
24
26
  ComputeWithPlacementGroupSupport,
25
27
  ComputeWithPrivateGatewaySupport,
26
28
  ComputeWithPrivilegedSupport,
29
+ ComputeWithReservationSupport,
27
30
  ComputeWithVolumeSupport,
28
31
  generate_unique_gateway_instance_name,
29
32
  generate_unique_instance_name,
@@ -35,6 +38,7 @@ from dstack._internal.core.backends.base.compute import (
35
38
  requires_nvidia_proprietary_kernel_modules,
36
39
  )
37
40
  from dstack._internal.core.backends.base.offers import (
41
+ OfferModifier,
38
42
  get_catalog_offers,
39
43
  get_offers_disk_modifier,
40
44
  )
@@ -78,9 +82,16 @@ logger = get_logger(__name__)
78
82
  # pd-balanced disks can be 10GB-64TB, but dstack images are 20GB and cannot grow larger
79
83
  # than 32TB because of filesystem settings
80
84
  CONFIGURABLE_DISK_SIZE = Range[Memory](min=Memory.parse("20GB"), max=Memory.parse("32TB"))
85
+ # Pattern from https://cloud.google.com/compute/docs/instances/reservations-consume#consuming_instances_from_a_specific_reservation
86
+ RESERVATION_PATTERN = re.compile(
87
+ r"projects/(?P<project_id>[a-z0-9-]+)/reservations/(?P<reservation_name>[a-z0-9-]+)"
88
+ )
89
+ RESOURCE_NAME_PATTERN = re.compile(r"[a-z0-9-]+")
90
+ TPU_VERSIONS = [tpu.name for tpu in KNOWN_TPUS]
81
91
 
82
92
 
83
- TPU_VERSIONS = [tpu.name for tpu in KNOWN_TPUS]
93
+ class GCPOfferBackendData(CoreModel):
94
+ is_dws_calendar_mode: bool = False
84
95
 
85
96
 
86
97
  class GCPVolumeDiskBackendData(CoreModel):
@@ -93,6 +104,7 @@ class GCPCompute(
93
104
  ComputeWithCreateInstanceSupport,
94
105
  ComputeWithPrivilegedSupport,
95
106
  ComputeWithMultinodeSupport,
107
+ ComputeWithReservationSupport,
96
108
  ComputeWithPlacementGroupSupport,
97
109
  ComputeWithGatewaySupport,
98
110
  ComputeWithPrivateGatewaySupport,
@@ -113,8 +125,12 @@ class GCPCompute(
113
125
  self.resource_policies_client = compute_v1.ResourcePoliciesClient(
114
126
  credentials=self.credentials
115
127
  )
128
+ self.reservations_client = compute_v1.ReservationsClient(credentials=self.credentials)
116
129
  self._usable_subnets_cache_lock = threading.Lock()
117
130
  self._usable_subnets_cache = TTLCache(maxsize=1, ttl=120)
131
+ self._find_reservation_cache_lock = threading.Lock()
132
+ # smaller TTL, since we check the reservation's in_use_count, which can change often
133
+ self._find_reservation_cache = TTLCache(maxsize=8, ttl=20)
118
134
 
119
135
  def get_all_offers_with_availability(self) -> List[InstanceOfferWithAvailability]:
120
136
  regions = get_or_error(self.config.regions)
@@ -149,10 +165,57 @@ class GCPCompute(
149
165
  offers_with_availability[-1].region = region
150
166
  return offers_with_availability
151
167
 
152
- def get_offers_modifier(
168
+ def get_offers_modifiers(self, requirements: Requirements) -> Iterable[OfferModifier]:
169
+ modifiers = []
170
+
171
+ if requirements.reservation:
172
+ zone_to_reservation = self._find_reservation(requirements.reservation)
173
+
174
+ def reservation_modifier(
175
+ offer: InstanceOfferWithAvailability,
176
+ ) -> Optional[InstanceOfferWithAvailability]:
177
+ if offer.instance.resources.spot:
178
+ return None
179
+ assert offer.availability_zones is not None
180
+ matching_zones = []
181
+ zones_with_capacity = []
182
+ for zone in offer.availability_zones:
183
+ reservation = zone_to_reservation.get(zone)
184
+ if reservation is not None and _offer_matches_reservation(offer, reservation):
185
+ matching_zones.append(zone)
186
+ if _reservation_has_capacity(reservation):
187
+ zones_with_capacity.append(zone)
188
+ if not matching_zones:
189
+ return None
190
+ offer = offer.copy(deep=True)
191
+ if zones_with_capacity:
192
+ offer.availability_zones = zones_with_capacity
193
+ else:
194
+ offer.availability_zones = matching_zones
195
+ offer.availability = InstanceAvailability.NOT_AVAILABLE
196
+ return offer
197
+
198
+ modifiers.append(reservation_modifier)
199
+
200
+ modifiers.append(get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements))
201
+ return modifiers
202
+
203
+ def get_offers_post_filter(
153
204
  self, requirements: Requirements
154
- ) -> Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]:
155
- return get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)
205
+ ) -> Optional[Callable[[InstanceOfferWithAvailability], bool]]:
206
+ if requirements.reservation is None:
207
+
208
+ def reserved_offers_filter(offer: InstanceOfferWithAvailability) -> bool:
209
+ """Remove reserved-only offers"""
210
+ if GCPOfferBackendData.__response__.parse_obj(
211
+ offer.backend_data
212
+ ).is_dws_calendar_mode:
213
+ return False
214
+ return True
215
+
216
+ return reserved_offers_filter
217
+
218
+ return None
156
219
 
157
220
  def terminate_instance(
158
221
  self, instance_id: str, region: str, backend_data: Optional[str] = None
@@ -305,6 +368,16 @@ class GCPCompute(
305
368
  )
306
369
 
307
370
  for zone in zones:
371
+ reservation = None
372
+ if instance_config.reservation:
373
+ reservation = self._find_reservation(instance_config.reservation).get(zone)
374
+ if reservation is None:
375
+ logger.warning(
376
+ "Reservation %s no longer exists in zone %s",
377
+ instance_config.reservation,
378
+ zone,
379
+ )
380
+ continue
308
381
  request = compute_v1.InsertInstanceRequest()
309
382
  request.zone = zone
310
383
  request.project = self.config.project_id
@@ -335,6 +408,7 @@ class GCPCompute(
335
408
  roce_subnetworks=roce_subnets,
336
409
  allocate_public_ip=allocate_public_ip,
337
410
  placement_policy=placement_policy,
411
+ reservation=reservation,
338
412
  )
339
413
  try:
340
414
  # GCP needs some time to return an error in case of no capacity (< 30s).
@@ -475,6 +549,11 @@ class GCPCompute(
475
549
  ) -> bool:
476
550
  return placement_group.configuration.region == instance_offer.region
477
551
 
552
+ def are_placement_groups_compatible_with_reservations(self, backend_type: BackendType) -> bool:
553
+ # Cannot use our own placement policies when provisioning in a reservation.
554
+ # Instead, we use the placement policy defined in reservation settings.
555
+ return False
556
+
478
557
  def create_gateway(
479
558
  self,
480
559
  configuration: GatewayComputeConfiguration,
@@ -880,6 +959,26 @@ class GCPCompute(
880
959
  usable_subnets=self._list_usable_subnets(),
881
960
  )
882
961
 
962
+ @cachedmethod(
963
+ cache=lambda self: self._find_reservation_cache,
964
+ lock=lambda self: self._find_reservation_cache_lock,
965
+ )
966
+ def _find_reservation(self, configured_name: str) -> dict[str, compute_v1.Reservation]:
967
+ if match := RESERVATION_PATTERN.fullmatch(configured_name):
968
+ project_id = match.group("project_id")
969
+ name = match.group("reservation_name")
970
+ elif RESOURCE_NAME_PATTERN.fullmatch(configured_name):
971
+ project_id = self.config.project_id
972
+ name = configured_name
973
+ else:
974
+ # misconfigured or non-GCP
975
+ return {}
976
+ return gcp_resources.find_reservation(
977
+ reservations_client=self.reservations_client,
978
+ project_id=project_id,
979
+ name=name,
980
+ )
981
+
883
982
 
884
983
  def _supported_instances_and_zones(
885
984
  regions: List[str],
@@ -922,8 +1021,8 @@ def _has_gpu_quota(quotas: Dict[str, float], resources: Resources) -> bool:
922
1021
  gpu = resources.gpus[0]
923
1022
  if _is_tpu(gpu.name):
924
1023
  return True
925
- if gpu.name in ["B200", "H100"]:
926
- # B200, H100 and H100_MEGA quotas are not returned by `regions_client.list`
1024
+ if gpu.name in ["B200", "H100", "RTXPRO6000"]:
1025
+ # B200, H100, H100_MEGA, and RTXPRO6000 quotas are not returned by `regions_client.list`
927
1026
  return True
928
1027
  quota_name = f"NVIDIA_{gpu.name}_GPUS"
929
1028
  if gpu.name == "A100" and gpu.memory_mib == 80 * 1024:
@@ -933,6 +1032,52 @@ def _has_gpu_quota(quotas: Dict[str, float], resources: Resources) -> bool:
933
1032
  return len(resources.gpus) <= quotas.get(quota_name, 0)
934
1033
 
935
1034
 
1035
+ def _offer_matches_reservation(
1036
+ offer: InstanceOfferWithAvailability, reservation: compute_v1.Reservation
1037
+ ) -> bool:
1038
+ if (
1039
+ reservation.specific_reservation is None
1040
+ or reservation.specific_reservation.instance_properties is None
1041
+ ):
1042
+ return False
1043
+ properties = reservation.specific_reservation.instance_properties
1044
+ if properties.machine_type != offer.instance.name:
1045
+ return False
1046
+ accelerators = properties.guest_accelerators or []
1047
+ if not accelerators and offer.instance.resources.gpus:
1048
+ return False
1049
+ if len(accelerators) > 1:
1050
+ logger.warning(
1051
+ "Expected 0 or 1 accelerator types per instance,"
1052
+ f" but {properties.machine_type} has {len(accelerators)}."
1053
+ f" Ignoring reservation {reservation.self_link}"
1054
+ )
1055
+ return False
1056
+ if accelerators:
1057
+ if accelerators[0].accelerator_count != len(offer.instance.resources.gpus):
1058
+ return False
1059
+ if (
1060
+ offer.instance.resources.gpus
1061
+ and gcp_resources.find_accelerator_name(
1062
+ offer.instance.resources.gpus[0].name,
1063
+ offer.instance.resources.gpus[0].memory_mib,
1064
+ )
1065
+ != accelerators[0].accelerator_type
1066
+ ):
1067
+ return False
1068
+ return True
1069
+
1070
+
1071
+ def _reservation_has_capacity(reservation: compute_v1.Reservation) -> bool:
1072
+ return (
1073
+ reservation.specific_reservation is not None
1074
+ and reservation.specific_reservation.in_use_count is not None
1075
+ and reservation.specific_reservation.assured_count is not None
1076
+ and reservation.specific_reservation.in_use_count
1077
+ < reservation.specific_reservation.assured_count
1078
+ )
1079
+
1080
+
936
1081
  def _unique_instance_name(instance: InstanceType) -> str:
937
1082
  if instance.resources.spot:
938
1083
  name = f"{instance.name}-spot"
@@ -89,6 +89,16 @@ class GCPBackendConfig(CoreModel):
89
89
  description="The tags (labels) that will be assigned to resources created by `dstack`"
90
90
  ),
91
91
  ] = None
92
+ preview_features: Annotated[
93
+ Optional[List[Literal["g4"]]],
94
+ Field(
95
+ description=(
96
+ "The list of preview GCP features to enable."
97
+ " There are currently no preview features"
98
+ ),
99
+ max_items=1,
100
+ ),
101
+ ] = None
92
102
 
93
103
 
94
104
  class GCPBackendConfigWithCreds(GCPBackendConfig):