dstack 0.19.27__py3-none-any.whl → 0.19.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (74) hide show
  1. dstack/_internal/cli/commands/__init__.py +11 -8
  2. dstack/_internal/cli/commands/apply.py +6 -3
  3. dstack/_internal/cli/commands/completion.py +3 -1
  4. dstack/_internal/cli/commands/config.py +1 -0
  5. dstack/_internal/cli/commands/init.py +2 -2
  6. dstack/_internal/cli/commands/offer.py +1 -1
  7. dstack/_internal/cli/commands/project.py +1 -0
  8. dstack/_internal/cli/commands/server.py +2 -2
  9. dstack/_internal/cli/main.py +1 -1
  10. dstack/_internal/cli/services/configurators/base.py +2 -4
  11. dstack/_internal/cli/services/configurators/fleet.py +4 -5
  12. dstack/_internal/cli/services/configurators/gateway.py +3 -5
  13. dstack/_internal/cli/services/configurators/run.py +51 -27
  14. dstack/_internal/cli/services/configurators/volume.py +3 -5
  15. dstack/_internal/core/backends/aws/compute.py +51 -36
  16. dstack/_internal/core/backends/azure/compute.py +10 -7
  17. dstack/_internal/core/backends/base/compute.py +96 -14
  18. dstack/_internal/core/backends/base/offers.py +34 -4
  19. dstack/_internal/core/backends/cloudrift/compute.py +5 -7
  20. dstack/_internal/core/backends/cudo/compute.py +4 -2
  21. dstack/_internal/core/backends/datacrunch/compute.py +13 -11
  22. dstack/_internal/core/backends/digitalocean_base/compute.py +4 -5
  23. dstack/_internal/core/backends/gcp/compute.py +12 -7
  24. dstack/_internal/core/backends/hotaisle/compute.py +4 -7
  25. dstack/_internal/core/backends/kubernetes/compute.py +6 -4
  26. dstack/_internal/core/backends/lambdalabs/compute.py +4 -5
  27. dstack/_internal/core/backends/local/compute.py +1 -3
  28. dstack/_internal/core/backends/nebius/compute.py +10 -7
  29. dstack/_internal/core/backends/oci/compute.py +10 -7
  30. dstack/_internal/core/backends/runpod/compute.py +15 -6
  31. dstack/_internal/core/backends/template/compute.py.jinja +3 -1
  32. dstack/_internal/core/backends/tensordock/compute.py +1 -3
  33. dstack/_internal/core/backends/tensordock/models.py +2 -0
  34. dstack/_internal/core/backends/vastai/compute.py +7 -3
  35. dstack/_internal/core/backends/vultr/compute.py +5 -5
  36. dstack/_internal/core/compatibility/runs.py +2 -0
  37. dstack/_internal/core/models/common.py +67 -43
  38. dstack/_internal/core/models/configurations.py +88 -62
  39. dstack/_internal/core/models/fleets.py +41 -24
  40. dstack/_internal/core/models/instances.py +5 -5
  41. dstack/_internal/core/models/profiles.py +66 -47
  42. dstack/_internal/core/models/projects.py +8 -0
  43. dstack/_internal/core/models/repos/remote.py +21 -16
  44. dstack/_internal/core/models/resources.py +69 -65
  45. dstack/_internal/core/models/runs.py +17 -9
  46. dstack/_internal/server/app.py +5 -0
  47. dstack/_internal/server/background/tasks/process_fleets.py +8 -0
  48. dstack/_internal/server/background/tasks/process_instances.py +3 -2
  49. dstack/_internal/server/background/tasks/process_submitted_jobs.py +97 -34
  50. dstack/_internal/server/models.py +6 -5
  51. dstack/_internal/server/schemas/gateways.py +10 -9
  52. dstack/_internal/server/services/backends/__init__.py +1 -1
  53. dstack/_internal/server/services/backends/handlers.py +2 -0
  54. dstack/_internal/server/services/docker.py +8 -7
  55. dstack/_internal/server/services/projects.py +63 -4
  56. dstack/_internal/server/services/runs.py +2 -0
  57. dstack/_internal/server/settings.py +46 -0
  58. dstack/_internal/server/statics/index.html +1 -1
  59. dstack/_internal/server/statics/main-56191fbfe77f49b251de.css +3 -0
  60. dstack/_internal/server/statics/{main-4eecc75fbe64067eb1bc.js → main-c51afa7f243e24d3e446.js} +61115 -49101
  61. dstack/_internal/server/statics/{main-4eecc75fbe64067eb1bc.js.map → main-c51afa7f243e24d3e446.js.map} +1 -1
  62. dstack/_internal/utils/env.py +85 -11
  63. dstack/version.py +1 -1
  64. {dstack-0.19.27.dist-info → dstack-0.19.29.dist-info}/METADATA +1 -1
  65. {dstack-0.19.27.dist-info → dstack-0.19.29.dist-info}/RECORD +68 -73
  66. dstack/_internal/core/backends/tensordock/__init__.py +0 -0
  67. dstack/_internal/core/backends/tensordock/api_client.py +0 -104
  68. dstack/_internal/core/backends/tensordock/backend.py +0 -16
  69. dstack/_internal/core/backends/tensordock/configurator.py +0 -74
  70. dstack/_internal/server/statics/main-56191c63d516fd0041c4.css +0 -3
  71. dstack/_internal/server/statics/static/media/github.1f7102513534c83a9d8d735d2b8c12a2.svg +0 -3
  72. {dstack-0.19.27.dist-info → dstack-0.19.29.dist-info}/WHEEL +0 -0
  73. {dstack-0.19.27.dist-info → dstack-0.19.29.dist-info}/entry_points.txt +0 -0
  74. {dstack-0.19.27.dist-info → dstack-0.19.29.dist-info}/licenses/LICENSE.md +0 -0
@@ -7,7 +7,7 @@ from abc import ABC, abstractmethod
7
7
  from collections.abc import Iterable
8
8
  from functools import lru_cache
9
9
  from pathlib import Path
10
- from typing import Dict, List, Literal, Optional
10
+ from typing import Callable, Dict, List, Literal, Optional
11
11
 
12
12
  import git
13
13
  import requests
@@ -15,6 +15,7 @@ import yaml
15
15
  from cachetools import TTLCache, cachedmethod
16
16
 
17
17
  from dstack._internal import settings
18
+ from dstack._internal.core.backends.base.offers import filter_offers_by_requirements
18
19
  from dstack._internal.core.consts import (
19
20
  DSTACK_RUNNER_HTTP_PORT,
20
21
  DSTACK_RUNNER_SSH_PORT,
@@ -57,14 +58,8 @@ class Compute(ABC):
57
58
  If a compute supports additional features, it must also subclass `ComputeWith*` classes.
58
59
  """
59
60
 
60
- def __init__(self):
61
- self._offers_cache_lock = threading.Lock()
62
- self._offers_cache = TTLCache(maxsize=10, ttl=180)
63
-
64
61
  @abstractmethod
65
- def get_offers(
66
- self, requirements: Optional[Requirements] = None
67
- ) -> List[InstanceOfferWithAvailability]:
62
+ def get_offers(self, requirements: Requirements) -> List[InstanceOfferWithAvailability]:
68
63
  """
69
64
  Returns offers with availability matching `requirements`.
70
65
  If the provider is added to gpuhunt, typically gets offers using `base.offers.get_catalog_offers()`
@@ -121,10 +116,97 @@ class Compute(ABC):
121
116
  """
122
117
  pass
123
118
 
124
- def _get_offers_cached_key(self, requirements: Optional[Requirements] = None) -> int:
119
+
120
+ class ComputeWithAllOffersCached(ABC):
121
+ """
122
+ Provides common `get_offers()` implementation for backends
123
+ whose offers do not depend on requirements.
124
+ It caches all offers with availability and post-filters by requirements.
125
+ """
126
+
127
+ def __init__(self) -> None:
128
+ super().__init__()
129
+ self._offers_cache_lock = threading.Lock()
130
+ self._offers_cache = TTLCache(maxsize=1, ttl=180)
131
+
132
+ @abstractmethod
133
+ def get_all_offers_with_availability(self) -> List[InstanceOfferWithAvailability]:
134
+ """
135
+ Returns all backend offers with availability.
136
+ """
137
+ pass
138
+
139
+ def get_offers_modifier(
140
+ self, requirements: Requirements
141
+ ) -> Optional[
142
+ Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]
143
+ ]:
144
+ """
145
+ Returns a modifier function that modifies offers before they are filtered by requirements.
146
+ Can return `None` to exclude the offer.
147
+ E.g. can be used to set appropriate disk size based on requirements.
148
+ """
149
+ return None
150
+
151
+ def get_offers_post_filter(
152
+ self, requirements: Requirements
153
+ ) -> Optional[Callable[[InstanceOfferWithAvailability], bool]]:
154
+ """
155
+ Returns a filter function to apply to offers based on requirements.
156
+ This allows backends to implement custom post-filtering logic for specific requirements.
157
+ """
158
+ return None
159
+
160
+ def get_offers(self, requirements: Requirements) -> List[InstanceOfferWithAvailability]:
161
+ offers = self._get_all_offers_with_availability_cached()
162
+ modifier = self.get_offers_modifier(requirements)
163
+ if modifier is not None:
164
+ modified_offers = []
165
+ for o in offers:
166
+ modified_offer = modifier(o)
167
+ if modified_offer is not None:
168
+ modified_offers.append(modified_offer)
169
+ offers = modified_offers
170
+ offers = filter_offers_by_requirements(offers, requirements)
171
+ post_filter = self.get_offers_post_filter(requirements)
172
+ if post_filter is not None:
173
+ offers = [o for o in offers if post_filter(o)]
174
+ return offers
175
+
176
+ @cachedmethod(
177
+ cache=lambda self: self._offers_cache,
178
+ lock=lambda self: self._offers_cache_lock,
179
+ )
180
+ def _get_all_offers_with_availability_cached(self) -> List[InstanceOfferWithAvailability]:
181
+ return self.get_all_offers_with_availability()
182
+
183
+
184
+ class ComputeWithFilteredOffersCached(ABC):
185
+ """
186
+ Provides common `get_offers()` implementation for backends
187
+ whose offers depend on requirements.
188
+ It caches offers using requirements as key.
189
+ """
190
+
191
+ def __init__(self) -> None:
192
+ super().__init__()
193
+ self._offers_cache_lock = threading.Lock()
194
+ self._offers_cache = TTLCache(maxsize=10, ttl=180)
195
+
196
+ @abstractmethod
197
+ def get_offers_by_requirements(
198
+ self, requirements: Requirements
199
+ ) -> List[InstanceOfferWithAvailability]:
200
+ """
201
+ Returns backend offers with availability matching requirements.
202
+ """
203
+ pass
204
+
205
+ def get_offers(self, requirements: Requirements) -> List[InstanceOfferWithAvailability]:
206
+ return self._get_offers_cached(requirements)
207
+
208
+ def _get_offers_cached_key(self, requirements: Requirements) -> int:
125
209
  # Requirements is not hashable, so we use a hack to get arguments hash
126
- if requirements is None:
127
- return hash(None)
128
210
  return hash(requirements.json())
129
211
 
130
212
  @cachedmethod(
@@ -132,10 +214,10 @@ class Compute(ABC):
132
214
  key=_get_offers_cached_key,
133
215
  lock=lambda self: self._offers_cache_lock,
134
216
  )
135
- def get_offers_cached(
136
- self, requirements: Optional[Requirements] = None
217
+ def _get_offers_cached(
218
+ self, requirements: Requirements
137
219
  ) -> List[InstanceOfferWithAvailability]:
138
- return self.get_offers(requirements)
220
+ return self.get_offers_by_requirements(requirements)
139
221
 
140
222
 
141
223
  class ComputeWithCreateInstanceSupport(ABC):
@@ -1,5 +1,5 @@
1
1
  from dataclasses import asdict
2
- from typing import Callable, List, Optional
2
+ from typing import Callable, List, Optional, TypeVar
3
3
 
4
4
  import gpuhunt
5
5
  from pydantic import parse_obj_as
@@ -9,11 +9,13 @@ from dstack._internal.core.models.instances import (
9
9
  Disk,
10
10
  Gpu,
11
11
  InstanceOffer,
12
+ InstanceOfferWithAvailability,
12
13
  InstanceType,
13
14
  Resources,
14
15
  )
15
16
  from dstack._internal.core.models.resources import DEFAULT_DISK, CPUSpec, Memory, Range
16
17
  from dstack._internal.core.models.runs import Requirements
18
+ from dstack._internal.utils.common import get_or_error
17
19
 
18
20
  # Offers not supported by all dstack versions are hidden behind one or more flags.
19
21
  # This list enables the flags that are currently supported.
@@ -163,9 +165,13 @@ def requirements_to_query_filter(req: Optional[Requirements]) -> gpuhunt.QueryFi
163
165
  return q
164
166
 
165
167
 
166
- def match_requirements(
167
- offers: List[InstanceOffer], requirements: Optional[Requirements]
168
- ) -> List[InstanceOffer]:
168
+ InstanceOfferT = TypeVar("InstanceOfferT", InstanceOffer, InstanceOfferWithAvailability)
169
+
170
+
171
+ def filter_offers_by_requirements(
172
+ offers: List[InstanceOfferT],
173
+ requirements: Optional[Requirements],
174
+ ) -> List[InstanceOfferT]:
169
175
  query_filter = requirements_to_query_filter(requirements)
170
176
  filtered_offers = []
171
177
  for offer in offers:
@@ -190,3 +196,27 @@ def choose_disk_size_mib(
190
196
  disk_size_gib = disk_size_range.min
191
197
 
192
198
  return round(disk_size_gib * 1024)
199
+
200
+
201
+ def get_offers_disk_modifier(
202
+ configurable_disk_size: Range[Memory], requirements: Requirements
203
+ ) -> Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]:
204
+ """
205
+ Returns a func that modifies offers disk by setting min value that satisfies both
206
+ `configurable_disk_size` and `requirements`.
207
+ """
208
+
209
+ def modifier(offer: InstanceOfferWithAvailability) -> Optional[InstanceOfferWithAvailability]:
210
+ requirements_disk_range = DEFAULT_DISK.size
211
+ if requirements.resources.disk is not None:
212
+ requirements_disk_range = requirements.resources.disk.size
213
+ disk_size_range = requirements_disk_range.intersect(configurable_disk_size)
214
+ if disk_size_range is None:
215
+ return None
216
+ offer_copy = offer.copy(deep=True)
217
+ offer_copy.instance.resources.disk = Disk(
218
+ size_mib=get_or_error(disk_size_range.min) * 1024
219
+ )
220
+ return offer_copy
221
+
222
+ return modifier
@@ -1,7 +1,8 @@
1
1
  from typing import Dict, List, Optional
2
2
 
3
- from dstack._internal.core.backends.base.backend import Compute
4
3
  from dstack._internal.core.backends.base.compute import (
4
+ Compute,
5
+ ComputeWithAllOffersCached,
5
6
  ComputeWithCreateInstanceSupport,
6
7
  get_shim_commands,
7
8
  )
@@ -17,13 +18,14 @@ from dstack._internal.core.models.instances import (
17
18
  InstanceOfferWithAvailability,
18
19
  )
19
20
  from dstack._internal.core.models.placement import PlacementGroup
20
- from dstack._internal.core.models.runs import JobProvisioningData, Requirements
21
+ from dstack._internal.core.models.runs import JobProvisioningData
21
22
  from dstack._internal.utils.logging import get_logger
22
23
 
23
24
  logger = get_logger(__name__)
24
25
 
25
26
 
26
27
  class CloudRiftCompute(
28
+ ComputeWithAllOffersCached,
27
29
  ComputeWithCreateInstanceSupport,
28
30
  Compute,
29
31
  ):
@@ -32,15 +34,11 @@ class CloudRiftCompute(
32
34
  self.config = config
33
35
  self.client = RiftClient(self.config.creds.api_key)
34
36
 
35
- def get_offers(
36
- self, requirements: Optional[Requirements] = None
37
- ) -> List[InstanceOfferWithAvailability]:
37
+ def get_all_offers_with_availability(self) -> List[InstanceOfferWithAvailability]:
38
38
  offers = get_catalog_offers(
39
39
  backend=BackendType.CLOUDRIFT,
40
40
  locations=self.config.regions or None,
41
- requirements=requirements,
42
41
  )
43
-
44
42
  offers_with_availabilities = self._get_offers_with_availability(offers)
45
43
  return offers_with_availabilities
46
44
 
@@ -5,6 +5,7 @@ import requests
5
5
  from dstack._internal.core.backends.base.backend import Compute
6
6
  from dstack._internal.core.backends.base.compute import (
7
7
  ComputeWithCreateInstanceSupport,
8
+ ComputeWithFilteredOffersCached,
8
9
  generate_unique_instance_name,
9
10
  get_shim_commands,
10
11
  )
@@ -29,6 +30,7 @@ MAX_RESOURCE_NAME_LEN = 30
29
30
 
30
31
 
31
32
  class CudoCompute(
33
+ ComputeWithFilteredOffersCached,
32
34
  ComputeWithCreateInstanceSupport,
33
35
  Compute,
34
36
  ):
@@ -37,8 +39,8 @@ class CudoCompute(
37
39
  self.config = config
38
40
  self.api_client = CudoApiClient(config.creds.api_key)
39
41
 
40
- def get_offers(
41
- self, requirements: Optional[Requirements] = None
42
+ def get_offers_by_requirements(
43
+ self, requirements: Requirements
42
44
  ) -> List[InstanceOfferWithAvailability]:
43
45
  offers = get_catalog_offers(
44
46
  backend=BackendType.CUDO,
@@ -1,4 +1,4 @@
1
- from typing import Dict, List, Optional
1
+ from typing import Callable, Dict, List, Optional
2
2
 
3
3
  from datacrunch import DataCrunchClient
4
4
  from datacrunch.exceptions import APIException
@@ -6,11 +6,12 @@ from datacrunch.instances.instances import Instance
6
6
 
7
7
  from dstack._internal.core.backends.base.backend import Compute
8
8
  from dstack._internal.core.backends.base.compute import (
9
+ ComputeWithAllOffersCached,
9
10
  ComputeWithCreateInstanceSupport,
10
11
  generate_unique_instance_name,
11
12
  get_shim_commands,
12
13
  )
13
- from dstack._internal.core.backends.base.offers import get_catalog_offers
14
+ from dstack._internal.core.backends.base.offers import get_catalog_offers, get_offers_disk_modifier
14
15
  from dstack._internal.core.backends.datacrunch.models import DataCrunchConfig
15
16
  from dstack._internal.core.errors import NoCapacityError
16
17
  from dstack._internal.core.models.backends.base import BackendType
@@ -36,6 +37,7 @@ CONFIGURABLE_DISK_SIZE = Range[Memory](min=IMAGE_SIZE, max=None)
36
37
 
37
38
 
38
39
  class DataCrunchCompute(
40
+ ComputeWithAllOffersCached,
39
41
  ComputeWithCreateInstanceSupport,
40
42
  Compute,
41
43
  ):
@@ -47,18 +49,19 @@ class DataCrunchCompute(
47
49
  client_secret=self.config.creds.client_secret,
48
50
  )
49
51
 
50
- def get_offers(
51
- self, requirements: Optional[Requirements] = None
52
- ) -> List[InstanceOfferWithAvailability]:
52
+ def get_all_offers_with_availability(self) -> List[InstanceOfferWithAvailability]:
53
53
  offers = get_catalog_offers(
54
54
  backend=BackendType.DATACRUNCH,
55
55
  locations=self.config.regions,
56
- requirements=requirements,
57
- configurable_disk_size=CONFIGURABLE_DISK_SIZE,
58
56
  )
59
57
  offers_with_availability = self._get_offers_with_availability(offers)
60
58
  return offers_with_availability
61
59
 
60
+ def get_offers_modifier(
61
+ self, requirements: Requirements
62
+ ) -> Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]:
63
+ return get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)
64
+
62
65
  def _get_offers_with_availability(
63
66
  self, offers: List[InstanceOffer]
64
67
  ) -> List[InstanceOfferWithAvailability]:
@@ -182,10 +185,9 @@ class DataCrunchCompute(
182
185
 
183
186
  def _get_vm_image_id(instance_offer: InstanceOfferWithAvailability) -> str:
184
187
  # https://api.datacrunch.io/v1/images
185
- if (
186
- len(instance_offer.instance.resources.gpus) > 0
187
- and instance_offer.instance.resources.gpus[0].name == "V100"
188
- ):
188
+ if len(instance_offer.instance.resources.gpus) > 0 and instance_offer.instance.resources.gpus[
189
+ 0
190
+ ].name in ["V100", "A6000"]:
189
191
  # Ubuntu 22.04 + CUDA 12.0 + Docker
190
192
  return "2088da25-bb0d-41cc-a191-dccae45d96fd"
191
193
  # Ubuntu 24.04 + CUDA 12.8 Open + Docker
@@ -5,6 +5,7 @@ from gpuhunt.providers.digitalocean import DigitalOceanProvider
5
5
 
6
6
  from dstack._internal.core.backends.base.backend import Compute
7
7
  from dstack._internal.core.backends.base.compute import (
8
+ ComputeWithAllOffersCached,
8
9
  ComputeWithCreateInstanceSupport,
9
10
  generate_unique_instance_name,
10
11
  get_user_data,
@@ -20,7 +21,7 @@ from dstack._internal.core.models.instances import (
20
21
  InstanceOfferWithAvailability,
21
22
  )
22
23
  from dstack._internal.core.models.placement import PlacementGroup
23
- from dstack._internal.core.models.runs import JobProvisioningData, Requirements
24
+ from dstack._internal.core.models.runs import JobProvisioningData
24
25
  from dstack._internal.utils.logging import get_logger
25
26
 
26
27
  logger = get_logger(__name__)
@@ -37,6 +38,7 @@ DOCKER_INSTALL_COMMANDS = [
37
38
 
38
39
 
39
40
  class BaseDigitalOceanCompute(
41
+ ComputeWithAllOffersCached,
40
42
  ComputeWithCreateInstanceSupport,
41
43
  Compute,
42
44
  ):
@@ -50,13 +52,10 @@ class BaseDigitalOceanCompute(
50
52
  DigitalOceanProvider(api_key=config.creds.api_key, api_url=api_url)
51
53
  )
52
54
 
53
- def get_offers(
54
- self, requirements: Optional[Requirements] = None
55
- ) -> List[InstanceOfferWithAvailability]:
55
+ def get_all_offers_with_availability(self) -> List[InstanceOfferWithAvailability]:
56
56
  offers = get_catalog_offers(
57
57
  backend=self.BACKEND_TYPE,
58
58
  locations=self.config.regions,
59
- requirements=requirements,
60
59
  catalog=self.catalog,
61
60
  )
62
61
  return [
@@ -17,6 +17,7 @@ import dstack._internal.core.backends.gcp.resources as gcp_resources
17
17
  from dstack import version
18
18
  from dstack._internal.core.backends.base.compute import (
19
19
  Compute,
20
+ ComputeWithAllOffersCached,
20
21
  ComputeWithCreateInstanceSupport,
21
22
  ComputeWithGatewaySupport,
22
23
  ComputeWithMultinodeSupport,
@@ -31,7 +32,10 @@ from dstack._internal.core.backends.base.compute import (
31
32
  get_user_data,
32
33
  merge_tags,
33
34
  )
34
- from dstack._internal.core.backends.base.offers import get_catalog_offers
35
+ from dstack._internal.core.backends.base.offers import (
36
+ get_catalog_offers,
37
+ get_offers_disk_modifier,
38
+ )
35
39
  from dstack._internal.core.backends.gcp.features import tcpx as tcpx_features
36
40
  from dstack._internal.core.backends.gcp.models import GCPConfig
37
41
  from dstack._internal.core.errors import (
@@ -82,6 +86,7 @@ class GCPVolumeDiskBackendData(CoreModel):
82
86
 
83
87
 
84
88
  class GCPCompute(
89
+ ComputeWithAllOffersCached,
85
90
  ComputeWithCreateInstanceSupport,
86
91
  ComputeWithMultinodeSupport,
87
92
  ComputeWithPlacementGroupSupport,
@@ -107,14 +112,10 @@ class GCPCompute(
107
112
  self._extra_subnets_cache_lock = threading.Lock()
108
113
  self._extra_subnets_cache = TTLCache(maxsize=30, ttl=60)
109
114
 
110
- def get_offers(
111
- self, requirements: Optional[Requirements] = None
112
- ) -> List[InstanceOfferWithAvailability]:
115
+ def get_all_offers_with_availability(self) -> List[InstanceOfferWithAvailability]:
113
116
  regions = get_or_error(self.config.regions)
114
117
  offers = get_catalog_offers(
115
118
  backend=BackendType.GCP,
116
- requirements=requirements,
117
- configurable_disk_size=CONFIGURABLE_DISK_SIZE,
118
119
  extra_filter=_supported_instances_and_zones(regions),
119
120
  )
120
121
  quotas: Dict[str, Dict[str, float]] = defaultdict(dict)
@@ -142,9 +143,13 @@ class GCPCompute(
142
143
  offer_keys_to_offers[key] = offer_with_availability
143
144
  offers_with_availability.append(offer_with_availability)
144
145
  offers_with_availability[-1].region = region
145
-
146
146
  return offers_with_availability
147
147
 
148
+ def get_offers_modifier(
149
+ self, requirements: Requirements
150
+ ) -> Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]:
151
+ return get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)
152
+
148
153
  def terminate_instance(
149
154
  self, instance_id: str, region: str, backend_data: Optional[str] = None
150
155
  ) -> None:
@@ -9,6 +9,7 @@ from gpuhunt.providers.hotaisle import HotAisleProvider
9
9
 
10
10
  from dstack._internal.core.backends.base.compute import (
11
11
  Compute,
12
+ ComputeWithAllOffersCached,
12
13
  ComputeWithCreateInstanceSupport,
13
14
  get_shim_commands,
14
15
  )
@@ -23,7 +24,7 @@ from dstack._internal.core.models.instances import (
23
24
  InstanceOfferWithAvailability,
24
25
  )
25
26
  from dstack._internal.core.models.placement import PlacementGroup
26
- from dstack._internal.core.models.runs import JobProvisioningData, Requirements
27
+ from dstack._internal.core.models.runs import JobProvisioningData
27
28
  from dstack._internal.utils.logging import get_logger
28
29
 
29
30
  logger = get_logger(__name__)
@@ -44,6 +45,7 @@ INSTANCE_TYPE_SPECS = {
44
45
 
45
46
 
46
47
  class HotAisleCompute(
48
+ ComputeWithAllOffersCached,
47
49
  ComputeWithCreateInstanceSupport,
48
50
  Compute,
49
51
  ):
@@ -56,16 +58,12 @@ class HotAisleCompute(
56
58
  HotAisleProvider(api_key=config.creds.api_key, team_handle=config.team_handle)
57
59
  )
58
60
 
59
- def get_offers(
60
- self, requirements: Optional[Requirements] = None
61
- ) -> List[InstanceOfferWithAvailability]:
61
+ def get_all_offers_with_availability(self) -> List[InstanceOfferWithAvailability]:
62
62
  offers = get_catalog_offers(
63
63
  backend=BackendType.HOTAISLE,
64
64
  locations=self.config.regions or None,
65
- requirements=requirements,
66
65
  catalog=self.catalog,
67
66
  )
68
-
69
67
  supported_offers = []
70
68
  for offer in offers:
71
69
  if offer.instance.name in INSTANCE_TYPE_SPECS:
@@ -78,7 +76,6 @@ class HotAisleCompute(
78
76
  logger.warning(
79
77
  f"Skipping unsupported Hot Aisle instance type: {offer.instance.name}"
80
78
  )
81
-
82
79
  return supported_offers
83
80
 
84
81
  def get_payload_from_offer(self, instance_type) -> dict:
@@ -9,13 +9,14 @@ from kubernetes import client
9
9
 
10
10
  from dstack._internal.core.backends.base.compute import (
11
11
  Compute,
12
+ ComputeWithFilteredOffersCached,
12
13
  ComputeWithGatewaySupport,
13
14
  generate_unique_gateway_instance_name,
14
15
  generate_unique_instance_name_for_job,
15
16
  get_docker_commands,
16
17
  get_dstack_gateway_commands,
17
18
  )
18
- from dstack._internal.core.backends.base.offers import match_requirements
19
+ from dstack._internal.core.backends.base.offers import filter_offers_by_requirements
19
20
  from dstack._internal.core.backends.kubernetes.models import (
20
21
  KubernetesConfig,
21
22
  KubernetesNetworkingConfig,
@@ -58,6 +59,7 @@ NVIDIA_GPU_NAMES = NVIDIA_GPU_NAME_TO_GPU_INFO.keys()
58
59
 
59
60
 
60
61
  class KubernetesCompute(
62
+ ComputeWithFilteredOffersCached,
61
63
  ComputeWithGatewaySupport,
62
64
  Compute,
63
65
  ):
@@ -70,8 +72,8 @@ class KubernetesCompute(
70
72
  self.networking_config = networking_config
71
73
  self.api = get_api_from_config_data(config.kubeconfig.data)
72
74
 
73
- def get_offers(
74
- self, requirements: Optional[Requirements] = None
75
+ def get_offers_by_requirements(
76
+ self, requirements: Requirements
75
77
  ) -> List[InstanceOfferWithAvailability]:
76
78
  nodes = self.api.list_node()
77
79
  instance_offers = []
@@ -99,7 +101,7 @@ class KubernetesCompute(
99
101
  availability=InstanceAvailability.AVAILABLE,
100
102
  instance_runtime=InstanceRuntime.RUNNER,
101
103
  )
102
- instance_offers.extend(match_requirements([instance_offer], requirements))
104
+ instance_offers.extend(filter_offers_by_requirements([instance_offer], requirements))
103
105
  return instance_offers
104
106
 
105
107
  def run_job(
@@ -7,6 +7,7 @@ from typing import Dict, List, Optional
7
7
 
8
8
  from dstack._internal.core.backends.base.compute import (
9
9
  Compute,
10
+ ComputeWithAllOffersCached,
10
11
  ComputeWithCreateInstanceSupport,
11
12
  generate_unique_instance_name,
12
13
  get_shim_commands,
@@ -22,12 +23,13 @@ from dstack._internal.core.models.instances import (
22
23
  InstanceOfferWithAvailability,
23
24
  )
24
25
  from dstack._internal.core.models.placement import PlacementGroup
25
- from dstack._internal.core.models.runs import JobProvisioningData, Requirements
26
+ from dstack._internal.core.models.runs import JobProvisioningData
26
27
 
27
28
  MAX_INSTANCE_NAME_LEN = 60
28
29
 
29
30
 
30
31
  class LambdaCompute(
32
+ ComputeWithAllOffersCached,
31
33
  ComputeWithCreateInstanceSupport,
32
34
  Compute,
33
35
  ):
@@ -36,13 +38,10 @@ class LambdaCompute(
36
38
  self.config = config
37
39
  self.api_client = LambdaAPIClient(config.creds.api_key)
38
40
 
39
- def get_offers(
40
- self, requirements: Optional[Requirements] = None
41
- ) -> List[InstanceOfferWithAvailability]:
41
+ def get_all_offers_with_availability(self) -> List[InstanceOfferWithAvailability]:
42
42
  offers = get_catalog_offers(
43
43
  backend=BackendType.LAMBDA,
44
44
  locations=self.config.regions or None,
45
- requirements=requirements,
46
45
  )
47
46
  offers_with_availability = self._get_offers_with_availability(offers)
48
47
  return offers_with_availability
@@ -28,9 +28,7 @@ class LocalCompute(
28
28
  ComputeWithVolumeSupport,
29
29
  Compute,
30
30
  ):
31
- def get_offers(
32
- self, requirements: Optional[Requirements] = None
33
- ) -> List[InstanceOfferWithAvailability]:
31
+ def get_offers(self, requirements: Requirements) -> List[InstanceOfferWithAvailability]:
34
32
  return [
35
33
  InstanceOfferWithAvailability(
36
34
  backend=BackendType.LOCAL,
@@ -3,7 +3,7 @@ import random
3
3
  import shlex
4
4
  import time
5
5
  from functools import cached_property
6
- from typing import List, Optional
6
+ from typing import Callable, List, Optional
7
7
 
8
8
  from nebius.aio.operation import Operation as SDKOperation
9
9
  from nebius.aio.service_error import RequestError, StatusCode
@@ -12,13 +12,14 @@ from nebius.sdk import SDK
12
12
 
13
13
  from dstack._internal.core.backends.base.backend import Compute
14
14
  from dstack._internal.core.backends.base.compute import (
15
+ ComputeWithAllOffersCached,
15
16
  ComputeWithCreateInstanceSupport,
16
17
  ComputeWithMultinodeSupport,
17
18
  ComputeWithPlacementGroupSupport,
18
19
  generate_unique_instance_name,
19
20
  get_user_data,
20
21
  )
21
- from dstack._internal.core.backends.base.offers import get_catalog_offers
22
+ from dstack._internal.core.backends.base.offers import get_catalog_offers, get_offers_disk_modifier
22
23
  from dstack._internal.core.backends.nebius import resources
23
24
  from dstack._internal.core.backends.nebius.fabrics import get_suitable_infiniband_fabrics
24
25
  from dstack._internal.core.backends.nebius.models import NebiusConfig, NebiusServiceAccountCreds
@@ -76,6 +77,7 @@ SUPPORTED_PLATFORMS = [
76
77
 
77
78
 
78
79
  class NebiusCompute(
80
+ ComputeWithAllOffersCached,
79
81
  ComputeWithCreateInstanceSupport,
80
82
  ComputeWithMultinodeSupport,
81
83
  ComputeWithPlacementGroupSupport,
@@ -106,15 +108,11 @@ class NebiusCompute(
106
108
  ).metadata.id
107
109
  return self._subnet_id_cache[region]
108
110
 
109
- def get_offers(
110
- self, requirements: Optional[Requirements] = None
111
- ) -> List[InstanceOfferWithAvailability]:
111
+ def get_all_offers_with_availability(self) -> List[InstanceOfferWithAvailability]:
112
112
  offers = get_catalog_offers(
113
113
  backend=BackendType.NEBIUS,
114
114
  locations=list(self._region_to_project_id),
115
- requirements=requirements,
116
115
  extra_filter=_supported_instances,
117
- configurable_disk_size=CONFIGURABLE_DISK_SIZE,
118
116
  )
119
117
  return [
120
118
  InstanceOfferWithAvailability(
@@ -124,6 +122,11 @@ class NebiusCompute(
124
122
  for offer in offers
125
123
  ]
126
124
 
125
+ def get_offers_modifier(
126
+ self, requirements: Requirements
127
+ ) -> Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]:
128
+ return get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)
129
+
127
130
  def create_instance(
128
131
  self,
129
132
  instance_offer: InstanceOfferWithAvailability,