dstack 0.19.30__py3-none-any.whl → 0.19.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (39) hide show
  1. dstack/_internal/cli/commands/__init__.py +8 -0
  2. dstack/_internal/cli/commands/project.py +27 -20
  3. dstack/_internal/cli/commands/server.py +5 -0
  4. dstack/_internal/cli/main.py +1 -3
  5. dstack/_internal/core/backends/aws/compute.py +2 -0
  6. dstack/_internal/core/backends/azure/compute.py +2 -0
  7. dstack/_internal/core/backends/base/compute.py +32 -9
  8. dstack/_internal/core/backends/base/offers.py +1 -0
  9. dstack/_internal/core/backends/cloudrift/compute.py +2 -0
  10. dstack/_internal/core/backends/cudo/compute.py +2 -0
  11. dstack/_internal/core/backends/datacrunch/compute.py +2 -0
  12. dstack/_internal/core/backends/digitalocean_base/compute.py +2 -0
  13. dstack/_internal/core/backends/features.py +5 -0
  14. dstack/_internal/core/backends/gcp/compute.py +74 -34
  15. dstack/_internal/core/backends/gcp/configurator.py +1 -1
  16. dstack/_internal/core/backends/gcp/models.py +14 -1
  17. dstack/_internal/core/backends/gcp/resources.py +35 -12
  18. dstack/_internal/core/backends/hotaisle/compute.py +2 -0
  19. dstack/_internal/core/backends/kubernetes/compute.py +466 -213
  20. dstack/_internal/core/backends/kubernetes/models.py +13 -16
  21. dstack/_internal/core/backends/kubernetes/utils.py +145 -8
  22. dstack/_internal/core/backends/lambdalabs/compute.py +2 -0
  23. dstack/_internal/core/backends/local/compute.py +2 -0
  24. dstack/_internal/core/backends/nebius/compute.py +2 -0
  25. dstack/_internal/core/backends/oci/compute.py +2 -0
  26. dstack/_internal/core/backends/template/compute.py.jinja +2 -0
  27. dstack/_internal/core/backends/tensordock/compute.py +2 -0
  28. dstack/_internal/core/backends/vultr/compute.py +2 -0
  29. dstack/_internal/server/background/tasks/common.py +2 -0
  30. dstack/_internal/server/background/tasks/process_instances.py +2 -2
  31. dstack/_internal/server/services/offers.py +7 -1
  32. dstack/_internal/server/testing/common.py +2 -0
  33. dstack/_internal/server/utils/provisioning.py +3 -10
  34. dstack/version.py +1 -1
  35. {dstack-0.19.30.dist-info → dstack-0.19.31.dist-info}/METADATA +11 -9
  36. {dstack-0.19.30.dist-info → dstack-0.19.31.dist-info}/RECORD +39 -39
  37. {dstack-0.19.30.dist-info → dstack-0.19.31.dist-info}/WHEEL +0 -0
  38. {dstack-0.19.30.dist-info → dstack-0.19.31.dist-info}/entry_points.txt +0 -0
  39. {dstack-0.19.30.dist-info → dstack-0.19.31.dist-info}/licenses/LICENSE.md +0 -0
@@ -19,6 +19,7 @@ DSTACK_INSTANCE_TAG = "dstack-runner-instance"
19
19
  DSTACK_GATEWAY_TAG = "dstack-gateway-instance"
20
20
 
21
21
  supported_accelerators = [
22
+ {"accelerator_name": "nvidia-b200", "gpu_name": "B200", "memory_mb": 1024 * 180},
22
23
  {"accelerator_name": "nvidia-a100-80gb", "gpu_name": "A100", "memory_mb": 1024 * 80},
23
24
  {"accelerator_name": "nvidia-tesla-a100", "gpu_name": "A100", "memory_mb": 1024 * 40},
24
25
  {"accelerator_name": "nvidia-l4", "gpu_name": "L4", "memory_mb": 1024 * 24},
@@ -58,8 +59,6 @@ def check_vpc(
58
59
  )
59
60
  for region in regions:
60
61
  get_vpc_subnet_or_error(
61
- subnetworks_client=subnetworks_client,
62
- vpc_project_id=vpc_project_id,
63
62
  vpc_name=vpc_name,
64
63
  region=region,
65
64
  usable_subnets=usable_subnets,
@@ -121,6 +120,7 @@ def create_instance_struct(
121
120
  network: str = "global/networks/default",
122
121
  subnetwork: Optional[str] = None,
123
122
  extra_subnetworks: Optional[List[Tuple[str, str]]] = None,
123
+ roce_subnetworks: Optional[List[Tuple[str, str]]] = None,
124
124
  allocate_public_ip: bool = True,
125
125
  placement_policy: Optional[str] = None,
126
126
  ) -> compute_v1.Instance:
@@ -132,6 +132,7 @@ def create_instance_struct(
132
132
  subnetwork=subnetwork,
133
133
  allocate_public_ip=allocate_public_ip,
134
134
  extra_subnetworks=extra_subnetworks,
135
+ roce_subnetworks=roce_subnetworks,
135
136
  )
136
137
 
137
138
  disk = compute_v1.AttachedDisk()
@@ -194,6 +195,7 @@ def _get_network_interfaces(
194
195
  subnetwork: Optional[str],
195
196
  allocate_public_ip: bool,
196
197
  extra_subnetworks: Optional[List[Tuple[str, str]]],
198
+ roce_subnetworks: Optional[List[Tuple[str, str]]],
197
199
  ) -> List[compute_v1.NetworkInterface]:
198
200
  network_interface = compute_v1.NetworkInterface()
199
201
  network_interface.network = network
@@ -221,6 +223,14 @@ def _get_network_interfaces(
221
223
  nic_type=compute_v1.NetworkInterface.NicType.GVNIC.name,
222
224
  )
223
225
  )
226
+ for network, subnetwork in roce_subnetworks or []:
227
+ network_interfaces.append(
228
+ compute_v1.NetworkInterface(
229
+ network=network,
230
+ subnetwork=subnetwork,
231
+ nic_type=compute_v1.NetworkInterface.NicType.MRDMA.name,
232
+ )
233
+ )
224
234
  return network_interfaces
225
235
 
226
236
 
@@ -233,29 +243,41 @@ def list_project_usable_subnets(
233
243
 
234
244
 
235
245
  def get_vpc_subnet_or_error(
236
- subnetworks_client: compute_v1.SubnetworksClient,
237
- vpc_project_id: str,
238
246
  vpc_name: str,
239
247
  region: str,
240
- usable_subnets: Optional[List[compute_v1.UsableSubnetwork]] = None,
248
+ usable_subnets: list[compute_v1.UsableSubnetwork],
241
249
  ) -> str:
242
250
  """
243
251
  Returns resource name of any usable subnet in a given VPC
244
252
  (e.g. "projects/example-project/regions/europe-west4/subnetworks/example-subnet")
245
253
  """
246
- if usable_subnets is None:
247
- usable_subnets = list_project_usable_subnets(subnetworks_client, vpc_project_id)
254
+ vpc_subnets = get_vpc_subnets(vpc_name, region, usable_subnets)
255
+ if vpc_subnets:
256
+ return vpc_subnets[0]
257
+ raise ComputeError(
258
+ f"No usable subnetwork found in region {region} for VPC {vpc_name}."
259
+ f" Ensure that VPC {vpc_name} exists and has usable subnetworks."
260
+ )
261
+
262
+
263
+ def get_vpc_subnets(
264
+ vpc_name: str,
265
+ region: str,
266
+ usable_subnets: list[compute_v1.UsableSubnetwork],
267
+ ) -> list[str]:
268
+ """
269
+ Returns resource names of all usable subnets in a given VPC
270
+ (e.g. ["projects/example-project/regions/europe-west4/subnetworks/example-subnet"])
271
+ """
272
+ result = []
248
273
  for subnet in usable_subnets:
249
274
  network_name = subnet.network.split("/")[-1]
250
275
  subnet_url = subnet.subnetwork
251
276
  subnet_resource_name = remove_prefix(subnet_url, "https://www.googleapis.com/compute/v1/")
252
277
  subnet_region = subnet_resource_name.split("/")[3]
253
278
  if network_name == vpc_name and subnet_region == region:
254
- return subnet_resource_name
255
- raise ComputeError(
256
- f"No usable subnetwork found in region {region} for VPC {vpc_name} in project {vpc_project_id}."
257
- f" Ensure that VPC {vpc_name} exists and has usable subnetworks."
258
- )
279
+ result.append(subnet_resource_name)
280
+ return result
259
281
 
260
282
 
261
283
  def create_runner_firewall_rules(
@@ -476,5 +498,6 @@ def instance_type_supports_persistent_disk(instance_type_name: str) -> bool:
476
498
  "n4-",
477
499
  "h3-",
478
500
  "v6e",
501
+ "a4-",
479
502
  ]
480
503
  )
@@ -11,6 +11,7 @@ from dstack._internal.core.backends.base.compute import (
11
11
  Compute,
12
12
  ComputeWithAllOffersCached,
13
13
  ComputeWithCreateInstanceSupport,
14
+ ComputeWithPrivilegedSupport,
14
15
  get_shim_commands,
15
16
  )
16
17
  from dstack._internal.core.backends.base.offers import get_catalog_offers
@@ -47,6 +48,7 @@ INSTANCE_TYPE_SPECS = {
47
48
  class HotAisleCompute(
48
49
  ComputeWithAllOffersCached,
49
50
  ComputeWithCreateInstanceSupport,
51
+ ComputeWithPrivilegedSupport,
50
52
  Compute,
51
53
  ):
52
54
  def __init__(self, config: HotAisleConfig):