dstack 0.19.25rc1__py3-none-any.whl → 0.19.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (161) hide show
  1. dstack/_internal/cli/commands/__init__.py +2 -2
  2. dstack/_internal/cli/commands/apply.py +3 -61
  3. dstack/_internal/cli/commands/attach.py +1 -1
  4. dstack/_internal/cli/commands/completion.py +1 -1
  5. dstack/_internal/cli/commands/delete.py +2 -2
  6. dstack/_internal/cli/commands/fleet.py +1 -1
  7. dstack/_internal/cli/commands/gateway.py +2 -2
  8. dstack/_internal/cli/commands/init.py +56 -24
  9. dstack/_internal/cli/commands/logs.py +1 -1
  10. dstack/_internal/cli/commands/metrics.py +1 -1
  11. dstack/_internal/cli/commands/offer.py +45 -7
  12. dstack/_internal/cli/commands/project.py +2 -2
  13. dstack/_internal/cli/commands/secrets.py +2 -2
  14. dstack/_internal/cli/commands/server.py +1 -1
  15. dstack/_internal/cli/commands/stop.py +1 -1
  16. dstack/_internal/cli/commands/volume.py +1 -1
  17. dstack/_internal/cli/main.py +2 -2
  18. dstack/_internal/cli/services/completion.py +2 -2
  19. dstack/_internal/cli/services/configurators/__init__.py +6 -2
  20. dstack/_internal/cli/services/configurators/base.py +6 -7
  21. dstack/_internal/cli/services/configurators/fleet.py +1 -3
  22. dstack/_internal/cli/services/configurators/gateway.py +2 -4
  23. dstack/_internal/cli/services/configurators/run.py +293 -58
  24. dstack/_internal/cli/services/configurators/volume.py +2 -4
  25. dstack/_internal/cli/services/profile.py +1 -1
  26. dstack/_internal/cli/services/repos.py +35 -48
  27. dstack/_internal/core/backends/amddevcloud/__init__.py +1 -0
  28. dstack/_internal/core/backends/amddevcloud/backend.py +16 -0
  29. dstack/_internal/core/backends/amddevcloud/compute.py +5 -0
  30. dstack/_internal/core/backends/amddevcloud/configurator.py +29 -0
  31. dstack/_internal/core/backends/aws/compute.py +6 -1
  32. dstack/_internal/core/backends/aws/configurator.py +11 -7
  33. dstack/_internal/core/backends/azure/configurator.py +11 -7
  34. dstack/_internal/core/backends/base/compute.py +33 -5
  35. dstack/_internal/core/backends/base/configurator.py +25 -13
  36. dstack/_internal/core/backends/base/offers.py +2 -0
  37. dstack/_internal/core/backends/cloudrift/configurator.py +13 -7
  38. dstack/_internal/core/backends/configurators.py +15 -0
  39. dstack/_internal/core/backends/cudo/configurator.py +11 -7
  40. dstack/_internal/core/backends/datacrunch/compute.py +5 -1
  41. dstack/_internal/core/backends/datacrunch/configurator.py +13 -7
  42. dstack/_internal/core/backends/digitalocean/__init__.py +1 -0
  43. dstack/_internal/core/backends/digitalocean/backend.py +16 -0
  44. dstack/_internal/core/backends/digitalocean/compute.py +5 -0
  45. dstack/_internal/core/backends/digitalocean/configurator.py +31 -0
  46. dstack/_internal/core/backends/digitalocean_base/__init__.py +1 -0
  47. dstack/_internal/core/backends/digitalocean_base/api_client.py +104 -0
  48. dstack/_internal/core/backends/digitalocean_base/backend.py +5 -0
  49. dstack/_internal/core/backends/digitalocean_base/compute.py +173 -0
  50. dstack/_internal/core/backends/digitalocean_base/configurator.py +57 -0
  51. dstack/_internal/core/backends/digitalocean_base/models.py +43 -0
  52. dstack/_internal/core/backends/gcp/compute.py +32 -8
  53. dstack/_internal/core/backends/gcp/configurator.py +11 -7
  54. dstack/_internal/core/backends/hotaisle/api_client.py +25 -33
  55. dstack/_internal/core/backends/hotaisle/compute.py +1 -6
  56. dstack/_internal/core/backends/hotaisle/configurator.py +13 -7
  57. dstack/_internal/core/backends/kubernetes/configurator.py +13 -7
  58. dstack/_internal/core/backends/lambdalabs/configurator.py +11 -7
  59. dstack/_internal/core/backends/models.py +7 -0
  60. dstack/_internal/core/backends/nebius/compute.py +1 -8
  61. dstack/_internal/core/backends/nebius/configurator.py +11 -7
  62. dstack/_internal/core/backends/nebius/resources.py +21 -11
  63. dstack/_internal/core/backends/oci/compute.py +4 -5
  64. dstack/_internal/core/backends/oci/configurator.py +11 -7
  65. dstack/_internal/core/backends/runpod/configurator.py +11 -7
  66. dstack/_internal/core/backends/template/configurator.py.jinja +11 -7
  67. dstack/_internal/core/backends/tensordock/configurator.py +13 -7
  68. dstack/_internal/core/backends/vastai/configurator.py +11 -7
  69. dstack/_internal/core/backends/vultr/compute.py +1 -5
  70. dstack/_internal/core/backends/vultr/configurator.py +11 -4
  71. dstack/_internal/core/compatibility/fleets.py +5 -0
  72. dstack/_internal/core/compatibility/gpus.py +13 -0
  73. dstack/_internal/core/compatibility/runs.py +9 -1
  74. dstack/_internal/core/models/backends/base.py +5 -1
  75. dstack/_internal/core/models/common.py +3 -3
  76. dstack/_internal/core/models/configurations.py +191 -32
  77. dstack/_internal/core/models/files.py +1 -1
  78. dstack/_internal/core/models/fleets.py +80 -3
  79. dstack/_internal/core/models/profiles.py +41 -11
  80. dstack/_internal/core/models/resources.py +46 -42
  81. dstack/_internal/core/models/runs.py +28 -5
  82. dstack/_internal/core/services/configs/__init__.py +6 -3
  83. dstack/_internal/core/services/profiles.py +2 -2
  84. dstack/_internal/core/services/repos.py +86 -79
  85. dstack/_internal/core/services/ssh/ports.py +1 -1
  86. dstack/_internal/proxy/lib/deps.py +6 -2
  87. dstack/_internal/server/app.py +22 -17
  88. dstack/_internal/server/background/tasks/process_fleets.py +109 -13
  89. dstack/_internal/server/background/tasks/process_gateways.py +4 -1
  90. dstack/_internal/server/background/tasks/process_instances.py +22 -73
  91. dstack/_internal/server/background/tasks/process_probes.py +1 -1
  92. dstack/_internal/server/background/tasks/process_running_jobs.py +12 -4
  93. dstack/_internal/server/background/tasks/process_runs.py +3 -1
  94. dstack/_internal/server/background/tasks/process_submitted_jobs.py +67 -44
  95. dstack/_internal/server/background/tasks/process_terminating_jobs.py +2 -2
  96. dstack/_internal/server/background/tasks/process_volumes.py +1 -1
  97. dstack/_internal/server/db.py +8 -4
  98. dstack/_internal/server/migrations/versions/2498ab323443_add_fleetmodel_consolidation_attempt_.py +44 -0
  99. dstack/_internal/server/models.py +6 -2
  100. dstack/_internal/server/routers/gpus.py +1 -6
  101. dstack/_internal/server/schemas/runner.py +11 -0
  102. dstack/_internal/server/services/backends/__init__.py +14 -8
  103. dstack/_internal/server/services/backends/handlers.py +6 -1
  104. dstack/_internal/server/services/docker.py +5 -5
  105. dstack/_internal/server/services/fleets.py +37 -38
  106. dstack/_internal/server/services/gateways/__init__.py +2 -0
  107. dstack/_internal/server/services/gateways/client.py +5 -2
  108. dstack/_internal/server/services/gateways/connection.py +1 -1
  109. dstack/_internal/server/services/gpus.py +50 -49
  110. dstack/_internal/server/services/instances.py +44 -4
  111. dstack/_internal/server/services/jobs/__init__.py +15 -4
  112. dstack/_internal/server/services/jobs/configurators/base.py +53 -17
  113. dstack/_internal/server/services/jobs/configurators/dev.py +9 -4
  114. dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +6 -8
  115. dstack/_internal/server/services/jobs/configurators/extensions/vscode.py +7 -9
  116. dstack/_internal/server/services/jobs/configurators/service.py +1 -3
  117. dstack/_internal/server/services/jobs/configurators/task.py +3 -3
  118. dstack/_internal/server/services/locking.py +5 -5
  119. dstack/_internal/server/services/logging.py +10 -2
  120. dstack/_internal/server/services/logs/__init__.py +8 -6
  121. dstack/_internal/server/services/logs/aws.py +330 -327
  122. dstack/_internal/server/services/logs/filelog.py +7 -6
  123. dstack/_internal/server/services/logs/gcp.py +141 -139
  124. dstack/_internal/server/services/plugins.py +1 -1
  125. dstack/_internal/server/services/projects.py +2 -5
  126. dstack/_internal/server/services/proxy/repo.py +5 -1
  127. dstack/_internal/server/services/requirements/__init__.py +0 -0
  128. dstack/_internal/server/services/requirements/combine.py +259 -0
  129. dstack/_internal/server/services/runner/client.py +7 -0
  130. dstack/_internal/server/services/runs.py +17 -1
  131. dstack/_internal/server/services/services/__init__.py +8 -2
  132. dstack/_internal/server/services/services/autoscalers.py +2 -0
  133. dstack/_internal/server/services/ssh.py +2 -1
  134. dstack/_internal/server/services/storage/__init__.py +5 -6
  135. dstack/_internal/server/services/storage/gcs.py +49 -49
  136. dstack/_internal/server/services/storage/s3.py +52 -52
  137. dstack/_internal/server/statics/index.html +1 -1
  138. dstack/_internal/server/statics/{main-d151b300fcac3933213d.js → main-4eecc75fbe64067eb1bc.js} +1146 -899
  139. dstack/_internal/server/statics/{main-d151b300fcac3933213d.js.map → main-4eecc75fbe64067eb1bc.js.map} +1 -1
  140. dstack/_internal/server/statics/{main-aec4762350e34d6fbff9.css → main-56191c63d516fd0041c4.css} +1 -1
  141. dstack/_internal/server/testing/common.py +7 -4
  142. dstack/_internal/server/utils/logging.py +3 -3
  143. dstack/_internal/server/utils/provisioning.py +3 -3
  144. dstack/_internal/utils/json_schema.py +3 -1
  145. dstack/_internal/utils/path.py +8 -1
  146. dstack/_internal/utils/ssh.py +7 -0
  147. dstack/_internal/utils/typing.py +14 -0
  148. dstack/api/_public/repos.py +62 -8
  149. dstack/api/_public/runs.py +19 -8
  150. dstack/api/server/__init__.py +17 -19
  151. dstack/api/server/_gpus.py +2 -1
  152. dstack/api/server/_group.py +4 -3
  153. dstack/api/server/_repos.py +20 -3
  154. dstack/plugins/builtin/rest_plugin/_plugin.py +1 -0
  155. dstack/version.py +1 -1
  156. {dstack-0.19.25rc1.dist-info → dstack-0.19.27.dist-info}/METADATA +2 -2
  157. {dstack-0.19.25rc1.dist-info → dstack-0.19.27.dist-info}/RECORD +160 -142
  158. dstack/api/huggingface/__init__.py +0 -73
  159. {dstack-0.19.25rc1.dist-info → dstack-0.19.27.dist-info}/WHEEL +0 -0
  160. {dstack-0.19.25rc1.dist-info → dstack-0.19.27.dist-info}/entry_points.txt +0 -0
  161. {dstack-0.19.25rc1.dist-info → dstack-0.19.27.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,173 @@
1
+ from typing import List, Optional
2
+
3
+ import gpuhunt
4
+ from gpuhunt.providers.digitalocean import DigitalOceanProvider
5
+
6
+ from dstack._internal.core.backends.base.backend import Compute
7
+ from dstack._internal.core.backends.base.compute import (
8
+ ComputeWithCreateInstanceSupport,
9
+ generate_unique_instance_name,
10
+ get_user_data,
11
+ )
12
+ from dstack._internal.core.backends.base.offers import get_catalog_offers
13
+ from dstack._internal.core.backends.digitalocean_base.api_client import DigitalOceanAPIClient
14
+ from dstack._internal.core.backends.digitalocean_base.models import BaseDigitalOceanConfig
15
+ from dstack._internal.core.errors import BackendError
16
+ from dstack._internal.core.models.backends.base import BackendType
17
+ from dstack._internal.core.models.instances import (
18
+ InstanceAvailability,
19
+ InstanceConfiguration,
20
+ InstanceOfferWithAvailability,
21
+ )
22
+ from dstack._internal.core.models.placement import PlacementGroup
23
+ from dstack._internal.core.models.runs import JobProvisioningData, Requirements
24
+ from dstack._internal.utils.logging import get_logger
25
+
26
+ logger = get_logger(__name__)
27
+
28
+ MAX_INSTANCE_NAME_LEN = 60
29
+ DOCKER_INSTALL_COMMANDS = [
30
+ "export DEBIAN_FRONTEND=noninteractive",
31
+ "mkdir -p /etc/apt/keyrings",
32
+ "curl --max-time 60 -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg",
33
+ 'echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null',
34
+ "apt-get update",
35
+ "apt-get --assume-yes install docker-ce docker-ce-cli containerd.io docker-compose-plugin",
36
+ ]
37
+
38
+
39
+ class BaseDigitalOceanCompute(
40
+ ComputeWithCreateInstanceSupport,
41
+ Compute,
42
+ ):
43
+ def __init__(self, config: BaseDigitalOceanConfig, api_url: str, type: BackendType):
44
+ super().__init__()
45
+ self.config = config
46
+ self.api_client = DigitalOceanAPIClient(config.creds.api_key, api_url)
47
+ self.catalog = gpuhunt.Catalog(balance_resources=False, auto_reload=False)
48
+ self.BACKEND_TYPE = type
49
+ self.catalog.add_provider(
50
+ DigitalOceanProvider(api_key=config.creds.api_key, api_url=api_url)
51
+ )
52
+
53
+ def get_offers(
54
+ self, requirements: Optional[Requirements] = None
55
+ ) -> List[InstanceOfferWithAvailability]:
56
+ offers = get_catalog_offers(
57
+ backend=self.BACKEND_TYPE,
58
+ locations=self.config.regions,
59
+ requirements=requirements,
60
+ catalog=self.catalog,
61
+ )
62
+ return [
63
+ InstanceOfferWithAvailability(
64
+ **offer.dict(),
65
+ availability=InstanceAvailability.AVAILABLE,
66
+ )
67
+ for offer in offers
68
+ ]
69
+
70
+ def create_instance(
71
+ self,
72
+ instance_offer: InstanceOfferWithAvailability,
73
+ instance_config: InstanceConfiguration,
74
+ placement_group: Optional[PlacementGroup],
75
+ ) -> JobProvisioningData:
76
+ instance_name = generate_unique_instance_name(
77
+ instance_config, max_length=MAX_INSTANCE_NAME_LEN
78
+ )
79
+
80
+ project_ssh_key = instance_config.ssh_keys[0]
81
+ ssh_key_id = self.api_client.get_or_create_ssh_key(
82
+ name=f"dstack-{instance_config.project_name}",
83
+ public_key=project_ssh_key.public,
84
+ )
85
+ size_slug = instance_offer.instance.name
86
+
87
+ if not instance_offer.instance.resources.gpus:
88
+ backend_specific_commands = DOCKER_INSTALL_COMMANDS
89
+ else:
90
+ backend_specific_commands = None
91
+
92
+ project_id = None
93
+ if self.config.project_name:
94
+ project_id = self.api_client.get_project_id(self.config.project_name)
95
+ if project_id is None:
96
+ raise BackendError(f"Project {self.config.project_name} does not exist")
97
+ droplet_config = {
98
+ "name": instance_name,
99
+ "region": instance_offer.region,
100
+ "size": size_slug,
101
+ "image": self._get_image_for_instance(instance_offer),
102
+ "ssh_keys": [ssh_key_id],
103
+ "backups": False,
104
+ "ipv6": False,
105
+ "monitoring": False,
106
+ "tags": [],
107
+ "user_data": get_user_data(
108
+ authorized_keys=instance_config.get_public_keys(),
109
+ backend_specific_commands=backend_specific_commands,
110
+ ),
111
+ **({"project_id": project_id} if project_id is not None else {}),
112
+ }
113
+
114
+ droplet = self.api_client.create_droplet(droplet_config)
115
+
116
+ return JobProvisioningData(
117
+ backend=instance_offer.backend,
118
+ instance_type=instance_offer.instance,
119
+ instance_id=str(droplet["id"]),
120
+ hostname=None,
121
+ internal_ip=None,
122
+ region=instance_offer.region,
123
+ price=instance_offer.price,
124
+ username="root",
125
+ ssh_port=22,
126
+ dockerized=True,
127
+ ssh_proxy=None,
128
+ backend_data=None,
129
+ )
130
+
131
+ def update_provisioning_data(
132
+ self,
133
+ provisioning_data: JobProvisioningData,
134
+ project_ssh_public_key: str,
135
+ project_ssh_private_key: str,
136
+ ):
137
+ droplet = self.api_client.get_droplet(provisioning_data.instance_id)
138
+ if droplet["status"] == "active":
139
+ for network in droplet["networks"]["v4"]:
140
+ if network["type"] == "public":
141
+ provisioning_data.hostname = network["ip_address"]
142
+ break
143
+
144
+ def terminate_instance(
145
+ self, instance_id: str, region: str, backend_data: Optional[str] = None
146
+ ):
147
+ self.api_client.delete_droplet(instance_id)
148
+
149
+ def _get_image_for_instance(self, instance_offer: InstanceOfferWithAvailability) -> str:
150
+ if not instance_offer.instance.resources.gpus:
151
+ # No GPUs, use CPU image
152
+ return "ubuntu-24-04-x64"
153
+
154
+ gpu_count = len(instance_offer.instance.resources.gpus)
155
+ gpu_vendor = instance_offer.instance.resources.gpus[0].vendor
156
+
157
+ if gpu_vendor == gpuhunt.AcceleratorVendor.AMD:
158
+ # AMD GPU
159
+ return "digitaloceanai-rocmjupyter"
160
+ else:
161
+ # NVIDIA GPUs - DO only supports 1 and 8 GPU configurations.
162
+ # DO says for single GPU plans using GPUs other than H100s use "gpu-h100x1-base". DO does not provide guidance for x8 GPUs so assuming the same applies.
163
+ # See (https://docs.digitalocean.com/products/droplets/getting-started/recommended-gpu-setup/#aiml-ready-image)
164
+ if gpu_count == 8:
165
+ return "gpu-h100x8-base"
166
+ elif gpu_count == 1:
167
+ return "gpu-h100x1-base"
168
+ else:
169
+ # For Unsupported GPU count - use single GPU image and log warning
170
+ logger.warning(
171
+ f"Unsupported NVIDIA GPU count: {gpu_count}, using single GPU image"
172
+ )
173
+ return "gpu-h100x1-base"
@@ -0,0 +1,57 @@
1
+ import json
2
+ from typing import Optional
3
+
4
+ from dstack._internal.core.backends.base.configurator import (
5
+ BackendRecord,
6
+ Configurator,
7
+ )
8
+ from dstack._internal.core.backends.digitalocean_base.backend import BaseDigitalOceanBackend
9
+ from dstack._internal.core.backends.digitalocean_base.models import (
10
+ AnyBaseDigitalOceanCreds,
11
+ BaseDigitalOceanBackendConfig,
12
+ BaseDigitalOceanBackendConfigWithCreds,
13
+ BaseDigitalOceanConfig,
14
+ BaseDigitalOceanCreds,
15
+ BaseDigitalOceanStoredConfig,
16
+ )
17
+
18
+
19
+ class BaseDigitalOceanConfigurator(Configurator):
20
+ def validate_config(
21
+ self, config: BaseDigitalOceanBackendConfigWithCreds, default_creds_enabled: bool
22
+ ):
23
+ self._validate_creds(config.creds, config.project_name)
24
+
25
+ def create_backend(
26
+ self, project_name: str, config: BaseDigitalOceanBackendConfigWithCreds
27
+ ) -> BackendRecord:
28
+ return BackendRecord(
29
+ config=BaseDigitalOceanStoredConfig(
30
+ **BaseDigitalOceanBackendConfig.__response__.parse_obj(config).dict()
31
+ ).json(),
32
+ auth=BaseDigitalOceanCreds.parse_obj(config.creds).json(),
33
+ )
34
+
35
+ def get_backend_config_with_creds(
36
+ self, record: BackendRecord
37
+ ) -> BaseDigitalOceanBackendConfigWithCreds:
38
+ config = self._get_config(record)
39
+ return BaseDigitalOceanBackendConfigWithCreds.__response__.parse_obj(config)
40
+
41
+ def get_backend_config_without_creds(
42
+ self, record: BackendRecord
43
+ ) -> BaseDigitalOceanBackendConfig:
44
+ config = self._get_config(record)
45
+ return BaseDigitalOceanBackendConfig.__response__.parse_obj(config)
46
+
47
+ def get_backend(self, record: BackendRecord) -> BaseDigitalOceanBackend:
48
+ raise NotImplementedError("Subclasses must implement get_backend")
49
+
50
+ def _get_config(self, record: BackendRecord) -> BaseDigitalOceanConfig:
51
+ return BaseDigitalOceanConfig.__response__(
52
+ **json.loads(record.config),
53
+ creds=BaseDigitalOceanCreds.parse_raw(record.auth),
54
+ )
55
+
56
+ def _validate_creds(self, creds: AnyBaseDigitalOceanCreds, project_name: Optional[str] = None):
57
+ pass
@@ -0,0 +1,43 @@
1
+ from typing import Annotated, List, Literal, Optional, Union
2
+
3
+ from pydantic import Field
4
+
5
+ from dstack._internal.core.models.common import CoreModel
6
+
7
+
8
+ class BaseDigitalOceanAPIKeyCreds(CoreModel):
9
+ type: Annotated[Literal["api_key"], Field(description="The type of credentials")] = "api_key"
10
+ api_key: Annotated[str, Field(description="The API key")]
11
+
12
+
13
+ AnyBaseDigitalOceanCreds = BaseDigitalOceanAPIKeyCreds
14
+ BaseDigitalOceanCreds = AnyBaseDigitalOceanCreds
15
+
16
+
17
+ class BaseDigitalOceanBackendConfig(CoreModel):
18
+ type: Annotated[
19
+ Literal["amddevcloud", "digitalocean"],
20
+ Field(description="The type of backend"),
21
+ ]
22
+ project_name: Annotated[Optional[str], Field(description="The name of the project")] = None
23
+ regions: Annotated[
24
+ Optional[List[str]],
25
+ Field(description="The list of regions. Omit to use all regions"),
26
+ ] = None
27
+
28
+
29
+ class BaseDigitalOceanBackendConfigWithCreds(BaseDigitalOceanBackendConfig):
30
+ creds: Annotated[AnyBaseDigitalOceanCreds, Field(description="The credentials")]
31
+
32
+
33
+ AnyBaseDigitalOceanBackendConfig = Union[
34
+ BaseDigitalOceanBackendConfig, BaseDigitalOceanBackendConfigWithCreds
35
+ ]
36
+
37
+
38
+ class BaseDigitalOceanStoredConfig(BaseDigitalOceanBackendConfig):
39
+ pass
40
+
41
+
42
+ class BaseDigitalOceanConfig(BaseDigitalOceanStoredConfig):
43
+ creds: AnyBaseDigitalOceanCreds
@@ -2,6 +2,7 @@ import concurrent.futures
2
2
  import json
3
3
  import threading
4
4
  from collections import defaultdict
5
+ from dataclasses import dataclass
5
6
  from typing import Callable, Dict, List, Literal, Optional, Tuple
6
7
 
7
8
  import google.api_core.exceptions
@@ -285,16 +286,18 @@ class GCPCompute(
285
286
  )
286
287
  raise NoCapacityError()
287
288
 
289
+ image = _get_image(
290
+ instance_type_name=instance_offer.instance.name,
291
+ cuda=len(instance_offer.instance.resources.gpus) > 0,
292
+ )
293
+
288
294
  for zone in zones:
289
295
  request = compute_v1.InsertInstanceRequest()
290
296
  request.zone = zone
291
297
  request.project = self.config.project_id
292
298
  request.instance_resource = gcp_resources.create_instance_struct(
293
299
  disk_size=disk_size,
294
- image_id=_get_image_id(
295
- instance_type_name=instance_offer.instance.name,
296
- cuda=len(instance_offer.instance.resources.gpus) > 0,
297
- ),
300
+ image_id=image.id,
298
301
  machine_type=instance_offer.instance.name,
299
302
  accelerators=gcp_resources.get_accelerators(
300
303
  project_id=self.config.project_id,
@@ -305,6 +308,7 @@ class GCPCompute(
305
308
  user_data=_get_user_data(
306
309
  authorized_keys=authorized_keys,
307
310
  instance_type_name=instance_offer.instance.name,
311
+ is_ufw_installed=image.is_ufw_installed,
308
312
  ),
309
313
  authorized_keys=authorized_keys,
310
314
  labels=labels,
@@ -889,24 +893,41 @@ def _get_vpc_subnet(
889
893
  )
890
894
 
891
895
 
892
- def _get_image_id(instance_type_name: str, cuda: bool) -> str:
896
+ @dataclass
897
+ class GCPImage:
898
+ id: str
899
+ is_ufw_installed: bool
900
+
901
+
902
+ def _get_image(instance_type_name: str, cuda: bool) -> GCPImage:
893
903
  if instance_type_name == "a3-megagpu-8g":
894
904
  image_name = "dstack-a3mega-5"
905
+ is_ufw_installed = False
895
906
  elif instance_type_name in ["a3-edgegpu-8g", "a3-highgpu-8g"]:
896
- return "projects/cos-cloud/global/images/cos-105-17412-535-78"
907
+ return GCPImage(
908
+ id="projects/cos-cloud/global/images/cos-105-17412-535-78",
909
+ is_ufw_installed=False,
910
+ )
897
911
  elif cuda:
898
912
  image_name = f"dstack-cuda-{version.base_image}"
913
+ is_ufw_installed = True
899
914
  else:
900
915
  image_name = f"dstack-{version.base_image}"
916
+ is_ufw_installed = True
901
917
  image_name = image_name.replace(".", "-")
902
- return f"projects/dstack/global/images/{image_name}"
918
+ return GCPImage(
919
+ id=f"projects/dstack/global/images/{image_name}",
920
+ is_ufw_installed=is_ufw_installed,
921
+ )
903
922
 
904
923
 
905
924
  def _get_gateway_image_id() -> str:
906
925
  return "projects/ubuntu-os-cloud/global/images/ubuntu-2204-jammy-v20230714"
907
926
 
908
927
 
909
- def _get_user_data(authorized_keys: List[str], instance_type_name: str) -> str:
928
+ def _get_user_data(
929
+ authorized_keys: List[str], instance_type_name: str, is_ufw_installed: bool
930
+ ) -> str:
910
931
  base_path = None
911
932
  bin_path = None
912
933
  backend_shim_env = None
@@ -929,6 +950,9 @@ def _get_user_data(authorized_keys: List[str], instance_type_name: str) -> str:
929
950
  base_path=base_path,
930
951
  bin_path=bin_path,
931
952
  backend_shim_env=backend_shim_env,
953
+ # Instance-level firewall is optional on GCP. The main protection comes from GCP firewalls.
954
+ # So only set up instance-level firewall as an additional measure if ufw is available.
955
+ skip_firewall_setup=not is_ufw_installed,
932
956
  )
933
957
 
934
958
 
@@ -11,7 +11,6 @@ from dstack._internal.core.backends.base.configurator import (
11
11
  from dstack._internal.core.backends.gcp import auth, resources
12
12
  from dstack._internal.core.backends.gcp.backend import GCPBackend
13
13
  from dstack._internal.core.backends.gcp.models import (
14
- AnyGCPBackendConfig,
15
14
  GCPBackendConfig,
16
15
  GCPBackendConfigWithCreds,
17
16
  GCPConfig,
@@ -109,7 +108,12 @@ DEFAULT_REGIONS = REGIONS
109
108
  MAIN_REGION = "us-east1"
110
109
 
111
110
 
112
- class GCPConfigurator(Configurator):
111
+ class GCPConfigurator(
112
+ Configurator[
113
+ GCPBackendConfig,
114
+ GCPBackendConfigWithCreds,
115
+ ]
116
+ ):
113
117
  TYPE = BackendType.GCP
114
118
  BACKEND_CLASS = GCPBackend
115
119
 
@@ -147,12 +151,12 @@ class GCPConfigurator(Configurator):
147
151
  auth=GCPCreds.parse_obj(config.creds).json(),
148
152
  )
149
153
 
150
- def get_backend_config(
151
- self, record: BackendRecord, include_creds: bool
152
- ) -> AnyGCPBackendConfig:
154
+ def get_backend_config_with_creds(self, record: BackendRecord) -> GCPBackendConfigWithCreds:
155
+ config = self._get_config(record)
156
+ return GCPBackendConfigWithCreds.__response__.parse_obj(config)
157
+
158
+ def get_backend_config_without_creds(self, record: BackendRecord) -> GCPBackendConfig:
153
159
  config = self._get_config(record)
154
- if include_creds:
155
- return GCPBackendConfigWithCreds.__response__.parse_obj(config)
156
160
  return GCPBackendConfig.__response__.parse_obj(config)
157
161
 
158
162
  def get_backend(self, record: BackendRecord) -> GCPBackend:
@@ -16,46 +16,38 @@ class HotAisleAPIClient:
16
16
  self.team_handle = team_handle
17
17
 
18
18
  def validate_api_key(self) -> bool:
19
+ url = f"{API_URL}/user/"
19
20
  try:
20
- self._validate_user_and_team()
21
- return True
21
+ response = self._make_request("GET", url)
22
+ response.raise_for_status()
22
23
  except requests.HTTPError as e:
23
- if e.response.status_code == 401:
24
- raise_invalid_credentials_error(
25
- fields=[["creds", "api_key"]], details="Invalid API key"
26
- )
27
- elif e.response.status_code == 403:
28
- raise_invalid_credentials_error(
29
- fields=[["creds", "api_key"]],
30
- details="Authenticated user does note have required permissions",
31
- )
32
- raise e
33
- except ValueError as e:
34
- error_message = str(e)
35
- if "No Hot Aisle teams found" in error_message:
36
- raise_invalid_credentials_error(
37
- fields=[["creds", "api_key"]],
38
- details="Valid API key but no teams found for this user",
39
- )
40
- elif "not found" in error_message:
41
- raise_invalid_credentials_error(
42
- fields=[["team_handle"]], details=f"Team handle '{self.team_handle}' not found"
43
- )
44
- raise e
45
-
46
- def _validate_user_and_team(self) -> None:
47
- url = f"{API_URL}/user/"
48
- response = self._make_request("GET", url)
49
- response.raise_for_status()
50
- user_data = response.json()
24
+ if e.response is not None:
25
+ if e.response.status_code == 401:
26
+ raise_invalid_credentials_error(
27
+ fields=[["creds", "api_key"]], details="Invalid API key"
28
+ )
29
+ if e.response.status_code == 403:
30
+ raise_invalid_credentials_error(
31
+ fields=[["creds", "api_key"]],
32
+ details="Authenticated user does not have required permissions",
33
+ )
34
+ raise
51
35
 
52
- teams = user_data.get("teams", [])
36
+ user_data = response.json()
37
+ teams = user_data["teams"]
53
38
  if not teams:
54
- raise ValueError("No Hot Aisle teams found for this user")
39
+ raise_invalid_credentials_error(
40
+ fields=[["creds", "api_key"]],
41
+ details="Valid API key but no teams found for this user",
42
+ )
55
43
 
56
44
  available_teams = [team["handle"] for team in teams]
57
45
  if self.team_handle not in available_teams:
58
- raise ValueError(f"Hot Aisle team '{self.team_handle}' not found.")
46
+ raise_invalid_credentials_error(
47
+ fields=[["team_handle"]],
48
+ details=f"Team handle '{self.team_handle}' not found",
49
+ )
50
+ return True
59
51
 
60
52
  def upload_ssh_key(self, public_key: str) -> bool:
61
53
  url = f"{API_URL}/user/ssh_keys/"
@@ -28,8 +28,6 @@ from dstack._internal.utils.logging import get_logger
28
28
 
29
29
  logger = get_logger(__name__)
30
30
 
31
- MAX_INSTANCE_NAME_LEN = 60
32
-
33
31
 
34
32
  INSTANCE_TYPE_SPECS = {
35
33
  "1x MI300X 8x Xeon Platinum 8462Y+": {
@@ -130,9 +128,7 @@ class HotAisleCompute(
130
128
  ssh_port=22,
131
129
  dockerized=True,
132
130
  ssh_proxy=None,
133
- backend_data=HotAisleInstanceBackendData(
134
- ip_address=vm_data["ip_address"], vm_id=vm_data["name"]
135
- ).json(),
131
+ backend_data=HotAisleInstanceBackendData(ip_address=vm_data["ip_address"]).json(),
136
132
  )
137
133
 
138
134
  def update_provisioning_data(
@@ -217,7 +213,6 @@ def _run_ssh_command(hostname: str, ssh_private_key: str, command: str):
217
213
 
218
214
  class HotAisleInstanceBackendData(CoreModel):
219
215
  ip_address: str
220
- vm_id: Optional[str] = None
221
216
 
222
217
  @classmethod
223
218
  def load(cls, raw: Optional[str]) -> "HotAisleInstanceBackendData":
@@ -7,7 +7,6 @@ from dstack._internal.core.backends.base.configurator import (
7
7
  from dstack._internal.core.backends.hotaisle.api_client import HotAisleAPIClient
8
8
  from dstack._internal.core.backends.hotaisle.backend import HotAisleBackend
9
9
  from dstack._internal.core.backends.hotaisle.models import (
10
- AnyHotAisleBackendConfig,
11
10
  AnyHotAisleCreds,
12
11
  HotAisleBackendConfig,
13
12
  HotAisleBackendConfigWithCreds,
@@ -20,7 +19,12 @@ from dstack._internal.core.models.backends.base import (
20
19
  )
21
20
 
22
21
 
23
- class HotAisleConfigurator(Configurator):
22
+ class HotAisleConfigurator(
23
+ Configurator[
24
+ HotAisleBackendConfig,
25
+ HotAisleBackendConfigWithCreds,
26
+ ]
27
+ ):
24
28
  TYPE = BackendType.HOTAISLE
25
29
  BACKEND_CLASS = HotAisleBackend
26
30
 
@@ -37,12 +41,14 @@ class HotAisleConfigurator(Configurator):
37
41
  auth=HotAisleCreds.parse_obj(config.creds).json(),
38
42
  )
39
43
 
40
- def get_backend_config(
41
- self, record: BackendRecord, include_creds: bool
42
- ) -> AnyHotAisleBackendConfig:
44
+ def get_backend_config_with_creds(
45
+ self, record: BackendRecord
46
+ ) -> HotAisleBackendConfigWithCreds:
47
+ config = self._get_config(record)
48
+ return HotAisleBackendConfigWithCreds.__response__.parse_obj(config)
49
+
50
+ def get_backend_config_without_creds(self, record: BackendRecord) -> HotAisleBackendConfig:
43
51
  config = self._get_config(record)
44
- if include_creds:
45
- return HotAisleBackendConfigWithCreds.__response__.parse_obj(config)
46
52
  return HotAisleBackendConfig.__response__.parse_obj(config)
47
53
 
48
54
  def get_backend(self, record: BackendRecord) -> HotAisleBackend:
@@ -6,7 +6,6 @@ from dstack._internal.core.backends.base.configurator import (
6
6
  from dstack._internal.core.backends.kubernetes import utils as kubernetes_utils
7
7
  from dstack._internal.core.backends.kubernetes.backend import KubernetesBackend
8
8
  from dstack._internal.core.backends.kubernetes.models import (
9
- AnyKubernetesBackendConfig,
10
9
  KubernetesBackendConfig,
11
10
  KubernetesBackendConfigWithCreds,
12
11
  KubernetesConfig,
@@ -18,7 +17,12 @@ from dstack._internal.utils.logging import get_logger
18
17
  logger = get_logger(__name__)
19
18
 
20
19
 
21
- class KubernetesConfigurator(Configurator):
20
+ class KubernetesConfigurator(
21
+ Configurator[
22
+ KubernetesBackendConfig,
23
+ KubernetesBackendConfigWithCreds,
24
+ ]
25
+ ):
22
26
  TYPE = BackendType.KUBERNETES
23
27
  BACKEND_CLASS = KubernetesBackend
24
28
 
@@ -40,12 +44,14 @@ class KubernetesConfigurator(Configurator):
40
44
  auth="",
41
45
  )
42
46
 
43
- def get_backend_config(
44
- self, record: BackendRecord, include_creds: bool
45
- ) -> AnyKubernetesBackendConfig:
47
+ def get_backend_config_with_creds(
48
+ self, record: BackendRecord
49
+ ) -> KubernetesBackendConfigWithCreds:
50
+ config = self._get_config(record)
51
+ return KubernetesBackendConfigWithCreds.__response__.parse_obj(config)
52
+
53
+ def get_backend_config_without_creds(self, record: BackendRecord) -> KubernetesBackendConfig:
46
54
  config = self._get_config(record)
47
- if include_creds:
48
- return KubernetesBackendConfigWithCreds.__response__.parse_obj(config)
49
55
  return KubernetesBackendConfig.__response__.parse_obj(config)
50
56
 
51
57
  def get_backend(self, record: BackendRecord) -> KubernetesBackend:
@@ -8,7 +8,6 @@ from dstack._internal.core.backends.base.configurator import (
8
8
  from dstack._internal.core.backends.lambdalabs import api_client
9
9
  from dstack._internal.core.backends.lambdalabs.backend import LambdaBackend
10
10
  from dstack._internal.core.backends.lambdalabs.models import (
11
- AnyLambdaBackendConfig,
12
11
  LambdaBackendConfig,
13
12
  LambdaBackendConfigWithCreds,
14
13
  LambdaConfig,
@@ -20,7 +19,12 @@ from dstack._internal.core.models.backends.base import (
20
19
  )
21
20
 
22
21
 
23
- class LambdaConfigurator(Configurator):
22
+ class LambdaConfigurator(
23
+ Configurator[
24
+ LambdaBackendConfig,
25
+ LambdaBackendConfigWithCreds,
26
+ ]
27
+ ):
24
28
  TYPE = BackendType.LAMBDA
25
29
  BACKEND_CLASS = LambdaBackend
26
30
 
@@ -37,12 +41,12 @@ class LambdaConfigurator(Configurator):
37
41
  auth=LambdaCreds.parse_obj(config.creds).json(),
38
42
  )
39
43
 
40
- def get_backend_config(
41
- self, record: BackendRecord, include_creds: bool
42
- ) -> AnyLambdaBackendConfig:
44
+ def get_backend_config_with_creds(self, record: BackendRecord) -> LambdaBackendConfigWithCreds:
45
+ config = self._get_config(record)
46
+ return LambdaBackendConfigWithCreds.__response__.parse_obj(config)
47
+
48
+ def get_backend_config_without_creds(self, record: BackendRecord) -> LambdaBackendConfig:
43
49
  config = self._get_config(record)
44
- if include_creds:
45
- return LambdaBackendConfigWithCreds.__response__.parse_obj(config)
46
50
  return LambdaBackendConfig.__response__.parse_obj(config)
47
51
 
48
52
  def get_backend(self, record: BackendRecord) -> LambdaBackend:
@@ -20,6 +20,10 @@ from dstack._internal.core.backends.datacrunch.models import (
20
20
  DataCrunchBackendConfig,
21
21
  DataCrunchBackendConfigWithCreds,
22
22
  )
23
+ from dstack._internal.core.backends.digitalocean_base.models import (
24
+ BaseDigitalOceanBackendConfig,
25
+ BaseDigitalOceanBackendConfigWithCreds,
26
+ )
23
27
  from dstack._internal.core.backends.dstack.models import (
24
28
  DstackBackendConfig,
25
29
  DstackBaseBackendConfig,
@@ -77,6 +81,7 @@ AnyBackendConfigWithoutCreds = Union[
77
81
  CloudRiftBackendConfig,
78
82
  CudoBackendConfig,
79
83
  DataCrunchBackendConfig,
84
+ BaseDigitalOceanBackendConfig,
80
85
  GCPBackendConfig,
81
86
  HotAisleBackendConfig,
82
87
  KubernetesBackendConfig,
@@ -100,6 +105,7 @@ AnyBackendConfigWithCreds = Union[
100
105
  CloudRiftBackendConfigWithCreds,
101
106
  CudoBackendConfigWithCreds,
102
107
  DataCrunchBackendConfigWithCreds,
108
+ BaseDigitalOceanBackendConfigWithCreds,
103
109
  GCPBackendConfigWithCreds,
104
110
  HotAisleBackendConfigWithCreds,
105
111
  KubernetesBackendConfigWithCreds,
@@ -122,6 +128,7 @@ AnyBackendFileConfigWithCreds = Union[
122
128
  CloudRiftBackendConfigWithCreds,
123
129
  CudoBackendConfigWithCreds,
124
130
  DataCrunchBackendConfigWithCreds,
131
+ BaseDigitalOceanBackendConfigWithCreds,
125
132
  GCPBackendFileConfigWithCreds,
126
133
  HotAisleBackendFileConfigWithCreds,
127
134
  KubernetesBackendFileConfigWithCreds,