dstack 0.19.26__py3-none-any.whl → 0.19.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (93) hide show
  1. dstack/_internal/cli/commands/__init__.py +11 -8
  2. dstack/_internal/cli/commands/apply.py +6 -3
  3. dstack/_internal/cli/commands/completion.py +3 -1
  4. dstack/_internal/cli/commands/config.py +1 -0
  5. dstack/_internal/cli/commands/init.py +4 -4
  6. dstack/_internal/cli/commands/offer.py +1 -1
  7. dstack/_internal/cli/commands/project.py +1 -0
  8. dstack/_internal/cli/commands/server.py +2 -2
  9. dstack/_internal/cli/main.py +1 -1
  10. dstack/_internal/cli/services/configurators/base.py +2 -4
  11. dstack/_internal/cli/services/configurators/fleet.py +4 -5
  12. dstack/_internal/cli/services/configurators/gateway.py +3 -5
  13. dstack/_internal/cli/services/configurators/run.py +165 -43
  14. dstack/_internal/cli/services/configurators/volume.py +3 -5
  15. dstack/_internal/cli/services/repos.py +1 -18
  16. dstack/_internal/core/backends/amddevcloud/__init__.py +1 -0
  17. dstack/_internal/core/backends/amddevcloud/backend.py +16 -0
  18. dstack/_internal/core/backends/amddevcloud/compute.py +5 -0
  19. dstack/_internal/core/backends/amddevcloud/configurator.py +29 -0
  20. dstack/_internal/core/backends/aws/compute.py +6 -1
  21. dstack/_internal/core/backends/base/compute.py +33 -5
  22. dstack/_internal/core/backends/base/offers.py +2 -0
  23. dstack/_internal/core/backends/configurators.py +15 -0
  24. dstack/_internal/core/backends/digitalocean/__init__.py +1 -0
  25. dstack/_internal/core/backends/digitalocean/backend.py +16 -0
  26. dstack/_internal/core/backends/digitalocean/compute.py +5 -0
  27. dstack/_internal/core/backends/digitalocean/configurator.py +31 -0
  28. dstack/_internal/core/backends/digitalocean_base/__init__.py +1 -0
  29. dstack/_internal/core/backends/digitalocean_base/api_client.py +104 -0
  30. dstack/_internal/core/backends/digitalocean_base/backend.py +5 -0
  31. dstack/_internal/core/backends/digitalocean_base/compute.py +173 -0
  32. dstack/_internal/core/backends/digitalocean_base/configurator.py +57 -0
  33. dstack/_internal/core/backends/digitalocean_base/models.py +43 -0
  34. dstack/_internal/core/backends/gcp/compute.py +32 -8
  35. dstack/_internal/core/backends/hotaisle/api_client.py +25 -33
  36. dstack/_internal/core/backends/hotaisle/compute.py +1 -6
  37. dstack/_internal/core/backends/models.py +7 -0
  38. dstack/_internal/core/backends/nebius/compute.py +0 -7
  39. dstack/_internal/core/backends/oci/compute.py +4 -5
  40. dstack/_internal/core/backends/vultr/compute.py +1 -5
  41. dstack/_internal/core/compatibility/fleets.py +5 -0
  42. dstack/_internal/core/compatibility/runs.py +10 -1
  43. dstack/_internal/core/models/backends/base.py +5 -1
  44. dstack/_internal/core/models/common.py +67 -43
  45. dstack/_internal/core/models/configurations.py +109 -69
  46. dstack/_internal/core/models/files.py +1 -1
  47. dstack/_internal/core/models/fleets.py +115 -25
  48. dstack/_internal/core/models/instances.py +5 -5
  49. dstack/_internal/core/models/profiles.py +66 -47
  50. dstack/_internal/core/models/repos/remote.py +21 -16
  51. dstack/_internal/core/models/resources.py +69 -65
  52. dstack/_internal/core/models/runs.py +41 -14
  53. dstack/_internal/core/services/repos.py +85 -80
  54. dstack/_internal/server/app.py +5 -0
  55. dstack/_internal/server/background/tasks/process_fleets.py +117 -13
  56. dstack/_internal/server/background/tasks/process_instances.py +12 -71
  57. dstack/_internal/server/background/tasks/process_running_jobs.py +2 -0
  58. dstack/_internal/server/background/tasks/process_runs.py +2 -0
  59. dstack/_internal/server/background/tasks/process_submitted_jobs.py +48 -16
  60. dstack/_internal/server/migrations/versions/2498ab323443_add_fleetmodel_consolidation_attempt_.py +44 -0
  61. dstack/_internal/server/models.py +11 -7
  62. dstack/_internal/server/schemas/gateways.py +10 -9
  63. dstack/_internal/server/schemas/runner.py +1 -0
  64. dstack/_internal/server/services/backends/handlers.py +2 -0
  65. dstack/_internal/server/services/docker.py +8 -7
  66. dstack/_internal/server/services/fleets.py +23 -25
  67. dstack/_internal/server/services/instances.py +3 -3
  68. dstack/_internal/server/services/jobs/configurators/base.py +46 -6
  69. dstack/_internal/server/services/jobs/configurators/dev.py +4 -4
  70. dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +3 -5
  71. dstack/_internal/server/services/jobs/configurators/extensions/vscode.py +4 -6
  72. dstack/_internal/server/services/jobs/configurators/service.py +0 -3
  73. dstack/_internal/server/services/jobs/configurators/task.py +0 -3
  74. dstack/_internal/server/services/projects.py +52 -1
  75. dstack/_internal/server/services/runs.py +16 -0
  76. dstack/_internal/server/settings.py +46 -0
  77. dstack/_internal/server/statics/index.html +1 -1
  78. dstack/_internal/server/statics/{main-aec4762350e34d6fbff9.css → main-5e0d56245c4bd241ec27.css} +1 -1
  79. dstack/_internal/server/statics/{main-d151b300fcac3933213d.js → main-a2a16772fbf11a14d191.js} +1215 -998
  80. dstack/_internal/server/statics/{main-d151b300fcac3933213d.js.map → main-a2a16772fbf11a14d191.js.map} +1 -1
  81. dstack/_internal/server/testing/common.py +6 -3
  82. dstack/_internal/utils/env.py +85 -11
  83. dstack/_internal/utils/path.py +8 -1
  84. dstack/_internal/utils/ssh.py +7 -0
  85. dstack/api/_public/repos.py +41 -6
  86. dstack/api/_public/runs.py +14 -1
  87. dstack/version.py +1 -1
  88. {dstack-0.19.26.dist-info → dstack-0.19.28.dist-info}/METADATA +2 -2
  89. {dstack-0.19.26.dist-info → dstack-0.19.28.dist-info}/RECORD +92 -78
  90. dstack/_internal/server/statics/static/media/github.1f7102513534c83a9d8d735d2b8c12a2.svg +0 -3
  91. {dstack-0.19.26.dist-info → dstack-0.19.28.dist-info}/WHEEL +0 -0
  92. {dstack-0.19.26.dist-info → dstack-0.19.28.dist-info}/entry_points.txt +0 -0
  93. {dstack-0.19.26.dist-info → dstack-0.19.28.dist-info}/licenses/LICENSE.md +0 -0
@@ -1,6 +1,5 @@
1
1
  import argparse
2
2
  import time
3
- from typing import List
4
3
 
5
4
  from rich.table import Table
6
5
 
@@ -26,7 +25,7 @@ from dstack.api._public import Client
26
25
 
27
26
 
28
27
  class VolumeConfigurator(BaseApplyConfigurator[VolumeConfiguration]):
29
- TYPE: ApplyConfigurationType = ApplyConfigurationType.VOLUME
28
+ TYPE = ApplyConfigurationType.VOLUME
30
29
 
31
30
  def apply_configuration(
32
31
  self,
@@ -34,9 +33,8 @@ class VolumeConfigurator(BaseApplyConfigurator[VolumeConfiguration]):
34
33
  configuration_path: str,
35
34
  command_args: argparse.Namespace,
36
35
  configurator_args: argparse.Namespace,
37
- unknown_args: List[str],
38
36
  ):
39
- self.apply_args(conf, configurator_args, unknown_args)
37
+ self.apply_args(conf, configurator_args)
40
38
  spec = VolumeSpec(
41
39
  configuration=conf,
42
40
  configuration_path=configuration_path,
@@ -167,7 +165,7 @@ class VolumeConfigurator(BaseApplyConfigurator[VolumeConfiguration]):
167
165
  help="The volume name",
168
166
  )
169
167
 
170
- def apply_args(self, conf: VolumeConfiguration, args: argparse.Namespace, unknown: List[str]):
168
+ def apply_args(self, conf: VolumeConfiguration, args: argparse.Namespace):
171
169
  if args.name:
172
170
  conf.name = args.name
173
171
 
@@ -1,5 +1,5 @@
1
1
  import argparse
2
- from typing import Literal, Optional, Union, overload
2
+ from typing import Literal, Union, overload
3
3
 
4
4
  import git
5
5
 
@@ -8,7 +8,6 @@ from dstack._internal.core.errors import CLIError
8
8
  from dstack._internal.core.models.repos.local import LocalRepo
9
9
  from dstack._internal.core.models.repos.remote import GitRepoURL, RemoteRepo, RepoError
10
10
  from dstack._internal.core.models.repos.virtual import VirtualRepo
11
- from dstack._internal.core.services.repos import get_default_branch
12
11
  from dstack._internal.utils.path import PathLike
13
12
  from dstack.api._public import Client
14
13
 
@@ -43,22 +42,6 @@ def init_default_virtual_repo(api: Client) -> VirtualRepo:
43
42
  return repo
44
43
 
45
44
 
46
- def get_repo_from_url(
47
- repo_url: str, repo_branch: Optional[str] = None, repo_hash: Optional[str] = None
48
- ) -> RemoteRepo:
49
- if repo_branch is None and repo_hash is None:
50
- repo_branch = get_default_branch(repo_url)
51
- if repo_branch is None:
52
- raise CLIError(
53
- "Failed to automatically detect remote repo branch. Specify branch or hash."
54
- )
55
- return RemoteRepo.from_url(
56
- repo_url=repo_url,
57
- repo_branch=repo_branch,
58
- repo_hash=repo_hash,
59
- )
60
-
61
-
62
45
  @overload
63
46
  def get_repo_from_dir(repo_dir: PathLike, local: Literal[False] = False) -> RemoteRepo: ...
64
47
 
@@ -0,0 +1 @@
1
+ # This package contains the implementation for the AMDDevCloud backend.
@@ -0,0 +1,16 @@
1
+ from dstack._internal.core.backends.amddevcloud.compute import AMDDevCloudCompute
2
+ from dstack._internal.core.backends.digitalocean_base.backend import BaseDigitalOceanBackend
3
+ from dstack._internal.core.backends.digitalocean_base.models import BaseDigitalOceanConfig
4
+ from dstack._internal.core.models.backends.base import BackendType
5
+
6
+
7
+ class AMDDevCloudBackend(BaseDigitalOceanBackend):
8
+ TYPE = BackendType.AMDDEVCLOUD
9
+ COMPUTE_CLASS = AMDDevCloudCompute
10
+
11
+ def __init__(self, config: BaseDigitalOceanConfig, api_url: str):
12
+ self.config = config
13
+ self._compute = AMDDevCloudCompute(self.config, api_url=api_url, type=self.TYPE)
14
+
15
+ def compute(self) -> AMDDevCloudCompute:
16
+ return self._compute
@@ -0,0 +1,5 @@
1
+ from dstack._internal.core.backends.digitalocean_base.compute import BaseDigitalOceanCompute
2
+
3
+
4
+ class AMDDevCloudCompute(BaseDigitalOceanCompute):
5
+ pass
@@ -0,0 +1,29 @@
1
+ from typing import Optional
2
+
3
+ from dstack._internal.core.backends.amddevcloud.backend import AMDDevCloudBackend
4
+ from dstack._internal.core.backends.base.configurator import BackendRecord
5
+ from dstack._internal.core.backends.digitalocean_base.api_client import DigitalOceanAPIClient
6
+ from dstack._internal.core.backends.digitalocean_base.backend import BaseDigitalOceanBackend
7
+ from dstack._internal.core.backends.digitalocean_base.configurator import (
8
+ BaseDigitalOceanConfigurator,
9
+ )
10
+ from dstack._internal.core.backends.digitalocean_base.models import AnyBaseDigitalOceanCreds
11
+ from dstack._internal.core.models.backends.base import (
12
+ BackendType,
13
+ )
14
+
15
+
16
+ class AMDDevCloudConfigurator(BaseDigitalOceanConfigurator):
17
+ TYPE = BackendType.AMDDEVCLOUD
18
+ BACKEND_CLASS = AMDDevCloudBackend
19
+ API_URL = "https://api-amd.digitalocean.com"
20
+
21
+ def get_backend(self, record: BackendRecord) -> BaseDigitalOceanBackend:
22
+ config = self._get_config(record)
23
+ return AMDDevCloudBackend(config=config, api_url=self.API_URL)
24
+
25
+ def _validate_creds(self, creds: AnyBaseDigitalOceanCreds, project_name: Optional[str] = None):
26
+ api_client = DigitalOceanAPIClient(creds.api_key, self.API_URL)
27
+ api_client.validate_api_key()
28
+ if project_name:
29
+ api_client.validate_project_name(project_name)
@@ -292,7 +292,12 @@ class AWSCompute(
292
292
  image_id=image_id,
293
293
  instance_type=instance_offer.instance.name,
294
294
  iam_instance_profile=self.config.iam_instance_profile,
295
- user_data=get_user_data(authorized_keys=instance_config.get_public_keys()),
295
+ user_data=get_user_data(
296
+ authorized_keys=instance_config.get_public_keys(),
297
+ # Custom OS images may lack ufw, so don't attempt to set up the firewall.
298
+ # Rely on security groups and the image's built-in firewall rules instead.
299
+ skip_firewall_setup=self.config.os_images is not None,
300
+ ),
296
301
  tags=aws_resources.make_tags(tags),
297
302
  security_group_id=security_group_id,
298
303
  spot=instance_offer.instance.resources.spot,
@@ -4,6 +4,7 @@ import re
4
4
  import string
5
5
  import threading
6
6
  from abc import ABC, abstractmethod
7
+ from collections.abc import Iterable
7
8
  from functools import lru_cache
8
9
  from pathlib import Path
9
10
  from typing import Dict, List, Literal, Optional
@@ -19,7 +20,7 @@ from dstack._internal.core.consts import (
19
20
  DSTACK_RUNNER_SSH_PORT,
20
21
  DSTACK_SHIM_HTTP_PORT,
21
22
  )
22
- from dstack._internal.core.models.configurations import DEFAULT_REPO_DIR
23
+ from dstack._internal.core.models.configurations import LEGACY_REPO_DIR
23
24
  from dstack._internal.core.models.gateways import (
24
25
  GatewayComputeConfiguration,
25
26
  GatewayProvisioningData,
@@ -45,6 +46,7 @@ logger = get_logger(__name__)
45
46
 
46
47
  DSTACK_SHIM_BINARY_NAME = "dstack-shim"
47
48
  DSTACK_RUNNER_BINARY_NAME = "dstack-runner"
49
+ DEFAULT_PRIVATE_SUBNETS = ("10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16")
48
50
 
49
51
  GoArchType = Literal["amd64", "arm64"]
50
52
 
@@ -507,12 +509,16 @@ def get_user_data(
507
509
  base_path: Optional[PathLike] = None,
508
510
  bin_path: Optional[PathLike] = None,
509
511
  backend_shim_env: Optional[Dict[str, str]] = None,
512
+ skip_firewall_setup: bool = False,
513
+ firewall_allow_from_subnets: Iterable[str] = DEFAULT_PRIVATE_SUBNETS,
510
514
  ) -> str:
511
515
  shim_commands = get_shim_commands(
512
516
  authorized_keys=authorized_keys,
513
517
  base_path=base_path,
514
518
  bin_path=bin_path,
515
519
  backend_shim_env=backend_shim_env,
520
+ skip_firewall_setup=skip_firewall_setup,
521
+ firewall_allow_from_subnets=firewall_allow_from_subnets,
516
522
  )
517
523
  commands = (backend_specific_commands or []) + shim_commands
518
524
  return get_cloud_config(
@@ -554,8 +560,13 @@ def get_shim_commands(
554
560
  bin_path: Optional[PathLike] = None,
555
561
  backend_shim_env: Optional[Dict[str, str]] = None,
556
562
  arch: Optional[str] = None,
563
+ skip_firewall_setup: bool = False,
564
+ firewall_allow_from_subnets: Iterable[str] = DEFAULT_PRIVATE_SUBNETS,
557
565
  ) -> List[str]:
558
- commands = get_setup_cloud_instance_commands()
566
+ commands = get_setup_cloud_instance_commands(
567
+ skip_firewall_setup=skip_firewall_setup,
568
+ firewall_allow_from_subnets=firewall_allow_from_subnets,
569
+ )
559
570
  commands += get_shim_pre_start_commands(
560
571
  base_path=base_path,
561
572
  bin_path=bin_path,
@@ -638,8 +649,11 @@ def get_dstack_shim_download_url(arch: Optional[str] = None) -> str:
638
649
  return url_template.format(version=version, arch=arch)
639
650
 
640
651
 
641
- def get_setup_cloud_instance_commands() -> list[str]:
642
- return [
652
+ def get_setup_cloud_instance_commands(
653
+ skip_firewall_setup: bool,
654
+ firewall_allow_from_subnets: Iterable[str],
655
+ ) -> list[str]:
656
+ commands = [
643
657
  # Workaround for https://github.com/NVIDIA/nvidia-container-toolkit/issues/48
644
658
  # Attempts to patch /etc/docker/daemon.json while keeping any custom settings it may have.
645
659
  (
@@ -653,6 +667,19 @@ def get_setup_cloud_instance_commands() -> list[str]:
653
667
  "'"
654
668
  ),
655
669
  ]
670
+ if not skip_firewall_setup:
671
+ commands += [
672
+ "ufw --force reset", # Some OS images have default rules like `allow 80`. Delete them
673
+ "ufw default deny incoming",
674
+ "ufw default allow outgoing",
675
+ "ufw allow ssh",
676
+ ]
677
+ for subnet in firewall_allow_from_subnets:
678
+ commands.append(f"ufw allow from {subnet}")
679
+ commands += [
680
+ "ufw --force enable",
681
+ ]
682
+ return commands
656
683
 
657
684
 
658
685
  def get_shim_pre_start_commands(
@@ -773,7 +800,8 @@ def get_docker_commands(
773
800
  f" --ssh-port {DSTACK_RUNNER_SSH_PORT}"
774
801
  " --temp-dir /tmp/runner"
775
802
  " --home-dir /root"
776
- f" --working-dir {DEFAULT_REPO_DIR}"
803
+ # TODO: Not used, left for compatibility with old runners. Remove eventually.
804
+ f" --working-dir {LEGACY_REPO_DIR}"
777
805
  ),
778
806
  ]
779
807
 
@@ -34,6 +34,8 @@ def get_catalog_offers(
34
34
  provider = backend.value
35
35
  if backend == BackendType.LAMBDA:
36
36
  provider = "lambdalabs"
37
+ if backend == BackendType.AMDDEVCLOUD:
38
+ provider = "digitalocean"
37
39
  q = requirements_to_query_filter(requirements)
38
40
  q.provider = [provider]
39
41
  offers = []
@@ -5,6 +5,12 @@ from dstack._internal.core.models.backends.base import BackendType
5
5
 
6
6
  _CONFIGURATOR_CLASSES: List[Type[Configurator]] = []
7
7
 
8
+ try:
9
+ from dstack._internal.core.backends.amddevcloud.configurator import AMDDevCloudConfigurator
10
+
11
+ _CONFIGURATOR_CLASSES.append(AMDDevCloudConfigurator)
12
+ except ImportError:
13
+ pass
8
14
 
9
15
  try:
10
16
  from dstack._internal.core.backends.aws.configurator import AWSConfigurator
@@ -47,6 +53,15 @@ try:
47
53
  except ImportError:
48
54
  pass
49
55
 
56
+ try:
57
+ from dstack._internal.core.backends.digitalocean.configurator import (
58
+ DigitalOceanConfigurator,
59
+ )
60
+
61
+ _CONFIGURATOR_CLASSES.append(DigitalOceanConfigurator)
62
+ except ImportError:
63
+ pass
64
+
50
65
  try:
51
66
  from dstack._internal.core.backends.gcp.configurator import GCPConfigurator
52
67
 
@@ -0,0 +1 @@
1
+ # DigitalOcean backend for dstack
@@ -0,0 +1,16 @@
1
+ from dstack._internal.core.backends.digitalocean.compute import DigitalOceanCompute
2
+ from dstack._internal.core.backends.digitalocean_base.backend import BaseDigitalOceanBackend
3
+ from dstack._internal.core.backends.digitalocean_base.models import BaseDigitalOceanConfig
4
+ from dstack._internal.core.models.backends.base import BackendType
5
+
6
+
7
+ class DigitalOceanBackend(BaseDigitalOceanBackend):
8
+ TYPE = BackendType.DIGITALOCEAN
9
+ COMPUTE_CLASS = DigitalOceanCompute
10
+
11
+ def __init__(self, config: BaseDigitalOceanConfig, api_url: str):
12
+ self.config = config
13
+ self._compute = DigitalOceanCompute(self.config, api_url=api_url, type=self.TYPE)
14
+
15
+ def compute(self) -> DigitalOceanCompute:
16
+ return self._compute
@@ -0,0 +1,5 @@
1
+ from ..digitalocean_base.compute import BaseDigitalOceanCompute
2
+
3
+
4
+ class DigitalOceanCompute(BaseDigitalOceanCompute):
5
+ pass
@@ -0,0 +1,31 @@
1
+ from typing import Optional
2
+
3
+ from dstack._internal.core.backends.base.configurator import BackendRecord
4
+ from dstack._internal.core.backends.digitalocean.backend import DigitalOceanBackend
5
+ from dstack._internal.core.backends.digitalocean_base.api_client import DigitalOceanAPIClient
6
+ from dstack._internal.core.backends.digitalocean_base.backend import BaseDigitalOceanBackend
7
+ from dstack._internal.core.backends.digitalocean_base.configurator import (
8
+ BaseDigitalOceanConfigurator,
9
+ )
10
+ from dstack._internal.core.backends.digitalocean_base.models import (
11
+ AnyBaseDigitalOceanCreds,
12
+ )
13
+ from dstack._internal.core.models.backends.base import (
14
+ BackendType,
15
+ )
16
+
17
+
18
+ class DigitalOceanConfigurator(BaseDigitalOceanConfigurator):
19
+ TYPE = BackendType.DIGITALOCEAN
20
+ BACKEND_CLASS = DigitalOceanBackend
21
+ API_URL = "https://api.digitalocean.com"
22
+
23
+ def get_backend(self, record: BackendRecord) -> BaseDigitalOceanBackend:
24
+ config = self._get_config(record)
25
+ return DigitalOceanBackend(config=config, api_url=self.API_URL)
26
+
27
+ def _validate_creds(self, creds: AnyBaseDigitalOceanCreds, project_name: Optional[str] = None):
28
+ api_client = DigitalOceanAPIClient(creds.api_key, self.API_URL)
29
+ api_client.validate_api_key()
30
+ if project_name:
31
+ api_client.validate_project_name(project_name)
@@ -0,0 +1 @@
1
+ # This package contains the base classes for DigitalOcean and AMDDevCloud backends.
@@ -0,0 +1,104 @@
1
+ from typing import Any, Dict, List, Optional
2
+
3
+ import requests
4
+
5
+ from dstack._internal.core.backends.base.configurator import raise_invalid_credentials_error
6
+ from dstack._internal.core.errors import NoCapacityError
7
+ from dstack._internal.utils.logging import get_logger
8
+
9
+ logger = get_logger(__name__)
10
+
11
+
12
+ class DigitalOceanAPIClient:
13
+ def __init__(self, api_key: str, api_url: str):
14
+ self.api_key = api_key
15
+ self.base_url = api_url
16
+
17
+ def validate_api_key(self) -> bool:
18
+ try:
19
+ response = self._make_request("GET", "/v2/account")
20
+ response.raise_for_status()
21
+ return True
22
+ except requests.HTTPError as e:
23
+ status = e.response.status_code
24
+ if status == 401:
25
+ raise_invalid_credentials_error(
26
+ fields=[["creds", "api_key"]], details="Invaild API key"
27
+ )
28
+ raise e
29
+
30
+ def validate_project_name(self, project_name: str) -> bool:
31
+ if self.get_project_id(project_name) is None:
32
+ raise_invalid_credentials_error(
33
+ fields=[["project_name"]],
34
+ details=f"Project with name '{project_name}' does not exist",
35
+ )
36
+ return True
37
+
38
+ def list_ssh_keys(self) -> List[Dict[str, Any]]:
39
+ response = self._make_request("GET", "/v2/account/keys")
40
+ response.raise_for_status()
41
+ return response.json()["ssh_keys"]
42
+
43
+ def list_projects(self) -> List[Dict[str, Any]]:
44
+ response = self._make_request("GET", "/v2/projects")
45
+ response.raise_for_status()
46
+ return response.json()["projects"]
47
+
48
+ def get_project_id(self, project_name: str) -> Optional[str]:
49
+ projects = self.list_projects()
50
+ for project in projects:
51
+ if project["name"] == project_name:
52
+ return project["id"]
53
+ return None
54
+
55
+ def create_ssh_key(self, name: str, public_key: str) -> Dict[str, Any]:
56
+ payload = {"name": name, "public_key": public_key}
57
+ response = self._make_request("POST", "/v2/account/keys", json=payload)
58
+ response.raise_for_status()
59
+ return response.json()["ssh_key"]
60
+
61
+ def get_or_create_ssh_key(self, name: str, public_key: str) -> int:
62
+ ssh_keys = self.list_ssh_keys()
63
+ for ssh_key in ssh_keys:
64
+ if ssh_key["public_key"].strip() == public_key.strip():
65
+ return ssh_key["id"]
66
+
67
+ ssh_key = self.create_ssh_key(name, public_key)
68
+ return ssh_key["id"]
69
+
70
+ def create_droplet(self, droplet_config: Dict[str, Any]) -> Dict[str, Any]:
71
+ response = self._make_request("POST", "/v2/droplets", json=droplet_config)
72
+ if response.status_code == 422:
73
+ raise NoCapacityError(response.json()["message"])
74
+ response.raise_for_status()
75
+ return response.json()["droplet"]
76
+
77
+ def get_droplet(self, droplet_id: str) -> Dict[str, Any]:
78
+ response = self._make_request("GET", f"/v2/droplets/{droplet_id}")
79
+ response.raise_for_status()
80
+ return response.json()["droplet"]
81
+
82
+ def delete_droplet(self, droplet_id: str) -> None:
83
+ response = self._make_request("DELETE", f"/v2/droplets/{droplet_id}")
84
+ if response.status_code == 404:
85
+ logger.debug("DigitalOcean droplet %s not found", droplet_id)
86
+ return
87
+ response.raise_for_status()
88
+
89
+ def _make_request(
90
+ self, method: str, endpoint: str, json: Optional[Dict[str, Any]] = None, timeout: int = 30
91
+ ) -> requests.Response:
92
+ url = f"{self.base_url}{endpoint}"
93
+ headers = {
94
+ "Authorization": f"Bearer {self.api_key}",
95
+ }
96
+
97
+ response = requests.request(
98
+ method=method,
99
+ url=url,
100
+ headers=headers,
101
+ json=json,
102
+ timeout=timeout,
103
+ )
104
+ return response
@@ -0,0 +1,5 @@
1
+ from dstack._internal.core.backends.base.backend import Backend
2
+
3
+
4
+ class BaseDigitalOceanBackend(Backend):
5
+ pass
@@ -0,0 +1,173 @@
1
+ from typing import List, Optional
2
+
3
+ import gpuhunt
4
+ from gpuhunt.providers.digitalocean import DigitalOceanProvider
5
+
6
+ from dstack._internal.core.backends.base.backend import Compute
7
+ from dstack._internal.core.backends.base.compute import (
8
+ ComputeWithCreateInstanceSupport,
9
+ generate_unique_instance_name,
10
+ get_user_data,
11
+ )
12
+ from dstack._internal.core.backends.base.offers import get_catalog_offers
13
+ from dstack._internal.core.backends.digitalocean_base.api_client import DigitalOceanAPIClient
14
+ from dstack._internal.core.backends.digitalocean_base.models import BaseDigitalOceanConfig
15
+ from dstack._internal.core.errors import BackendError
16
+ from dstack._internal.core.models.backends.base import BackendType
17
+ from dstack._internal.core.models.instances import (
18
+ InstanceAvailability,
19
+ InstanceConfiguration,
20
+ InstanceOfferWithAvailability,
21
+ )
22
+ from dstack._internal.core.models.placement import PlacementGroup
23
+ from dstack._internal.core.models.runs import JobProvisioningData, Requirements
24
+ from dstack._internal.utils.logging import get_logger
25
+
26
+ logger = get_logger(__name__)
27
+
28
+ MAX_INSTANCE_NAME_LEN = 60
29
+ DOCKER_INSTALL_COMMANDS = [
30
+ "export DEBIAN_FRONTEND=noninteractive",
31
+ "mkdir -p /etc/apt/keyrings",
32
+ "curl --max-time 60 -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg",
33
+ 'echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null',
34
+ "apt-get update",
35
+ "apt-get --assume-yes install docker-ce docker-ce-cli containerd.io docker-compose-plugin",
36
+ ]
37
+
38
+
39
+ class BaseDigitalOceanCompute(
40
+ ComputeWithCreateInstanceSupport,
41
+ Compute,
42
+ ):
43
+ def __init__(self, config: BaseDigitalOceanConfig, api_url: str, type: BackendType):
44
+ super().__init__()
45
+ self.config = config
46
+ self.api_client = DigitalOceanAPIClient(config.creds.api_key, api_url)
47
+ self.catalog = gpuhunt.Catalog(balance_resources=False, auto_reload=False)
48
+ self.BACKEND_TYPE = type
49
+ self.catalog.add_provider(
50
+ DigitalOceanProvider(api_key=config.creds.api_key, api_url=api_url)
51
+ )
52
+
53
+ def get_offers(
54
+ self, requirements: Optional[Requirements] = None
55
+ ) -> List[InstanceOfferWithAvailability]:
56
+ offers = get_catalog_offers(
57
+ backend=self.BACKEND_TYPE,
58
+ locations=self.config.regions,
59
+ requirements=requirements,
60
+ catalog=self.catalog,
61
+ )
62
+ return [
63
+ InstanceOfferWithAvailability(
64
+ **offer.dict(),
65
+ availability=InstanceAvailability.AVAILABLE,
66
+ )
67
+ for offer in offers
68
+ ]
69
+
70
+ def create_instance(
71
+ self,
72
+ instance_offer: InstanceOfferWithAvailability,
73
+ instance_config: InstanceConfiguration,
74
+ placement_group: Optional[PlacementGroup],
75
+ ) -> JobProvisioningData:
76
+ instance_name = generate_unique_instance_name(
77
+ instance_config, max_length=MAX_INSTANCE_NAME_LEN
78
+ )
79
+
80
+ project_ssh_key = instance_config.ssh_keys[0]
81
+ ssh_key_id = self.api_client.get_or_create_ssh_key(
82
+ name=f"dstack-{instance_config.project_name}",
83
+ public_key=project_ssh_key.public,
84
+ )
85
+ size_slug = instance_offer.instance.name
86
+
87
+ if not instance_offer.instance.resources.gpus:
88
+ backend_specific_commands = DOCKER_INSTALL_COMMANDS
89
+ else:
90
+ backend_specific_commands = None
91
+
92
+ project_id = None
93
+ if self.config.project_name:
94
+ project_id = self.api_client.get_project_id(self.config.project_name)
95
+ if project_id is None:
96
+ raise BackendError(f"Project {self.config.project_name} does not exist")
97
+ droplet_config = {
98
+ "name": instance_name,
99
+ "region": instance_offer.region,
100
+ "size": size_slug,
101
+ "image": self._get_image_for_instance(instance_offer),
102
+ "ssh_keys": [ssh_key_id],
103
+ "backups": False,
104
+ "ipv6": False,
105
+ "monitoring": False,
106
+ "tags": [],
107
+ "user_data": get_user_data(
108
+ authorized_keys=instance_config.get_public_keys(),
109
+ backend_specific_commands=backend_specific_commands,
110
+ ),
111
+ **({"project_id": project_id} if project_id is not None else {}),
112
+ }
113
+
114
+ droplet = self.api_client.create_droplet(droplet_config)
115
+
116
+ return JobProvisioningData(
117
+ backend=instance_offer.backend,
118
+ instance_type=instance_offer.instance,
119
+ instance_id=str(droplet["id"]),
120
+ hostname=None,
121
+ internal_ip=None,
122
+ region=instance_offer.region,
123
+ price=instance_offer.price,
124
+ username="root",
125
+ ssh_port=22,
126
+ dockerized=True,
127
+ ssh_proxy=None,
128
+ backend_data=None,
129
+ )
130
+
131
+ def update_provisioning_data(
132
+ self,
133
+ provisioning_data: JobProvisioningData,
134
+ project_ssh_public_key: str,
135
+ project_ssh_private_key: str,
136
+ ):
137
+ droplet = self.api_client.get_droplet(provisioning_data.instance_id)
138
+ if droplet["status"] == "active":
139
+ for network in droplet["networks"]["v4"]:
140
+ if network["type"] == "public":
141
+ provisioning_data.hostname = network["ip_address"]
142
+ break
143
+
144
+ def terminate_instance(
145
+ self, instance_id: str, region: str, backend_data: Optional[str] = None
146
+ ):
147
+ self.api_client.delete_droplet(instance_id)
148
+
149
+ def _get_image_for_instance(self, instance_offer: InstanceOfferWithAvailability) -> str:
150
+ if not instance_offer.instance.resources.gpus:
151
+ # No GPUs, use CPU image
152
+ return "ubuntu-24-04-x64"
153
+
154
+ gpu_count = len(instance_offer.instance.resources.gpus)
155
+ gpu_vendor = instance_offer.instance.resources.gpus[0].vendor
156
+
157
+ if gpu_vendor == gpuhunt.AcceleratorVendor.AMD:
158
+ # AMD GPU
159
+ return "digitaloceanai-rocmjupyter"
160
+ else:
161
+ # NVIDIA GPUs - DO only supports 1 and 8 GPU configurations.
162
+ # DO says for single GPU plans using GPUs other than H100s use "gpu-h100x1-base". DO does not provide guidance for x8 GPUs so assuming the same applies.
163
+ # See (https://docs.digitalocean.com/products/droplets/getting-started/recommended-gpu-setup/#aiml-ready-image)
164
+ if gpu_count == 8:
165
+ return "gpu-h100x8-base"
166
+ elif gpu_count == 1:
167
+ return "gpu-h100x1-base"
168
+ else:
169
+ # For Unsupported GPU count - use single GPU image and log warning
170
+ logger.warning(
171
+ f"Unsupported NVIDIA GPU count: {gpu_count}, using single GPU image"
172
+ )
173
+ return "gpu-h100x1-base"