dstack 0.19.15rc1__py3-none-any.whl → 0.19.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (93) hide show
  1. dstack/_internal/cli/commands/secrets.py +92 -0
  2. dstack/_internal/cli/main.py +2 -0
  3. dstack/_internal/cli/services/completion.py +5 -0
  4. dstack/_internal/cli/services/configurators/run.py +59 -17
  5. dstack/_internal/cli/utils/secrets.py +25 -0
  6. dstack/_internal/core/backends/__init__.py +10 -4
  7. dstack/_internal/core/backends/cloudrift/__init__.py +0 -0
  8. dstack/_internal/core/backends/cloudrift/api_client.py +208 -0
  9. dstack/_internal/core/backends/cloudrift/backend.py +16 -0
  10. dstack/_internal/core/backends/cloudrift/compute.py +138 -0
  11. dstack/_internal/core/backends/cloudrift/configurator.py +66 -0
  12. dstack/_internal/core/backends/cloudrift/models.py +40 -0
  13. dstack/_internal/core/backends/configurators.py +9 -0
  14. dstack/_internal/core/backends/models.py +7 -0
  15. dstack/_internal/core/compatibility/logs.py +15 -0
  16. dstack/_internal/core/compatibility/runs.py +31 -2
  17. dstack/_internal/core/models/backends/base.py +2 -0
  18. dstack/_internal/core/models/configurations.py +33 -2
  19. dstack/_internal/core/models/files.py +67 -0
  20. dstack/_internal/core/models/logs.py +2 -1
  21. dstack/_internal/core/models/runs.py +24 -1
  22. dstack/_internal/core/models/secrets.py +9 -2
  23. dstack/_internal/server/app.py +2 -0
  24. dstack/_internal/server/background/tasks/process_fleets.py +1 -1
  25. dstack/_internal/server/background/tasks/process_gateways.py +1 -1
  26. dstack/_internal/server/background/tasks/process_instances.py +1 -1
  27. dstack/_internal/server/background/tasks/process_placement_groups.py +1 -1
  28. dstack/_internal/server/background/tasks/process_running_jobs.py +110 -13
  29. dstack/_internal/server/background/tasks/process_runs.py +36 -5
  30. dstack/_internal/server/background/tasks/process_submitted_jobs.py +10 -4
  31. dstack/_internal/server/background/tasks/process_terminating_jobs.py +2 -2
  32. dstack/_internal/server/background/tasks/process_volumes.py +1 -1
  33. dstack/_internal/server/migrations/versions/5f1707c525d2_add_filearchivemodel.py +39 -0
  34. dstack/_internal/server/migrations/versions/644b8a114187_add_secretmodel.py +49 -0
  35. dstack/_internal/server/models.py +33 -0
  36. dstack/_internal/server/routers/files.py +67 -0
  37. dstack/_internal/server/routers/gateways.py +6 -3
  38. dstack/_internal/server/routers/projects.py +63 -0
  39. dstack/_internal/server/routers/prometheus.py +5 -5
  40. dstack/_internal/server/routers/secrets.py +57 -15
  41. dstack/_internal/server/schemas/files.py +5 -0
  42. dstack/_internal/server/schemas/logs.py +10 -1
  43. dstack/_internal/server/schemas/projects.py +12 -0
  44. dstack/_internal/server/schemas/runner.py +2 -0
  45. dstack/_internal/server/schemas/secrets.py +7 -11
  46. dstack/_internal/server/security/permissions.py +75 -2
  47. dstack/_internal/server/services/backends/__init__.py +1 -1
  48. dstack/_internal/server/services/files.py +91 -0
  49. dstack/_internal/server/services/fleets.py +1 -1
  50. dstack/_internal/server/services/gateways/__init__.py +1 -1
  51. dstack/_internal/server/services/jobs/__init__.py +19 -8
  52. dstack/_internal/server/services/jobs/configurators/base.py +27 -3
  53. dstack/_internal/server/services/jobs/configurators/dev.py +3 -3
  54. dstack/_internal/server/services/logs/aws.py +38 -38
  55. dstack/_internal/server/services/logs/filelog.py +48 -14
  56. dstack/_internal/server/services/logs/gcp.py +17 -16
  57. dstack/_internal/server/services/projects.py +164 -5
  58. dstack/_internal/server/services/prometheus/__init__.py +0 -0
  59. dstack/_internal/server/services/prometheus/client_metrics.py +52 -0
  60. dstack/_internal/server/services/proxy/repo.py +3 -0
  61. dstack/_internal/server/services/runner/client.py +8 -0
  62. dstack/_internal/server/services/runs.py +55 -10
  63. dstack/_internal/server/services/secrets.py +204 -0
  64. dstack/_internal/server/services/services/__init__.py +2 -1
  65. dstack/_internal/server/services/storage/base.py +21 -0
  66. dstack/_internal/server/services/storage/gcs.py +28 -6
  67. dstack/_internal/server/services/storage/s3.py +27 -9
  68. dstack/_internal/server/services/users.py +1 -3
  69. dstack/_internal/server/services/volumes.py +1 -1
  70. dstack/_internal/server/settings.py +2 -2
  71. dstack/_internal/server/statics/index.html +1 -1
  72. dstack/_internal/server/statics/{main-0ac1e1583684417ae4d1.js → main-d151637af20f70b2e796.js} +104 -48
  73. dstack/_internal/server/statics/{main-0ac1e1583684417ae4d1.js.map → main-d151637af20f70b2e796.js.map} +1 -1
  74. dstack/_internal/server/statics/{main-f39c418b05fe14772dd8.css → main-d48635d8fe670d53961c.css} +1 -1
  75. dstack/_internal/server/statics/static/media/google.b194b06fafd0a52aeb566922160ea514.svg +1 -0
  76. dstack/_internal/server/testing/common.py +43 -5
  77. dstack/_internal/settings.py +5 -0
  78. dstack/_internal/utils/files.py +69 -0
  79. dstack/_internal/utils/nested_list.py +47 -0
  80. dstack/_internal/utils/path.py +12 -4
  81. dstack/api/_public/runs.py +73 -12
  82. dstack/api/server/__init__.py +6 -0
  83. dstack/api/server/_files.py +18 -0
  84. dstack/api/server/_logs.py +5 -1
  85. dstack/api/server/_projects.py +24 -0
  86. dstack/api/server/_secrets.py +15 -15
  87. dstack/version.py +1 -1
  88. {dstack-0.19.15rc1.dist-info → dstack-0.19.17.dist-info}/METADATA +3 -4
  89. {dstack-0.19.15rc1.dist-info → dstack-0.19.17.dist-info}/RECORD +93 -71
  90. /dstack/_internal/server/services/{prometheus.py → prometheus/custom_metrics.py} +0 -0
  91. {dstack-0.19.15rc1.dist-info → dstack-0.19.17.dist-info}/WHEEL +0 -0
  92. {dstack-0.19.15rc1.dist-info → dstack-0.19.17.dist-info}/entry_points.txt +0 -0
  93. {dstack-0.19.15rc1.dist-info → dstack-0.19.17.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,92 @@
1
+ import argparse
2
+
3
+ from dstack._internal.cli.commands import APIBaseCommand
4
+ from dstack._internal.cli.services.completion import SecretNameCompleter
5
+ from dstack._internal.cli.utils.common import (
6
+ confirm_ask,
7
+ console,
8
+ )
9
+ from dstack._internal.cli.utils.secrets import print_secrets_table
10
+
11
+
12
+ class SecretCommand(APIBaseCommand):
13
+ NAME = "secret"
14
+ DESCRIPTION = "Manage secrets"
15
+
16
+ def _register(self):
17
+ super()._register()
18
+ self._parser.set_defaults(subfunc=self._list)
19
+ subparsers = self._parser.add_subparsers(dest="action")
20
+
21
+ list_parser = subparsers.add_parser(
22
+ "list", help="List secrets", formatter_class=self._parser.formatter_class
23
+ )
24
+ list_parser.set_defaults(subfunc=self._list)
25
+
26
+ get_parser = subparsers.add_parser(
27
+ "get", help="Get secret value", formatter_class=self._parser.formatter_class
28
+ )
29
+ get_parser.add_argument(
30
+ "name",
31
+ help="The name of the secret",
32
+ ).completer = SecretNameCompleter()
33
+ get_parser.set_defaults(subfunc=self._get)
34
+
35
+ set_parser = subparsers.add_parser(
36
+ "set", help="Set secret", formatter_class=self._parser.formatter_class
37
+ )
38
+ set_parser.add_argument(
39
+ "name",
40
+ help="The name of the secret",
41
+ )
42
+ set_parser.add_argument(
43
+ "value",
44
+ help="The value of the secret",
45
+ )
46
+ set_parser.set_defaults(subfunc=self._set)
47
+
48
+ delete_parser = subparsers.add_parser(
49
+ "delete",
50
+ help="Delete secrets",
51
+ formatter_class=self._parser.formatter_class,
52
+ )
53
+ delete_parser.add_argument(
54
+ "name",
55
+ help="The name of the secret",
56
+ ).completer = SecretNameCompleter()
57
+ delete_parser.add_argument(
58
+ "-y", "--yes", help="Don't ask for confirmation", action="store_true"
59
+ )
60
+ delete_parser.set_defaults(subfunc=self._delete)
61
+
62
+ def _command(self, args: argparse.Namespace):
63
+ super()._command(args)
64
+ args.subfunc(args)
65
+
66
+ def _list(self, args: argparse.Namespace):
67
+ secrets = self.api.client.secrets.list(self.api.project)
68
+ print_secrets_table(secrets)
69
+
70
+ def _get(self, args: argparse.Namespace):
71
+ secret = self.api.client.secrets.get(self.api.project, name=args.name)
72
+ print_secrets_table([secret])
73
+
74
+ def _set(self, args: argparse.Namespace):
75
+ self.api.client.secrets.create_or_update(
76
+ self.api.project,
77
+ name=args.name,
78
+ value=args.value,
79
+ )
80
+ console.print("[grey58]OK[/]")
81
+
82
+ def _delete(self, args: argparse.Namespace):
83
+ if not args.yes and not confirm_ask(f"Delete the secret [code]{args.name}[/]?"):
84
+ console.print("\nExiting...")
85
+ return
86
+
87
+ with console.status("Deleting secret..."):
88
+ self.api.client.secrets.delete(
89
+ project_name=self.api.project,
90
+ names=[args.name],
91
+ )
92
+ console.print("[grey58]OK[/]")
@@ -17,6 +17,7 @@ from dstack._internal.cli.commands.metrics import MetricsCommand
17
17
  from dstack._internal.cli.commands.offer import OfferCommand
18
18
  from dstack._internal.cli.commands.project import ProjectCommand
19
19
  from dstack._internal.cli.commands.ps import PsCommand
20
+ from dstack._internal.cli.commands.secrets import SecretCommand
20
21
  from dstack._internal.cli.commands.server import ServerCommand
21
22
  from dstack._internal.cli.commands.stats import StatsCommand
22
23
  from dstack._internal.cli.commands.stop import StopCommand
@@ -72,6 +73,7 @@ def main():
72
73
  MetricsCommand.register(subparsers)
73
74
  ProjectCommand.register(subparsers)
74
75
  PsCommand.register(subparsers)
76
+ SecretCommand.register(subparsers)
75
77
  ServerCommand.register(subparsers)
76
78
  StatsCommand.register(subparsers)
77
79
  StopCommand.register(subparsers)
@@ -75,6 +75,11 @@ class GatewayNameCompleter(BaseAPINameCompleter):
75
75
  return [r.name for r in api.client.gateways.list(api.project)]
76
76
 
77
77
 
78
+ class SecretNameCompleter(BaseAPINameCompleter):
79
+ def fetch_resource_names(self, api: Client) -> Iterable[str]:
80
+ return [r.name for r in api.client.secrets.list(api.project)]
81
+
82
+
78
83
  class ProjectNameCompleter(BaseCompleter):
79
84
  """
80
85
  Completer for local project names.
@@ -41,12 +41,13 @@ from dstack._internal.core.models.configurations import (
41
41
  )
42
42
  from dstack._internal.core.models.repos.base import Repo
43
43
  from dstack._internal.core.models.resources import CPUSpec
44
- from dstack._internal.core.models.runs import JobStatus, JobSubmission, RunStatus
44
+ from dstack._internal.core.models.runs import JobStatus, JobSubmission, RunSpec, RunStatus
45
45
  from dstack._internal.core.services.configs import ConfigManager
46
46
  from dstack._internal.core.services.diff import diff_models
47
47
  from dstack._internal.utils.common import local_time
48
48
  from dstack._internal.utils.interpolator import InterpolatorError, VariablesInterpolator
49
49
  from dstack._internal.utils.logging import get_logger
50
+ from dstack._internal.utils.nested_list import NestedList, NestedListItem
50
51
  from dstack.api._public.repos import get_ssh_keypair
51
52
  from dstack.api._public.runs import Run
52
53
  from dstack.api.utils import load_profile
@@ -102,25 +103,20 @@ class BaseRunConfigurator(ApplyEnvVarsConfiguratorMixin, BaseApplyConfigurator):
102
103
  confirm_message = f"Submit the run [code]{conf.name}[/]?"
103
104
  stop_run_name = None
104
105
  if run_plan.current_resource is not None:
105
- changed_fields = []
106
- if run_plan.action == ApplyAction.UPDATE:
107
- diff = diff_models(
108
- run_plan.get_effective_run_spec().configuration,
109
- run_plan.current_resource.run_spec.configuration,
110
- )
111
- changed_fields = list(diff.keys())
112
- if run_plan.action == ApplyAction.UPDATE and len(changed_fields) > 0:
106
+ diff = render_run_spec_diff(
107
+ run_plan.get_effective_run_spec(),
108
+ run_plan.current_resource.run_spec,
109
+ )
110
+ if run_plan.action == ApplyAction.UPDATE and diff is not None:
113
111
  console.print(
114
112
  f"Active run [code]{conf.name}[/] already exists."
115
- " Detected configuration changes that can be updated in-place:"
116
- f" {changed_fields}"
113
+ f" Detected changes that [code]can[/] be updated in-place:\n{diff}"
117
114
  )
118
115
  confirm_message = "Update the run?"
119
- elif run_plan.action == ApplyAction.UPDATE and len(changed_fields) == 0:
116
+ elif run_plan.action == ApplyAction.UPDATE and diff is None:
120
117
  stop_run_name = run_plan.current_resource.run_spec.run_name
121
118
  console.print(
122
- f"Active run [code]{conf.name}[/] already exists."
123
- " Detected no configuration changes."
119
+ f"Active run [code]{conf.name}[/] already exists. Detected no changes."
124
120
  )
125
121
  if command_args.yes and not command_args.force:
126
122
  console.print("Use --force to apply anyway.")
@@ -129,7 +125,8 @@ class BaseRunConfigurator(ApplyEnvVarsConfiguratorMixin, BaseApplyConfigurator):
129
125
  elif not run_plan.current_resource.status.is_finished():
130
126
  stop_run_name = run_plan.current_resource.run_spec.run_name
131
127
  console.print(
132
- f"Active run [code]{conf.name}[/] already exists and cannot be updated in-place."
128
+ f"Active run [code]{conf.name}[/] already exists."
129
+ f" Detected changes that [error]cannot[/] be updated in-place:\n{diff}"
133
130
  )
134
131
  confirm_message = "Stop and override the run?"
135
132
 
@@ -398,9 +395,10 @@ class BaseRunConfigurator(ApplyEnvVarsConfiguratorMixin, BaseApplyConfigurator):
398
395
  else:
399
396
  has_amd_gpu = vendor == gpuhunt.AcceleratorVendor.AMD
400
397
  has_tt_gpu = vendor == gpuhunt.AcceleratorVendor.TENSTORRENT
401
- if has_amd_gpu and conf.image is None:
398
+ # When docker=True, the system uses Docker-in-Docker image, so no custom image is required
399
+ if has_amd_gpu and conf.image is None and conf.docker is not True:
402
400
  raise ConfigurationError("`image` is required if `resources.gpu.vendor` is `amd`")
403
- if has_tt_gpu and conf.image is None:
401
+ if has_tt_gpu and conf.image is None and conf.docker is not True:
404
402
  raise ConfigurationError(
405
403
  "`image` is required if `resources.gpu.vendor` is `tenstorrent`"
406
404
  )
@@ -610,3 +608,47 @@ def _run_resubmitted(run: Run, current_job_submission: Optional[JobSubmission])
610
608
  not run.status.is_finished()
611
609
  and run._run.latest_job_submission.submitted_at > current_job_submission.submitted_at
612
610
  )
611
+
612
+
613
+ def render_run_spec_diff(old_spec: RunSpec, new_spec: RunSpec) -> Optional[str]:
614
+ changed_spec_fields = list(diff_models(old_spec, new_spec))
615
+ if not changed_spec_fields:
616
+ return None
617
+ friendly_spec_field_names = {
618
+ "repo_id": "Repo ID",
619
+ "repo_code_hash": "Repo files",
620
+ "repo_data": "Repo state (branch, commit, or other)",
621
+ "ssh_key_pub": "Public SSH key",
622
+ }
623
+ nested_list = NestedList()
624
+ for spec_field in changed_spec_fields:
625
+ if spec_field == "merged_profile":
626
+ continue
627
+ elif spec_field == "configuration":
628
+ if type(old_spec.configuration) is not type(new_spec.configuration):
629
+ item = NestedListItem("Configuration type")
630
+ else:
631
+ item = NestedListItem(
632
+ "Configuration properties:",
633
+ children=[
634
+ NestedListItem(field)
635
+ for field in diff_models(old_spec.configuration, new_spec.configuration)
636
+ ],
637
+ )
638
+ elif spec_field == "profile":
639
+ if type(old_spec.profile) is not type(new_spec.profile):
640
+ item = NestedListItem("Profile")
641
+ else:
642
+ item = NestedListItem(
643
+ "Profile properties:",
644
+ children=[
645
+ NestedListItem(field)
646
+ for field in diff_models(old_spec.profile, new_spec.profile)
647
+ ],
648
+ )
649
+ elif spec_field in friendly_spec_field_names:
650
+ item = NestedListItem(friendly_spec_field_names[spec_field])
651
+ else:
652
+ item = NestedListItem(spec_field.replace("_", " ").capitalize())
653
+ nested_list.children.append(item)
654
+ return nested_list.render()
@@ -0,0 +1,25 @@
1
+ from typing import List
2
+
3
+ from rich.table import Table
4
+
5
+ from dstack._internal.cli.utils.common import add_row_from_dict, console
6
+ from dstack._internal.core.models.secrets import Secret
7
+
8
+
9
+ def print_secrets_table(secrets: List[Secret]) -> None:
10
+ console.print(get_secrets_table(secrets))
11
+ console.print()
12
+
13
+
14
+ def get_secrets_table(secrets: List[Secret]) -> Table:
15
+ table = Table(box=None)
16
+ table.add_column("NAME", no_wrap=True)
17
+ table.add_column("VALUE")
18
+
19
+ for secret in secrets:
20
+ row = {
21
+ "NAME": secret.name,
22
+ "VALUE": secret.value or "*" * 6,
23
+ }
24
+ add_row_from_dict(table, row)
25
+ return table
@@ -9,18 +9,25 @@ from dstack._internal.core.backends.base.compute import (
9
9
  )
10
10
  from dstack._internal.core.backends.base.configurator import Configurator
11
11
  from dstack._internal.core.backends.configurators import list_available_configurator_classes
12
+ from dstack._internal.core.backends.local.compute import LocalCompute
12
13
  from dstack._internal.core.models.backends.base import BackendType
14
+ from dstack._internal.settings import LOCAL_BACKEND_ENABLED
13
15
 
14
16
 
15
17
  def _get_backends_with_compute_feature(
16
18
  configurator_classes: list[type[Configurator]],
17
19
  compute_feature_class: type,
18
20
  ) -> list[BackendType]:
21
+ backend_types_and_computes = [
22
+ (configurator_class.TYPE, configurator_class.BACKEND_CLASS.COMPUTE_CLASS)
23
+ for configurator_class in configurator_classes
24
+ ]
25
+ if LOCAL_BACKEND_ENABLED:
26
+ backend_types_and_computes.append((BackendType.LOCAL, LocalCompute))
19
27
  backend_types = []
20
- for configurator_class in configurator_classes:
21
- compute_class = configurator_class.BACKEND_CLASS.COMPUTE_CLASS
28
+ for backend_type, compute_class in backend_types_and_computes:
22
29
  if issubclass(compute_class, compute_feature_class):
23
- backend_types.append(configurator_class.TYPE)
30
+ backend_types.append(backend_type)
24
31
  return backend_types
25
32
 
26
33
 
@@ -28,7 +35,6 @@ _configurator_classes = list_available_configurator_classes()
28
35
 
29
36
 
30
37
  # The following backend lists do not include unavailable backends (i.e. backends missing deps).
31
- # TODO: Add LocalBackend to lists if it's enabled
32
38
  BACKENDS_WITH_CREATE_INSTANCE_SUPPORT = _get_backends_with_compute_feature(
33
39
  configurator_classes=_configurator_classes,
34
40
  compute_feature_class=ComputeWithCreateInstanceSupport,
File without changes
@@ -0,0 +1,208 @@
1
+ import os
2
+ import re
3
+ from typing import Any, Dict, List, Mapping, Optional, Union
4
+
5
+ import requests
6
+ from packaging import version
7
+ from requests import Response
8
+
9
+ from dstack._internal.core.errors import BackendError, BackendInvalidCredentialsError
10
+ from dstack._internal.utils.logging import get_logger
11
+
12
+ logger = get_logger(__name__)
13
+
14
+
15
+ CLOUDRIFT_SERVER_ADDRESS = "https://api.cloudrift.ai"
16
+ CLOUDRIFT_API_VERSION = "2025-05-29"
17
+
18
+
19
+ class RiftClient:
20
+ def __init__(self, api_key: Optional[str] = None):
21
+ self.public_api_root = os.path.join(CLOUDRIFT_SERVER_ADDRESS, "api/v1")
22
+ self.api_key = api_key
23
+
24
+ def validate_api_key(self) -> bool:
25
+ """
26
+ Validates the API key by making a request to the server.
27
+ Returns True if the API key is valid, False otherwise.
28
+ """
29
+ try:
30
+ response = self._make_request("auth/me")
31
+ if isinstance(response, dict):
32
+ return "email" in response
33
+ return False
34
+ except BackendInvalidCredentialsError:
35
+ return False
36
+ except Exception as e:
37
+ logger.error(f"Error validating API key: {e}")
38
+ return False
39
+
40
+ def get_instance_types(self) -> List[Dict]:
41
+ request_data = {"selector": {"ByServiceAndLocation": {"services": ["vm"]}}}
42
+ response_data = self._make_request("instance-types/list", request_data)
43
+ if isinstance(response_data, dict):
44
+ return response_data.get("instance_types", [])
45
+ return []
46
+
47
+ def list_recipes(self) -> List[Dict]:
48
+ request_data = {}
49
+ response_data = self._make_request("recipes/list", request_data)
50
+ if isinstance(response_data, dict):
51
+ return response_data.get("groups", [])
52
+ return []
53
+
54
+ def get_vm_recipies(self) -> List[Dict]:
55
+ """
56
+ Retrieves a list of VM recipes from the CloudRift API.
57
+ Returns a list of dictionaries containing recipe information.
58
+ """
59
+ recipe_group = self.list_recipes()
60
+ vm_recipes = []
61
+ for group in recipe_group:
62
+ tags = group.get("tags", [])
63
+ has_vm = "vm" in map(str.lower, tags)
64
+ if group.get("name", "").lower() != "linux" or not has_vm:
65
+ continue
66
+
67
+ recipes = group.get("recipes", [])
68
+ for recipe in recipes:
69
+ details = recipe.get("details", {})
70
+ if details.get("VirtualMachine", False):
71
+ vm_recipes.append(recipe)
72
+
73
+ return vm_recipes
74
+
75
+ def get_vm_image_url(self) -> Optional[str]:
76
+ recipes = self.get_vm_recipies()
77
+ ubuntu_images = []
78
+ for recipe in recipes:
79
+ has_nvidia_driver = "nvidia-driver" in recipe.get("tags", [])
80
+ if not has_nvidia_driver:
81
+ continue
82
+
83
+ recipe_name = recipe.get("name", "")
84
+ if "Ubuntu" not in recipe_name:
85
+ continue
86
+
87
+ url = recipe["details"].get("VirtualMachine", {}).get("image_url", None)
88
+ version_match = re.search(r".* (\d+\.\d+)", recipe_name)
89
+ if url and version_match and version_match.group(1):
90
+ ubuntu_version = version.parse(version_match.group(1))
91
+ ubuntu_images.append((ubuntu_version, url))
92
+
93
+ ubuntu_images.sort(key=lambda x: x[0]) # Sort by version
94
+ if ubuntu_images:
95
+ return ubuntu_images[-1][1]
96
+
97
+ return None
98
+
99
+ def deploy_instance(
100
+ self, instance_type: str, region: str, ssh_keys: List[str], cmd: str
101
+ ) -> List[str]:
102
+ image_url = self.get_vm_image_url()
103
+ if not image_url:
104
+ raise BackendError("No suitable VM image found.")
105
+
106
+ request_data = {
107
+ "config": {
108
+ "VirtualMachine": {
109
+ "cloudinit_commands": cmd,
110
+ "image_url": image_url,
111
+ "ssh_key": {"PublicKeys": ssh_keys},
112
+ }
113
+ },
114
+ "selector": {
115
+ "ByInstanceTypeAndLocation": {
116
+ "datacenters": [region],
117
+ "instance_type": instance_type,
118
+ }
119
+ },
120
+ "with_public_ip": True,
121
+ }
122
+ logger.debug("Deploying instance with request data: %s", request_data)
123
+
124
+ response_data = self._make_request("instances/rent", request_data)
125
+ if isinstance(response_data, dict):
126
+ return response_data.get("instance_ids", [])
127
+ return []
128
+
129
+ def list_instances(self, instance_ids: Optional[List[str]] = None) -> List[Dict]:
130
+ request_data = {
131
+ "selector": {
132
+ "ByStatus": ["Initializing", "Active", "Deactivating"],
133
+ }
134
+ }
135
+ logger.debug("Listing instances with request data: %s", request_data)
136
+ response_data = self._make_request("instances/list", request_data)
137
+ if isinstance(response_data, dict):
138
+ return response_data.get("instances", [])
139
+
140
+ return []
141
+
142
+ def get_instance_by_id(self, instance_id: str) -> Optional[Dict]:
143
+ request_data = {"selector": {"ById": [instance_id]}}
144
+ logger.debug("Getting instance with request data: %s", request_data)
145
+ response_data = self._make_request("instances/list", request_data)
146
+ if isinstance(response_data, dict):
147
+ instances = response_data.get("instances", [])
148
+ if isinstance(instances, list) and len(instances) > 0:
149
+ return instances[0]
150
+
151
+ return None
152
+
153
+ def terminate_instance(self, instance_id: str) -> bool:
154
+ request_data = {"selector": {"ById": [instance_id]}}
155
+ logger.debug("Terminating instance with request data: %s", request_data)
156
+ response_data = self._make_request("instances/terminate", request_data)
157
+ if isinstance(response_data, dict):
158
+ info = response_data.get("terminated", [])
159
+ return len(info) > 0
160
+
161
+ return False
162
+
163
+ def _make_request(
164
+ self,
165
+ endpoint: str,
166
+ data: Optional[Mapping[str, Any]] = None,
167
+ method: str = "POST",
168
+ **kwargs,
169
+ ) -> Union[Mapping[str, Any], str, Response]:
170
+ headers = {}
171
+ if self.api_key is not None:
172
+ headers["X-API-Key"] = self.api_key
173
+
174
+ version = CLOUDRIFT_API_VERSION
175
+ full_url = f"{self.public_api_root}/{endpoint}"
176
+
177
+ try:
178
+ response = requests.request(
179
+ method,
180
+ full_url,
181
+ headers=headers,
182
+ json={"version": version, "data": data},
183
+ timeout=15,
184
+ **kwargs,
185
+ )
186
+
187
+ if not response.ok:
188
+ response.raise_for_status()
189
+ try:
190
+ response_json = response.json()
191
+ if isinstance(response_json, str):
192
+ return response_json
193
+ if version is not None and version < response_json["version"]:
194
+ logger.warning(
195
+ "The API version %s is lower than the server version %s. ",
196
+ version,
197
+ response_json["version"],
198
+ )
199
+ return response_json["data"]
200
+ except requests.exceptions.JSONDecodeError:
201
+ return response
202
+ except requests.HTTPError as e:
203
+ if e.response is not None and e.response.status_code in (
204
+ requests.codes.forbidden,
205
+ requests.codes.unauthorized,
206
+ ):
207
+ raise BackendInvalidCredentialsError(e.response.text)
208
+ raise
@@ -0,0 +1,16 @@
1
+ from dstack._internal.core.backends.base.backend import Backend
2
+ from dstack._internal.core.backends.cloudrift.compute import CloudRiftCompute
3
+ from dstack._internal.core.backends.cloudrift.models import CloudRiftConfig
4
+ from dstack._internal.core.models.backends.base import BackendType
5
+
6
+
7
+ class CloudRiftBackend(Backend):
8
+ TYPE = BackendType.CLOUDRIFT
9
+ COMPUTE_CLASS = CloudRiftCompute
10
+
11
+ def __init__(self, config: CloudRiftConfig):
12
+ self.config = config
13
+ self._compute = CloudRiftCompute(self.config)
14
+
15
+ def compute(self) -> CloudRiftCompute:
16
+ return self._compute
@@ -0,0 +1,138 @@
1
+ from typing import Dict, List, Optional
2
+
3
+ from dstack._internal.core.backends.base.backend import Compute
4
+ from dstack._internal.core.backends.base.compute import (
5
+ ComputeWithCreateInstanceSupport,
6
+ get_shim_commands,
7
+ )
8
+ from dstack._internal.core.backends.base.offers import get_catalog_offers
9
+ from dstack._internal.core.backends.cloudrift.api_client import RiftClient
10
+ from dstack._internal.core.backends.cloudrift.models import CloudRiftConfig
11
+ from dstack._internal.core.errors import ComputeError
12
+ from dstack._internal.core.models.backends.base import BackendType
13
+ from dstack._internal.core.models.instances import (
14
+ InstanceAvailability,
15
+ InstanceConfiguration,
16
+ InstanceOffer,
17
+ InstanceOfferWithAvailability,
18
+ )
19
+ from dstack._internal.core.models.placement import PlacementGroup
20
+ from dstack._internal.core.models.runs import JobProvisioningData, Requirements
21
+ from dstack._internal.utils.logging import get_logger
22
+
23
+ logger = get_logger(__name__)
24
+
25
+
26
+ class CloudRiftCompute(
27
+ ComputeWithCreateInstanceSupport,
28
+ Compute,
29
+ ):
30
+ def __init__(self, config: CloudRiftConfig):
31
+ super().__init__()
32
+ self.config = config
33
+ self.client = RiftClient(self.config.creds.api_key)
34
+
35
+ def get_offers(
36
+ self, requirements: Optional[Requirements] = None
37
+ ) -> List[InstanceOfferWithAvailability]:
38
+ offers = get_catalog_offers(
39
+ backend=BackendType.CLOUDRIFT,
40
+ locations=self.config.regions or None,
41
+ requirements=requirements,
42
+ )
43
+
44
+ offers_with_availabilities = self._get_offers_with_availability(offers)
45
+ return offers_with_availabilities
46
+
47
+ def _get_offers_with_availability(
48
+ self, offers: List[InstanceOffer]
49
+ ) -> List[InstanceOfferWithAvailability]:
50
+ instance_types_with_availabilities: List[Dict] = self.client.get_instance_types()
51
+
52
+ region_availabilities = {}
53
+ for instance_type in instance_types_with_availabilities:
54
+ for variant in instance_type["variants"]:
55
+ for dc, count in variant["available_nodes_per_dc"].items():
56
+ if count > 0:
57
+ key = (variant["name"], dc)
58
+ region_availabilities[key] = InstanceAvailability.AVAILABLE
59
+
60
+ availability_offers = []
61
+ for offer in offers:
62
+ key = (offer.instance.name, offer.region)
63
+ availability = region_availabilities.get(key, InstanceAvailability.NOT_AVAILABLE)
64
+ availability_offers.append(
65
+ InstanceOfferWithAvailability(**offer.dict(), availability=availability)
66
+ )
67
+
68
+ return availability_offers
69
+
70
+ def create_instance(
71
+ self,
72
+ instance_offer: InstanceOfferWithAvailability,
73
+ instance_config: InstanceConfiguration,
74
+ placement_group: Optional[PlacementGroup],
75
+ ) -> JobProvisioningData:
76
+ commands = get_shim_commands(authorized_keys=instance_config.get_public_keys())
77
+ startup_script = " ".join([" && ".join(commands)])
78
+ logger.debug(
79
+ f"Creating instance for offer {instance_offer.instance.name} in region {instance_offer.region} with commands: {startup_script}"
80
+ )
81
+
82
+ instance_ids = self.client.deploy_instance(
83
+ instance_type=instance_offer.instance.name,
84
+ region=instance_offer.region,
85
+ ssh_keys=instance_config.get_public_keys(),
86
+ cmd=startup_script,
87
+ )
88
+
89
+ if len(instance_ids) == 0:
90
+ raise ComputeError(
91
+ f"Failed to create instance for offer {instance_offer.instance.name} in region {instance_offer.region}."
92
+ )
93
+
94
+ return JobProvisioningData(
95
+ backend=instance_offer.backend,
96
+ instance_type=instance_offer.instance,
97
+ instance_id=instance_ids[0],
98
+ hostname=None,
99
+ internal_ip=None,
100
+ region=instance_offer.region,
101
+ price=instance_offer.price,
102
+ username="riftuser",
103
+ ssh_port=22,
104
+ dockerized=True,
105
+ ssh_proxy=None,
106
+ backend_data=None,
107
+ )
108
+
109
+ def update_provisioning_data(
110
+ self,
111
+ provisioning_data: JobProvisioningData,
112
+ project_ssh_public_key: str,
113
+ project_ssh_private_key: str,
114
+ ):
115
+ instance_info = self.client.get_instance_by_id(provisioning_data.instance_id)
116
+
117
+ if not instance_info:
118
+ return
119
+
120
+ instance_mode = instance_info.get("node_mode", "")
121
+
122
+ if not instance_mode or instance_mode != "VirtualMachine":
123
+ return
124
+
125
+ vms = instance_info.get("virtual_machines", [])
126
+ if len(vms) == 0:
127
+ return
128
+
129
+ vm_ready = vms[0].get("ready", False)
130
+ if vm_ready:
131
+ provisioning_data.hostname = instance_info.get("host_address", None)
132
+
133
+ def terminate_instance(
134
+ self, instance_id: str, region: str, backend_data: Optional[str] = None
135
+ ):
136
+ terminated = self.client.terminate_instance(instance_id=instance_id)
137
+ if not terminated:
138
+ raise ComputeError(f"Failed to terminate instance {instance_id} in region {region}.")