dstack 0.19.15__py3-none-any.whl → 0.19.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (57) hide show
  1. dstack/_internal/core/backends/cloudrift/__init__.py +0 -0
  2. dstack/_internal/core/backends/cloudrift/api_client.py +208 -0
  3. dstack/_internal/core/backends/cloudrift/backend.py +16 -0
  4. dstack/_internal/core/backends/cloudrift/compute.py +138 -0
  5. dstack/_internal/core/backends/cloudrift/configurator.py +66 -0
  6. dstack/_internal/core/backends/cloudrift/models.py +40 -0
  7. dstack/_internal/core/backends/configurators.py +9 -0
  8. dstack/_internal/core/backends/models.py +7 -0
  9. dstack/_internal/core/compatibility/logs.py +15 -0
  10. dstack/_internal/core/compatibility/runs.py +2 -0
  11. dstack/_internal/core/models/backends/base.py +2 -0
  12. dstack/_internal/core/models/configurations.py +22 -2
  13. dstack/_internal/core/models/logs.py +2 -1
  14. dstack/_internal/core/models/runs.py +10 -1
  15. dstack/_internal/server/background/tasks/process_fleets.py +1 -1
  16. dstack/_internal/server/background/tasks/process_gateways.py +1 -1
  17. dstack/_internal/server/background/tasks/process_instances.py +1 -1
  18. dstack/_internal/server/background/tasks/process_placement_groups.py +1 -1
  19. dstack/_internal/server/background/tasks/process_running_jobs.py +1 -1
  20. dstack/_internal/server/background/tasks/process_runs.py +21 -2
  21. dstack/_internal/server/background/tasks/process_submitted_jobs.py +10 -4
  22. dstack/_internal/server/background/tasks/process_terminating_jobs.py +2 -2
  23. dstack/_internal/server/background/tasks/process_volumes.py +1 -1
  24. dstack/_internal/server/routers/gateways.py +6 -3
  25. dstack/_internal/server/routers/projects.py +63 -0
  26. dstack/_internal/server/routers/prometheus.py +5 -5
  27. dstack/_internal/server/schemas/logs.py +10 -1
  28. dstack/_internal/server/schemas/projects.py +12 -0
  29. dstack/_internal/server/security/permissions.py +75 -2
  30. dstack/_internal/server/services/fleets.py +1 -1
  31. dstack/_internal/server/services/gateways/__init__.py +1 -1
  32. dstack/_internal/server/services/jobs/configurators/base.py +7 -1
  33. dstack/_internal/server/services/logs/aws.py +38 -38
  34. dstack/_internal/server/services/logs/filelog.py +48 -14
  35. dstack/_internal/server/services/logs/gcp.py +17 -16
  36. dstack/_internal/server/services/projects.py +164 -5
  37. dstack/_internal/server/services/prometheus/__init__.py +0 -0
  38. dstack/_internal/server/services/prometheus/client_metrics.py +52 -0
  39. dstack/_internal/server/services/runs.py +3 -3
  40. dstack/_internal/server/services/services/__init__.py +2 -1
  41. dstack/_internal/server/services/users.py +1 -3
  42. dstack/_internal/server/services/volumes.py +1 -1
  43. dstack/_internal/server/statics/index.html +1 -1
  44. dstack/_internal/server/statics/{main-0ac1e1583684417ae4d1.js → main-a4eafa74304e587d037c.js} +51 -43
  45. dstack/_internal/server/statics/{main-0ac1e1583684417ae4d1.js.map → main-a4eafa74304e587d037c.js.map} +1 -1
  46. dstack/_internal/server/statics/{main-f39c418b05fe14772dd8.css → main-f53d6d0d42f8d61df1de.css} +1 -1
  47. dstack/_internal/settings.py +1 -0
  48. dstack/api/_public/runs.py +6 -5
  49. dstack/api/server/_logs.py +5 -1
  50. dstack/api/server/_projects.py +24 -0
  51. dstack/version.py +1 -1
  52. {dstack-0.19.15.dist-info → dstack-0.19.16.dist-info}/METADATA +1 -1
  53. {dstack-0.19.15.dist-info → dstack-0.19.16.dist-info}/RECORD +57 -48
  54. /dstack/_internal/server/services/{prometheus.py → prometheus/custom_metrics.py} +0 -0
  55. {dstack-0.19.15.dist-info → dstack-0.19.16.dist-info}/WHEEL +0 -0
  56. {dstack-0.19.15.dist-info → dstack-0.19.16.dist-info}/entry_points.txt +0 -0
  57. {dstack-0.19.15.dist-info → dstack-0.19.16.dist-info}/licenses/LICENSE.md +0 -0
File without changes
@@ -0,0 +1,208 @@
1
+ import os
2
+ import re
3
+ from typing import Any, Dict, List, Mapping, Optional, Union
4
+
5
+ import requests
6
+ from packaging import version
7
+ from requests import Response
8
+
9
+ from dstack._internal.core.errors import BackendError, BackendInvalidCredentialsError
10
+ from dstack._internal.utils.logging import get_logger
11
+
12
+ logger = get_logger(__name__)
13
+
14
+
15
+ CLOUDRIFT_SERVER_ADDRESS = "https://api.cloudrift.ai"
16
+ CLOUDRIFT_API_VERSION = "2025-05-29"
17
+
18
+
19
+ class RiftClient:
20
+ def __init__(self, api_key: Optional[str] = None):
21
+ self.public_api_root = os.path.join(CLOUDRIFT_SERVER_ADDRESS, "api/v1")
22
+ self.api_key = api_key
23
+
24
+ def validate_api_key(self) -> bool:
25
+ """
26
+ Validates the API key by making a request to the server.
27
+ Returns True if the API key is valid, False otherwise.
28
+ """
29
+ try:
30
+ response = self._make_request("auth/me")
31
+ if isinstance(response, dict):
32
+ return "email" in response
33
+ return False
34
+ except BackendInvalidCredentialsError:
35
+ return False
36
+ except Exception as e:
37
+ logger.error(f"Error validating API key: {e}")
38
+ return False
39
+
40
+ def get_instance_types(self) -> List[Dict]:
41
+ request_data = {"selector": {"ByServiceAndLocation": {"services": ["vm"]}}}
42
+ response_data = self._make_request("instance-types/list", request_data)
43
+ if isinstance(response_data, dict):
44
+ return response_data.get("instance_types", [])
45
+ return []
46
+
47
+ def list_recipes(self) -> List[Dict]:
48
+ request_data = {}
49
+ response_data = self._make_request("recipes/list", request_data)
50
+ if isinstance(response_data, dict):
51
+ return response_data.get("groups", [])
52
+ return []
53
+
54
+ def get_vm_recipies(self) -> List[Dict]:
55
+ """
56
+ Retrieves a list of VM recipes from the CloudRift API.
57
+ Returns a list of dictionaries containing recipe information.
58
+ """
59
+ recipe_group = self.list_recipes()
60
+ vm_recipes = []
61
+ for group in recipe_group:
62
+ tags = group.get("tags", [])
63
+ has_vm = "vm" in map(str.lower, tags)
64
+ if group.get("name", "").lower() != "linux" or not has_vm:
65
+ continue
66
+
67
+ recipes = group.get("recipes", [])
68
+ for recipe in recipes:
69
+ details = recipe.get("details", {})
70
+ if details.get("VirtualMachine", False):
71
+ vm_recipes.append(recipe)
72
+
73
+ return vm_recipes
74
+
75
+ def get_vm_image_url(self) -> Optional[str]:
76
+ recipes = self.get_vm_recipies()
77
+ ubuntu_images = []
78
+ for recipe in recipes:
79
+ has_nvidia_driver = "nvidia-driver" in recipe.get("tags", [])
80
+ if not has_nvidia_driver:
81
+ continue
82
+
83
+ recipe_name = recipe.get("name", "")
84
+ if "Ubuntu" not in recipe_name:
85
+ continue
86
+
87
+ url = recipe["details"].get("VirtualMachine", {}).get("image_url", None)
88
+ version_match = re.search(r".* (\d+\.\d+)", recipe_name)
89
+ if url and version_match and version_match.group(1):
90
+ ubuntu_version = version.parse(version_match.group(1))
91
+ ubuntu_images.append((ubuntu_version, url))
92
+
93
+ ubuntu_images.sort(key=lambda x: x[0]) # Sort by version
94
+ if ubuntu_images:
95
+ return ubuntu_images[-1][1]
96
+
97
+ return None
98
+
99
+ def deploy_instance(
100
+ self, instance_type: str, region: str, ssh_keys: List[str], cmd: str
101
+ ) -> List[str]:
102
+ image_url = self.get_vm_image_url()
103
+ if not image_url:
104
+ raise BackendError("No suitable VM image found.")
105
+
106
+ request_data = {
107
+ "config": {
108
+ "VirtualMachine": {
109
+ "cloudinit_commands": cmd,
110
+ "image_url": image_url,
111
+ "ssh_key": {"PublicKeys": ssh_keys},
112
+ }
113
+ },
114
+ "selector": {
115
+ "ByInstanceTypeAndLocation": {
116
+ "datacenters": [region],
117
+ "instance_type": instance_type,
118
+ }
119
+ },
120
+ "with_public_ip": True,
121
+ }
122
+ logger.debug("Deploying instance with request data: %s", request_data)
123
+
124
+ response_data = self._make_request("instances/rent", request_data)
125
+ if isinstance(response_data, dict):
126
+ return response_data.get("instance_ids", [])
127
+ return []
128
+
129
+ def list_instances(self, instance_ids: Optional[List[str]] = None) -> List[Dict]:
130
+ request_data = {
131
+ "selector": {
132
+ "ByStatus": ["Initializing", "Active", "Deactivating"],
133
+ }
134
+ }
135
+ logger.debug("Listing instances with request data: %s", request_data)
136
+ response_data = self._make_request("instances/list", request_data)
137
+ if isinstance(response_data, dict):
138
+ return response_data.get("instances", [])
139
+
140
+ return []
141
+
142
+ def get_instance_by_id(self, instance_id: str) -> Optional[Dict]:
143
+ request_data = {"selector": {"ById": [instance_id]}}
144
+ logger.debug("Getting instance with request data: %s", request_data)
145
+ response_data = self._make_request("instances/list", request_data)
146
+ if isinstance(response_data, dict):
147
+ instances = response_data.get("instances", [])
148
+ if isinstance(instances, list) and len(instances) > 0:
149
+ return instances[0]
150
+
151
+ return None
152
+
153
+ def terminate_instance(self, instance_id: str) -> bool:
154
+ request_data = {"selector": {"ById": [instance_id]}}
155
+ logger.debug("Terminating instance with request data: %s", request_data)
156
+ response_data = self._make_request("instances/terminate", request_data)
157
+ if isinstance(response_data, dict):
158
+ info = response_data.get("terminated", [])
159
+ return len(info) > 0
160
+
161
+ return False
162
+
163
+ def _make_request(
164
+ self,
165
+ endpoint: str,
166
+ data: Optional[Mapping[str, Any]] = None,
167
+ method: str = "POST",
168
+ **kwargs,
169
+ ) -> Union[Mapping[str, Any], str, Response]:
170
+ headers = {}
171
+ if self.api_key is not None:
172
+ headers["X-API-Key"] = self.api_key
173
+
174
+ version = CLOUDRIFT_API_VERSION
175
+ full_url = f"{self.public_api_root}/{endpoint}"
176
+
177
+ try:
178
+ response = requests.request(
179
+ method,
180
+ full_url,
181
+ headers=headers,
182
+ json={"version": version, "data": data},
183
+ timeout=15,
184
+ **kwargs,
185
+ )
186
+
187
+ if not response.ok:
188
+ response.raise_for_status()
189
+ try:
190
+ response_json = response.json()
191
+ if isinstance(response_json, str):
192
+ return response_json
193
+ if version is not None and version < response_json["version"]:
194
+ logger.warning(
195
+ "The API version %s is lower than the server version %s. ",
196
+ version,
197
+ response_json["version"],
198
+ )
199
+ return response_json["data"]
200
+ except requests.exceptions.JSONDecodeError:
201
+ return response
202
+ except requests.HTTPError as e:
203
+ if e.response is not None and e.response.status_code in (
204
+ requests.codes.forbidden,
205
+ requests.codes.unauthorized,
206
+ ):
207
+ raise BackendInvalidCredentialsError(e.response.text)
208
+ raise
@@ -0,0 +1,16 @@
1
+ from dstack._internal.core.backends.base.backend import Backend
2
+ from dstack._internal.core.backends.cloudrift.compute import CloudRiftCompute
3
+ from dstack._internal.core.backends.cloudrift.models import CloudRiftConfig
4
+ from dstack._internal.core.models.backends.base import BackendType
5
+
6
+
7
+ class CloudRiftBackend(Backend):
8
+ TYPE = BackendType.CLOUDRIFT
9
+ COMPUTE_CLASS = CloudRiftCompute
10
+
11
+ def __init__(self, config: CloudRiftConfig):
12
+ self.config = config
13
+ self._compute = CloudRiftCompute(self.config)
14
+
15
+ def compute(self) -> CloudRiftCompute:
16
+ return self._compute
@@ -0,0 +1,138 @@
1
+ from typing import Dict, List, Optional
2
+
3
+ from dstack._internal.core.backends.base.backend import Compute
4
+ from dstack._internal.core.backends.base.compute import (
5
+ ComputeWithCreateInstanceSupport,
6
+ get_shim_commands,
7
+ )
8
+ from dstack._internal.core.backends.base.offers import get_catalog_offers
9
+ from dstack._internal.core.backends.cloudrift.api_client import RiftClient
10
+ from dstack._internal.core.backends.cloudrift.models import CloudRiftConfig
11
+ from dstack._internal.core.errors import ComputeError
12
+ from dstack._internal.core.models.backends.base import BackendType
13
+ from dstack._internal.core.models.instances import (
14
+ InstanceAvailability,
15
+ InstanceConfiguration,
16
+ InstanceOffer,
17
+ InstanceOfferWithAvailability,
18
+ )
19
+ from dstack._internal.core.models.placement import PlacementGroup
20
+ from dstack._internal.core.models.runs import JobProvisioningData, Requirements
21
+ from dstack._internal.utils.logging import get_logger
22
+
23
+ logger = get_logger(__name__)
24
+
25
+
26
+ class CloudRiftCompute(
27
+ ComputeWithCreateInstanceSupport,
28
+ Compute,
29
+ ):
30
+ def __init__(self, config: CloudRiftConfig):
31
+ super().__init__()
32
+ self.config = config
33
+ self.client = RiftClient(self.config.creds.api_key)
34
+
35
+ def get_offers(
36
+ self, requirements: Optional[Requirements] = None
37
+ ) -> List[InstanceOfferWithAvailability]:
38
+ offers = get_catalog_offers(
39
+ backend=BackendType.CLOUDRIFT,
40
+ locations=self.config.regions or None,
41
+ requirements=requirements,
42
+ )
43
+
44
+ offers_with_availabilities = self._get_offers_with_availability(offers)
45
+ return offers_with_availabilities
46
+
47
+ def _get_offers_with_availability(
48
+ self, offers: List[InstanceOffer]
49
+ ) -> List[InstanceOfferWithAvailability]:
50
+ instance_types_with_availabilities: List[Dict] = self.client.get_instance_types()
51
+
52
+ region_availabilities = {}
53
+ for instance_type in instance_types_with_availabilities:
54
+ for variant in instance_type["variants"]:
55
+ for dc, count in variant["available_nodes_per_dc"].items():
56
+ if count > 0:
57
+ key = (variant["name"], dc)
58
+ region_availabilities[key] = InstanceAvailability.AVAILABLE
59
+
60
+ availability_offers = []
61
+ for offer in offers:
62
+ key = (offer.instance.name, offer.region)
63
+ availability = region_availabilities.get(key, InstanceAvailability.NOT_AVAILABLE)
64
+ availability_offers.append(
65
+ InstanceOfferWithAvailability(**offer.dict(), availability=availability)
66
+ )
67
+
68
+ return availability_offers
69
+
70
+ def create_instance(
71
+ self,
72
+ instance_offer: InstanceOfferWithAvailability,
73
+ instance_config: InstanceConfiguration,
74
+ placement_group: Optional[PlacementGroup],
75
+ ) -> JobProvisioningData:
76
+ commands = get_shim_commands(authorized_keys=instance_config.get_public_keys())
77
+ startup_script = " ".join([" && ".join(commands)])
78
+ logger.debug(
79
+ f"Creating instance for offer {instance_offer.instance.name} in region {instance_offer.region} with commands: {startup_script}"
80
+ )
81
+
82
+ instance_ids = self.client.deploy_instance(
83
+ instance_type=instance_offer.instance.name,
84
+ region=instance_offer.region,
85
+ ssh_keys=instance_config.get_public_keys(),
86
+ cmd=startup_script,
87
+ )
88
+
89
+ if len(instance_ids) == 0:
90
+ raise ComputeError(
91
+ f"Failed to create instance for offer {instance_offer.instance.name} in region {instance_offer.region}."
92
+ )
93
+
94
+ return JobProvisioningData(
95
+ backend=instance_offer.backend,
96
+ instance_type=instance_offer.instance,
97
+ instance_id=instance_ids[0],
98
+ hostname=None,
99
+ internal_ip=None,
100
+ region=instance_offer.region,
101
+ price=instance_offer.price,
102
+ username="riftuser",
103
+ ssh_port=22,
104
+ dockerized=True,
105
+ ssh_proxy=None,
106
+ backend_data=None,
107
+ )
108
+
109
+ def update_provisioning_data(
110
+ self,
111
+ provisioning_data: JobProvisioningData,
112
+ project_ssh_public_key: str,
113
+ project_ssh_private_key: str,
114
+ ):
115
+ instance_info = self.client.get_instance_by_id(provisioning_data.instance_id)
116
+
117
+ if not instance_info:
118
+ return
119
+
120
+ instance_mode = instance_info.get("node_mode", "")
121
+
122
+ if not instance_mode or instance_mode != "VirtualMachine":
123
+ return
124
+
125
+ vms = instance_info.get("virtual_machines", [])
126
+ if len(vms) == 0:
127
+ return
128
+
129
+ vm_ready = vms[0].get("ready", False)
130
+ if vm_ready:
131
+ provisioning_data.hostname = instance_info.get("host_address", None)
132
+
133
+ def terminate_instance(
134
+ self, instance_id: str, region: str, backend_data: Optional[str] = None
135
+ ):
136
+ terminated = self.client.terminate_instance(instance_id=instance_id)
137
+ if not terminated:
138
+ raise ComputeError(f"Failed to terminate instance {instance_id} in region {region}.")
@@ -0,0 +1,66 @@
1
+ import json
2
+
3
+ from dstack._internal.core.backends.base.configurator import (
4
+ BackendRecord,
5
+ Configurator,
6
+ raise_invalid_credentials_error,
7
+ )
8
+ from dstack._internal.core.backends.cloudrift.api_client import RiftClient
9
+ from dstack._internal.core.backends.cloudrift.backend import CloudRiftBackend
10
+ from dstack._internal.core.backends.cloudrift.models import (
11
+ AnyCloudRiftBackendConfig,
12
+ AnyCloudRiftCreds,
13
+ CloudRiftBackendConfig,
14
+ CloudRiftBackendConfigWithCreds,
15
+ CloudRiftConfig,
16
+ CloudRiftCreds,
17
+ CloudRiftStoredConfig,
18
+ )
19
+ from dstack._internal.core.models.backends.base import (
20
+ BackendType,
21
+ )
22
+
23
+
24
+ class CloudRiftConfigurator(Configurator):
25
+ TYPE = BackendType.CLOUDRIFT
26
+ BACKEND_CLASS = CloudRiftBackend
27
+
28
+ def validate_config(
29
+ self, config: CloudRiftBackendConfigWithCreds, default_creds_enabled: bool
30
+ ):
31
+ self._validate_creds(config.creds)
32
+
33
+ def create_backend(
34
+ self, project_name: str, config: CloudRiftBackendConfigWithCreds
35
+ ) -> BackendRecord:
36
+ return BackendRecord(
37
+ config=CloudRiftStoredConfig(
38
+ **CloudRiftBackendConfig.__response__.parse_obj(config).dict()
39
+ ).json(),
40
+ auth=CloudRiftCreds.parse_obj(config.creds).json(),
41
+ )
42
+
43
+ def get_backend_config(
44
+ self, record: BackendRecord, include_creds: bool
45
+ ) -> AnyCloudRiftBackendConfig:
46
+ config = self._get_config(record)
47
+ if include_creds:
48
+ return CloudRiftBackendConfigWithCreds.__response__.parse_obj(config)
49
+ return CloudRiftBackendConfig.__response__.parse_obj(config)
50
+
51
+ def get_backend(self, record: BackendRecord) -> CloudRiftBackend:
52
+ config = self._get_config(record)
53
+ return CloudRiftBackend(config=config)
54
+
55
+ def _get_config(self, record: BackendRecord) -> CloudRiftConfig:
56
+ return CloudRiftConfig.__response__(
57
+ **json.loads(record.config),
58
+ creds=CloudRiftCreds.parse_raw(record.auth),
59
+ )
60
+
61
+ def _validate_creds(self, creds: AnyCloudRiftCreds):
62
+ if not isinstance(creds, CloudRiftCreds):
63
+ raise_invalid_credentials_error(fields=[["creds"]])
64
+ client = RiftClient(creds.api_key)
65
+ if not client.validate_api_key():
66
+ raise_invalid_credentials_error(fields=[["creds", "api_key"]])
@@ -0,0 +1,40 @@
1
+ from typing import Annotated, List, Literal, Optional, Union
2
+
3
+ from pydantic import Field
4
+
5
+ from dstack._internal.core.models.common import CoreModel
6
+
7
+
8
+ class CloudRiftAPIKeyCreds(CoreModel):
9
+ type: Annotated[Literal["api_key"], Field(description="The type of credentials")] = "api_key"
10
+ api_key: Annotated[str, Field(description="The API key")]
11
+
12
+
13
+ AnyCloudRiftCreds = CloudRiftAPIKeyCreds
14
+ CloudRiftCreds = AnyCloudRiftCreds
15
+
16
+
17
+ class CloudRiftBackendConfig(CoreModel):
18
+ type: Annotated[
19
+ Literal["cloudrift"],
20
+ Field(description="The type of backend"),
21
+ ] = "cloudrift"
22
+ regions: Annotated[
23
+ Optional[List[str]],
24
+ Field(description="The list of CloudRift regions. Omit to use all regions"),
25
+ ] = None
26
+
27
+
28
+ class CloudRiftBackendConfigWithCreds(CloudRiftBackendConfig):
29
+ creds: Annotated[AnyCloudRiftCreds, Field(description="The credentials")]
30
+
31
+
32
+ AnyCloudRiftBackendConfig = Union[CloudRiftBackendConfig, CloudRiftBackendConfigWithCreds]
33
+
34
+
35
+ class CloudRiftStoredConfig(CloudRiftBackendConfig):
36
+ pass
37
+
38
+
39
+ class CloudRiftConfig(CloudRiftStoredConfig):
40
+ creds: AnyCloudRiftCreds
@@ -20,6 +20,15 @@ try:
20
20
  except ImportError:
21
21
  pass
22
22
 
23
+ try:
24
+ from dstack._internal.core.backends.cloudrift.configurator import (
25
+ CloudRiftConfigurator,
26
+ )
27
+
28
+ _CONFIGURATOR_CLASSES.append(CloudRiftConfigurator)
29
+ except ImportError:
30
+ pass
31
+
23
32
  try:
24
33
  from dstack._internal.core.backends.cudo.configurator import (
25
34
  CudoConfigurator,
@@ -8,6 +8,10 @@ from dstack._internal.core.backends.azure.models import (
8
8
  AzureBackendConfig,
9
9
  AzureBackendConfigWithCreds,
10
10
  )
11
+ from dstack._internal.core.backends.cloudrift.models import (
12
+ CloudRiftBackendConfig,
13
+ CloudRiftBackendConfigWithCreds,
14
+ )
11
15
  from dstack._internal.core.backends.cudo.models import (
12
16
  CudoBackendConfig,
13
17
  CudoBackendConfigWithCreds,
@@ -65,6 +69,7 @@ from dstack._internal.core.models.common import CoreModel
65
69
  AnyBackendConfigWithoutCreds = Union[
66
70
  AWSBackendConfig,
67
71
  AzureBackendConfig,
72
+ CloudRiftBackendConfig,
68
73
  CudoBackendConfig,
69
74
  DataCrunchBackendConfig,
70
75
  GCPBackendConfig,
@@ -86,6 +91,7 @@ AnyBackendConfigWithoutCreds = Union[
86
91
  AnyBackendConfigWithCreds = Union[
87
92
  AWSBackendConfigWithCreds,
88
93
  AzureBackendConfigWithCreds,
94
+ CloudRiftBackendConfigWithCreds,
89
95
  CudoBackendConfigWithCreds,
90
96
  DataCrunchBackendConfigWithCreds,
91
97
  GCPBackendConfigWithCreds,
@@ -106,6 +112,7 @@ AnyBackendConfigWithCreds = Union[
106
112
  AnyBackendFileConfigWithCreds = Union[
107
113
  AWSBackendConfigWithCreds,
108
114
  AzureBackendConfigWithCreds,
115
+ CloudRiftBackendConfigWithCreds,
109
116
  CudoBackendConfigWithCreds,
110
117
  DataCrunchBackendConfigWithCreds,
111
118
  GCPBackendFileConfigWithCreds,
@@ -0,0 +1,15 @@
1
+ from typing import Dict, Optional
2
+
3
+ from dstack._internal.server.schemas.logs import PollLogsRequest
4
+
5
+
6
+ def get_poll_logs_excludes(request: PollLogsRequest) -> Optional[Dict]:
7
+ """
8
+ Returns exclude mapping to exclude certain fields from the request.
9
+ Use this method to exclude new fields when they are not set to keep
10
+ clients backward-compatibility with older servers.
11
+ """
12
+ excludes = {}
13
+ if request.next_token is None:
14
+ excludes["next_token"] = True
15
+ return excludes if excludes else None
@@ -97,6 +97,8 @@ def get_run_spec_excludes(run_spec: RunSpec) -> Optional[Dict]:
97
97
  configuration_excludes["rate_limits"] = True
98
98
  if configuration.shell is None:
99
99
  configuration_excludes["shell"] = True
100
+ if configuration.docker is None:
101
+ configuration_excludes["docker"] = True
100
102
  if configuration.priority is None:
101
103
  configuration_excludes["priority"] = True
102
104
  if configuration.startup_order is None:
@@ -6,6 +6,7 @@ class BackendType(str, enum.Enum):
6
6
  Attributes:
7
7
  AWS (BackendType): Amazon Web Services
8
8
  AZURE (BackendType): Microsoft Azure
9
+ CLOUDRIFT (BackendType): CloudRift
9
10
  CUDO (BackendType): Cudo
10
11
  DSTACK (BackendType): dstack Sky
11
12
  GCP (BackendType): Google Cloud Platform
@@ -22,6 +23,7 @@ class BackendType(str, enum.Enum):
22
23
 
23
24
  AWS = "aws"
24
25
  AZURE = "azure"
26
+ CLOUDRIFT = "cloudrift"
25
27
  CUDO = "cudo"
26
28
  DATACRUNCH = "datacrunch"
27
29
  DSTACK = "dstack"
@@ -194,12 +194,14 @@ class BaseRunConfiguration(CoreModel):
194
194
  ] = None
195
195
  python: Annotated[
196
196
  Optional[PythonVersion],
197
- Field(description="The major version of Python. Mutually exclusive with `image`"),
197
+ Field(
198
+ description="The major version of Python. Mutually exclusive with `image` and `docker`"
199
+ ),
198
200
  ] = None
199
201
  nvcc: Annotated[
200
202
  Optional[bool],
201
203
  Field(
202
- description="Use image with NVIDIA CUDA Compiler (NVCC) included. Mutually exclusive with `image`"
204
+ description="Use image with NVIDIA CUDA Compiler (NVCC) included. Mutually exclusive with `image` and `docker`"
203
205
  ),
204
206
  ] = None
205
207
  single_branch: Annotated[
@@ -244,6 +246,12 @@ class BaseRunConfiguration(CoreModel):
244
246
  volumes: Annotated[
245
247
  List[Union[MountPoint, str]], Field(description="The volumes mount points")
246
248
  ] = []
249
+ docker: Annotated[
250
+ Optional[bool],
251
+ Field(
252
+ description="Use Docker inside the container. Mutually exclusive with `image`, `python`, and `nvcc`. Overrides `privileged`"
253
+ ),
254
+ ] = None
247
255
  # deprecated since 0.18.31; task, service -- no effect; dev-environment -- executed right before `init`
248
256
  setup: CommandsList = []
249
257
 
@@ -259,6 +267,18 @@ class BaseRunConfiguration(CoreModel):
259
267
  return PythonVersion(v)
260
268
  return v
261
269
 
270
+ @validator("docker", pre=True, always=True)
271
+ def _docker(cls, v, values) -> Optional[bool]:
272
+ if v is True and values.get("image"):
273
+ raise KeyError("`image` and `docker` are mutually exclusive fields")
274
+ if v is True and values.get("python"):
275
+ raise KeyError("`python` and `docker` are mutually exclusive fields")
276
+ if v is True and values.get("nvcc"):
277
+ raise KeyError("`nvcc` and `docker` are mutually exclusive fields")
278
+ # Ideally, we'd like to also prohibit privileged=False when docker=True,
279
+ # but it's not possible to do so without breaking backwards compatibility.
280
+ return v
281
+
262
282
  @validator("volumes", each_item=True)
263
283
  def convert_volumes(cls, v) -> MountPoint:
264
284
  if isinstance(v, str):
@@ -1,6 +1,6 @@
1
1
  from datetime import datetime
2
2
  from enum import Enum
3
- from typing import List
3
+ from typing import List, Optional
4
4
 
5
5
  from dstack._internal.core.models.common import CoreModel
6
6
 
@@ -23,3 +23,4 @@ class LogEvent(CoreModel):
23
23
 
24
24
  class JobSubmissionLogs(CoreModel):
25
25
  logs: List[LogEvent]
26
+ next_token: Optional[str]
@@ -301,7 +301,7 @@ class JobSubmission(CoreModel):
301
301
  job_provisioning_data: Optional[JobProvisioningData]
302
302
  job_runtime_data: Optional[JobRuntimeData]
303
303
  # TODO: make status_message and error a computed field after migrating to pydanticV2
304
- status_message: Optional[str]
304
+ status_message: Optional[str] = None
305
305
  error: Optional[str] = None
306
306
 
307
307
  @property
@@ -548,11 +548,17 @@ class Run(CoreModel):
548
548
  retry_on_events = (
549
549
  jobs[0].job_spec.retry.on_events if jobs and jobs[0].job_spec.retry else []
550
550
  )
551
+ job_status = (
552
+ jobs[0].job_submissions[-1].status
553
+ if len(jobs) == 1 and jobs[0].job_submissions
554
+ else None
555
+ )
551
556
  termination_reason = Run.get_last_termination_reason(jobs[0]) if jobs else None
552
557
  except KeyError:
553
558
  return values
554
559
  values["status_message"] = Run._get_status_message(
555
560
  status=status,
561
+ job_status=job_status,
556
562
  retry_on_events=retry_on_events,
557
563
  termination_reason=termination_reason,
558
564
  )
@@ -568,9 +574,12 @@ class Run(CoreModel):
568
574
  @staticmethod
569
575
  def _get_status_message(
570
576
  status: RunStatus,
577
+ job_status: Optional[JobStatus],
571
578
  retry_on_events: List[RetryEvent],
572
579
  termination_reason: Optional[JobTerminationReason],
573
580
  ) -> str:
581
+ if job_status == JobStatus.PULLING:
582
+ return "pulling"
574
583
  # Currently, `retrying` is shown only for `no-capacity` events
575
584
  if (
576
585
  status in [RunStatus.SUBMITTED, RunStatus.PENDING]
@@ -29,7 +29,7 @@ async def process_fleets():
29
29
  )
30
30
  .order_by(FleetModel.last_processed_at.asc())
31
31
  .limit(1)
32
- .with_for_update(skip_locked=True)
32
+ .with_for_update(skip_locked=True, key_share=True)
33
33
  )
34
34
  fleet_model = res.scalar()
35
35
  if fleet_model is None: