dstack 0.19.6rc1__py3-none-any.whl → 0.19.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (69) hide show
  1. dstack/_internal/cli/services/args.py +2 -2
  2. dstack/_internal/cli/services/configurators/fleet.py +3 -2
  3. dstack/_internal/cli/services/configurators/run.py +50 -4
  4. dstack/_internal/cli/utils/fleet.py +3 -1
  5. dstack/_internal/cli/utils/run.py +25 -28
  6. dstack/_internal/core/backends/aws/compute.py +13 -1
  7. dstack/_internal/core/backends/azure/compute.py +42 -13
  8. dstack/_internal/core/backends/azure/configurator.py +21 -0
  9. dstack/_internal/core/backends/azure/models.py +9 -0
  10. dstack/_internal/core/backends/base/compute.py +101 -27
  11. dstack/_internal/core/backends/base/offers.py +13 -3
  12. dstack/_internal/core/backends/cudo/compute.py +2 -0
  13. dstack/_internal/core/backends/datacrunch/compute.py +2 -0
  14. dstack/_internal/core/backends/gcp/auth.py +1 -1
  15. dstack/_internal/core/backends/gcp/compute.py +51 -35
  16. dstack/_internal/core/backends/gcp/resources.py +6 -1
  17. dstack/_internal/core/backends/lambdalabs/compute.py +20 -8
  18. dstack/_internal/core/backends/local/compute.py +2 -0
  19. dstack/_internal/core/backends/nebius/compute.py +95 -1
  20. dstack/_internal/core/backends/nebius/configurator.py +11 -0
  21. dstack/_internal/core/backends/nebius/fabrics.py +47 -0
  22. dstack/_internal/core/backends/nebius/models.py +8 -0
  23. dstack/_internal/core/backends/nebius/resources.py +29 -0
  24. dstack/_internal/core/backends/oci/compute.py +2 -0
  25. dstack/_internal/core/backends/remote/provisioning.py +27 -2
  26. dstack/_internal/core/backends/template/compute.py.jinja +2 -0
  27. dstack/_internal/core/backends/tensordock/compute.py +2 -0
  28. dstack/_internal/core/backends/vastai/compute.py +2 -1
  29. dstack/_internal/core/backends/vultr/compute.py +5 -1
  30. dstack/_internal/core/errors.py +4 -0
  31. dstack/_internal/core/models/fleets.py +2 -0
  32. dstack/_internal/core/models/instances.py +4 -3
  33. dstack/_internal/core/models/resources.py +80 -3
  34. dstack/_internal/core/models/runs.py +10 -3
  35. dstack/_internal/core/models/volumes.py +1 -1
  36. dstack/_internal/server/background/tasks/process_fleets.py +4 -13
  37. dstack/_internal/server/background/tasks/process_instances.py +176 -55
  38. dstack/_internal/server/background/tasks/process_placement_groups.py +1 -1
  39. dstack/_internal/server/background/tasks/process_prometheus_metrics.py +5 -2
  40. dstack/_internal/server/background/tasks/process_submitted_jobs.py +1 -1
  41. dstack/_internal/server/models.py +1 -0
  42. dstack/_internal/server/routers/gateways.py +2 -1
  43. dstack/_internal/server/services/config.py +7 -2
  44. dstack/_internal/server/services/fleets.py +24 -26
  45. dstack/_internal/server/services/gateways/__init__.py +17 -2
  46. dstack/_internal/server/services/instances.py +0 -2
  47. dstack/_internal/server/services/offers.py +15 -0
  48. dstack/_internal/server/services/placement.py +27 -6
  49. dstack/_internal/server/services/plugins.py +77 -0
  50. dstack/_internal/server/services/resources.py +21 -0
  51. dstack/_internal/server/services/runs.py +41 -17
  52. dstack/_internal/server/services/volumes.py +10 -1
  53. dstack/_internal/server/testing/common.py +35 -26
  54. dstack/_internal/utils/common.py +22 -9
  55. dstack/_internal/utils/json_schema.py +6 -3
  56. dstack/api/__init__.py +1 -0
  57. dstack/api/server/__init__.py +8 -1
  58. dstack/api/server/_fleets.py +16 -0
  59. dstack/api/server/_runs.py +44 -3
  60. dstack/plugins/__init__.py +8 -0
  61. dstack/plugins/_base.py +72 -0
  62. dstack/plugins/_models.py +8 -0
  63. dstack/plugins/_utils.py +19 -0
  64. dstack/version.py +1 -1
  65. {dstack-0.19.6rc1.dist-info → dstack-0.19.8.dist-info}/METADATA +14 -2
  66. {dstack-0.19.6rc1.dist-info → dstack-0.19.8.dist-info}/RECORD +69 -62
  67. {dstack-0.19.6rc1.dist-info → dstack-0.19.8.dist-info}/WHEEL +0 -0
  68. {dstack-0.19.6rc1.dist-info → dstack-0.19.8.dist-info}/entry_points.txt +0 -0
  69. {dstack-0.19.6rc1.dist-info → dstack-0.19.8.dist-info}/licenses/LICENSE.md +0 -0
@@ -6,7 +6,12 @@ from typing import Dict, List, Optional, Type
6
6
  import requests
7
7
 
8
8
  from dstack import version
9
- from dstack._internal.core.errors import ClientError, ServerClientError, URLNotFoundError
9
+ from dstack._internal.core.errors import (
10
+ ClientError,
11
+ MethodNotAllowedError,
12
+ ServerClientError,
13
+ URLNotFoundError,
14
+ )
10
15
  from dstack._internal.utils.logging import get_logger
11
16
  from dstack.api.server._backends import BackendsAPIClient
12
17
  from dstack.api.server._fleets import FleetsAPIClient
@@ -156,6 +161,8 @@ class APIClient:
156
161
  )
157
162
  if resp.status_code == 404:
158
163
  raise URLNotFoundError(f"Status code 404 when requesting {resp.request.url}")
164
+ if resp.status_code == 405:
165
+ raise MethodNotAllowedError(f"Status code 405 when requesting {resp.request.url}")
159
166
  if 400 <= resp.status_code < 600:
160
167
  raise ClientError(
161
168
  f"Unexpected error: status code {resp.status_code}"
@@ -3,6 +3,7 @@ from typing import Any, Dict, List, Optional, Union
3
3
  from pydantic import parse_obj_as
4
4
 
5
5
  from dstack._internal.core.models.fleets import ApplyFleetPlanInput, Fleet, FleetPlan, FleetSpec
6
+ from dstack._internal.core.models.instances import Instance
6
7
  from dstack._internal.server.schemas.fleets import (
7
8
  ApplyFleetPlanRequest,
8
9
  CreateFleetRequest,
@@ -83,9 +84,24 @@ def _get_apply_plan_excludes(plan_input: ApplyFleetPlanInput) -> Dict:
83
84
  spec_excludes = _get_fleet_spec_excludes(plan_input.spec)
84
85
  if spec_excludes:
85
86
  apply_plan_excludes["spec"] = apply_plan_excludes
87
+ current_resource = plan_input.current_resource
88
+ if current_resource is not None:
89
+ current_resource_excludes = {}
90
+ apply_plan_excludes["current_resource"] = current_resource_excludes
91
+ if all(map(_should_exclude_instance_cpu_arch, current_resource.instances)):
92
+ current_resource_excludes["instances"] = {
93
+ "__all__": {"instance_type": {"resources": {"cpu_arch"}}}
94
+ }
86
95
  return {"plan": apply_plan_excludes}
87
96
 
88
97
 
98
+ def _should_exclude_instance_cpu_arch(instance: Instance) -> bool:
99
+ try:
100
+ return instance.instance_type.resources.cpu_arch is None
101
+ except AttributeError:
102
+ return True
103
+
104
+
89
105
  def _get_create_fleet_excludes(fleet_spec: FleetSpec) -> Dict:
90
106
  create_fleet_excludes = {}
91
107
  spec_excludes = _get_fleet_spec_excludes(fleet_spec)
@@ -7,6 +7,7 @@ from pydantic import parse_obj_as
7
7
  from dstack._internal.core.models.configurations import ServiceConfiguration
8
8
  from dstack._internal.core.models.runs import (
9
9
  ApplyRunPlanInput,
10
+ JobSubmission,
10
11
  Run,
11
12
  RunPlan,
12
13
  RunSpec,
@@ -96,13 +97,53 @@ def _get_apply_plan_excludes(plan: ApplyRunPlanInput) -> Optional[Dict]:
96
97
  run_spec_excludes = _get_run_spec_excludes(plan.run_spec)
97
98
  if run_spec_excludes is not None:
98
99
  apply_plan_excludes["run_spec"] = run_spec_excludes
99
- if plan.current_resource is not None:
100
- apply_plan_excludes["current_resource"] = {
101
- "run_spec": _get_run_spec_excludes(plan.current_resource.run_spec)
100
+ current_resource = plan.current_resource
101
+ if current_resource is not None:
102
+ current_resource_excludes = {}
103
+ apply_plan_excludes["current_resource"] = current_resource_excludes
104
+ current_resource_excludes["run_spec"] = _get_run_spec_excludes(current_resource.run_spec)
105
+ job_submissions_excludes = {}
106
+ current_resource_excludes["jobs"] = {
107
+ "__all__": {"job_submissions": {"__all__": job_submissions_excludes}}
102
108
  }
109
+ job_submissions = [js for j in current_resource.jobs for js in j.job_submissions]
110
+ if all(map(_should_exclude_job_submission_jpd_cpu_arch, job_submissions)):
111
+ job_submissions_excludes["job_provisioning_data"] = {
112
+ "instance_type": {"resources": {"cpu_arch"}}
113
+ }
114
+ if all(map(_should_exclude_job_submission_jrd_cpu_arch, job_submissions)):
115
+ job_submissions_excludes["job_runtime_data"] = {
116
+ "offer": {"instance": {"resources": {"cpu_arch"}}}
117
+ }
118
+ latest_job_submission = current_resource.latest_job_submission
119
+ if latest_job_submission is not None:
120
+ latest_job_submission_excludes = {}
121
+ current_resource_excludes["latest_job_submission"] = latest_job_submission_excludes
122
+ if _should_exclude_job_submission_jpd_cpu_arch(latest_job_submission):
123
+ latest_job_submission_excludes["job_provisioning_data"] = {
124
+ "instance_type": {"resources": {"cpu_arch"}}
125
+ }
126
+ if _should_exclude_job_submission_jrd_cpu_arch(latest_job_submission):
127
+ latest_job_submission_excludes["job_runtime_data"] = {
128
+ "offer": {"instance": {"resources": {"cpu_arch"}}}
129
+ }
103
130
  return {"plan": apply_plan_excludes}
104
131
 
105
132
 
133
+ def _should_exclude_job_submission_jpd_cpu_arch(job_submission: JobSubmission) -> bool:
134
+ try:
135
+ return job_submission.job_provisioning_data.instance_type.resources.cpu_arch is None
136
+ except AttributeError:
137
+ return True
138
+
139
+
140
+ def _should_exclude_job_submission_jrd_cpu_arch(job_submission: JobSubmission) -> bool:
141
+ try:
142
+ return job_submission.job_runtime_data.offer.instance.resources.cpu_arch is None
143
+ except AttributeError:
144
+ return True
145
+
146
+
106
147
  def _get_get_plan_excludes(request: GetRunPlanRequest) -> Optional[Dict]:
107
148
  """
108
149
  Excludes new fields when they are not set to keep
@@ -0,0 +1,8 @@
1
+ # ruff: noqa: F401
2
+ from dstack._internal.core.models.fleets import FleetSpec
3
+ from dstack._internal.core.models.gateways import GatewaySpec
4
+ from dstack._internal.core.models.runs import RunSpec
5
+ from dstack._internal.core.models.volumes import VolumeSpec
6
+ from dstack.plugins._base import ApplyPolicy, Plugin
7
+ from dstack.plugins._models import ApplySpec
8
+ from dstack.plugins._utils import get_plugin_logger
@@ -0,0 +1,72 @@
1
+ from dstack._internal.core.models.fleets import FleetSpec
2
+ from dstack._internal.core.models.gateways import GatewaySpec
3
+ from dstack._internal.core.models.runs import RunSpec
4
+ from dstack._internal.core.models.volumes import VolumeSpec
5
+ from dstack.plugins._models import ApplySpec
6
+
7
+
8
+ class ApplyPolicy:
9
+ """
10
+ A base apply policy class to modify specs on `dstack apply`.
11
+ Subclass it and return the subclass instance in `Plugin.get_apply_policies()`.
12
+ """
13
+
14
+ def on_apply(self, user: str, project: str, spec: ApplySpec) -> ApplySpec:
15
+ """
16
+ Modify `spec` before it's applied.
17
+ Raise `ValueError` for `spec` to be rejected as invalid.
18
+
19
+ This method can be called twice:
20
+ * first when a user gets a plan
21
+ * second when a user applies a plan
22
+
23
+ In both cases, the original spec is passed, so the method does not
24
+ need to check if it modified the spec before.
25
+
26
+ It's safe to modify and return `spec` without copying.
27
+ """
28
+ if isinstance(spec, RunSpec):
29
+ return self.on_run_apply(user=user, project=project, spec=spec)
30
+ if isinstance(spec, FleetSpec):
31
+ return self.on_fleet_apply(user=user, project=project, spec=spec)
32
+ if isinstance(spec, VolumeSpec):
33
+ return self.on_volume_apply(user=user, project=project, spec=spec)
34
+ if isinstance(spec, GatewaySpec):
35
+ return self.on_gateway_apply(user=user, project=project, spec=spec)
36
+ raise ValueError(f"Unknown spec type {type(spec)}")
37
+
38
+ def on_run_apply(self, user: str, project: str, spec: RunSpec) -> RunSpec:
39
+ """
40
+ Called by the default `on_apply()` implementation for runs.
41
+ """
42
+ return spec
43
+
44
+ def on_fleet_apply(self, user: str, project: str, spec: FleetSpec) -> FleetSpec:
45
+ """
46
+ Called by the default `on_apply()` implementation for fleets.
47
+ """
48
+ return spec
49
+
50
+ def on_volume_apply(self, user: str, project: str, spec: VolumeSpec) -> VolumeSpec:
51
+ """
52
+ Called by the default `on_apply()` implementation for volumes.
53
+ """
54
+ return spec
55
+
56
+ def on_gateway_apply(self, user: str, project: str, spec: GatewaySpec) -> GatewaySpec:
57
+ """
58
+ Called by the default `on_apply()` implementation for gateways.
59
+ """
60
+ return spec
61
+
62
+
63
+ class Plugin:
64
+ """
65
+ A base plugin class.
66
+ Plugins must subclass it, implement public methods,
67
+ and register the subclass as an entrypoint of the package
68
+ (https://packaging.python.org/en/latest/specifications/entry-points/).
69
+ """
70
+
71
+ def get_apply_policies(self) -> list[ApplyPolicy]:
72
+ return []
@@ -0,0 +1,8 @@
1
+ from typing import TypeVar
2
+
3
+ from dstack._internal.core.models.fleets import FleetSpec
4
+ from dstack._internal.core.models.gateways import GatewaySpec
5
+ from dstack._internal.core.models.runs import RunSpec
6
+ from dstack._internal.core.models.volumes import VolumeSpec
7
+
8
+ ApplySpec = TypeVar("ApplySpec", RunSpec, FleetSpec, VolumeSpec, GatewaySpec)
@@ -0,0 +1,19 @@
1
+ import logging
2
+
3
+ from dstack._internal.utils.logging import get_logger
4
+
5
+
6
+ def get_plugin_logger(name: str) -> logging.Logger:
7
+ """
8
+ Use this function to set up loggers in plugins.
9
+
10
+ Put at the top of the plugin modules:
11
+
12
+ ```
13
+ from dstack.plugins import get_plugin_logger
14
+
15
+ logger = get_plugin_logger(__name__)
16
+ ```
17
+
18
+ """
19
+ return get_logger(f"dstack.plugins.{name}")
dstack/version.py CHANGED
@@ -1,3 +1,3 @@
1
- __version__ = "0.19.6rc1"
1
+ __version__ = "0.19.8"
2
2
  __is_release__ = True
3
3
  base_image = "0.7"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dstack
3
- Version: 0.19.6rc1
3
+ Version: 0.19.8
4
4
  Summary: dstack is an open-source orchestration engine for running AI workloads on any cloud or on-premises.
5
5
  Project-URL: Homepage, https://dstack.ai
6
6
  Project-URL: Source, https://github.com/dstackai/dstack
@@ -21,7 +21,7 @@ Requires-Dist: cryptography
21
21
  Requires-Dist: cursor
22
22
  Requires-Dist: filelock
23
23
  Requires-Dist: gitpython
24
- Requires-Dist: gpuhunt<0.2.0,>=0.1.3
24
+ Requires-Dist: gpuhunt==0.1.5
25
25
  Requires-Dist: jsonschema
26
26
  Requires-Dist: packaging
27
27
  Requires-Dist: paramiko>=3.2.0
@@ -49,9 +49,11 @@ Requires-Dist: asyncpg; extra == 'all'
49
49
  Requires-Dist: azure-identity>=1.12.0; extra == 'all'
50
50
  Requires-Dist: azure-mgmt-authorization>=3.0.0; extra == 'all'
51
51
  Requires-Dist: azure-mgmt-compute>=29.1.0; extra == 'all'
52
+ Requires-Dist: azure-mgmt-msi>=7.0.0; extra == 'all'
52
53
  Requires-Dist: azure-mgmt-network<28.0.0,>=23.0.0; extra == 'all'
53
54
  Requires-Dist: azure-mgmt-resource>=22.0.0; extra == 'all'
54
55
  Requires-Dist: azure-mgmt-subscription>=3.1.1; extra == 'all'
56
+ Requires-Dist: backports-entry-points-selectable; extra == 'all'
55
57
  Requires-Dist: boto3; extra == 'all'
56
58
  Requires-Dist: botocore; extra == 'all'
57
59
  Requires-Dist: datacrunch; extra == 'all'
@@ -87,6 +89,7 @@ Requires-Dist: alembic-postgresql-enum; extra == 'aws'
87
89
  Requires-Dist: alembic>=1.10.2; extra == 'aws'
88
90
  Requires-Dist: apscheduler<4; extra == 'aws'
89
91
  Requires-Dist: asyncpg; extra == 'aws'
92
+ Requires-Dist: backports-entry-points-selectable; extra == 'aws'
90
93
  Requires-Dist: boto3; extra == 'aws'
91
94
  Requires-Dist: botocore; extra == 'aws'
92
95
  Requires-Dist: docker>=6.0.0; extra == 'aws'
@@ -114,9 +117,11 @@ Requires-Dist: asyncpg; extra == 'azure'
114
117
  Requires-Dist: azure-identity>=1.12.0; extra == 'azure'
115
118
  Requires-Dist: azure-mgmt-authorization>=3.0.0; extra == 'azure'
116
119
  Requires-Dist: azure-mgmt-compute>=29.1.0; extra == 'azure'
120
+ Requires-Dist: azure-mgmt-msi>=7.0.0; extra == 'azure'
117
121
  Requires-Dist: azure-mgmt-network<28.0.0,>=23.0.0; extra == 'azure'
118
122
  Requires-Dist: azure-mgmt-resource>=22.0.0; extra == 'azure'
119
123
  Requires-Dist: azure-mgmt-subscription>=3.1.1; extra == 'azure'
124
+ Requires-Dist: backports-entry-points-selectable; extra == 'azure'
120
125
  Requires-Dist: docker>=6.0.0; extra == 'azure'
121
126
  Requires-Dist: fastapi; extra == 'azure'
122
127
  Requires-Dist: grpcio>=1.50; extra == 'azure'
@@ -139,6 +144,7 @@ Requires-Dist: alembic-postgresql-enum; extra == 'datacrunch'
139
144
  Requires-Dist: alembic>=1.10.2; extra == 'datacrunch'
140
145
  Requires-Dist: apscheduler<4; extra == 'datacrunch'
141
146
  Requires-Dist: asyncpg; extra == 'datacrunch'
147
+ Requires-Dist: backports-entry-points-selectable; extra == 'datacrunch'
142
148
  Requires-Dist: datacrunch; extra == 'datacrunch'
143
149
  Requires-Dist: docker>=6.0.0; extra == 'datacrunch'
144
150
  Requires-Dist: fastapi; extra == 'datacrunch'
@@ -170,6 +176,7 @@ Requires-Dist: alembic-postgresql-enum; extra == 'gcp'
170
176
  Requires-Dist: alembic>=1.10.2; extra == 'gcp'
171
177
  Requires-Dist: apscheduler<4; extra == 'gcp'
172
178
  Requires-Dist: asyncpg; extra == 'gcp'
179
+ Requires-Dist: backports-entry-points-selectable; extra == 'gcp'
173
180
  Requires-Dist: docker>=6.0.0; extra == 'gcp'
174
181
  Requires-Dist: fastapi; extra == 'gcp'
175
182
  Requires-Dist: google-api-python-client>=2.80.0; extra == 'gcp'
@@ -199,6 +206,7 @@ Requires-Dist: alembic-postgresql-enum; extra == 'kubernetes'
199
206
  Requires-Dist: alembic>=1.10.2; extra == 'kubernetes'
200
207
  Requires-Dist: apscheduler<4; extra == 'kubernetes'
201
208
  Requires-Dist: asyncpg; extra == 'kubernetes'
209
+ Requires-Dist: backports-entry-points-selectable; extra == 'kubernetes'
202
210
  Requires-Dist: docker>=6.0.0; extra == 'kubernetes'
203
211
  Requires-Dist: fastapi; extra == 'kubernetes'
204
212
  Requires-Dist: grpcio>=1.50; extra == 'kubernetes'
@@ -222,6 +230,7 @@ Requires-Dist: alembic-postgresql-enum; extra == 'lambda'
222
230
  Requires-Dist: alembic>=1.10.2; extra == 'lambda'
223
231
  Requires-Dist: apscheduler<4; extra == 'lambda'
224
232
  Requires-Dist: asyncpg; extra == 'lambda'
233
+ Requires-Dist: backports-entry-points-selectable; extra == 'lambda'
225
234
  Requires-Dist: boto3; extra == 'lambda'
226
235
  Requires-Dist: botocore; extra == 'lambda'
227
236
  Requires-Dist: docker>=6.0.0; extra == 'lambda'
@@ -246,6 +255,7 @@ Requires-Dist: alembic-postgresql-enum; extra == 'nebius'
246
255
  Requires-Dist: alembic>=1.10.2; extra == 'nebius'
247
256
  Requires-Dist: apscheduler<4; extra == 'nebius'
248
257
  Requires-Dist: asyncpg; extra == 'nebius'
258
+ Requires-Dist: backports-entry-points-selectable; extra == 'nebius'
249
259
  Requires-Dist: docker>=6.0.0; extra == 'nebius'
250
260
  Requires-Dist: fastapi; extra == 'nebius'
251
261
  Requires-Dist: grpcio>=1.50; extra == 'nebius'
@@ -269,6 +279,7 @@ Requires-Dist: alembic-postgresql-enum; extra == 'oci'
269
279
  Requires-Dist: alembic>=1.10.2; extra == 'oci'
270
280
  Requires-Dist: apscheduler<4; extra == 'oci'
271
281
  Requires-Dist: asyncpg; extra == 'oci'
282
+ Requires-Dist: backports-entry-points-selectable; extra == 'oci'
272
283
  Requires-Dist: docker>=6.0.0; extra == 'oci'
273
284
  Requires-Dist: fastapi; extra == 'oci'
274
285
  Requires-Dist: grpcio>=1.50; extra == 'oci'
@@ -292,6 +303,7 @@ Requires-Dist: alembic-postgresql-enum; extra == 'server'
292
303
  Requires-Dist: alembic>=1.10.2; extra == 'server'
293
304
  Requires-Dist: apscheduler<4; extra == 'server'
294
305
  Requires-Dist: asyncpg; extra == 'server'
306
+ Requires-Dist: backports-entry-points-selectable; extra == 'server'
295
307
  Requires-Dist: docker>=6.0.0; extra == 'server'
296
308
  Requires-Dist: fastapi; extra == 'server'
297
309
  Requires-Dist: grpcio>=1.50; extra == 'server'