dstack 0.18.43__py3-none-any.whl → 0.18.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (59) hide show
  1. dstack/_internal/cli/services/configurators/run.py +1 -0
  2. dstack/_internal/cli/utils/run.py +11 -0
  3. dstack/_internal/core/backends/aws/compute.py +1 -0
  4. dstack/_internal/core/backends/azure/compute.py +1 -1
  5. dstack/_internal/core/backends/gcp/compute.py +1 -1
  6. dstack/_internal/core/backends/runpod/compute.py +21 -3
  7. dstack/_internal/core/backends/runpod/config.py +8 -0
  8. dstack/_internal/core/models/backends/runpod.py +2 -0
  9. dstack/_internal/core/models/configurations.py +2 -1
  10. dstack/_internal/core/models/profiles.py +46 -1
  11. dstack/_internal/core/models/runs.py +4 -0
  12. dstack/_internal/server/app.py +11 -1
  13. dstack/_internal/server/background/__init__.py +10 -0
  14. dstack/_internal/server/background/tasks/process_placement_groups.py +1 -0
  15. dstack/_internal/server/background/tasks/process_prometheus_metrics.py +135 -0
  16. dstack/_internal/server/background/tasks/process_running_jobs.py +66 -19
  17. dstack/_internal/server/background/tasks/process_runs.py +1 -0
  18. dstack/_internal/server/background/tasks/process_submitted_jobs.py +4 -1
  19. dstack/_internal/server/migrations/versions/60e444118b6d_add_jobprometheusmetrics.py +40 -0
  20. dstack/_internal/server/migrations/versions/98d1b92988bc_add_jobterminationreason_terminated_due_.py +140 -0
  21. dstack/_internal/server/models.py +11 -0
  22. dstack/_internal/server/routers/metrics.py +21 -2
  23. dstack/_internal/server/routers/prometheus.py +36 -0
  24. dstack/_internal/server/security/permissions.py +1 -1
  25. dstack/_internal/server/services/backends/configurators/runpod.py +3 -33
  26. dstack/_internal/server/services/config.py +13 -3
  27. dstack/_internal/server/services/fleets.py +1 -0
  28. dstack/_internal/server/services/gateways/__init__.py +1 -0
  29. dstack/_internal/server/services/jobs/configurators/base.py +9 -1
  30. dstack/_internal/server/services/metrics.py +103 -70
  31. dstack/_internal/server/services/prometheus.py +87 -0
  32. dstack/_internal/server/services/runner/client.py +14 -3
  33. dstack/_internal/server/services/runs.py +43 -15
  34. dstack/_internal/server/services/volumes.py +1 -0
  35. dstack/_internal/server/settings.py +3 -0
  36. dstack/_internal/server/statics/index.html +1 -1
  37. dstack/_internal/server/statics/{main-fe8fd9db55df8d10e648.js → main-4eb116b97819badd1e2c.js} +66 -13
  38. dstack/_internal/server/statics/{main-fe8fd9db55df8d10e648.js.map → main-4eb116b97819badd1e2c.js.map} +1 -1
  39. dstack/_internal/server/statics/{main-7510e71dfa9749a4e70e.css → main-da9f8c06a69c20dac23e.css} +1 -1
  40. dstack/_internal/server/statics/static/media/entraID.d65d1f3e9486a8e56d24fc07b3230885.svg +9 -0
  41. dstack/_internal/server/testing/common.py +17 -0
  42. dstack/api/_public/runs.py +3 -0
  43. dstack/api/server/_fleets.py +2 -0
  44. dstack/api/server/_runs.py +4 -0
  45. dstack/api/utils.py +3 -0
  46. dstack/version.py +1 -1
  47. {dstack-0.18.43.dist-info → dstack-0.18.44.dist-info}/METADATA +10 -1
  48. {dstack-0.18.43.dist-info → dstack-0.18.44.dist-info}/RECORD +59 -50
  49. tests/_internal/server/background/tasks/test_process_prometheus_metrics.py +189 -0
  50. tests/_internal/server/background/tasks/test_process_running_jobs.py +125 -0
  51. tests/_internal/server/routers/test_fleets.py +2 -0
  52. tests/_internal/server/routers/test_metrics.py +15 -0
  53. tests/_internal/server/routers/test_prometheus.py +244 -0
  54. tests/_internal/server/routers/test_runs.py +79 -56
  55. tests/_internal/server/services/test_metrics.py +163 -0
  56. {dstack-0.18.43.dist-info → dstack-0.18.44.dist-info}/LICENSE.md +0 -0
  57. {dstack-0.18.43.dist-info → dstack-0.18.44.dist-info}/WHEEL +0 -0
  58. {dstack-0.18.43.dist-info → dstack-0.18.44.dist-info}/entry_points.txt +0 -0
  59. {dstack-0.18.43.dist-info → dstack-0.18.44.dist-info}/top_level.txt +0 -0
@@ -1,113 +1,146 @@
1
1
  import json
2
+ from collections import defaultdict
3
+ from collections.abc import Sequence
2
4
  from datetime import datetime, timezone
5
+ from typing import Optional
3
6
 
4
7
  from sqlalchemy import select
5
8
  from sqlalchemy.ext.asyncio import AsyncSession
6
9
 
7
- from dstack._internal.core.errors import ResourceNotExistsError
8
10
  from dstack._internal.core.models.metrics import JobMetrics, Metric
9
- from dstack._internal.server.models import JobMetricsPoint, JobModel, ProjectModel
10
- from dstack._internal.server.services.jobs import get_run_job_model
11
+ from dstack._internal.server.models import JobMetricsPoint, JobModel
12
+ from dstack._internal.utils.logging import get_logger
11
13
 
12
-
13
- async def get_job_metrics(
14
- session: AsyncSession,
15
- project: ProjectModel,
16
- run_name: str,
17
- replica_num: int,
18
- job_num: int,
19
- ) -> JobMetrics:
20
- job_model = await get_run_job_model(
21
- session=session,
22
- project=project,
23
- run_name=run_name,
24
- replica_num=replica_num,
25
- job_num=job_num,
26
- )
27
- if job_model is None:
28
- raise ResourceNotExistsError("Found no job with given parameters")
29
- job_metrics = await _get_job_metrics(
30
- session=session,
31
- job_model=job_model,
32
- )
33
- return job_metrics
14
+ logger = get_logger(__name__)
34
15
 
35
16
 
36
- async def _get_job_metrics(
17
+ async def get_job_metrics(
37
18
  session: AsyncSession,
38
19
  job_model: JobModel,
20
+ after: Optional[datetime] = None,
21
+ before: Optional[datetime] = None,
22
+ limit: Optional[int] = None,
39
23
  ) -> JobMetrics:
40
- res = await session.execute(
24
+ """
25
+ Returns metrics ordered from the latest to the earliest.
26
+
27
+ Expected usage:
28
+ * limit=100 — get the latest 100 points
29
+ * after=<now - 1 hour> — get points for the last one hour
30
+ * before=<earliest timestamp from the last batch>, limit=100 ­— paginate back in history
31
+ """
32
+ stmt = (
41
33
  select(JobMetricsPoint)
42
34
  .where(JobMetricsPoint.job_id == job_model.id)
43
35
  .order_by(JobMetricsPoint.timestamp_micro.desc())
44
- .limit(2)
45
36
  )
37
+ if after is not None:
38
+ # we need +1 point for cpu_usage_percent, thus >=
39
+ stmt = stmt.where(JobMetricsPoint.timestamp_micro >= _datetime_to_unix_time_micro(after))
40
+ if before is not None:
41
+ stmt = stmt.where(JobMetricsPoint.timestamp_micro < _datetime_to_unix_time_micro(before))
42
+ if limit is not None:
43
+ # +1 for cpu_usage_percent
44
+ stmt = stmt.limit(limit + 1)
45
+ res = await session.execute(stmt)
46
46
  points = res.scalars().all()
47
+ # we need at least 2 points to calculate cpu_usage_percent
47
48
  if len(points) < 2:
48
49
  return JobMetrics(metrics=[])
49
- last_point = points[0]
50
- prev_point = points[1]
51
- return _calculate_job_metrics(last_point, prev_point)
50
+ return _calculate_job_metrics(points)
51
+
52
+
53
+ def _calculate_job_metrics(points: Sequence[JobMetricsPoint]) -> JobMetrics:
54
+ timestamps: list[datetime] = []
55
+ cpu_usage_points: list[int] = []
56
+ memory_usage_points: list[int] = []
57
+ memory_working_set_points: list[int] = []
58
+ gpus_memory_usage_points: defaultdict[int, list[int]] = defaultdict(list)
59
+ gpus_util_points: defaultdict[int, list[int]] = defaultdict(list)
52
60
 
61
+ gpus_detected_num: Optional[int] = None
62
+ gpus_detected_num_mismatch: bool = False
63
+ for point, prev_point in zip(points, points[1:]):
64
+ timestamps.append(_unix_time_micro_to_datetime(point.timestamp_micro))
65
+ cpu_usage_points.append(_get_cpu_usage(point, prev_point))
66
+ memory_usage_points.append(point.memory_usage_bytes)
67
+ memory_working_set_points.append(point.memory_working_set_bytes)
68
+ gpus_memory_usage = json.loads(point.gpus_memory_usage_bytes)
69
+ gpus_util = json.loads(point.gpus_util_percent)
70
+ if gpus_detected_num is None:
71
+ gpus_detected_num = len(gpus_memory_usage)
72
+ if len(gpus_memory_usage) != gpus_detected_num or len(gpus_util) != gpus_detected_num:
73
+ gpus_detected_num_mismatch = True
74
+ if not gpus_detected_num_mismatch:
75
+ for i in range(gpus_detected_num):
76
+ gpus_memory_usage_points[i].append(gpus_memory_usage[i])
77
+ gpus_util_points[i].append(gpus_util[i])
53
78
 
54
- def _calculate_job_metrics(last_point: JobMetricsPoint, prev_point: JobMetricsPoint) -> JobMetrics:
55
- metrics = []
56
- timestamp = _unix_time_micro_to_datetime(last_point.timestamp_micro)
57
- metrics.append(
79
+ metrics: list[Metric] = [
58
80
  Metric(
59
81
  name="cpu_usage_percent",
60
- timestamps=[timestamp],
61
- values=[_get_cpu_usage(last_point, prev_point)],
62
- )
63
- )
64
- metrics.append(
82
+ timestamps=timestamps,
83
+ values=cpu_usage_points,
84
+ ),
65
85
  Metric(
66
86
  name="memory_usage_bytes",
67
- timestamps=[timestamp],
68
- values=[last_point.memory_usage_bytes],
69
- )
70
- )
71
- metrics.append(
87
+ timestamps=timestamps,
88
+ values=memory_usage_points,
89
+ ),
72
90
  Metric(
73
91
  name="memory_working_set_bytes",
74
- timestamps=[timestamp],
75
- values=[last_point.memory_working_set_bytes],
76
- )
77
- )
78
-
79
- gpus_memory_usage_bytes = json.loads(last_point.gpus_memory_usage_bytes)
80
- gpus_util_percent = json.loads(last_point.gpus_util_percent)
81
- gpus_detected_num = len(gpus_memory_usage_bytes)
82
- metrics.append(
83
- Metric(
84
- name="gpus_detected_num",
85
- timestamps=[timestamp],
86
- values=[gpus_detected_num],
87
- )
88
- )
89
- for i in range(gpus_detected_num):
92
+ timestamps=timestamps,
93
+ values=memory_working_set_points,
94
+ ),
95
+ ]
96
+ if gpus_detected_num_mismatch:
97
+ # If number of GPUs changed in the time window, skip GPU metrics altogether, otherwise
98
+ # results can be unpredictable (e.g, one GPU takes place of another, as they are
99
+ # identified by an array index only).
100
+ logger.warning("gpus_detected_num mismatch, skipping GPU metrics")
101
+ else:
90
102
  metrics.append(
103
+ # As gpus_detected_num expected to be constant, we add only two points — the latest
104
+ # and the earliest in the batch
91
105
  Metric(
92
- name=f"gpu_memory_usage_bytes_gpu{i}",
93
- timestamps=[timestamp],
94
- values=[gpus_memory_usage_bytes[i]],
106
+ name="gpus_detected_num",
107
+ timestamps=[timestamps[0], timestamps[-1]]
108
+ if len(timestamps) > 1
109
+ else [timestamps[0]],
110
+ values=[gpus_detected_num, gpus_detected_num]
111
+ if len(timestamps) > 1
112
+ else [gpus_detected_num],
95
113
  )
96
114
  )
97
- metrics.append(
98
- Metric(
99
- name=f"gpu_util_percent_gpu{i}",
100
- timestamps=[timestamp],
101
- values=[gpus_util_percent[i]],
115
+ for index, gpu_memory_usage_points in gpus_memory_usage_points.items():
116
+ metrics.append(
117
+ Metric(
118
+ name=f"gpu_memory_usage_bytes_gpu{index}",
119
+ timestamps=timestamps,
120
+ values=gpu_memory_usage_points,
121
+ )
122
+ )
123
+ for index, gpu_util_points in gpus_util_points.items():
124
+ metrics.append(
125
+ Metric(
126
+ name=f"gpu_util_percent_gpu{index}",
127
+ timestamps=timestamps,
128
+ values=gpu_util_points,
129
+ )
102
130
  )
103
- )
104
131
  return JobMetrics(metrics=metrics)
105
132
 
106
133
 
107
134
  def _get_cpu_usage(last_point: JobMetricsPoint, prev_point: JobMetricsPoint) -> int:
108
135
  window = last_point.timestamp_micro - prev_point.timestamp_micro
136
+ if window == 0:
137
+ return 0
109
138
  return round((last_point.cpu_usage_micro - prev_point.cpu_usage_micro) / window * 100)
110
139
 
111
140
 
112
141
  def _unix_time_micro_to_datetime(unix_time_ms: int) -> datetime:
113
142
  return datetime.fromtimestamp(unix_time_ms / 1_000_000, tz=timezone.utc)
143
+
144
+
145
+ def _datetime_to_unix_time_micro(dt: datetime) -> int:
146
+ return int(dt.timestamp() * 1_000_000)
@@ -0,0 +1,87 @@
1
+ from collections.abc import Generator, Iterable
2
+
3
+ from prometheus_client import Metric
4
+ from prometheus_client.parser import text_string_to_metric_families
5
+ from prometheus_client.samples import Sample
6
+ from sqlalchemy import select
7
+ from sqlalchemy.ext.asyncio import AsyncSession
8
+ from sqlalchemy.orm import joinedload
9
+
10
+ from dstack._internal.core.models.runs import JobStatus
11
+ from dstack._internal.server.models import JobModel, JobPrometheusMetrics, ProjectModel
12
+
13
+
14
+ async def get_metrics(session: AsyncSession) -> str:
15
+ res = await session.execute(
16
+ select(JobPrometheusMetrics)
17
+ .join(JobModel)
18
+ .join(ProjectModel)
19
+ .where(JobModel.status.in_([JobStatus.RUNNING]))
20
+ .order_by(ProjectModel.name, JobModel.job_name)
21
+ .options(joinedload(JobPrometheusMetrics.job).joinedload(JobModel.project))
22
+ )
23
+ metrics_models = res.scalars().all()
24
+ return _process_metrics(metrics_models)
25
+
26
+
27
+ async def get_project_metrics(session: AsyncSession, project: ProjectModel) -> str:
28
+ res = await session.execute(
29
+ select(JobPrometheusMetrics)
30
+ .join(JobModel)
31
+ .where(
32
+ JobModel.project_id == project.id,
33
+ JobModel.status.in_([JobStatus.RUNNING]),
34
+ )
35
+ .order_by(JobModel.job_name)
36
+ .options(joinedload(JobPrometheusMetrics.job).joinedload(JobModel.project))
37
+ )
38
+ metrics_models = res.scalars().all()
39
+ return _process_metrics(metrics_models)
40
+
41
+
42
+ def _process_metrics(metrics_models: Iterable[JobPrometheusMetrics]) -> str:
43
+ metrics = _parse_and_enrich_metrics(metrics_models)
44
+ if not metrics:
45
+ return ""
46
+ return "\n".join(_render_metrics(metrics)) + "\n"
47
+
48
+
49
+ def _parse_and_enrich_metrics(metrics_models: Iterable[JobPrometheusMetrics]) -> list[Metric]:
50
+ metrics: dict[str, Metric] = {}
51
+ for metrics_model in metrics_models:
52
+ for metric in text_string_to_metric_families(metrics_model.text):
53
+ samples = metric.samples
54
+ metric.samples = []
55
+ name = metric.name
56
+ metric = metrics.setdefault(name, metric)
57
+ for sample in samples:
58
+ labels = sample.labels
59
+ labels.update(_get_dstack_labels(metrics_model.job))
60
+ # text_string_to_metric_families "fixes" counter names appending _total,
61
+ # we rebuild Sample to revert this
62
+ metric.samples.append(Sample(name, labels, *sample[2:]))
63
+ return list(metrics.values())
64
+
65
+
66
+ def _get_dstack_labels(job: JobModel) -> dict[str, str]:
67
+ return {
68
+ "dstack_project_name": job.project.name,
69
+ "dstack_run_name": job.run_name,
70
+ "dstack_job_name": job.job_name,
71
+ "dstack_job_num": str(job.job_num),
72
+ "dstack_replica_num": str(job.replica_num),
73
+ }
74
+
75
+
76
+ def _render_metrics(metrics: Iterable[Metric]) -> Generator[str, None, None]:
77
+ for metric in metrics:
78
+ yield f"# HELP {metric.name} {metric.documentation}"
79
+ yield f"# TYPE {metric.name} {metric.type}"
80
+ for sample in metric.samples:
81
+ parts: list[str] = [f"{sample.name}{{"]
82
+ parts.extend(",".join(f'{name}="{value}"' for name, value in sample.labels.items()))
83
+ parts.append(f"}} {sample.value}")
84
+ # text_string_to_metric_families converts milliseconds to float seconds
85
+ if isinstance(sample.timestamp, float):
86
+ parts.append(f" {int(sample.timestamp * 1000)}")
87
+ yield "".join(parts)
@@ -178,9 +178,6 @@ class ShimClient:
178
178
  # API v1 (a.k.a. Legacy API) — `/api/{submit,pull,stop}`
179
179
  _API_V2_MIN_SHIM_VERSION = (0, 18, 34)
180
180
 
181
- # A surrogate task ID for API-v1-over-v2 emulation (`_v2_compat_*` methods)
182
- _LEGACY_TASK_ID = "00000000-0000-0000-0000-000000000000"
183
-
184
181
  _shim_version: Optional["_Version"]
185
182
  _api_version: int
186
183
  _negotiated: bool = False
@@ -339,6 +336,20 @@ class ShimClient:
339
336
  resp = self._request("GET", "/api/pull", raise_for_status=True)
340
337
  return self._response(LegacyPullResponse, resp)
341
338
 
339
+ # Metrics
340
+
341
+ def get_task_metrics(self, task_id: "_TaskID") -> Optional[str]:
342
+ resp = self._request("GET", f"/metrics/tasks/{task_id}")
343
+ if resp.status_code == HTTPStatus.NOT_FOUND:
344
+ # Metrics exporter is not installed or old shim version
345
+ return None
346
+ if resp.status_code == HTTPStatus.BAD_GATEWAY:
347
+ # Metrics exporter is not available or returned an error
348
+ logger.info("failed to collect metrics for task %s: %s", task_id, resp.text)
349
+ return None
350
+ self._raise_for_status(resp)
351
+ return resp.text
352
+
342
353
  # Private methods used for public methods implementations
343
354
 
344
355
  def _request(
@@ -16,6 +16,7 @@ from dstack._internal.core.errors import (
16
16
  ServerClientError,
17
17
  )
18
18
  from dstack._internal.core.models.common import ApplyAction, is_core_model_instance
19
+ from dstack._internal.core.models.configurations import AnyRunConfiguration
19
20
  from dstack._internal.core.models.instances import (
20
21
  InstanceAvailability,
21
22
  InstanceOfferWithAvailability,
@@ -47,6 +48,7 @@ from dstack._internal.core.models.volumes import (
47
48
  )
48
49
  from dstack._internal.core.services import validate_dstack_resource_name
49
50
  from dstack._internal.core.services.diff import diff_models
51
+ from dstack._internal.server import settings
50
52
  from dstack._internal.server.db import get_db
51
53
  from dstack._internal.server.models import (
52
54
  JobModel,
@@ -552,6 +554,7 @@ async def stop_run(session: AsyncSession, run_model: RunModel, abort: bool):
552
554
  res = await session.execute(
553
555
  select(RunModel)
554
556
  .where(RunModel.id == run_model.id)
557
+ .order_by(RunModel.id) # take locks in order
555
558
  .with_for_update()
556
559
  .execution_options(populate_existing=True)
557
560
  )
@@ -559,6 +562,7 @@ async def stop_run(session: AsyncSession, run_model: RunModel, abort: bool):
559
562
  await session.execute(
560
563
  select(JobModel)
561
564
  .where(JobModel.run_id == run_model.id)
565
+ .order_by(JobModel.id) # take locks in order
562
566
  .with_for_update()
563
567
  .execution_options(populate_existing=True)
564
568
  )
@@ -592,7 +596,10 @@ async def delete_runs(
592
596
  await session.commit()
593
597
  async with get_locker().lock_ctx(RunModel.__tablename__, run_ids):
594
598
  res = await session.execute(
595
- select(RunModel).where(RunModel.id.in_(run_ids)).with_for_update()
599
+ select(RunModel)
600
+ .where(RunModel.id.in_(run_ids))
601
+ .order_by(RunModel.id) # take locks in order
602
+ .with_for_update()
596
603
  )
597
604
  run_models = res.scalars().all()
598
605
  active_runs = [r for r in run_models if not r.status.is_finished()]
@@ -832,12 +839,23 @@ def _validate_run_spec_and_set_defaults(run_spec: RunSpec):
832
839
  run_spec.repo_id = DEFAULT_VIRTUAL_REPO_ID
833
840
  if run_spec.repo_data is None:
834
841
  run_spec.repo_data = VirtualRunRepoData()
842
+ if (
843
+ run_spec.merged_profile.utilization_policy is not None
844
+ and run_spec.merged_profile.utilization_policy.time_window
845
+ > settings.SERVER_METRICS_TTL_SECONDS
846
+ ):
847
+ raise ServerClientError(
848
+ f"Maximum utilization_policy.time_window is {settings.SERVER_METRICS_TTL_SECONDS}s"
849
+ )
835
850
 
836
851
 
837
852
  _UPDATABLE_SPEC_FIELDS = ["repo_code_hash", "configuration"]
838
- # Most service fields can be updated via replica redeployment.
839
- # TODO: Allow updating other fields when a rolling deployment is supported.
840
- _UPDATABLE_CONFIGURATION_FIELDS = ["replicas", "scaling", "strip_prefix"]
853
+ _CONF_TYPE_TO_UPDATABLE_FIELDS = {
854
+ "dev-environment": ["inactivity_duration"],
855
+ # Most service fields can be updated via replica redeployment.
856
+ # TODO: Allow updating other fields when rolling deployment is supported.
857
+ "service": ["replicas", "scaling", "strip_prefix"],
858
+ }
841
859
 
842
860
 
843
861
  def _can_update_run_spec(current_run_spec: RunSpec, new_run_spec: RunSpec) -> bool:
@@ -850,11 +868,6 @@ def _can_update_run_spec(current_run_spec: RunSpec, new_run_spec: RunSpec) -> bo
850
868
 
851
869
 
852
870
  def _check_can_update_run_spec(current_run_spec: RunSpec, new_run_spec: RunSpec):
853
- if (
854
- current_run_spec.configuration.type != "service"
855
- or new_run_spec.configuration.type != "service"
856
- ):
857
- raise ServerClientError("Can only update service run configuration")
858
871
  spec_diff = diff_models(current_run_spec, new_run_spec)
859
872
  changed_spec_fields = list(spec_diff.keys())
860
873
  for key in changed_spec_fields:
@@ -863,13 +876,28 @@ def _check_can_update_run_spec(current_run_spec: RunSpec, new_run_spec: RunSpec)
863
876
  f"Failed to update fields {changed_spec_fields}."
864
877
  f" Can only update {_UPDATABLE_SPEC_FIELDS}."
865
878
  )
866
- configuration_diff = diff_models(current_run_spec.configuration, new_run_spec.configuration)
867
- changed_configuration_fields = list(configuration_diff.keys())
868
- for key in changed_configuration_fields:
869
- if key not in _UPDATABLE_CONFIGURATION_FIELDS:
879
+ _check_can_update_configuration(current_run_spec.configuration, new_run_spec.configuration)
880
+
881
+
882
+ def _check_can_update_configuration(
883
+ current: AnyRunConfiguration, new: AnyRunConfiguration
884
+ ) -> None:
885
+ if current.type != new.type:
886
+ raise ServerClientError(
887
+ f"Configuration type changed from {current.type} to {new.type}, cannot update"
888
+ )
889
+ updatable_fields = _CONF_TYPE_TO_UPDATABLE_FIELDS.get(new.type)
890
+ if updatable_fields is None:
891
+ raise ServerClientError(
892
+ f"Can only update {', '.join(_CONF_TYPE_TO_UPDATABLE_FIELDS)} configurations."
893
+ f" Not {new.type}"
894
+ )
895
+ diff = diff_models(current, new)
896
+ changed_fields = list(diff.keys())
897
+ for key in changed_fields:
898
+ if key not in updatable_fields:
870
899
  raise ServerClientError(
871
- f"Failed to update fields {changed_configuration_fields}."
872
- f" Can only update {_UPDATABLE_CONFIGURATION_FIELDS}"
900
+ f"Failed to update fields {changed_fields}. Can only update {updatable_fields}"
873
901
  )
874
902
 
875
903
 
@@ -264,6 +264,7 @@ async def delete_volumes(session: AsyncSession, project: ProjectModel, names: Li
264
264
  .options(selectinload(VolumeModel.user))
265
265
  .options(selectinload(VolumeModel.attachments))
266
266
  .execution_options(populate_existing=True)
267
+ .order_by(VolumeModel.id) # take locks in order
267
268
  .with_for_update()
268
269
  )
269
270
  volume_models = res.scalars().unique().all()
@@ -31,6 +31,8 @@ ALEMBIC_MIGRATIONS_LOCATION = os.getenv(
31
31
  DB_POOL_SIZE = int(os.getenv("DSTACK_DB_POOL_SIZE", 10))
32
32
  DB_MAX_OVERFLOW = int(os.getenv("DSTACK_DB_MAX_OVERFLOW", 10))
33
33
 
34
+ MAX_OFFERS_TRIED = int(os.getenv("DSTACK_SERVER_MAX_OFFERS_TRIED", 25))
35
+
34
36
  SERVER_CONFIG_DISABLED = os.getenv("DSTACK_SERVER_CONFIG_DISABLED") is not None
35
37
  SERVER_CONFIG_ENABLED = not SERVER_CONFIG_DISABLED
36
38
 
@@ -74,3 +76,4 @@ LOCAL_BACKEND_ENABLED = os.getenv("DSTACK_LOCAL_BACKEND_ENABLED") is not None
74
76
  UPDATE_DEFAULT_PROJECT = os.getenv("DSTACK_UPDATE_DEFAULT_PROJECT") is not None
75
77
  DO_NOT_UPDATE_DEFAULT_PROJECT = os.getenv("DSTACK_DO_NOT_UPDATE_DEFAULT_PROJECT") is not None
76
78
  SKIP_GATEWAY_UPDATE = os.getenv("DSTACK_SKIP_GATEWAY_UPDATE", None) is not None
79
+ ENABLE_PROMETHEUS_METRICS = os.getenv("DSTACK_ENABLE_PROMETHEUS_METRICS", None) is not None
@@ -1,3 +1,3 @@
1
1
  <!doctype html><html lang="en"><head><meta charset="utf-8"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><title>dstack</title><meta name="description" content="Get GPUs at the best prices and availability from a wide range of providers. No cloud account of your own is required.
2
2
  "/><link rel="preconnect" href="https://fonts.googleapis.com"><link rel="preconnect" href="https://fonts.gstatic.com" crossorigin><link href="https://fonts.googleapis.com/css2?family=Roboto:ital,wght@0,100;0,300;0,400;0,500;0,700;0,900;1,100;1,300;1,400;1,500;1,700;1,900&display=swap" rel="stylesheet"><meta name="og:title" content="dstack"><meta name="og:type" content="article"><meta name="og:image" content="/splash_thumbnail.png"><meta name="og:description" content="Get GPUs at the best prices and availability from a wide range of providers. No cloud account of your own is required.
3
- "><link rel="icon" type="image/x-icon" href="/assets/favicon.ico"><link rel="icon" type="image/png" sizes="16x16" href="/assets/favicon-16x16.png"><link rel="icon" type="image/png" sizes="32x32" href="/assets/favicon-32x32.png"><link rel="icon" type="image/png" sizes="48x48" href="/assets/favicon-48x48.png"><link rel="manifest" href="/assets/manifest.webmanifest"><meta name="mobile-web-app-capable" content="yes"><meta name="theme-color" content="#fff"><meta name="application-name" content="dstackai"><link rel="apple-touch-icon" sizes="57x57" href="/assets/apple-touch-icon-57x57.png"><link rel="apple-touch-icon" sizes="60x60" href="/assets/apple-touch-icon-60x60.png"><link rel="apple-touch-icon" sizes="72x72" href="/assets/apple-touch-icon-72x72.png"><link rel="apple-touch-icon" sizes="76x76" href="/assets/apple-touch-icon-76x76.png"><link rel="apple-touch-icon" sizes="114x114" href="/assets/apple-touch-icon-114x114.png"><link rel="apple-touch-icon" sizes="120x120" href="/assets/apple-touch-icon-120x120.png"><link rel="apple-touch-icon" sizes="144x144" href="/assets/apple-touch-icon-144x144.png"><link rel="apple-touch-icon" sizes="152x152" href="/assets/apple-touch-icon-152x152.png"><link rel="apple-touch-icon" sizes="167x167" href="/assets/apple-touch-icon-167x167.png"><link rel="apple-touch-icon" sizes="180x180" href="/assets/apple-touch-icon-180x180.png"><link rel="apple-touch-icon" sizes="1024x1024" href="/assets/apple-touch-icon-1024x1024.png"><meta name="apple-mobile-web-app-capable" content="yes"><meta name="apple-mobile-web-app-status-bar-style" content="black-translucent"><meta name="apple-mobile-web-app-title" content="dstackai"><link rel="apple-touch-startup-image" media="(device-width: 320px) and (device-height: 568px) and (-webkit-device-pixel-ratio: 2) and (orientation: portrait)" href="/assets/apple-touch-startup-image-640x1136.png"><link rel="apple-touch-startup-image" media="(device-width: 320px) and (device-height: 568px) and (-webkit-device-pixel-ratio: 2) and (orientation: landscape)" href="/assets/apple-touch-startup-image-1136x640.png"><link rel="apple-touch-startup-image" media="(device-width: 375px) and (device-height: 667px) and (-webkit-device-pixel-ratio: 2) and (orientation: portrait)" href="/assets/apple-touch-startup-image-750x1334.png"><link rel="apple-touch-startup-image" media="(device-width: 375px) and (device-height: 667px) and (-webkit-device-pixel-ratio: 2) and (orientation: landscape)" href="/assets/apple-touch-startup-image-1334x750.png"><link rel="apple-touch-startup-image" media="(device-width: 375px) and (device-height: 812px) and (-webkit-device-pixel-ratio: 3) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1125x2436.png"><link rel="apple-touch-startup-image" media="(device-width: 375px) and (device-height: 812px) and (-webkit-device-pixel-ratio: 3) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2436x1125.png"><link rel="apple-touch-startup-image" media="(device-width: 390px) and (device-height: 844px) and (-webkit-device-pixel-ratio: 3) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1170x2532.png"><link rel="apple-touch-startup-image" media="(device-width: 390px) and (device-height: 844px) and (-webkit-device-pixel-ratio: 3) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2532x1170.png"><link rel="apple-touch-startup-image" media="(device-width: 393px) and (device-height: 852px) and (-webkit-device-pixel-ratio: 3) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1179x2556.png"><link rel="apple-touch-startup-image" media="(device-width: 393px) and (device-height: 852px) and (-webkit-device-pixel-ratio: 3) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2556x1179.png"><link rel="apple-touch-startup-image" media="(device-width: 414px) and (device-height: 896px) and (-webkit-device-pixel-ratio: 2) and (orientation: portrait)" href="/assets/apple-touch-startup-image-828x1792.png"><link rel="apple-touch-startup-image" media="(device-width: 414px) and (device-height: 896px) and (-webkit-device-pixel-ratio: 2) and (orientation: landscape)" href="/assets/apple-touch-startup-image-1792x828.png"><link rel="apple-touch-startup-image" media="(device-width: 414px) and (device-height: 896px) and (-webkit-device-pixel-ratio: 3) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1242x2688.png"><link rel="apple-touch-startup-image" media="(device-width: 414px) and (device-height: 896px) and (-webkit-device-pixel-ratio: 3) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2688x1242.png"><link rel="apple-touch-startup-image" media="(device-width: 414px) and (device-height: 736px) and (-webkit-device-pixel-ratio: 3) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1242x2208.png"><link rel="apple-touch-startup-image" media="(device-width: 414px) and (device-height: 736px) and (-webkit-device-pixel-ratio: 3) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2208x1242.png"><link rel="apple-touch-startup-image" media="(device-width: 428px) and (device-height: 926px) and (-webkit-device-pixel-ratio: 3) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1284x2778.png"><link rel="apple-touch-startup-image" media="(device-width: 428px) and (device-height: 926px) and (-webkit-device-pixel-ratio: 3) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2778x1284.png"><link rel="apple-touch-startup-image" media="(device-width: 430px) and (device-height: 932px) and (-webkit-device-pixel-ratio: 3) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1290x2796.png"><link rel="apple-touch-startup-image" media="(device-width: 430px) and (device-height: 932px) and (-webkit-device-pixel-ratio: 3) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2796x1290.png"><link rel="apple-touch-startup-image" media="(device-width: 744px) and (device-height: 1133px) and (-webkit-device-pixel-ratio: 2) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1488x2266.png"><link rel="apple-touch-startup-image" media="(device-width: 744px) and (device-height: 1133px) and (-webkit-device-pixel-ratio: 2) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2266x1488.png"><link rel="apple-touch-startup-image" media="(device-width: 768px) and (device-height: 1024px) and (-webkit-device-pixel-ratio: 2) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1536x2048.png"><link rel="apple-touch-startup-image" media="(device-width: 768px) and (device-height: 1024px) and (-webkit-device-pixel-ratio: 2) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2048x1536.png"><link rel="apple-touch-startup-image" media="(device-width: 810px) and (device-height: 1080px) and (-webkit-device-pixel-ratio: 2) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1620x2160.png"><link rel="apple-touch-startup-image" media="(device-width: 810px) and (device-height: 1080px) and (-webkit-device-pixel-ratio: 2) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2160x1620.png"><link rel="apple-touch-startup-image" media="(device-width: 820px) and (device-height: 1080px) and (-webkit-device-pixel-ratio: 2) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1640x2160.png"><link rel="apple-touch-startup-image" media="(device-width: 820px) and (device-height: 1080px) and (-webkit-device-pixel-ratio: 2) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2160x1640.png"><link rel="apple-touch-startup-image" media="(device-width: 834px) and (device-height: 1194px) and (-webkit-device-pixel-ratio: 2) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1668x2388.png"><link rel="apple-touch-startup-image" media="(device-width: 834px) and (device-height: 1194px) and (-webkit-device-pixel-ratio: 2) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2388x1668.png"><link rel="apple-touch-startup-image" media="(device-width: 834px) and (device-height: 1112px) and (-webkit-device-pixel-ratio: 2) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1668x2224.png"><link rel="apple-touch-startup-image" media="(device-width: 834px) and (device-height: 1112px) and (-webkit-device-pixel-ratio: 2) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2224x1668.png"><link rel="apple-touch-startup-image" media="(device-width: 1024px) and (device-height: 1366px) and (-webkit-device-pixel-ratio: 2) and (orientation: portrait)" href="/assets/apple-touch-startup-image-2048x2732.png"><link rel="apple-touch-startup-image" media="(device-width: 1024px) and (device-height: 1366px) and (-webkit-device-pixel-ratio: 2) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2732x2048.png"><meta name="msapplication-TileColor" content="#fff"><meta name="msapplication-TileImage" content="/assets/mstile-144x144.png"><meta name="msapplication-config" content="/assets/browserconfig.xml"><link rel="yandex-tableau-widget" href="/assets/yandex-browser-manifest.json"><script defer="defer" src="/main-fe8fd9db55df8d10e648.js"></script><link href="/main-7510e71dfa9749a4e70e.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div class="b-page-header" id="header"></div><div id="root"></div></body></html>
3
+ "><link rel="icon" type="image/x-icon" href="/assets/favicon.ico"><link rel="icon" type="image/png" sizes="16x16" href="/assets/favicon-16x16.png"><link rel="icon" type="image/png" sizes="32x32" href="/assets/favicon-32x32.png"><link rel="icon" type="image/png" sizes="48x48" href="/assets/favicon-48x48.png"><link rel="manifest" href="/assets/manifest.webmanifest"><meta name="mobile-web-app-capable" content="yes"><meta name="theme-color" content="#fff"><meta name="application-name" content="dstackai"><link rel="apple-touch-icon" sizes="57x57" href="/assets/apple-touch-icon-57x57.png"><link rel="apple-touch-icon" sizes="60x60" href="/assets/apple-touch-icon-60x60.png"><link rel="apple-touch-icon" sizes="72x72" href="/assets/apple-touch-icon-72x72.png"><link rel="apple-touch-icon" sizes="76x76" href="/assets/apple-touch-icon-76x76.png"><link rel="apple-touch-icon" sizes="114x114" href="/assets/apple-touch-icon-114x114.png"><link rel="apple-touch-icon" sizes="120x120" href="/assets/apple-touch-icon-120x120.png"><link rel="apple-touch-icon" sizes="144x144" href="/assets/apple-touch-icon-144x144.png"><link rel="apple-touch-icon" sizes="152x152" href="/assets/apple-touch-icon-152x152.png"><link rel="apple-touch-icon" sizes="167x167" href="/assets/apple-touch-icon-167x167.png"><link rel="apple-touch-icon" sizes="180x180" href="/assets/apple-touch-icon-180x180.png"><link rel="apple-touch-icon" sizes="1024x1024" href="/assets/apple-touch-icon-1024x1024.png"><meta name="apple-mobile-web-app-capable" content="yes"><meta name="apple-mobile-web-app-status-bar-style" content="black-translucent"><meta name="apple-mobile-web-app-title" content="dstackai"><link rel="apple-touch-startup-image" media="(device-width: 320px) and (device-height: 568px) and (-webkit-device-pixel-ratio: 2) and (orientation: portrait)" href="/assets/apple-touch-startup-image-640x1136.png"><link rel="apple-touch-startup-image" media="(device-width: 320px) and (device-height: 568px) and (-webkit-device-pixel-ratio: 2) and (orientation: landscape)" href="/assets/apple-touch-startup-image-1136x640.png"><link rel="apple-touch-startup-image" media="(device-width: 375px) and (device-height: 667px) and (-webkit-device-pixel-ratio: 2) and (orientation: portrait)" href="/assets/apple-touch-startup-image-750x1334.png"><link rel="apple-touch-startup-image" media="(device-width: 375px) and (device-height: 667px) and (-webkit-device-pixel-ratio: 2) and (orientation: landscape)" href="/assets/apple-touch-startup-image-1334x750.png"><link rel="apple-touch-startup-image" media="(device-width: 375px) and (device-height: 812px) and (-webkit-device-pixel-ratio: 3) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1125x2436.png"><link rel="apple-touch-startup-image" media="(device-width: 375px) and (device-height: 812px) and (-webkit-device-pixel-ratio: 3) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2436x1125.png"><link rel="apple-touch-startup-image" media="(device-width: 390px) and (device-height: 844px) and (-webkit-device-pixel-ratio: 3) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1170x2532.png"><link rel="apple-touch-startup-image" media="(device-width: 390px) and (device-height: 844px) and (-webkit-device-pixel-ratio: 3) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2532x1170.png"><link rel="apple-touch-startup-image" media="(device-width: 393px) and (device-height: 852px) and (-webkit-device-pixel-ratio: 3) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1179x2556.png"><link rel="apple-touch-startup-image" media="(device-width: 393px) and (device-height: 852px) and (-webkit-device-pixel-ratio: 3) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2556x1179.png"><link rel="apple-touch-startup-image" media="(device-width: 414px) and (device-height: 896px) and (-webkit-device-pixel-ratio: 2) and (orientation: portrait)" href="/assets/apple-touch-startup-image-828x1792.png"><link rel="apple-touch-startup-image" media="(device-width: 414px) and (device-height: 896px) and (-webkit-device-pixel-ratio: 2) and (orientation: landscape)" href="/assets/apple-touch-startup-image-1792x828.png"><link rel="apple-touch-startup-image" media="(device-width: 414px) and (device-height: 896px) and (-webkit-device-pixel-ratio: 3) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1242x2688.png"><link rel="apple-touch-startup-image" media="(device-width: 414px) and (device-height: 896px) and (-webkit-device-pixel-ratio: 3) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2688x1242.png"><link rel="apple-touch-startup-image" media="(device-width: 414px) and (device-height: 736px) and (-webkit-device-pixel-ratio: 3) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1242x2208.png"><link rel="apple-touch-startup-image" media="(device-width: 414px) and (device-height: 736px) and (-webkit-device-pixel-ratio: 3) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2208x1242.png"><link rel="apple-touch-startup-image" media="(device-width: 428px) and (device-height: 926px) and (-webkit-device-pixel-ratio: 3) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1284x2778.png"><link rel="apple-touch-startup-image" media="(device-width: 428px) and (device-height: 926px) and (-webkit-device-pixel-ratio: 3) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2778x1284.png"><link rel="apple-touch-startup-image" media="(device-width: 430px) and (device-height: 932px) and (-webkit-device-pixel-ratio: 3) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1290x2796.png"><link rel="apple-touch-startup-image" media="(device-width: 430px) and (device-height: 932px) and (-webkit-device-pixel-ratio: 3) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2796x1290.png"><link rel="apple-touch-startup-image" media="(device-width: 744px) and (device-height: 1133px) and (-webkit-device-pixel-ratio: 2) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1488x2266.png"><link rel="apple-touch-startup-image" media="(device-width: 744px) and (device-height: 1133px) and (-webkit-device-pixel-ratio: 2) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2266x1488.png"><link rel="apple-touch-startup-image" media="(device-width: 768px) and (device-height: 1024px) and (-webkit-device-pixel-ratio: 2) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1536x2048.png"><link rel="apple-touch-startup-image" media="(device-width: 768px) and (device-height: 1024px) and (-webkit-device-pixel-ratio: 2) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2048x1536.png"><link rel="apple-touch-startup-image" media="(device-width: 810px) and (device-height: 1080px) and (-webkit-device-pixel-ratio: 2) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1620x2160.png"><link rel="apple-touch-startup-image" media="(device-width: 810px) and (device-height: 1080px) and (-webkit-device-pixel-ratio: 2) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2160x1620.png"><link rel="apple-touch-startup-image" media="(device-width: 820px) and (device-height: 1080px) and (-webkit-device-pixel-ratio: 2) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1640x2160.png"><link rel="apple-touch-startup-image" media="(device-width: 820px) and (device-height: 1080px) and (-webkit-device-pixel-ratio: 2) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2160x1640.png"><link rel="apple-touch-startup-image" media="(device-width: 834px) and (device-height: 1194px) and (-webkit-device-pixel-ratio: 2) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1668x2388.png"><link rel="apple-touch-startup-image" media="(device-width: 834px) and (device-height: 1194px) and (-webkit-device-pixel-ratio: 2) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2388x1668.png"><link rel="apple-touch-startup-image" media="(device-width: 834px) and (device-height: 1112px) and (-webkit-device-pixel-ratio: 2) and (orientation: portrait)" href="/assets/apple-touch-startup-image-1668x2224.png"><link rel="apple-touch-startup-image" media="(device-width: 834px) and (device-height: 1112px) and (-webkit-device-pixel-ratio: 2) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2224x1668.png"><link rel="apple-touch-startup-image" media="(device-width: 1024px) and (device-height: 1366px) and (-webkit-device-pixel-ratio: 2) and (orientation: portrait)" href="/assets/apple-touch-startup-image-2048x2732.png"><link rel="apple-touch-startup-image" media="(device-width: 1024px) and (device-height: 1366px) and (-webkit-device-pixel-ratio: 2) and (orientation: landscape)" href="/assets/apple-touch-startup-image-2732x2048.png"><meta name="msapplication-TileColor" content="#fff"><meta name="msapplication-TileImage" content="/assets/mstile-144x144.png"><meta name="msapplication-config" content="/assets/browserconfig.xml"><link rel="yandex-tableau-widget" href="/assets/yandex-browser-manifest.json"><script defer="defer" src="/main-4eb116b97819badd1e2c.js"></script><link href="/main-da9f8c06a69c20dac23e.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div class="b-page-header" id="header"></div><div id="root"></div></body></html>