dstack 0.19.1__py3-none-any.whl → 0.19.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (68) hide show
  1. dstack/_internal/cli/commands/metrics.py +138 -0
  2. dstack/_internal/cli/commands/stats.py +5 -119
  3. dstack/_internal/cli/main.py +2 -0
  4. dstack/_internal/cli/services/profile.py +9 -0
  5. dstack/_internal/core/backends/aws/configurator.py +1 -0
  6. dstack/_internal/core/backends/base/compute.py +4 -1
  7. dstack/_internal/core/backends/base/models.py +7 -7
  8. dstack/_internal/core/backends/configurators.py +9 -0
  9. dstack/_internal/core/backends/cudo/compute.py +2 -0
  10. dstack/_internal/core/backends/cudo/configurator.py +0 -13
  11. dstack/_internal/core/backends/datacrunch/compute.py +118 -32
  12. dstack/_internal/core/backends/datacrunch/configurator.py +16 -11
  13. dstack/_internal/core/backends/gcp/compute.py +140 -26
  14. dstack/_internal/core/backends/gcp/configurator.py +2 -0
  15. dstack/_internal/core/backends/gcp/features/__init__.py +0 -0
  16. dstack/_internal/core/backends/gcp/features/tcpx.py +34 -0
  17. dstack/_internal/core/backends/gcp/models.py +13 -1
  18. dstack/_internal/core/backends/gcp/resources.py +64 -27
  19. dstack/_internal/core/backends/lambdalabs/compute.py +2 -4
  20. dstack/_internal/core/backends/lambdalabs/configurator.py +0 -21
  21. dstack/_internal/core/backends/models.py +8 -0
  22. dstack/_internal/core/backends/nebius/__init__.py +0 -0
  23. dstack/_internal/core/backends/nebius/backend.py +16 -0
  24. dstack/_internal/core/backends/nebius/compute.py +272 -0
  25. dstack/_internal/core/backends/nebius/configurator.py +74 -0
  26. dstack/_internal/core/backends/nebius/models.py +108 -0
  27. dstack/_internal/core/backends/nebius/resources.py +240 -0
  28. dstack/_internal/core/backends/tensordock/api_client.py +5 -4
  29. dstack/_internal/core/backends/tensordock/compute.py +2 -15
  30. dstack/_internal/core/errors.py +14 -0
  31. dstack/_internal/core/models/backends/base.py +2 -0
  32. dstack/_internal/core/models/profiles.py +3 -0
  33. dstack/_internal/proxy/lib/schemas/model_proxy.py +3 -3
  34. dstack/_internal/server/background/tasks/process_instances.py +12 -7
  35. dstack/_internal/server/background/tasks/process_running_jobs.py +20 -0
  36. dstack/_internal/server/background/tasks/process_submitted_jobs.py +3 -2
  37. dstack/_internal/server/routers/prometheus.py +5 -0
  38. dstack/_internal/server/security/permissions.py +19 -1
  39. dstack/_internal/server/services/instances.py +14 -6
  40. dstack/_internal/server/services/jobs/__init__.py +3 -3
  41. dstack/_internal/server/services/offers.py +4 -2
  42. dstack/_internal/server/services/runs.py +0 -2
  43. dstack/_internal/server/statics/index.html +1 -1
  44. dstack/_internal/server/statics/{main-da9f8c06a69c20dac23e.css → main-8f9c66f404e9c7e7e020.css} +1 -1
  45. dstack/_internal/server/statics/{main-4a0fe83e84574654e397.js → main-e190de603dc1e9f485ec.js} +7306 -149
  46. dstack/_internal/server/statics/{main-4a0fe83e84574654e397.js.map → main-e190de603dc1e9f485ec.js.map} +1 -1
  47. dstack/_internal/utils/common.py +8 -2
  48. dstack/_internal/utils/event_loop.py +30 -0
  49. dstack/_internal/utils/ignore.py +2 -0
  50. dstack/api/server/_fleets.py +3 -5
  51. dstack/api/server/_runs.py +6 -7
  52. dstack/version.py +1 -1
  53. {dstack-0.19.1.dist-info → dstack-0.19.3.dist-info}/METADATA +27 -11
  54. {dstack-0.19.1.dist-info → dstack-0.19.3.dist-info}/RECORD +67 -57
  55. tests/_internal/core/backends/datacrunch/test_configurator.py +6 -2
  56. tests/_internal/server/background/tasks/test_process_instances.py +4 -2
  57. tests/_internal/server/background/tasks/test_process_submitted_jobs.py +29 -0
  58. tests/_internal/server/routers/test_backends.py +116 -0
  59. tests/_internal/server/routers/test_fleets.py +2 -0
  60. tests/_internal/server/routers/test_prometheus.py +21 -0
  61. tests/_internal/server/routers/test_runs.py +4 -0
  62. tests/_internal/utils/test_common.py +16 -1
  63. tests/_internal/utils/test_event_loop.py +18 -0
  64. dstack/_internal/core/backends/datacrunch/api_client.py +0 -77
  65. {dstack-0.19.1.dist-info → dstack-0.19.3.dist-info}/LICENSE.md +0 -0
  66. {dstack-0.19.1.dist-info → dstack-0.19.3.dist-info}/WHEEL +0 -0
  67. {dstack-0.19.1.dist-info → dstack-0.19.3.dist-info}/entry_points.txt +0 -0
  68. {dstack-0.19.1.dist-info → dstack-0.19.3.dist-info}/top_level.txt +0 -0
@@ -8,7 +8,7 @@ import pytest
8
8
  from freezegun import freeze_time
9
9
  from sqlalchemy.ext.asyncio import AsyncSession
10
10
 
11
- from dstack._internal.core.errors import BackendError, ProvisioningError
11
+ from dstack._internal.core.errors import BackendError, NotYetTerminated, ProvisioningError
12
12
  from dstack._internal.core.models.backends.base import BackendType
13
13
  from dstack._internal.core.models.instances import (
14
14
  Gpu,
@@ -384,7 +384,9 @@ class TestTerminate:
384
384
 
385
385
  @pytest.mark.asyncio
386
386
  @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
387
- @pytest.mark.parametrize("error", [BackendError("err"), RuntimeError("err")])
387
+ @pytest.mark.parametrize(
388
+ "error", [BackendError("err"), RuntimeError("err"), NotYetTerminated("")]
389
+ )
388
390
  async def test_terminate_retry(self, test_db, session: AsyncSession, error: Exception):
389
391
  project = await create_project(session=session)
390
392
  instance = await create_instance(
@@ -536,6 +536,35 @@ class TestProcessSubmittedJobs:
536
536
  assert instance.total_blocks == 4
537
537
  assert instance.busy_blocks == 2
538
538
 
539
+ @pytest.mark.asyncio
540
+ @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
541
+ async def test_assigns_job_to_specific_fleet(self, test_db, session: AsyncSession):
542
+ project = await create_project(session)
543
+ user = await create_user(session)
544
+ repo = await create_repo(session=session, project_id=project.id)
545
+ fleet_a = await create_fleet(session=session, project=project, name="a")
546
+ await create_instance(session=session, project=project, fleet=fleet_a, price=1.0)
547
+ fleet_b = await create_fleet(session=session, project=project, name="b")
548
+ await create_instance(session=session, project=project, fleet=fleet_b, price=2.0)
549
+ fleet_c = await create_fleet(session=session, project=project, name="c")
550
+ await create_instance(session=session, project=project, fleet=fleet_c, price=3.0)
551
+ run_spec = get_run_spec(run_name="test-run", repo_id=repo.name)
552
+ # When more than one fleet is requested, the cheapest one is selected
553
+ run_spec.configuration.fleets = ["c", "b"]
554
+ run = await create_run(
555
+ session=session, project=project, repo=repo, user=user, run_spec=run_spec
556
+ )
557
+ job = await create_job(session=session, run=run)
558
+
559
+ await process_submitted_jobs()
560
+
561
+ await session.refresh(job)
562
+ res = await session.execute(select(JobModel).options(joinedload(JobModel.instance)))
563
+ job = res.unique().scalar_one()
564
+ assert job.status == JobStatus.SUBMITTED
565
+ assert job.instance is not None
566
+ assert job.instance.fleet == fleet_b
567
+
539
568
  @pytest.mark.asyncio
540
569
  @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
541
570
  async def test_creates_new_instance_in_existing_fleet(self, test_db, session: AsyncSession):
@@ -1,4 +1,5 @@
1
1
  import json
2
+ import sys
2
3
  from datetime import datetime, timezone
3
4
  from unittest.mock import Mock, patch
4
5
 
@@ -25,7 +26,14 @@ from dstack._internal.server.testing.common import (
25
26
  get_auth_headers,
26
27
  get_volume_provisioning_data,
27
28
  )
29
+ from dstack._internal.utils.crypto import generate_rsa_key_pair_bytes
28
30
 
31
+ FAKE_NEBIUS_SERVICE_ACCOUNT_CREDS = {
32
+ "type": "service_account",
33
+ "service_account_id": "serviceaccount-e00test",
34
+ "public_key_id": "publickey-e00test",
35
+ "private_key_content": generate_rsa_key_pair_bytes()[0].decode(),
36
+ }
29
37
  FAKE_OCI_CLIENT_CREDS = {
30
38
  "type": "client",
31
39
  "user": "ocid1.user.oc1..aaaaaaaa",
@@ -62,6 +70,7 @@ class TestListBackendTypes:
62
70
  "gcp",
63
71
  "kubernetes",
64
72
  "lambda",
73
+ *(["nebius"] if sys.version_info >= (3, 10) else []),
65
74
  "oci",
66
75
  "runpod",
67
76
  "tensordock",
@@ -182,6 +191,113 @@ class TestCreateBackend:
182
191
  res = await session.execute(select(BackendModel))
183
192
  assert len(res.scalars().all()) == 1
184
193
 
194
+ @pytest.mark.asyncio
195
+ @pytest.mark.skipif(sys.version_info < (3, 10), reason="Nebius requires Python 3.10")
196
+ @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
197
+ class TestNebius:
198
+ async def test_creates(self, test_db, session: AsyncSession, client: AsyncClient):
199
+ user = await create_user(session=session, global_role=GlobalRole.USER)
200
+ project = await create_project(session=session, owner=user)
201
+ await add_project_member(
202
+ session=session, project=project, user=user, project_role=ProjectRole.ADMIN
203
+ )
204
+ body = {
205
+ "type": "nebius",
206
+ "creds": FAKE_NEBIUS_SERVICE_ACCOUNT_CREDS,
207
+ }
208
+ with patch(
209
+ "dstack._internal.core.backends.nebius.resources.get_region_to_project_id_map"
210
+ ) as get_region_to_project_id_map:
211
+ get_region_to_project_id_map.return_value = {"eu-north1": "project-e00test"}
212
+ response = await client.post(
213
+ f"/api/project/{project.name}/backends/create",
214
+ headers=get_auth_headers(user.token),
215
+ json=body,
216
+ )
217
+ assert response.status_code == 200, response.json()
218
+ res = await session.execute(select(BackendModel))
219
+ assert len(res.scalars().all()) == 1
220
+
221
+ async def test_not_creates_with_invalid_creds(
222
+ self, test_db, session: AsyncSession, client: AsyncClient
223
+ ):
224
+ user = await create_user(session=session, global_role=GlobalRole.USER)
225
+ project = await create_project(session=session, owner=user)
226
+ await add_project_member(
227
+ session=session, project=project, user=user, project_role=ProjectRole.ADMIN
228
+ )
229
+ body = {
230
+ "type": "nebius",
231
+ "creds": FAKE_NEBIUS_SERVICE_ACCOUNT_CREDS,
232
+ }
233
+ with patch(
234
+ "dstack._internal.core.backends.nebius.resources.get_region_to_project_id_map"
235
+ ) as get_region_to_project_id_map:
236
+ get_region_to_project_id_map.side_effect = ValueError()
237
+ response = await client.post(
238
+ f"/api/project/{project.name}/backends/create",
239
+ headers=get_auth_headers(user.token),
240
+ json=body,
241
+ )
242
+ assert response.status_code == 400, response.json()
243
+ res = await session.execute(select(BackendModel))
244
+ assert len(res.scalars().all()) == 0
245
+
246
+ async def test_creates_with_regions(
247
+ self, test_db, session: AsyncSession, client: AsyncClient
248
+ ):
249
+ user = await create_user(session=session, global_role=GlobalRole.USER)
250
+ project = await create_project(session=session, owner=user)
251
+ await add_project_member(
252
+ session=session, project=project, user=user, project_role=ProjectRole.ADMIN
253
+ )
254
+ body = {
255
+ "type": "nebius",
256
+ "creds": FAKE_NEBIUS_SERVICE_ACCOUNT_CREDS,
257
+ "regions": ["eu-north1"],
258
+ }
259
+ with patch(
260
+ "dstack._internal.core.backends.nebius.resources.get_region_to_project_id_map"
261
+ ) as get_region_to_project_id_map:
262
+ get_region_to_project_id_map.return_value = {
263
+ "eu-north1": "project-e00test",
264
+ "eu-west1": "project-e01test",
265
+ }
266
+ response = await client.post(
267
+ f"/api/project/{project.name}/backends/create",
268
+ headers=get_auth_headers(user.token),
269
+ json=body,
270
+ )
271
+ assert response.status_code == 200, response.json()
272
+ res = await session.execute(select(BackendModel))
273
+ assert len(res.scalars().all()) == 1
274
+
275
+ async def test_not_creates_with_invalid_regions(
276
+ self, test_db, session: AsyncSession, client: AsyncClient
277
+ ):
278
+ user = await create_user(session=session, global_role=GlobalRole.USER)
279
+ project = await create_project(session=session, owner=user)
280
+ await add_project_member(
281
+ session=session, project=project, user=user, project_role=ProjectRole.ADMIN
282
+ )
283
+ body = {
284
+ "type": "nebius",
285
+ "creds": FAKE_NEBIUS_SERVICE_ACCOUNT_CREDS,
286
+ "regions": ["xx-xxxx1"],
287
+ }
288
+ with patch(
289
+ "dstack._internal.core.backends.nebius.resources.get_region_to_project_id_map"
290
+ ) as get_region_to_project_id_map:
291
+ get_region_to_project_id_map.return_value = {"eu-north1": "project-e00test"}
292
+ response = await client.post(
293
+ f"/api/project/{project.name}/backends/create",
294
+ headers=get_auth_headers(user.token),
295
+ json=body,
296
+ )
297
+ assert response.status_code == 400, response.json()
298
+ res = await session.execute(select(BackendModel))
299
+ assert len(res.scalars().all()) == 0
300
+
185
301
  @pytest.mark.asyncio
186
302
  @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
187
303
  async def test_creates_oci_backend(self, test_db, session: AsyncSession, client: AsyncClient):
@@ -366,6 +366,7 @@ class TestCreateFleet:
366
366
  "name": "",
367
367
  "default": False,
368
368
  "reservation": None,
369
+ "fleets": None,
369
370
  },
370
371
  "autocreated": False,
371
372
  },
@@ -484,6 +485,7 @@ class TestCreateFleet:
484
485
  "name": "",
485
486
  "default": False,
486
487
  "reservation": None,
488
+ "fleets": None,
487
489
  },
488
490
  "autocreated": False,
489
491
  },
@@ -28,6 +28,7 @@ from dstack._internal.server.testing.common import (
28
28
  create_repo,
29
29
  create_run,
30
30
  create_user,
31
+ get_auth_headers,
31
32
  get_instance_offer_with_availability,
32
33
  get_job_provisioning_data,
33
34
  get_job_runtime_data,
@@ -38,6 +39,7 @@ from dstack._internal.server.testing.common import (
38
39
  @pytest.fixture
39
40
  def enable_metrics(monkeypatch: pytest.MonkeyPatch):
40
41
  monkeypatch.setattr("dstack._internal.server.settings.ENABLE_PROMETHEUS_METRICS", True)
42
+ monkeypatch.setattr("dstack._internal.server.routers.prometheus._auth._token", None)
41
43
 
42
44
 
43
45
  FAKE_NOW = datetime(2023, 1, 2, 3, 4, tzinfo=timezone.utc)
@@ -289,6 +291,25 @@ class TestGetPrometheusMetrics:
289
291
  response = await client.get("/metrics")
290
292
  assert response.status_code == 404
291
293
 
294
+ @pytest.mark.parametrize("token", [None, "foo"])
295
+ async def test_returns_403_if_not_authenticated(
296
+ self, monkeypatch: pytest.MonkeyPatch, client: AsyncClient, token: Optional[str]
297
+ ):
298
+ monkeypatch.setattr("dstack._internal.server.routers.prometheus._auth._token", "secret")
299
+ if token is not None:
300
+ headers = get_auth_headers(token)
301
+ else:
302
+ headers = None
303
+ response = await client.get("/metrics", headers=headers)
304
+ assert response.status_code == 403
305
+
306
+ async def test_returns_200_if_token_is_valid(
307
+ self, monkeypatch: pytest.MonkeyPatch, client: AsyncClient
308
+ ):
309
+ monkeypatch.setattr("dstack._internal.server.routers.prometheus._auth._token", "secret")
310
+ response = await client.get("/metrics", headers=get_auth_headers("secret"))
311
+ assert response.status_code == 200
312
+
292
313
 
293
314
  async def _create_project(session: AsyncSession, name: str, user: UserModel) -> ProjectModel:
294
315
  project = await create_project(session=session, owner=user, name=name)
@@ -124,6 +124,7 @@ def get_dev_env_run_plan_dict(
124
124
  "idle_duration": None,
125
125
  "utilization_policy": None,
126
126
  "reservation": None,
127
+ "fleets": None,
127
128
  },
128
129
  "configuration_path": "dstack.yaml",
129
130
  "profile": {
@@ -142,6 +143,7 @@ def get_dev_env_run_plan_dict(
142
143
  "idle_duration": None,
143
144
  "utilization_policy": None,
144
145
  "reservation": None,
146
+ "fleets": None,
145
147
  },
146
148
  "repo_code_hash": None,
147
149
  "repo_data": {"repo_dir": "/repo", "repo_type": "local"},
@@ -274,6 +276,7 @@ def get_dev_env_run_dict(
274
276
  "idle_duration": None,
275
277
  "utilization_policy": None,
276
278
  "reservation": None,
279
+ "fleets": None,
277
280
  },
278
281
  "configuration_path": "dstack.yaml",
279
282
  "profile": {
@@ -292,6 +295,7 @@ def get_dev_env_run_dict(
292
295
  "idle_duration": None,
293
296
  "utilization_policy": None,
294
297
  "reservation": None,
298
+ "fleets": None,
295
299
  },
296
300
  "repo_code_hash": None,
297
301
  "repo_data": {"repo_dir": "/repo", "repo_type": "local"},
@@ -67,21 +67,36 @@ class TestPrettyDate:
67
67
  past_time = now - timedelta(days=5)
68
68
  assert pretty_date(past_time) == "5 days ago"
69
69
 
70
+ def test_week_ago(self):
71
+ now = datetime.now(tz=timezone.utc)
72
+ past_time = now - timedelta(days=7)
73
+ assert pretty_date(past_time) == "1 week ago"
74
+
70
75
  def test_weeks_ago(self):
71
76
  now = datetime.now(tz=timezone.utc)
72
77
  past_time = now - timedelta(days=21)
73
78
  assert pretty_date(past_time) == "3 weeks ago"
74
79
 
80
+ def test_month_ago(self):
81
+ now = datetime.now(tz=timezone.utc)
82
+ past_time = now - timedelta(days=31)
83
+ assert pretty_date(past_time) == "1 month ago"
84
+
75
85
  def test_months_ago(self):
76
86
  now = datetime.now(tz=timezone.utc)
77
87
  past_time = now - timedelta(days=90)
78
88
  assert pretty_date(past_time) == "3 months ago"
79
89
 
80
- def test_years_ago(self):
90
+ def test_year_ago(self):
81
91
  now = datetime.now(tz=timezone.utc)
82
92
  past_time = now - timedelta(days=400)
83
93
  assert pretty_date(past_time) == "1 year ago"
84
94
 
95
+ def test_years_ago(self):
96
+ now = datetime.now(tz=timezone.utc)
97
+ past_time = now - timedelta(days=700)
98
+ assert pretty_date(past_time) == "2 years ago"
99
+
85
100
  def test_future_time(self):
86
101
  now = datetime.now(tz=timezone.utc)
87
102
  future_time = now + timedelta(hours=1)
@@ -0,0 +1,18 @@
1
+ import asyncio
2
+
3
+ from dstack._internal.utils.event_loop import DaemonEventLoop
4
+
5
+
6
+ def test_daemon_event_loop():
7
+ q = asyncio.Queue()
8
+
9
+ async def worker(i):
10
+ await q.put(i)
11
+
12
+ async def all_workers():
13
+ await asyncio.gather(*[worker(i) for i in range(3)])
14
+
15
+ loop = DaemonEventLoop()
16
+ loop.await_(all_workers())
17
+ assert q.qsize() == 3
18
+ assert {loop.await_(q.get()) for _ in range(3)} == {0, 1, 2}
@@ -1,77 +0,0 @@
1
- from typing import Optional
2
-
3
- from datacrunch import DataCrunchClient
4
- from datacrunch.exceptions import APIException
5
- from datacrunch.instances.instances import Instance
6
-
7
- from dstack._internal.core.errors import NoCapacityError
8
- from dstack._internal.utils.ssh import get_public_key_fingerprint
9
-
10
-
11
- class DataCrunchAPIClient:
12
- def __init__(self, client_id: str, client_secret: str):
13
- self.client = DataCrunchClient(client_id, client_secret)
14
-
15
- def delete_instance(self, instance_id: str) -> None:
16
- try:
17
- self.client.instances.action(id_list=[instance_id], action="delete")
18
- except APIException:
19
- pass
20
-
21
- def get_or_create_ssh_key(self, name: str, public_key: str) -> str:
22
- fingerprint = get_public_key_fingerprint(public_key)
23
- keys = self.client.ssh_keys.get()
24
- found_keys = [
25
- key for key in keys if fingerprint == get_public_key_fingerprint(key.public_key)
26
- ]
27
- if found_keys:
28
- key = found_keys[0]
29
- return key.id
30
-
31
- key = self.client.ssh_keys.create(name, public_key)
32
- return key.id
33
-
34
- def get_or_create_startup_scrpit(self, name: str, script: str) -> str:
35
- scripts = self.client.startup_scripts.get()
36
- found_scripts = [startup_script for startup_script in scripts if script == startup_script]
37
- if found_scripts:
38
- startup_script = found_scripts[0]
39
- return startup_script.id
40
-
41
- startup_script = self.client.startup_scripts.create(name, script)
42
- return startup_script.id
43
-
44
- def get_instance_by_id(self, instance_id: str) -> Optional[Instance]:
45
- try:
46
- return self.client.instances.get_by_id(instance_id)
47
- except APIException:
48
- return None
49
-
50
- def deploy_instance(
51
- self,
52
- instance_type,
53
- image,
54
- ssh_key_ids,
55
- hostname,
56
- description,
57
- startup_script_id,
58
- disk_size,
59
- is_spot=True,
60
- location="FIN-01",
61
- ) -> Instance:
62
- try:
63
- instance = self.client.instances.create(
64
- instance_type=instance_type,
65
- image=image,
66
- ssh_key_ids=ssh_key_ids,
67
- hostname=hostname,
68
- description=description,
69
- startup_script_id=startup_script_id,
70
- is_spot=is_spot,
71
- location=location,
72
- os_volume={"name": "OS volume", "size": disk_size},
73
- )
74
- except APIException as e:
75
- raise NoCapacityError(f"DataCrunch API error: {e.message}")
76
-
77
- return instance