dstack 0.18.43__py3-none-any.whl → 0.19.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. dstack/_internal/cli/commands/gateway.py +15 -3
  2. dstack/_internal/cli/commands/logs.py +0 -22
  3. dstack/_internal/cli/commands/stats.py +8 -17
  4. dstack/_internal/cli/main.py +1 -5
  5. dstack/_internal/cli/services/configurators/fleet.py +4 -39
  6. dstack/_internal/cli/services/configurators/run.py +22 -20
  7. dstack/_internal/cli/services/profile.py +34 -83
  8. dstack/_internal/cli/utils/gateway.py +1 -1
  9. dstack/_internal/cli/utils/run.py +11 -0
  10. dstack/_internal/core/backends/__init__.py +56 -39
  11. dstack/_internal/core/backends/aws/__init__.py +0 -25
  12. dstack/_internal/core/backends/aws/auth.py +1 -10
  13. dstack/_internal/core/backends/aws/backend.py +26 -0
  14. dstack/_internal/core/backends/aws/compute.py +21 -45
  15. dstack/_internal/{server/services/backends/configurators/aws.py → core/backends/aws/configurator.py} +46 -85
  16. dstack/_internal/core/backends/aws/models.py +135 -0
  17. dstack/_internal/core/backends/aws/resources.py +1 -1
  18. dstack/_internal/core/backends/azure/__init__.py +0 -20
  19. dstack/_internal/core/backends/azure/auth.py +2 -11
  20. dstack/_internal/core/backends/azure/backend.py +21 -0
  21. dstack/_internal/core/backends/azure/compute.py +14 -28
  22. dstack/_internal/{server/services/backends/configurators/azure.py → core/backends/azure/configurator.py} +141 -210
  23. dstack/_internal/core/backends/azure/models.py +89 -0
  24. dstack/_internal/core/backends/base/__init__.py +0 -12
  25. dstack/_internal/core/backends/base/backend.py +18 -0
  26. dstack/_internal/core/backends/base/compute.py +153 -33
  27. dstack/_internal/core/backends/base/configurator.py +105 -0
  28. dstack/_internal/core/backends/base/models.py +14 -0
  29. dstack/_internal/core/backends/configurators.py +138 -0
  30. dstack/_internal/core/backends/cudo/__init__.py +0 -15
  31. dstack/_internal/core/backends/cudo/backend.py +16 -0
  32. dstack/_internal/core/backends/cudo/compute.py +8 -26
  33. dstack/_internal/core/backends/cudo/configurator.py +72 -0
  34. dstack/_internal/core/backends/cudo/models.py +37 -0
  35. dstack/_internal/core/backends/datacrunch/__init__.py +0 -15
  36. dstack/_internal/core/backends/datacrunch/backend.py +16 -0
  37. dstack/_internal/core/backends/datacrunch/compute.py +8 -25
  38. dstack/_internal/core/backends/datacrunch/configurator.py +66 -0
  39. dstack/_internal/core/backends/datacrunch/models.py +38 -0
  40. dstack/_internal/core/{models/backends/dstack.py → backends/dstack/models.py} +7 -7
  41. dstack/_internal/core/backends/gcp/__init__.py +0 -16
  42. dstack/_internal/core/backends/gcp/auth.py +2 -11
  43. dstack/_internal/core/backends/gcp/backend.py +17 -0
  44. dstack/_internal/core/backends/gcp/compute.py +14 -44
  45. dstack/_internal/{server/services/backends/configurators/gcp.py → core/backends/gcp/configurator.py} +46 -103
  46. dstack/_internal/core/backends/gcp/models.py +125 -0
  47. dstack/_internal/core/backends/kubernetes/__init__.py +0 -15
  48. dstack/_internal/core/backends/kubernetes/backend.py +16 -0
  49. dstack/_internal/core/backends/kubernetes/compute.py +16 -5
  50. dstack/_internal/core/backends/kubernetes/configurator.py +55 -0
  51. dstack/_internal/core/backends/kubernetes/models.py +72 -0
  52. dstack/_internal/core/backends/lambdalabs/__init__.py +0 -16
  53. dstack/_internal/core/backends/lambdalabs/backend.py +17 -0
  54. dstack/_internal/core/backends/lambdalabs/compute.py +7 -28
  55. dstack/_internal/core/backends/lambdalabs/configurator.py +82 -0
  56. dstack/_internal/core/backends/lambdalabs/models.py +37 -0
  57. dstack/_internal/core/backends/local/__init__.py +0 -13
  58. dstack/_internal/core/backends/local/backend.py +14 -0
  59. dstack/_internal/core/backends/local/compute.py +16 -2
  60. dstack/_internal/core/backends/models.py +128 -0
  61. dstack/_internal/core/backends/oci/__init__.py +0 -15
  62. dstack/_internal/core/backends/oci/auth.py +1 -5
  63. dstack/_internal/core/backends/oci/backend.py +16 -0
  64. dstack/_internal/core/backends/oci/compute.py +9 -23
  65. dstack/_internal/{server/services/backends/configurators/oci.py → core/backends/oci/configurator.py} +40 -85
  66. dstack/_internal/core/{models/backends/oci.py → backends/oci/models.py} +24 -25
  67. dstack/_internal/core/backends/oci/region.py +1 -1
  68. dstack/_internal/core/backends/runpod/__init__.py +0 -15
  69. dstack/_internal/core/backends/runpod/backend.py +16 -0
  70. dstack/_internal/core/backends/runpod/compute.py +28 -6
  71. dstack/_internal/core/backends/runpod/configurator.py +59 -0
  72. dstack/_internal/core/backends/runpod/models.py +54 -0
  73. dstack/_internal/core/backends/template/__init__.py +0 -0
  74. dstack/_internal/core/backends/tensordock/__init__.py +0 -15
  75. dstack/_internal/core/backends/tensordock/backend.py +16 -0
  76. dstack/_internal/core/backends/tensordock/compute.py +8 -27
  77. dstack/_internal/core/backends/tensordock/configurator.py +68 -0
  78. dstack/_internal/core/backends/tensordock/models.py +38 -0
  79. dstack/_internal/core/backends/vastai/__init__.py +0 -15
  80. dstack/_internal/core/backends/vastai/backend.py +16 -0
  81. dstack/_internal/core/backends/vastai/compute.py +2 -2
  82. dstack/_internal/core/backends/vastai/configurator.py +66 -0
  83. dstack/_internal/core/backends/vastai/models.py +37 -0
  84. dstack/_internal/core/backends/vultr/__init__.py +0 -15
  85. dstack/_internal/core/backends/vultr/backend.py +16 -0
  86. dstack/_internal/core/backends/vultr/compute.py +10 -24
  87. dstack/_internal/core/backends/vultr/configurator.py +64 -0
  88. dstack/_internal/core/backends/vultr/models.py +34 -0
  89. dstack/_internal/core/models/backends/__init__.py +0 -184
  90. dstack/_internal/core/models/backends/base.py +0 -19
  91. dstack/_internal/core/models/configurations.py +22 -16
  92. dstack/_internal/core/models/envs.py +4 -3
  93. dstack/_internal/core/models/fleets.py +17 -22
  94. dstack/_internal/core/models/gateways.py +3 -3
  95. dstack/_internal/core/models/instances.py +24 -0
  96. dstack/_internal/core/models/profiles.py +85 -45
  97. dstack/_internal/core/models/projects.py +1 -1
  98. dstack/_internal/core/models/repos/base.py +0 -5
  99. dstack/_internal/core/models/repos/local.py +3 -3
  100. dstack/_internal/core/models/repos/remote.py +26 -12
  101. dstack/_internal/core/models/repos/virtual.py +1 -1
  102. dstack/_internal/core/models/resources.py +45 -76
  103. dstack/_internal/core/models/runs.py +21 -19
  104. dstack/_internal/core/models/volumes.py +1 -3
  105. dstack/_internal/core/services/profiles.py +7 -16
  106. dstack/_internal/core/services/repos.py +0 -4
  107. dstack/_internal/server/app.py +11 -4
  108. dstack/_internal/server/background/__init__.py +10 -0
  109. dstack/_internal/server/background/tasks/process_gateways.py +4 -8
  110. dstack/_internal/server/background/tasks/process_instances.py +14 -9
  111. dstack/_internal/server/background/tasks/process_metrics.py +1 -1
  112. dstack/_internal/server/background/tasks/process_placement_groups.py +5 -1
  113. dstack/_internal/server/background/tasks/process_prometheus_metrics.py +135 -0
  114. dstack/_internal/server/background/tasks/process_running_jobs.py +80 -24
  115. dstack/_internal/server/background/tasks/process_runs.py +1 -0
  116. dstack/_internal/server/background/tasks/process_submitted_jobs.py +20 -38
  117. dstack/_internal/server/background/tasks/process_volumes.py +5 -2
  118. dstack/_internal/server/migrations/versions/60e444118b6d_add_jobprometheusmetrics.py +40 -0
  119. dstack/_internal/server/migrations/versions/7bc2586e8b9e_make_instancemodel_pool_id_optional.py +36 -0
  120. dstack/_internal/server/migrations/versions/98d1b92988bc_add_jobterminationreason_terminated_due_.py +140 -0
  121. dstack/_internal/server/migrations/versions/bc8ca4a505c6_store_backendtype_as_string.py +171 -0
  122. dstack/_internal/server/models.py +59 -9
  123. dstack/_internal/server/routers/backends.py +14 -23
  124. dstack/_internal/server/routers/instances.py +3 -4
  125. dstack/_internal/server/routers/metrics.py +31 -10
  126. dstack/_internal/server/routers/prometheus.py +36 -0
  127. dstack/_internal/server/routers/repos.py +1 -2
  128. dstack/_internal/server/routers/runs.py +13 -59
  129. dstack/_internal/server/schemas/gateways.py +14 -23
  130. dstack/_internal/server/schemas/projects.py +7 -2
  131. dstack/_internal/server/schemas/repos.py +2 -38
  132. dstack/_internal/server/schemas/runner.py +1 -0
  133. dstack/_internal/server/schemas/runs.py +1 -24
  134. dstack/_internal/server/security/permissions.py +1 -1
  135. dstack/_internal/server/services/backends/__init__.py +85 -158
  136. dstack/_internal/server/services/config.py +53 -567
  137. dstack/_internal/server/services/fleets.py +9 -103
  138. dstack/_internal/server/services/gateways/__init__.py +13 -4
  139. dstack/_internal/server/services/{pools.py → instances.py} +22 -329
  140. dstack/_internal/server/services/jobs/__init__.py +9 -6
  141. dstack/_internal/server/services/jobs/configurators/base.py +25 -1
  142. dstack/_internal/server/services/jobs/configurators/dev.py +9 -1
  143. dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +42 -0
  144. dstack/_internal/server/services/metrics.py +131 -72
  145. dstack/_internal/server/services/offers.py +1 -1
  146. dstack/_internal/server/services/projects.py +23 -14
  147. dstack/_internal/server/services/prometheus.py +245 -0
  148. dstack/_internal/server/services/runner/client.py +14 -3
  149. dstack/_internal/server/services/runs.py +67 -31
  150. dstack/_internal/server/services/volumes.py +9 -4
  151. dstack/_internal/server/settings.py +3 -0
  152. dstack/_internal/server/statics/index.html +1 -1
  153. dstack/_internal/server/statics/{main-fe8fd9db55df8d10e648.js → main-4fd5a4770eff59325ee3.js} +68 -15
  154. dstack/_internal/server/statics/{main-fe8fd9db55df8d10e648.js.map → main-4fd5a4770eff59325ee3.js.map} +1 -1
  155. dstack/_internal/server/statics/{main-7510e71dfa9749a4e70e.css → main-da9f8c06a69c20dac23e.css} +1 -1
  156. dstack/_internal/server/statics/static/media/entraID.d65d1f3e9486a8e56d24fc07b3230885.svg +9 -0
  157. dstack/_internal/server/testing/common.py +75 -32
  158. dstack/_internal/utils/json_schema.py +6 -0
  159. dstack/_internal/utils/ssh.py +2 -1
  160. dstack/api/__init__.py +4 -0
  161. dstack/api/_public/__init__.py +16 -20
  162. dstack/api/_public/backends.py +1 -1
  163. dstack/api/_public/repos.py +36 -36
  164. dstack/api/_public/runs.py +170 -83
  165. dstack/api/server/__init__.py +11 -13
  166. dstack/api/server/_backends.py +12 -16
  167. dstack/api/server/_fleets.py +15 -55
  168. dstack/api/server/_gateways.py +3 -14
  169. dstack/api/server/_repos.py +1 -4
  170. dstack/api/server/_runs.py +21 -96
  171. dstack/api/server/_volumes.py +10 -5
  172. dstack/api/utils.py +3 -0
  173. dstack/version.py +1 -1
  174. {dstack-0.18.43.dist-info → dstack-0.19.0rc1.dist-info}/METADATA +10 -1
  175. {dstack-0.18.43.dist-info → dstack-0.19.0rc1.dist-info}/RECORD +229 -206
  176. tests/_internal/cli/services/configurators/test_profile.py +6 -6
  177. tests/_internal/core/backends/aws/test_configurator.py +35 -0
  178. tests/_internal/core/backends/aws/test_resources.py +1 -1
  179. tests/_internal/core/backends/azure/test_configurator.py +61 -0
  180. tests/_internal/core/backends/cudo/__init__.py +0 -0
  181. tests/_internal/core/backends/cudo/test_configurator.py +37 -0
  182. tests/_internal/core/backends/datacrunch/__init__.py +0 -0
  183. tests/_internal/core/backends/datacrunch/test_configurator.py +17 -0
  184. tests/_internal/core/backends/gcp/test_configurator.py +42 -0
  185. tests/_internal/core/backends/kubernetes/test_configurator.py +43 -0
  186. tests/_internal/core/backends/lambdalabs/__init__.py +0 -0
  187. tests/_internal/core/backends/lambdalabs/test_configurator.py +38 -0
  188. tests/_internal/core/backends/oci/test_configurator.py +55 -0
  189. tests/_internal/core/backends/runpod/__init__.py +0 -0
  190. tests/_internal/core/backends/runpod/test_configurator.py +33 -0
  191. tests/_internal/core/backends/tensordock/__init__.py +0 -0
  192. tests/_internal/core/backends/tensordock/test_configurator.py +38 -0
  193. tests/_internal/core/backends/vastai/__init__.py +0 -0
  194. tests/_internal/core/backends/vastai/test_configurator.py +33 -0
  195. tests/_internal/core/backends/vultr/__init__.py +0 -0
  196. tests/_internal/core/backends/vultr/test_configurator.py +33 -0
  197. tests/_internal/server/background/tasks/test_process_gateways.py +4 -0
  198. tests/_internal/server/background/tasks/test_process_instances.py +49 -48
  199. tests/_internal/server/background/tasks/test_process_metrics.py +0 -3
  200. tests/_internal/server/background/tasks/test_process_placement_groups.py +2 -0
  201. tests/_internal/server/background/tasks/test_process_prometheus_metrics.py +186 -0
  202. tests/_internal/server/background/tasks/test_process_running_jobs.py +123 -19
  203. tests/_internal/server/background/tasks/test_process_runs.py +8 -22
  204. tests/_internal/server/background/tasks/test_process_submitted_jobs.py +3 -40
  205. tests/_internal/server/background/tasks/test_process_submitted_volumes.py +2 -0
  206. tests/_internal/server/background/tasks/test_process_terminating_jobs.py +10 -15
  207. tests/_internal/server/routers/test_backends.py +6 -764
  208. tests/_internal/server/routers/test_fleets.py +2 -26
  209. tests/_internal/server/routers/test_gateways.py +27 -3
  210. tests/_internal/server/routers/test_instances.py +0 -10
  211. tests/_internal/server/routers/test_metrics.py +42 -0
  212. tests/_internal/server/routers/test_projects.py +56 -0
  213. tests/_internal/server/routers/test_prometheus.py +333 -0
  214. tests/_internal/server/routers/test_repos.py +0 -15
  215. tests/_internal/server/routers/test_runs.py +83 -275
  216. tests/_internal/server/routers/test_volumes.py +2 -3
  217. tests/_internal/server/services/backends/__init__.py +0 -0
  218. tests/_internal/server/services/jobs/configurators/test_task.py +35 -0
  219. tests/_internal/server/services/test_config.py +7 -4
  220. tests/_internal/server/services/test_fleets.py +1 -4
  221. tests/_internal/server/services/{test_pools.py → test_instances.py} +11 -49
  222. tests/_internal/server/services/test_metrics.py +167 -0
  223. tests/_internal/server/services/test_repos.py +1 -14
  224. tests/_internal/server/services/test_runs.py +0 -4
  225. dstack/_internal/cli/commands/pool.py +0 -581
  226. dstack/_internal/cli/commands/run.py +0 -75
  227. dstack/_internal/core/backends/aws/config.py +0 -18
  228. dstack/_internal/core/backends/azure/config.py +0 -12
  229. dstack/_internal/core/backends/base/config.py +0 -5
  230. dstack/_internal/core/backends/cudo/config.py +0 -9
  231. dstack/_internal/core/backends/datacrunch/config.py +0 -9
  232. dstack/_internal/core/backends/gcp/config.py +0 -22
  233. dstack/_internal/core/backends/kubernetes/config.py +0 -6
  234. dstack/_internal/core/backends/lambdalabs/config.py +0 -9
  235. dstack/_internal/core/backends/nebius/__init__.py +0 -15
  236. dstack/_internal/core/backends/nebius/api_client.py +0 -319
  237. dstack/_internal/core/backends/nebius/compute.py +0 -220
  238. dstack/_internal/core/backends/nebius/config.py +0 -6
  239. dstack/_internal/core/backends/nebius/types.py +0 -37
  240. dstack/_internal/core/backends/oci/config.py +0 -6
  241. dstack/_internal/core/backends/runpod/config.py +0 -9
  242. dstack/_internal/core/backends/tensordock/config.py +0 -9
  243. dstack/_internal/core/backends/vastai/config.py +0 -6
  244. dstack/_internal/core/backends/vultr/config.py +0 -9
  245. dstack/_internal/core/models/backends/aws.py +0 -86
  246. dstack/_internal/core/models/backends/azure.py +0 -68
  247. dstack/_internal/core/models/backends/cudo.py +0 -43
  248. dstack/_internal/core/models/backends/datacrunch.py +0 -44
  249. dstack/_internal/core/models/backends/gcp.py +0 -67
  250. dstack/_internal/core/models/backends/kubernetes.py +0 -40
  251. dstack/_internal/core/models/backends/lambdalabs.py +0 -43
  252. dstack/_internal/core/models/backends/nebius.py +0 -54
  253. dstack/_internal/core/models/backends/runpod.py +0 -40
  254. dstack/_internal/core/models/backends/tensordock.py +0 -44
  255. dstack/_internal/core/models/backends/vastai.py +0 -43
  256. dstack/_internal/core/models/backends/vultr.py +0 -40
  257. dstack/_internal/core/models/pools.py +0 -43
  258. dstack/_internal/server/routers/pools.py +0 -142
  259. dstack/_internal/server/schemas/pools.py +0 -38
  260. dstack/_internal/server/services/backends/configurators/base.py +0 -72
  261. dstack/_internal/server/services/backends/configurators/cudo.py +0 -87
  262. dstack/_internal/server/services/backends/configurators/datacrunch.py +0 -79
  263. dstack/_internal/server/services/backends/configurators/kubernetes.py +0 -63
  264. dstack/_internal/server/services/backends/configurators/lambdalabs.py +0 -98
  265. dstack/_internal/server/services/backends/configurators/nebius.py +0 -85
  266. dstack/_internal/server/services/backends/configurators/runpod.py +0 -97
  267. dstack/_internal/server/services/backends/configurators/tensordock.py +0 -82
  268. dstack/_internal/server/services/backends/configurators/vastai.py +0 -80
  269. dstack/_internal/server/services/backends/configurators/vultr.py +0 -80
  270. dstack/api/_public/pools.py +0 -41
  271. dstack/api/_public/resources.py +0 -105
  272. dstack/api/server/_pools.py +0 -63
  273. tests/_internal/server/routers/test_pools.py +0 -612
  274. /dstack/_internal/{server/services/backends/configurators → core/backends/dstack}/__init__.py +0 -0
  275. {dstack-0.18.43.dist-info → dstack-0.19.0rc1.dist-info}/LICENSE.md +0 -0
  276. {dstack-0.18.43.dist-info → dstack-0.19.0rc1.dist-info}/WHEEL +0 -0
  277. {dstack-0.18.43.dist-info → dstack-0.19.0rc1.dist-info}/entry_points.txt +0 -0
  278. {dstack-0.18.43.dist-info → dstack-0.19.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,333 @@
1
+ from datetime import datetime, timedelta, timezone
2
+ from textwrap import dedent
3
+ from typing import Optional
4
+
5
+ import pytest
6
+ from freezegun import freeze_time
7
+ from httpx import AsyncClient
8
+ from sqlalchemy.ext.asyncio import AsyncSession
9
+
10
+ from dstack._internal.core.models.backends.base import BackendType
11
+ from dstack._internal.core.models.configurations import DevEnvironmentConfiguration
12
+ from dstack._internal.core.models.runs import JobProvisioningData, JobRuntimeData, JobStatus
13
+ from dstack._internal.core.models.users import GlobalRole, ProjectRole
14
+ from dstack._internal.server.models import JobModel, ProjectModel, UserModel
15
+ from dstack._internal.server.services.projects import add_project_member
16
+ from dstack._internal.server.testing.common import (
17
+ create_fleet,
18
+ create_instance,
19
+ create_job,
20
+ create_job_prometheus_metrics,
21
+ create_project,
22
+ create_repo,
23
+ create_run,
24
+ create_user,
25
+ get_instance_offer_with_availability,
26
+ get_job_provisioning_data,
27
+ get_job_runtime_data,
28
+ get_run_spec,
29
+ )
30
+
31
+
32
+ @pytest.fixture
33
+ def enable_metrics(monkeypatch: pytest.MonkeyPatch):
34
+ monkeypatch.setattr("dstack._internal.server.settings.ENABLE_PROMETHEUS_METRICS", True)
35
+
36
+
37
+ FAKE_NOW = datetime(2023, 1, 2, 3, 4, tzinfo=timezone.utc)
38
+
39
+
40
+ @freeze_time(FAKE_NOW)
41
+ @pytest.mark.asyncio
42
+ @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
43
+ @pytest.mark.usefixtures("image_config_mock", "test_db", "enable_metrics")
44
+ class TestGetPrometheusMetrics:
45
+ async def test_returns_metrics(self, session: AsyncSession, client: AsyncClient):
46
+ user = await create_user(session=session, name="test-user", global_role=GlobalRole.USER)
47
+ offer = get_instance_offer_with_availability(
48
+ instance_type="test-type", gpu_count=2, gpu_name="V4", price=12
49
+ )
50
+ project_2 = await _create_project(session, "project-2", user)
51
+ jpd_2_1 = get_job_provisioning_data(
52
+ backend=BackendType.AWS, gpu_name="T4", gpu_count=2, price=16
53
+ )
54
+ job_2_1 = await _create_job(
55
+ session=session,
56
+ run_name="run-1",
57
+ project=project_2,
58
+ user=user,
59
+ status=JobStatus.RUNNING,
60
+ job_provisioning_data=jpd_2_1,
61
+ submitted_at=FAKE_NOW - timedelta(seconds=100),
62
+ )
63
+ await create_job_prometheus_metrics(
64
+ session=session,
65
+ job=job_2_1,
66
+ text=dedent("""
67
+ # HELP FIELD_1 Test field 1
68
+ # TYPE FIELD_1 gauge
69
+ FIELD_1{gpu="0"} 100
70
+ FIELD_1{gpu="1"} 200
71
+ """),
72
+ )
73
+ project_1 = await _create_project(session, "project-1", user)
74
+ jpd_1_1 = get_job_provisioning_data(backend=BackendType.AWS, gpu_count=4, gpu_name="T4")
75
+ jrd_1_1 = get_job_runtime_data(offer=offer)
76
+ job_1_1 = await _create_job(
77
+ session=session,
78
+ run_name="run-1",
79
+ project=project_1,
80
+ user=user,
81
+ status=JobStatus.RUNNING,
82
+ job_provisioning_data=jpd_1_1,
83
+ job_runtime_data=jrd_1_1,
84
+ submitted_at=FAKE_NOW - timedelta(seconds=120),
85
+ )
86
+ await create_job_prometheus_metrics(
87
+ session=session,
88
+ job=job_1_1,
89
+ text=dedent("""
90
+ # Comments should be skipped
91
+
92
+ # HELP FIELD_1 Test field 1
93
+ # TYPE FIELD_1 gauge
94
+ FIELD_1{gpu="0"} 350
95
+ FIELD_1{gpu="1"} 400
96
+
97
+ # HELP FIELD_2 Test field 2
98
+ # TYPE FIELD_2 counter
99
+ FIELD_2{gpu="0"} 337325 1395066363000
100
+ FIELD_2{gpu="1"} 987169 1395066363010
101
+ """),
102
+ )
103
+ job_1_2 = await _create_job(session, "run-2", project_1, user, JobStatus.RUNNING)
104
+ await create_job_prometheus_metrics(
105
+ session=session,
106
+ job=job_1_2,
107
+ text=dedent("""
108
+ # HELP FIELD_1 Test field 1
109
+ # TYPE FIELD_1 gauge
110
+ FIELD_1{gpu="0"} 1200.0
111
+ FIELD_1{gpu="1"} 1600.0
112
+ FIELD_1{gpu="2"} 2400.0
113
+ """),
114
+ )
115
+ # Terminated job, should not appear in the response
116
+ job_1_3 = await _create_job(session, "run-3", project_1, user, JobStatus.TERMINATED)
117
+ await create_job_prometheus_metrics(
118
+ session=session,
119
+ job=job_1_3,
120
+ text=dedent("""
121
+ # HELP FIELD_1 Test field 1
122
+ # TYPE FIELD_1 gauge
123
+ FIELD_1{gpu="0"} 10
124
+ FIELD_1{gpu="1"} 20
125
+ """),
126
+ )
127
+ fleet = await create_fleet(session=session, project=project_1, name="test-fleet")
128
+ instance = await create_instance(
129
+ session=session,
130
+ project=project_1,
131
+ fleet=fleet,
132
+ backend=BackendType.AWS,
133
+ offer=offer,
134
+ price=14,
135
+ created_at=FAKE_NOW - timedelta(hours=1),
136
+ name="test-instance",
137
+ )
138
+
139
+ response = await client.get("/metrics")
140
+
141
+ assert response.status_code == 200
142
+ assert response.text == dedent(f"""\
143
+ # HELP dstack_instance_duration_seconds_total Total seconds the instance is running
144
+ # TYPE dstack_instance_duration_seconds_total counter
145
+ dstack_instance_duration_seconds_total{{dstack_project_name="project-1",dstack_fleet_name="test-fleet",dstack_fleet_id="{fleet.id}",dstack_instance_name="test-instance",dstack_instance_id="{instance.id}",dstack_instance_type="test-type",dstack_backend="aws",dstack_gpu="V4"}} 3600.0
146
+ # HELP dstack_instance_price_dollars_per_hour Instance price, USD/hour
147
+ # TYPE dstack_instance_price_dollars_per_hour gauge
148
+ dstack_instance_price_dollars_per_hour{{dstack_project_name="project-1",dstack_fleet_name="test-fleet",dstack_fleet_id="{fleet.id}",dstack_instance_name="test-instance",dstack_instance_id="{instance.id}",dstack_instance_type="test-type",dstack_backend="aws",dstack_gpu="V4"}} 14.0
149
+ # HELP dstack_instance_gpu_count Instance GPU count
150
+ # TYPE dstack_instance_gpu_count gauge
151
+ dstack_instance_gpu_count{{dstack_project_name="project-1",dstack_fleet_name="test-fleet",dstack_fleet_id="{fleet.id}",dstack_instance_name="test-instance",dstack_instance_id="{instance.id}",dstack_instance_type="test-type",dstack_backend="aws",dstack_gpu="V4"}} 2.0
152
+ # HELP dstack_job_duration_seconds_total Total seconds the job is running
153
+ # TYPE dstack_job_duration_seconds_total counter
154
+ dstack_job_duration_seconds_total{{dstack_project_name="project-1",dstack_user_name="test-user",dstack_run_name="run-1",dstack_run_id="{job_1_1.run_id}",dstack_job_name="run-1-0-0",dstack_job_id="{job_1_1.id}",dstack_job_num="0",dstack_replica_num="0",dstack_run_type="dev-environment",dstack_backend="aws",dstack_gpu="V4"}} 120.0
155
+ dstack_job_duration_seconds_total{{dstack_project_name="project-2",dstack_user_name="test-user",dstack_run_name="run-1",dstack_run_id="{job_2_1.run_id}",dstack_job_name="run-1-0-0",dstack_job_id="{job_2_1.id}",dstack_job_num="0",dstack_replica_num="0",dstack_run_type="dev-environment",dstack_backend="aws",dstack_gpu="T4"}} 100.0
156
+ # HELP dstack_job_price_dollars_per_hour Job instance price, USD/hour
157
+ # TYPE dstack_job_price_dollars_per_hour gauge
158
+ dstack_job_price_dollars_per_hour{{dstack_project_name="project-1",dstack_user_name="test-user",dstack_run_name="run-1",dstack_run_id="{job_1_1.run_id}",dstack_job_name="run-1-0-0",dstack_job_id="{job_1_1.id}",dstack_job_num="0",dstack_replica_num="0",dstack_run_type="dev-environment",dstack_backend="aws",dstack_gpu="V4"}} 12.0
159
+ dstack_job_price_dollars_per_hour{{dstack_project_name="project-2",dstack_user_name="test-user",dstack_run_name="run-1",dstack_run_id="{job_2_1.run_id}",dstack_job_name="run-1-0-0",dstack_job_id="{job_2_1.id}",dstack_job_num="0",dstack_replica_num="0",dstack_run_type="dev-environment",dstack_backend="aws",dstack_gpu="T4"}} 16.0
160
+ # HELP dstack_job_gpu_count Job GPU count
161
+ # TYPE dstack_job_gpu_count gauge
162
+ dstack_job_gpu_count{{dstack_project_name="project-1",dstack_user_name="test-user",dstack_run_name="run-1",dstack_run_id="{job_1_1.run_id}",dstack_job_name="run-1-0-0",dstack_job_id="{job_1_1.id}",dstack_job_num="0",dstack_replica_num="0",dstack_run_type="dev-environment",dstack_backend="aws",dstack_gpu="V4"}} 2.0
163
+ dstack_job_gpu_count{{dstack_project_name="project-2",dstack_user_name="test-user",dstack_run_name="run-1",dstack_run_id="{job_2_1.run_id}",dstack_job_name="run-1-0-0",dstack_job_id="{job_2_1.id}",dstack_job_num="0",dstack_replica_num="0",dstack_run_type="dev-environment",dstack_backend="aws",dstack_gpu="T4"}} 2.0
164
+ # HELP FIELD_1 Test field 1
165
+ # TYPE FIELD_1 gauge
166
+ FIELD_1{{gpu="0",dstack_project_name="project-1",dstack_user_name="test-user",dstack_run_name="run-1",dstack_run_id="{job_1_1.run_id}",dstack_job_name="run-1-0-0",dstack_job_id="{job_1_1.id}",dstack_job_num="0",dstack_replica_num="0"}} 350.0
167
+ FIELD_1{{gpu="1",dstack_project_name="project-1",dstack_user_name="test-user",dstack_run_name="run-1",dstack_run_id="{job_1_1.run_id}",dstack_job_name="run-1-0-0",dstack_job_id="{job_1_1.id}",dstack_job_num="0",dstack_replica_num="0"}} 400.0
168
+ FIELD_1{{gpu="0",dstack_project_name="project-1",dstack_user_name="test-user",dstack_run_name="run-2",dstack_run_id="{job_1_2.run_id}",dstack_job_name="run-2-0-0",dstack_job_id="{job_1_2.id}",dstack_job_num="0",dstack_replica_num="0"}} 1200.0
169
+ FIELD_1{{gpu="1",dstack_project_name="project-1",dstack_user_name="test-user",dstack_run_name="run-2",dstack_run_id="{job_1_2.run_id}",dstack_job_name="run-2-0-0",dstack_job_id="{job_1_2.id}",dstack_job_num="0",dstack_replica_num="0"}} 1600.0
170
+ FIELD_1{{gpu="2",dstack_project_name="project-1",dstack_user_name="test-user",dstack_run_name="run-2",dstack_run_id="{job_1_2.run_id}",dstack_job_name="run-2-0-0",dstack_job_id="{job_1_2.id}",dstack_job_num="0",dstack_replica_num="0"}} 2400.0
171
+ FIELD_1{{gpu="0",dstack_project_name="project-2",dstack_user_name="test-user",dstack_run_name="run-1",dstack_run_id="{job_2_1.run_id}",dstack_job_name="run-1-0-0",dstack_job_id="{job_2_1.id}",dstack_job_num="0",dstack_replica_num="0"}} 100.0
172
+ FIELD_1{{gpu="1",dstack_project_name="project-2",dstack_user_name="test-user",dstack_run_name="run-1",dstack_run_id="{job_2_1.run_id}",dstack_job_name="run-1-0-0",dstack_job_id="{job_2_1.id}",dstack_job_num="0",dstack_replica_num="0"}} 200.0
173
+ # HELP FIELD_2 Test field 2
174
+ # TYPE FIELD_2 counter
175
+ FIELD_2{{gpu="0",dstack_project_name="project-1",dstack_user_name="test-user",dstack_run_name="run-1",dstack_run_id="{job_1_1.run_id}",dstack_job_name="run-1-0-0",dstack_job_id="{job_1_1.id}",dstack_job_num="0",dstack_replica_num="0"}} 337325.0 1395066363000
176
+ FIELD_2{{gpu="1",dstack_project_name="project-1",dstack_user_name="test-user",dstack_run_name="run-1",dstack_run_id="{job_1_1.run_id}",dstack_job_name="run-1-0-0",dstack_job_id="{job_1_1.id}",dstack_job_num="0",dstack_replica_num="0"}} 987169.0 1395066363010
177
+ """)
178
+
179
+ async def test_returns_empty_response_if_no_runs(self, client: AsyncClient):
180
+ response = await client.get("/metrics")
181
+ assert response.status_code == 200
182
+ assert response.text == "\n"
183
+
184
+ async def test_returns_404_if_not_enabled(
185
+ self, monkeypatch: pytest.MonkeyPatch, client: AsyncClient
186
+ ):
187
+ monkeypatch.setattr("dstack._internal.server.settings.ENABLE_PROMETHEUS_METRICS", False)
188
+ response = await client.get("/metrics")
189
+ assert response.status_code == 404
190
+
191
+
192
+ @pytest.mark.asyncio
193
+ @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
194
+ @pytest.mark.usefixtures("image_config_mock", "test_db", "enable_metrics")
195
+ class TestGetPrometheusProjectMetrics:
196
+ async def test_returns_metrics(self, session: AsyncSession, client: AsyncClient):
197
+ user = await create_user(session=session, name="test-user", global_role=GlobalRole.USER)
198
+ project = await _create_project(session, "project-1", user)
199
+ job_1 = await _create_job(session, "run-1", project, user, JobStatus.RUNNING)
200
+ await create_job_prometheus_metrics(
201
+ session=session,
202
+ job=job_1,
203
+ text=dedent("""
204
+ # Comments should be skipped
205
+
206
+ # HELP FIELD_1 Test field 1
207
+ # TYPE FIELD_1 gauge
208
+ FIELD_1{gpu="0"} 350
209
+ FIELD_1{gpu="1"} 400
210
+
211
+ # HELP FIELD_2 Test field 2
212
+ # TYPE FIELD_2 counter
213
+ FIELD_2{gpu="0"} 337325 1395066363000
214
+ FIELD_2{gpu="1"} 987169 1395066363010
215
+ """),
216
+ )
217
+ job_2 = await _create_job(session, "run-2", project, user, JobStatus.RUNNING)
218
+ await create_job_prometheus_metrics(
219
+ session=session,
220
+ job=job_2,
221
+ text=dedent("""
222
+ # HELP FIELD_1 Test field 1
223
+ # TYPE FIELD_1 gauge
224
+ FIELD_1{gpu="0"} 1200.0
225
+ FIELD_1{gpu="1"} 1600.0
226
+ FIELD_1{gpu="2"} 2400.0
227
+ """),
228
+ )
229
+ # Terminated job, should not appear in the response
230
+ job_3 = await _create_job(session, "run-3", project, user, JobStatus.TERMINATED)
231
+ await create_job_prometheus_metrics(
232
+ session=session,
233
+ job=job_3,
234
+ text=dedent("""
235
+ # HELP FIELD_1 Test field 1
236
+ # TYPE FIELD_1 gauge
237
+ FIELD_1{gpu="0"} 10
238
+ FIELD_1{gpu="1"} 20
239
+ """),
240
+ )
241
+ another_project = await _create_project(session, "project-2", user)
242
+ another_project_job = await _create_job(
243
+ session, "run-4", another_project, user, JobStatus.RUNNING
244
+ )
245
+ await create_job_prometheus_metrics(
246
+ session=session,
247
+ job=another_project_job,
248
+ text=dedent("""
249
+ # HELP FIELD_1 Test field 1
250
+ # TYPE FIELD_1 gauge
251
+ FIELD_1{gpu="0"} 100
252
+ FIELD_1{gpu="1"} 200
253
+ """),
254
+ )
255
+
256
+ response = await client.get("/metrics/project/project-1")
257
+
258
+ assert response.status_code == 200
259
+ assert response.text == dedent(f"""\
260
+ # HELP FIELD_1 Test field 1
261
+ # TYPE FIELD_1 gauge
262
+ FIELD_1{{gpu="0",dstack_project_name="project-1",dstack_user_name="test-user",dstack_run_name="run-1",dstack_run_id="{job_1.run_id}",dstack_job_name="run-1-0-0",dstack_job_id="{job_1.id}",dstack_job_num="0",dstack_replica_num="0"}} 350.0
263
+ FIELD_1{{gpu="1",dstack_project_name="project-1",dstack_user_name="test-user",dstack_run_name="run-1",dstack_run_id="{job_1.run_id}",dstack_job_name="run-1-0-0",dstack_job_id="{job_1.id}",dstack_job_num="0",dstack_replica_num="0"}} 400.0
264
+ FIELD_1{{gpu="0",dstack_project_name="project-1",dstack_user_name="test-user",dstack_run_name="run-2",dstack_run_id="{job_2.run_id}",dstack_job_name="run-2-0-0",dstack_job_id="{job_2.id}",dstack_job_num="0",dstack_replica_num="0"}} 1200.0
265
+ FIELD_1{{gpu="1",dstack_project_name="project-1",dstack_user_name="test-user",dstack_run_name="run-2",dstack_run_id="{job_2.run_id}",dstack_job_name="run-2-0-0",dstack_job_id="{job_2.id}",dstack_job_num="0",dstack_replica_num="0"}} 1600.0
266
+ FIELD_1{{gpu="2",dstack_project_name="project-1",dstack_user_name="test-user",dstack_run_name="run-2",dstack_run_id="{job_2.run_id}",dstack_job_name="run-2-0-0",dstack_job_id="{job_2.id}",dstack_job_num="0",dstack_replica_num="0"}} 2400.0
267
+ # HELP FIELD_2 Test field 2
268
+ # TYPE FIELD_2 counter
269
+ FIELD_2{{gpu="0",dstack_project_name="project-1",dstack_user_name="test-user",dstack_run_name="run-1",dstack_run_id="{job_1.run_id}",dstack_job_name="run-1-0-0",dstack_job_id="{job_1.id}",dstack_job_num="0",dstack_replica_num="0"}} 337325.0 1395066363000
270
+ FIELD_2{{gpu="1",dstack_project_name="project-1",dstack_user_name="test-user",dstack_run_name="run-1",dstack_run_id="{job_1.run_id}",dstack_job_name="run-1-0-0",dstack_job_id="{job_1.id}",dstack_job_num="0",dstack_replica_num="0"}} 987169.0 1395066363010
271
+ """)
272
+
273
+ async def test_returns_empty_response_if_no_runs(
274
+ self, session: AsyncSession, client: AsyncClient
275
+ ):
276
+ user = await create_user(session=session, global_role=GlobalRole.USER)
277
+ await create_project(session=session, owner=user, name="test-project")
278
+ response = await client.get("/metrics/project/test-project")
279
+ assert response.status_code == 200
280
+ assert response.text == "\n"
281
+
282
+ async def test_returns_404_if_project_doesnt_exist(self, client: AsyncClient):
283
+ response = await client.get("/metrics/project/nonexistent")
284
+ assert response.status_code == 404
285
+
286
+ async def test_returns_404_if_not_enabled(
287
+ self, monkeypatch: pytest.MonkeyPatch, session: AsyncSession, client: AsyncClient
288
+ ):
289
+ monkeypatch.setattr("dstack._internal.server.settings.ENABLE_PROMETHEUS_METRICS", False)
290
+ user = await create_user(session=session, global_role=GlobalRole.USER)
291
+ await create_project(session=session, owner=user, name="test-project")
292
+ response = await client.get("/metrics/project/test-project")
293
+ assert response.status_code == 404
294
+
295
+
296
+ async def _create_project(session: AsyncSession, name: str, user: UserModel) -> ProjectModel:
297
+ project = await create_project(session=session, owner=user, name=name)
298
+ await add_project_member(
299
+ session=session, project=project, user=user, project_role=ProjectRole.USER
300
+ )
301
+ return project
302
+
303
+
304
+ async def _create_job(
305
+ session: AsyncSession,
306
+ run_name: str,
307
+ project: ProjectModel,
308
+ user: UserModel,
309
+ status: JobStatus,
310
+ job_provisioning_data: Optional[JobProvisioningData] = None,
311
+ job_runtime_data: Optional[JobRuntimeData] = None,
312
+ submitted_at: datetime = FAKE_NOW,
313
+ ) -> JobModel:
314
+ repo = await create_repo(session=session, project_id=project.id, repo_name=f"{run_name}-repo")
315
+ configuration = DevEnvironmentConfiguration(ide="vscode")
316
+ run_spec = get_run_spec(run_name=run_name, repo_id=repo.name, configuration=configuration)
317
+ run = await create_run(
318
+ session=session,
319
+ project=project,
320
+ repo=repo,
321
+ user=user,
322
+ run_name=run_name,
323
+ run_spec=run_spec,
324
+ )
325
+ job = await create_job(
326
+ session=session,
327
+ run=run,
328
+ status=status,
329
+ job_provisioning_data=job_provisioning_data,
330
+ job_runtime_data=job_runtime_data,
331
+ submitted_at=submitted_at,
332
+ )
333
+ return job
@@ -128,7 +128,6 @@ class TestGetRepo:
128
128
  user = await create_user(session=session, global_role=GlobalRole.USER)
129
129
  project = await create_project(session=session, owner=user)
130
130
  legacy_creds = {
131
- "protocol": "https",
132
131
  "clone_url": "https://github.com/dstackai/dstack.git",
133
132
  "private_key": None,
134
133
  "oauth_token": "test_token",
@@ -157,14 +156,12 @@ class TestGetRepo:
157
156
  user = await create_user(session=session, global_role=GlobalRole.USER)
158
157
  project = await create_project(session=session, owner=user)
159
158
  legacy_creds = {
160
- "protocol": "https",
161
159
  "clone_url": "https://github.com/dstackai/dstack.git",
162
160
  "private_key": None,
163
161
  "oauth_token": "legacy_creds",
164
162
  }
165
163
  repo = await create_repo(session=session, project_id=project.id, creds=legacy_creds)
166
164
  user_creds = {
167
- "protocol": "https",
168
165
  "clone_url": "https://github.com/dstackai/dstack.git",
169
166
  "private_key": None,
170
167
  "oauth_token": "user_creds",
@@ -214,13 +211,9 @@ class TestInitRepo:
214
211
  "repo_id": "test_repo",
215
212
  "repo_info": {
216
213
  "repo_type": "remote",
217
- "repo_host_name": "",
218
- "repo_port": None,
219
- "repo_user_name": "",
220
214
  "repo_name": "dstack",
221
215
  },
222
216
  "repo_creds": {
223
- "protocol": "https",
224
217
  "clone_url": "https://github.com/dstackai/dstack.git",
225
218
  "private_key": None,
226
219
  "oauth_token": "test_token",
@@ -254,13 +247,9 @@ class TestInitRepo:
254
247
  "repo_id": "test_repo",
255
248
  "repo_info": {
256
249
  "repo_type": "remote",
257
- "repo_host_name": "",
258
- "repo_port": None,
259
- "repo_user_name": "",
260
250
  "repo_name": "dstack",
261
251
  },
262
252
  "repo_creds": {
263
- "protocol": "https",
264
253
  "clone_url": "https://github.com/dstackai/dstack.git",
265
254
  "private_key": None,
266
255
  "oauth_token": "test_token",
@@ -276,13 +265,9 @@ class TestInitRepo:
276
265
  "repo_id": "test_repo",
277
266
  "repo_info": {
278
267
  "repo_type": "remote",
279
- "repo_host_name": "",
280
- "repo_port": None,
281
- "repo_user_name": "",
282
268
  "repo_name": "dstack",
283
269
  },
284
270
  "repo_creds": {
285
- "protocol": "https",
286
271
  "clone_url": "https://github.com/dstackai/dstack.git",
287
272
  "private_key": None,
288
273
  "oauth_token": "test_token_updated",