dstack 0.18.44__py3-none-any.whl → 0.19.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (267) hide show
  1. dstack/_internal/cli/commands/gateway.py +15 -3
  2. dstack/_internal/cli/commands/logs.py +0 -22
  3. dstack/_internal/cli/commands/stats.py +8 -17
  4. dstack/_internal/cli/main.py +1 -5
  5. dstack/_internal/cli/services/configurators/fleet.py +4 -39
  6. dstack/_internal/cli/services/configurators/run.py +22 -21
  7. dstack/_internal/cli/services/profile.py +34 -83
  8. dstack/_internal/cli/utils/gateway.py +1 -1
  9. dstack/_internal/core/backends/__init__.py +56 -39
  10. dstack/_internal/core/backends/aws/__init__.py +0 -25
  11. dstack/_internal/core/backends/aws/auth.py +1 -10
  12. dstack/_internal/core/backends/aws/backend.py +26 -0
  13. dstack/_internal/core/backends/aws/compute.py +20 -45
  14. dstack/_internal/{server/services/backends/configurators/aws.py → core/backends/aws/configurator.py} +46 -85
  15. dstack/_internal/core/backends/aws/models.py +135 -0
  16. dstack/_internal/core/backends/aws/resources.py +1 -1
  17. dstack/_internal/core/backends/azure/__init__.py +0 -20
  18. dstack/_internal/core/backends/azure/auth.py +2 -11
  19. dstack/_internal/core/backends/azure/backend.py +21 -0
  20. dstack/_internal/core/backends/azure/compute.py +13 -27
  21. dstack/_internal/{server/services/backends/configurators/azure.py → core/backends/azure/configurator.py} +141 -210
  22. dstack/_internal/core/backends/azure/models.py +89 -0
  23. dstack/_internal/core/backends/base/__init__.py +0 -12
  24. dstack/_internal/core/backends/base/backend.py +18 -0
  25. dstack/_internal/core/backends/base/compute.py +153 -33
  26. dstack/_internal/core/backends/base/configurator.py +105 -0
  27. dstack/_internal/core/backends/base/models.py +14 -0
  28. dstack/_internal/core/backends/configurators.py +138 -0
  29. dstack/_internal/core/backends/cudo/__init__.py +0 -15
  30. dstack/_internal/core/backends/cudo/backend.py +16 -0
  31. dstack/_internal/core/backends/cudo/compute.py +8 -26
  32. dstack/_internal/core/backends/cudo/configurator.py +72 -0
  33. dstack/_internal/core/backends/cudo/models.py +37 -0
  34. dstack/_internal/core/backends/datacrunch/__init__.py +0 -15
  35. dstack/_internal/core/backends/datacrunch/backend.py +16 -0
  36. dstack/_internal/core/backends/datacrunch/compute.py +8 -25
  37. dstack/_internal/core/backends/datacrunch/configurator.py +66 -0
  38. dstack/_internal/core/backends/datacrunch/models.py +38 -0
  39. dstack/_internal/core/{models/backends/dstack.py → backends/dstack/models.py} +7 -7
  40. dstack/_internal/core/backends/gcp/__init__.py +0 -16
  41. dstack/_internal/core/backends/gcp/auth.py +2 -11
  42. dstack/_internal/core/backends/gcp/backend.py +17 -0
  43. dstack/_internal/core/backends/gcp/compute.py +13 -43
  44. dstack/_internal/{server/services/backends/configurators/gcp.py → core/backends/gcp/configurator.py} +46 -103
  45. dstack/_internal/core/backends/gcp/models.py +125 -0
  46. dstack/_internal/core/backends/kubernetes/__init__.py +0 -15
  47. dstack/_internal/core/backends/kubernetes/backend.py +16 -0
  48. dstack/_internal/core/backends/kubernetes/compute.py +16 -5
  49. dstack/_internal/core/backends/kubernetes/configurator.py +55 -0
  50. dstack/_internal/core/backends/kubernetes/models.py +72 -0
  51. dstack/_internal/core/backends/lambdalabs/__init__.py +0 -16
  52. dstack/_internal/core/backends/lambdalabs/backend.py +17 -0
  53. dstack/_internal/core/backends/lambdalabs/compute.py +7 -28
  54. dstack/_internal/core/backends/lambdalabs/configurator.py +82 -0
  55. dstack/_internal/core/backends/lambdalabs/models.py +37 -0
  56. dstack/_internal/core/backends/local/__init__.py +0 -13
  57. dstack/_internal/core/backends/local/backend.py +14 -0
  58. dstack/_internal/core/backends/local/compute.py +16 -2
  59. dstack/_internal/core/backends/models.py +128 -0
  60. dstack/_internal/core/backends/oci/__init__.py +0 -15
  61. dstack/_internal/core/backends/oci/auth.py +1 -5
  62. dstack/_internal/core/backends/oci/backend.py +16 -0
  63. dstack/_internal/core/backends/oci/compute.py +9 -23
  64. dstack/_internal/{server/services/backends/configurators/oci.py → core/backends/oci/configurator.py} +40 -85
  65. dstack/_internal/core/{models/backends/oci.py → backends/oci/models.py} +24 -25
  66. dstack/_internal/core/backends/oci/region.py +1 -1
  67. dstack/_internal/core/backends/runpod/__init__.py +0 -15
  68. dstack/_internal/core/backends/runpod/backend.py +16 -0
  69. dstack/_internal/core/backends/runpod/compute.py +7 -3
  70. dstack/_internal/core/backends/runpod/configurator.py +59 -0
  71. dstack/_internal/core/backends/runpod/models.py +54 -0
  72. dstack/_internal/core/backends/template/__init__.py +0 -0
  73. dstack/_internal/core/backends/tensordock/__init__.py +0 -15
  74. dstack/_internal/core/backends/tensordock/backend.py +16 -0
  75. dstack/_internal/core/backends/tensordock/compute.py +8 -27
  76. dstack/_internal/core/backends/tensordock/configurator.py +68 -0
  77. dstack/_internal/core/backends/tensordock/models.py +38 -0
  78. dstack/_internal/core/backends/vastai/__init__.py +0 -15
  79. dstack/_internal/core/backends/vastai/backend.py +16 -0
  80. dstack/_internal/core/backends/vastai/compute.py +2 -2
  81. dstack/_internal/core/backends/vastai/configurator.py +66 -0
  82. dstack/_internal/core/backends/vastai/models.py +37 -0
  83. dstack/_internal/core/backends/vultr/__init__.py +0 -15
  84. dstack/_internal/core/backends/vultr/backend.py +16 -0
  85. dstack/_internal/core/backends/vultr/compute.py +10 -24
  86. dstack/_internal/core/backends/vultr/configurator.py +64 -0
  87. dstack/_internal/core/backends/vultr/models.py +34 -0
  88. dstack/_internal/core/models/backends/__init__.py +0 -184
  89. dstack/_internal/core/models/backends/base.py +0 -19
  90. dstack/_internal/core/models/configurations.py +20 -15
  91. dstack/_internal/core/models/envs.py +4 -3
  92. dstack/_internal/core/models/fleets.py +17 -22
  93. dstack/_internal/core/models/gateways.py +3 -3
  94. dstack/_internal/core/models/instances.py +24 -0
  95. dstack/_internal/core/models/profiles.py +41 -46
  96. dstack/_internal/core/models/projects.py +1 -1
  97. dstack/_internal/core/models/repos/base.py +0 -5
  98. dstack/_internal/core/models/repos/local.py +3 -3
  99. dstack/_internal/core/models/repos/remote.py +26 -12
  100. dstack/_internal/core/models/repos/virtual.py +1 -1
  101. dstack/_internal/core/models/resources.py +45 -76
  102. dstack/_internal/core/models/runs.py +17 -19
  103. dstack/_internal/core/models/volumes.py +1 -3
  104. dstack/_internal/core/services/profiles.py +7 -16
  105. dstack/_internal/core/services/repos.py +0 -4
  106. dstack/_internal/server/app.py +0 -3
  107. dstack/_internal/server/background/tasks/process_gateways.py +4 -8
  108. dstack/_internal/server/background/tasks/process_instances.py +14 -9
  109. dstack/_internal/server/background/tasks/process_metrics.py +1 -1
  110. dstack/_internal/server/background/tasks/process_placement_groups.py +4 -1
  111. dstack/_internal/server/background/tasks/process_prometheus_metrics.py +1 -1
  112. dstack/_internal/server/background/tasks/process_running_jobs.py +14 -5
  113. dstack/_internal/server/background/tasks/process_submitted_jobs.py +16 -37
  114. dstack/_internal/server/background/tasks/process_volumes.py +5 -2
  115. dstack/_internal/server/migrations/versions/7bc2586e8b9e_make_instancemodel_pool_id_optional.py +36 -0
  116. dstack/_internal/server/migrations/versions/bc8ca4a505c6_store_backendtype_as_string.py +171 -0
  117. dstack/_internal/server/models.py +48 -9
  118. dstack/_internal/server/routers/backends.py +14 -23
  119. dstack/_internal/server/routers/instances.py +3 -4
  120. dstack/_internal/server/routers/metrics.py +10 -8
  121. dstack/_internal/server/routers/prometheus.py +1 -1
  122. dstack/_internal/server/routers/repos.py +1 -2
  123. dstack/_internal/server/routers/runs.py +13 -59
  124. dstack/_internal/server/schemas/gateways.py +14 -23
  125. dstack/_internal/server/schemas/projects.py +7 -2
  126. dstack/_internal/server/schemas/repos.py +2 -38
  127. dstack/_internal/server/schemas/runner.py +1 -0
  128. dstack/_internal/server/schemas/runs.py +1 -24
  129. dstack/_internal/server/services/backends/__init__.py +85 -158
  130. dstack/_internal/server/services/config.py +52 -576
  131. dstack/_internal/server/services/fleets.py +8 -103
  132. dstack/_internal/server/services/gateways/__init__.py +12 -4
  133. dstack/_internal/server/services/{pools.py → instances.py} +22 -329
  134. dstack/_internal/server/services/jobs/__init__.py +9 -6
  135. dstack/_internal/server/services/jobs/configurators/base.py +16 -0
  136. dstack/_internal/server/services/jobs/configurators/dev.py +9 -1
  137. dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +42 -0
  138. dstack/_internal/server/services/metrics.py +39 -13
  139. dstack/_internal/server/services/offers.py +1 -1
  140. dstack/_internal/server/services/projects.py +23 -14
  141. dstack/_internal/server/services/prometheus.py +176 -18
  142. dstack/_internal/server/services/runs.py +24 -16
  143. dstack/_internal/server/services/volumes.py +8 -4
  144. dstack/_internal/server/statics/index.html +1 -1
  145. dstack/_internal/server/statics/{main-4eb116b97819badd1e2c.js → main-4fd5a4770eff59325ee3.js} +7 -7
  146. dstack/_internal/server/statics/{main-4eb116b97819badd1e2c.js.map → main-4fd5a4770eff59325ee3.js.map} +1 -1
  147. dstack/_internal/server/testing/common.py +58 -32
  148. dstack/_internal/utils/json_schema.py +6 -0
  149. dstack/_internal/utils/ssh.py +2 -1
  150. dstack/api/__init__.py +4 -0
  151. dstack/api/_public/__init__.py +16 -20
  152. dstack/api/_public/backends.py +1 -1
  153. dstack/api/_public/repos.py +36 -36
  154. dstack/api/_public/runs.py +167 -83
  155. dstack/api/server/__init__.py +11 -13
  156. dstack/api/server/_backends.py +12 -16
  157. dstack/api/server/_fleets.py +15 -57
  158. dstack/api/server/_gateways.py +3 -14
  159. dstack/api/server/_repos.py +1 -4
  160. dstack/api/server/_runs.py +21 -100
  161. dstack/api/server/_volumes.py +10 -5
  162. dstack/version.py +1 -1
  163. {dstack-0.18.44.dist-info → dstack-0.19.0rc1.dist-info}/METADATA +1 -1
  164. {dstack-0.18.44.dist-info → dstack-0.19.0rc1.dist-info}/RECORD +218 -204
  165. tests/_internal/cli/services/configurators/test_profile.py +6 -6
  166. tests/_internal/core/backends/aws/test_configurator.py +35 -0
  167. tests/_internal/core/backends/aws/test_resources.py +1 -1
  168. tests/_internal/core/backends/azure/test_configurator.py +61 -0
  169. tests/_internal/core/backends/cudo/__init__.py +0 -0
  170. tests/_internal/core/backends/cudo/test_configurator.py +37 -0
  171. tests/_internal/core/backends/datacrunch/__init__.py +0 -0
  172. tests/_internal/core/backends/datacrunch/test_configurator.py +17 -0
  173. tests/_internal/core/backends/gcp/test_configurator.py +42 -0
  174. tests/_internal/core/backends/kubernetes/test_configurator.py +43 -0
  175. tests/_internal/core/backends/lambdalabs/__init__.py +0 -0
  176. tests/_internal/core/backends/lambdalabs/test_configurator.py +38 -0
  177. tests/_internal/core/backends/oci/test_configurator.py +55 -0
  178. tests/_internal/core/backends/runpod/__init__.py +0 -0
  179. tests/_internal/core/backends/runpod/test_configurator.py +33 -0
  180. tests/_internal/core/backends/tensordock/__init__.py +0 -0
  181. tests/_internal/core/backends/tensordock/test_configurator.py +38 -0
  182. tests/_internal/core/backends/vastai/__init__.py +0 -0
  183. tests/_internal/core/backends/vastai/test_configurator.py +33 -0
  184. tests/_internal/core/backends/vultr/__init__.py +0 -0
  185. tests/_internal/core/backends/vultr/test_configurator.py +33 -0
  186. tests/_internal/server/background/tasks/test_process_gateways.py +4 -0
  187. tests/_internal/server/background/tasks/test_process_instances.py +49 -48
  188. tests/_internal/server/background/tasks/test_process_metrics.py +0 -3
  189. tests/_internal/server/background/tasks/test_process_placement_groups.py +2 -0
  190. tests/_internal/server/background/tasks/test_process_prometheus_metrics.py +0 -3
  191. tests/_internal/server/background/tasks/test_process_running_jobs.py +0 -21
  192. tests/_internal/server/background/tasks/test_process_runs.py +8 -22
  193. tests/_internal/server/background/tasks/test_process_submitted_jobs.py +3 -40
  194. tests/_internal/server/background/tasks/test_process_submitted_volumes.py +2 -0
  195. tests/_internal/server/background/tasks/test_process_terminating_jobs.py +10 -15
  196. tests/_internal/server/routers/test_backends.py +6 -764
  197. tests/_internal/server/routers/test_fleets.py +0 -26
  198. tests/_internal/server/routers/test_gateways.py +27 -3
  199. tests/_internal/server/routers/test_instances.py +0 -10
  200. tests/_internal/server/routers/test_metrics.py +27 -0
  201. tests/_internal/server/routers/test_projects.py +56 -0
  202. tests/_internal/server/routers/test_prometheus.py +116 -27
  203. tests/_internal/server/routers/test_repos.py +0 -15
  204. tests/_internal/server/routers/test_runs.py +4 -219
  205. tests/_internal/server/routers/test_volumes.py +2 -3
  206. tests/_internal/server/services/backends/__init__.py +0 -0
  207. tests/_internal/server/services/jobs/configurators/test_task.py +35 -0
  208. tests/_internal/server/services/test_config.py +7 -4
  209. tests/_internal/server/services/test_fleets.py +1 -4
  210. tests/_internal/server/services/{test_pools.py → test_instances.py} +11 -49
  211. tests/_internal/server/services/test_metrics.py +9 -5
  212. tests/_internal/server/services/test_repos.py +1 -14
  213. tests/_internal/server/services/test_runs.py +0 -4
  214. dstack/_internal/cli/commands/pool.py +0 -581
  215. dstack/_internal/cli/commands/run.py +0 -75
  216. dstack/_internal/core/backends/aws/config.py +0 -18
  217. dstack/_internal/core/backends/azure/config.py +0 -12
  218. dstack/_internal/core/backends/base/config.py +0 -5
  219. dstack/_internal/core/backends/cudo/config.py +0 -9
  220. dstack/_internal/core/backends/datacrunch/config.py +0 -9
  221. dstack/_internal/core/backends/gcp/config.py +0 -22
  222. dstack/_internal/core/backends/kubernetes/config.py +0 -6
  223. dstack/_internal/core/backends/lambdalabs/config.py +0 -9
  224. dstack/_internal/core/backends/nebius/__init__.py +0 -15
  225. dstack/_internal/core/backends/nebius/api_client.py +0 -319
  226. dstack/_internal/core/backends/nebius/compute.py +0 -220
  227. dstack/_internal/core/backends/nebius/config.py +0 -6
  228. dstack/_internal/core/backends/nebius/types.py +0 -37
  229. dstack/_internal/core/backends/oci/config.py +0 -6
  230. dstack/_internal/core/backends/runpod/config.py +0 -17
  231. dstack/_internal/core/backends/tensordock/config.py +0 -9
  232. dstack/_internal/core/backends/vastai/config.py +0 -6
  233. dstack/_internal/core/backends/vultr/config.py +0 -9
  234. dstack/_internal/core/models/backends/aws.py +0 -86
  235. dstack/_internal/core/models/backends/azure.py +0 -68
  236. dstack/_internal/core/models/backends/cudo.py +0 -43
  237. dstack/_internal/core/models/backends/datacrunch.py +0 -44
  238. dstack/_internal/core/models/backends/gcp.py +0 -67
  239. dstack/_internal/core/models/backends/kubernetes.py +0 -40
  240. dstack/_internal/core/models/backends/lambdalabs.py +0 -43
  241. dstack/_internal/core/models/backends/nebius.py +0 -54
  242. dstack/_internal/core/models/backends/runpod.py +0 -42
  243. dstack/_internal/core/models/backends/tensordock.py +0 -44
  244. dstack/_internal/core/models/backends/vastai.py +0 -43
  245. dstack/_internal/core/models/backends/vultr.py +0 -40
  246. dstack/_internal/core/models/pools.py +0 -43
  247. dstack/_internal/server/routers/pools.py +0 -142
  248. dstack/_internal/server/schemas/pools.py +0 -38
  249. dstack/_internal/server/services/backends/configurators/base.py +0 -72
  250. dstack/_internal/server/services/backends/configurators/cudo.py +0 -87
  251. dstack/_internal/server/services/backends/configurators/datacrunch.py +0 -79
  252. dstack/_internal/server/services/backends/configurators/kubernetes.py +0 -63
  253. dstack/_internal/server/services/backends/configurators/lambdalabs.py +0 -98
  254. dstack/_internal/server/services/backends/configurators/nebius.py +0 -85
  255. dstack/_internal/server/services/backends/configurators/runpod.py +0 -67
  256. dstack/_internal/server/services/backends/configurators/tensordock.py +0 -82
  257. dstack/_internal/server/services/backends/configurators/vastai.py +0 -80
  258. dstack/_internal/server/services/backends/configurators/vultr.py +0 -80
  259. dstack/api/_public/pools.py +0 -41
  260. dstack/api/_public/resources.py +0 -105
  261. dstack/api/server/_pools.py +0 -63
  262. tests/_internal/server/routers/test_pools.py +0 -612
  263. /dstack/_internal/{server/services/backends/configurators → core/backends/dstack}/__init__.py +0 -0
  264. {dstack-0.18.44.dist-info → dstack-0.19.0rc1.dist-info}/LICENSE.md +0 -0
  265. {dstack-0.18.44.dist-info → dstack-0.19.0rc1.dist-info}/WHEEL +0 -0
  266. {dstack-0.18.44.dist-info → dstack-0.19.0rc1.dist-info}/entry_points.txt +0 -0
  267. {dstack-0.18.44.dist-info → dstack-0.19.0rc1.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  from enum import Enum
2
- from typing import List, Optional, Union, overload
2
+ from typing import Any, Dict, List, Optional, Union, overload
3
3
 
4
4
  from pydantic import Field, root_validator, validator
5
5
  from typing_extensions import Annotated, Literal
@@ -8,12 +8,9 @@ from dstack._internal.core.models.backends.base import BackendType
8
8
  from dstack._internal.core.models.common import CoreModel, Duration
9
9
 
10
10
  DEFAULT_RETRY_DURATION = 3600
11
- DEFAULT_POOL_NAME = "default-pool"
12
11
 
13
12
  DEFAULT_RUN_TERMINATION_IDLE_TIME = 5 * 60 # 5 minutes
14
- DEFAULT_POOL_TERMINATION_IDLE_TIME = 72 * 60 * 60 # 3 days
15
-
16
- DEFAULT_INSTANCE_RETRY_DURATION = 60 * 60 * 24 # 24h
13
+ DEFAULT_FLEET_TERMINATION_IDLE_TIME = 72 * 60 * 60 # 3 days
17
14
 
18
15
  DEFAULT_STOP_DURATION = 300
19
16
 
@@ -72,6 +69,8 @@ def parse_idle_duration(v: Optional[Union[int, str, bool]]) -> Optional[Union[st
72
69
  return parse_duration(v)
73
70
 
74
71
 
72
+ # Deprecated in favor of ProfileRetry().
73
+ # TODO: Remove when no longer referenced.
75
74
  class ProfileRetryPolicy(CoreModel):
76
75
  retry: Annotated[bool, Field(description="Whether to retry the run on failure or not")] = False
77
76
  duration: Annotated[
@@ -98,14 +97,15 @@ class RetryEvent(str, Enum):
98
97
 
99
98
  class ProfileRetry(CoreModel):
100
99
  on_events: Annotated[
101
- List[RetryEvent],
100
+ Optional[List[RetryEvent]],
102
101
  Field(
103
102
  description=(
104
103
  "The list of events that should be handled with retry."
105
- " Supported events are `no-capacity`, `interruption`, and `error`"
104
+ " Supported events are `no-capacity`, `interruption`, and `error`."
105
+ " Omit to retry on all events"
106
106
  )
107
107
  ),
108
- ]
108
+ ] = None
109
109
  duration: Annotated[
110
110
  Optional[Union[int, str]],
111
111
  Field(description="The maximum period of retrying the run, e.g., `4h` or `1d`"),
@@ -115,7 +115,8 @@ class ProfileRetry(CoreModel):
115
115
 
116
116
  @root_validator
117
117
  def _validate_fields(cls, values):
118
- if "on_events" in values and len(values["on_events"]) == 0:
118
+ on_events = values.get("on_events", None)
119
+ if on_events is not None and len(values["on_events"]) == 0:
119
120
  raise ValueError("`on_events` cannot be empty")
120
121
  return values
121
122
 
@@ -157,13 +158,13 @@ class ProfileParams(CoreModel):
157
158
  backends: Annotated[
158
159
  Optional[List[BackendType]],
159
160
  Field(description="The backends to consider for provisioning (e.g., `[aws, gcp]`)"),
160
- ]
161
+ ] = None
161
162
  regions: Annotated[
162
163
  Optional[List[str]],
163
164
  Field(
164
165
  description="The regions to consider for provisioning (e.g., `[eu-west-1, us-west4, westeurope]`)"
165
166
  ),
166
- ]
167
+ ] = None
167
168
  availability_zones: Annotated[
168
169
  Optional[List[str]],
169
170
  Field(
@@ -175,7 +176,7 @@ class ProfileParams(CoreModel):
175
176
  Field(
176
177
  description="The cloud-specific instance types to consider for provisioning (e.g., `[p3.8xlarge, n1-standard-4]`)"
177
178
  ),
178
- ]
179
+ ] = None
179
180
  reservation: Annotated[
180
181
  Optional[str],
181
182
  Field(
@@ -184,17 +185,17 @@ class ProfileParams(CoreModel):
184
185
  " Supports AWS Capacity Reservations and Capacity Blocks"
185
186
  )
186
187
  ),
187
- ]
188
+ ] = None
188
189
  spot_policy: Annotated[
189
190
  Optional[SpotPolicy],
190
191
  Field(
191
192
  description="The policy for provisioning spot or on-demand instances: `spot`, `on-demand`, or `auto`. Defaults to `on-demand`"
192
193
  ),
193
- ]
194
+ ] = None
194
195
  retry: Annotated[
195
196
  Optional[Union[ProfileRetry, bool]],
196
197
  Field(description="The policy for resubmitting the run. Defaults to `false`"),
197
- ]
198
+ ] = None
198
199
  max_duration: Annotated[
199
200
  Optional[Union[Literal["off"], str, int, bool]],
200
201
  Field(
@@ -204,7 +205,7 @@ class ProfileParams(CoreModel):
204
205
  " Use `off` for unlimited duration. Defaults to `off`"
205
206
  )
206
207
  ),
207
- ]
208
+ ] = None
208
209
  stop_duration: Annotated[
209
210
  Optional[Union[Literal["off"], str, int, bool]],
210
211
  Field(
@@ -215,17 +216,17 @@ class ProfileParams(CoreModel):
215
216
  " Use `off` for unlimited duration. Defaults to `5m`"
216
217
  )
217
218
  ),
218
- ]
219
+ ] = None
219
220
  max_price: Annotated[
220
221
  Optional[float],
221
222
  Field(description="The maximum instance price per hour, in dollars", gt=0.0),
222
- ]
223
+ ] = None
223
224
  creation_policy: Annotated[
224
225
  Optional[CreationPolicy],
225
226
  Field(
226
- description="The policy for using instances from the pool. Defaults to `reuse-or-create`"
227
+ description="The policy for using instances from fleets. Defaults to `reuse-or-create`"
227
228
  ),
228
- ]
229
+ ] = None
229
230
  idle_duration: Annotated[
230
231
  Optional[Union[Literal["off"], str, int, bool]],
231
232
  Field(
@@ -234,30 +235,27 @@ class ProfileParams(CoreModel):
234
235
  " Defaults to `5m` for runs and `3d` for fleets. Use `off` for unlimited duration"
235
236
  )
236
237
  ),
237
- ]
238
+ ] = None
238
239
  utilization_policy: Annotated[
239
240
  Optional[UtilizationPolicy],
240
241
  Field(description="Run termination policy based on utilization"),
241
- ]
242
- # Deprecated:
243
- termination_policy: Annotated[
244
- Optional[TerminationPolicy],
245
- Field(
246
- description="Deprecated in favor of `idle_duration`",
247
- ),
248
- ]
249
- termination_idle_time: Annotated[
250
- Optional[Union[str, int]],
251
- Field(
252
- description="Deprecated in favor of `idle_duration`",
253
- ),
254
- ]
255
- # The name of the pool. If not set, dstack will use the default name
256
- pool_name: Optional[str]
257
- # The name of the instance
258
- instance_name: Optional[str]
259
- # The policy for resubmitting the run. Deprecated in favor of `retry`
260
- retry_policy: Optional[ProfileRetryPolicy]
242
+ ] = None
243
+
244
+ # Deprecated and unused. Left for compatibility with 0.18 clients.
245
+ pool_name: Annotated[Optional[str], Field(exclude=True)] = None
246
+ instance_name: Annotated[Optional[str], Field(exclude=True)] = None
247
+ retry_policy: Annotated[Optional[ProfileRetryPolicy], Field(exclude=True)] = None
248
+ termination_policy: Annotated[Optional[TerminationPolicy], Field(exclude=True)] = None
249
+ termination_idle_time: Annotated[Optional[Union[str, int]], Field(exclude=True)] = None
250
+
251
+ class Config:
252
+ @staticmethod
253
+ def schema_extra(schema: Dict[str, Any]) -> None:
254
+ del schema["properties"]["pool_name"]
255
+ del schema["properties"]["instance_name"]
256
+ del schema["properties"]["retry_policy"]
257
+ del schema["properties"]["termination_policy"]
258
+ del schema["properties"]["termination_idle_time"]
261
259
 
262
260
  _validate_max_duration = validator("max_duration", pre=True, allow_reuse=True)(
263
261
  parse_max_duration
@@ -265,9 +263,6 @@ class ProfileParams(CoreModel):
265
263
  _validate_stop_duration = validator("stop_duration", pre=True, allow_reuse=True)(
266
264
  parse_stop_duration
267
265
  )
268
- _validate_termination_idle_time = validator(
269
- "termination_idle_time", pre=True, allow_reuse=True
270
- )(parse_duration)
271
266
  _validate_idle_duration = validator("idle_duration", pre=True, allow_reuse=True)(
272
267
  parse_idle_duration
273
268
  )
@@ -277,11 +272,11 @@ class ProfileProps(CoreModel):
277
272
  name: Annotated[
278
273
  str,
279
274
  Field(
280
- description="The name of the profile that can be passed as `--profile` to `dstack run`"
275
+ description="The name of the profile that can be passed as `--profile` to `dstack apply`"
281
276
  ),
282
277
  ]
283
278
  default: Annotated[
284
- bool, Field(description="If set to true, `dstack run` will use this profile by default.")
279
+ bool, Field(description="If set to true, `dstack apply` will use this profile by default.")
285
280
  ] = False
286
281
 
287
282
 
@@ -3,7 +3,7 @@ from typing import List, Optional
3
3
 
4
4
  from pydantic import UUID4
5
5
 
6
- from dstack._internal.core.models.backends import BackendInfo
6
+ from dstack._internal.core.backends.models import BackendInfo
7
7
  from dstack._internal.core.models.common import CoreModel
8
8
  from dstack._internal.core.models.users import ProjectRole, User
9
9
 
@@ -13,11 +13,6 @@ class RepoType(str, Enum):
13
13
  VIRTUAL = "virtual"
14
14
 
15
15
 
16
- class RepoProtocol(str, Enum):
17
- SSH = "ssh"
18
- HTTPS = "https"
19
-
20
-
21
16
  class BaseRepoInfo(CoreModel):
22
17
  repo_type: str
23
18
 
@@ -26,7 +26,7 @@ class LocalRepo(Repo):
26
26
  Example:
27
27
 
28
28
  ```python
29
- run = client.runs.submit(
29
+ run = client.runs.apply_configuration(
30
30
  configuration=...,
31
31
  repo=LocalRepo.from_dir("."), # Mount the current folder to the run
32
32
  )
@@ -41,10 +41,10 @@ class LocalRepo(Repo):
41
41
  Creates an instance of a local repo from a local path.
42
42
 
43
43
  Args:
44
- repo_dir: The path to a local folder
44
+ repo_dir: The path to a local folder.
45
45
 
46
46
  Returns:
47
- A local repo instance
47
+ A local repo instance.
48
48
  """
49
49
  return LocalRepo(repo_dir=repo_dir)
50
50
 
@@ -3,7 +3,7 @@ import re
3
3
  import subprocess
4
4
  import time
5
5
  from dataclasses import dataclass
6
- from typing import BinaryIO, Callable, Dict, Optional
6
+ from typing import Annotated, Any, BinaryIO, Callable, Dict, Optional
7
7
 
8
8
  import git
9
9
  import pydantic
@@ -12,7 +12,7 @@ from typing_extensions import Literal
12
12
 
13
13
  from dstack._internal.core.errors import DstackError
14
14
  from dstack._internal.core.models.common import CoreModel
15
- from dstack._internal.core.models.repos.base import BaseRepoInfo, Repo, RepoProtocol
15
+ from dstack._internal.core.models.repos.base import BaseRepoInfo, Repo
16
16
  from dstack._internal.utils.hash import get_sha256, slugify
17
17
  from dstack._internal.utils.path import PathLike
18
18
  from dstack._internal.utils.ssh import get_host_config
@@ -25,20 +25,34 @@ class RepoError(DstackError):
25
25
 
26
26
 
27
27
  class RemoteRepoCreds(CoreModel):
28
- protocol: RepoProtocol # TODO: remove in 0.19
29
28
  clone_url: str
30
29
  private_key: Optional[str]
31
30
  oauth_token: Optional[str]
32
31
 
32
+ # TODO: remove in 0.20. Left for compatibility with CLI <=0.18.44
33
+ protocol: Annotated[Optional[str], Field(exclude=True)] = None
34
+
35
+ class Config:
36
+ @staticmethod
37
+ def schema_extra(schema: Dict[str, Any]) -> None:
38
+ del schema["properties"]["protocol"]
39
+
33
40
 
34
41
  class RemoteRepoInfo(BaseRepoInfo):
35
42
  repo_type: Literal["remote"] = "remote"
36
43
  repo_name: str
37
44
 
38
- # TODO: remove in 0.19
39
- repo_host_name: str = ""
40
- repo_port: Optional[int] = None
41
- repo_user_name: str = ""
45
+ # TODO: remove in 0.20. Left for compatibility with CLI <=0.18.44
46
+ repo_host_name: Annotated[Optional[str], Field(exclude=True)] = None
47
+ repo_port: Annotated[Optional[int], Field(exclude=True)] = None
48
+ repo_user_name: Annotated[Optional[str], Field(exclude=True)] = None
49
+
50
+ class Config:
51
+ @staticmethod
52
+ def schema_extra(schema: Dict[str, Any]) -> None:
53
+ del schema["properties"]["repo_host_name"]
54
+ del schema["properties"]["repo_port"]
55
+ del schema["properties"]["repo_user_name"]
42
56
 
43
57
 
44
58
  class RemoteRunRepoData(RemoteRepoInfo):
@@ -84,7 +98,7 @@ class RemoteRepo(Repo):
84
98
  Finally, you can pass the repo object to the run:
85
99
 
86
100
  ```python
87
- run = client.runs.submit(
101
+ run = client.runs.apply_configuration(
88
102
  configuration=...,
89
103
  repo=repo,
90
104
  )
@@ -100,10 +114,10 @@ class RemoteRepo(Repo):
100
114
  Creates an instance of a remote repo from a local path.
101
115
 
102
116
  Args:
103
- repo_dir: The path to a local folder
117
+ repo_dir: The path to a local folder.
104
118
 
105
119
  Returns:
106
- A remote repo instance
120
+ A remote repo instance.
107
121
  """
108
122
  return RemoteRepo(local_repo_dir=repo_dir)
109
123
 
@@ -115,12 +129,12 @@ class RemoteRepo(Repo):
115
129
  Creates an instance of a remote repo from a URL.
116
130
 
117
131
  Args:
118
- repo_url: The URL of a remote Git repo
132
+ repo_url: The URL of a remote Git repo.
119
133
  repo_branch: The name of the remote branch. Must be specified if `hash` is not specified.
120
134
  repo_hash: The hash of the revision. Must be specified if `branch` is not specified.
121
135
 
122
136
  Returns:
123
- A remote repo instance
137
+ A remote repo instance.
124
138
  """
125
139
  if repo_branch is None and repo_hash is None:
126
140
  raise ValueError("Either `repo_branch` or `repo_hash` must be specified.")
@@ -30,7 +30,7 @@ class VirtualRepo(Repo):
30
30
  virtual_repo.add_file_from_package(package=some_package, path="requirements.txt")
31
31
  virtual_repo.add_file_from_package(package=some_package, path="train.py")
32
32
 
33
- run = client.runs.submit(
33
+ run = client.runs.apply_configuration(
34
34
  configuration=...,
35
35
  repo=virtual_repo,
36
36
  )
@@ -8,6 +8,7 @@ from typing_extensions import Annotated
8
8
 
9
9
  from dstack._internal.core.models.common import CoreModel
10
10
  from dstack._internal.utils.common import pretty_resources
11
+ from dstack._internal.utils.json_schema import add_extra_schema_types
11
12
  from dstack._internal.utils.logging import get_logger
12
13
 
13
14
  logger = get_logger(__name__)
@@ -128,6 +129,22 @@ DEFAULT_GPU_COUNT = Range[int](min=1, max=1)
128
129
 
129
130
 
130
131
  class GPUSpec(CoreModel):
132
+ class Config:
133
+ @staticmethod
134
+ def schema_extra(schema: Dict[str, Any]):
135
+ add_extra_schema_types(
136
+ schema["properties"]["count"],
137
+ extra_types=[{"type": "integer"}, {"type": "string"}],
138
+ )
139
+ add_extra_schema_types(
140
+ schema["properties"]["memory"],
141
+ extra_types=[{"type": "integer"}, {"type": "string"}],
142
+ )
143
+ add_extra_schema_types(
144
+ schema["properties"]["total_memory"],
145
+ extra_types=[{"type": "integer"}, {"type": "string"}],
146
+ )
147
+
131
148
  vendor: Annotated[
132
149
  Optional[gpuhunt.AcceleratorVendor],
133
150
  Field(
@@ -233,6 +250,14 @@ class GPUSpec(CoreModel):
233
250
 
234
251
 
235
252
  class DiskSpec(CoreModel):
253
+ class Config:
254
+ @staticmethod
255
+ def schema_extra(schema: Dict[str, Any]):
256
+ add_extra_schema_types(
257
+ schema["properties"]["size"],
258
+ extra_types=[{"type": "integer"}, {"type": "string"}],
259
+ )
260
+
236
261
  size: Annotated[Range[Memory], Field(description="Disk size")]
237
262
 
238
263
  @classmethod
@@ -254,11 +279,26 @@ class ResourcesSpec(CoreModel):
254
279
  class Config:
255
280
  @staticmethod
256
281
  def schema_extra(schema: Dict[str, Any]):
257
- schema.clear()
258
- # replace strict schema with a more permissive one
259
- ref_template = "#/definitions/ResourcesSpecRequest/definitions/{model}"
260
- for field, value in ResourcesSpecSchema.schema(ref_template=ref_template).items():
261
- schema[field] = value
282
+ add_extra_schema_types(
283
+ schema["properties"]["cpu"],
284
+ extra_types=[{"type": "integer"}, {"type": "string"}],
285
+ )
286
+ add_extra_schema_types(
287
+ schema["properties"]["memory"],
288
+ extra_types=[{"type": "integer"}, {"type": "string"}],
289
+ )
290
+ add_extra_schema_types(
291
+ schema["properties"]["shm_size"],
292
+ extra_types=[{"type": "integer"}, {"type": "string"}],
293
+ )
294
+ add_extra_schema_types(
295
+ schema["properties"]["gpu"],
296
+ extra_types=[{"type": "integer"}, {"type": "string"}],
297
+ )
298
+ add_extra_schema_types(
299
+ schema["properties"]["disk"],
300
+ extra_types=[{"type": "integer"}, {"type": "string"}],
301
+ )
262
302
 
263
303
  cpu: Annotated[Range[int], Field(description="The number of CPU cores")] = DEFAULT_CPU_COUNT
264
304
  memory: Annotated[Range[Memory], Field(description="The RAM size (e.g., `8GB`)")] = (
@@ -290,74 +330,3 @@ class ResourcesSpec(CoreModel):
290
330
  resources.update(disk_size=self.disk.size)
291
331
  res = pretty_resources(**resources)
292
332
  return res
293
-
294
-
295
- IntRangeLike = Union[Range[Union[int, str]], int, str]
296
- MemoryRangeLike = Union[Range[Union[Memory, float, int, str]], float, int, str]
297
- MemoryLike = Union[Memory, float, int, str]
298
- GPULike = Union[GPUSpec, "GPUSpecSchema", int, str]
299
- DiskLike = Union[DiskSpec, "DiskSpecSchema", float, int, str]
300
- ComputeCapabilityLike = Union[ComputeCapability, float, str]
301
-
302
-
303
- class GPUSpecSchema(CoreModel):
304
- vendor: Annotated[
305
- Optional[gpuhunt.AcceleratorVendor],
306
- Field(
307
- description="The vendor of the GPU/accelerator, one of: `nvidia`, `amd`, `google` (alias: `tpu`), `intel`"
308
- ),
309
- ] = None
310
- name: Annotated[
311
- Optional[Union[List[str], str]], Field(description="The GPU name or list of names")
312
- ] = None
313
- count: Annotated[IntRangeLike, Field(description="The number of GPUs")] = DEFAULT_GPU_COUNT
314
- memory: Annotated[
315
- Optional[MemoryRangeLike],
316
- Field(
317
- description="The RAM size (e.g., `16GB`). Can be set to a range (e.g. `16GB..`, or `16GB..80GB`)"
318
- ),
319
- ] = None
320
- total_memory: Annotated[
321
- Optional[MemoryRangeLike],
322
- Field(
323
- description="The total RAM size (e.g., `32GB`). Can be set to a range (e.g. `16GB..`, or `16GB..80GB`)"
324
- ),
325
- ] = None
326
- compute_capability: Annotated[
327
- Optional[ComputeCapabilityLike],
328
- Field(description="The minimum compute capability of the GPU (e.g., `7.5`)"),
329
- ] = None
330
-
331
-
332
- class DiskSpecSchema(CoreModel):
333
- size: Annotated[
334
- MemoryRangeLike,
335
- Field(
336
- description="The disk size. Can be set to a range (e.g., `100GB..` or `100GB..200GB`)"
337
- ),
338
- ]
339
-
340
-
341
- class ResourcesSpecSchema(CoreModel):
342
- cpu: Annotated[Optional[IntRangeLike], Field(description="The number of CPU cores")] = (
343
- DEFAULT_CPU_COUNT
344
- )
345
- memory: Annotated[
346
- Optional[MemoryRangeLike],
347
- Field(description="The RAM size (e.g., `8GB`)"),
348
- ] = DEFAULT_MEMORY_SIZE
349
- shm_size: Annotated[
350
- Optional[MemoryLike],
351
- Field(
352
- description="The size of shared memory (e.g., `8GB`). "
353
- "If you are using parallel communicating processes (e.g., dataloaders in PyTorch), "
354
- "you may need to configure this"
355
- ),
356
- ] = None
357
- gpu: Annotated[
358
- Optional[GPULike],
359
- Field(
360
- description="The GPU requirements. Can be set to a number, a string (e.g. `A100`, `80GB:2`, etc.), or an object"
361
- ),
362
- ] = None
363
- disk: Annotated[Optional[DiskLike], Field(description="The disk resources")] = DEFAULT_DISK
@@ -20,7 +20,6 @@ from dstack._internal.core.models.profiles import (
20
20
  CreationPolicy,
21
21
  Profile,
22
22
  ProfileParams,
23
- ProfileRetryPolicy,
24
23
  RetryEvent,
25
24
  SpotPolicy,
26
25
  UtilizationPolicy,
@@ -178,6 +177,11 @@ class Gateway(CoreModel):
178
177
  options: dict = {}
179
178
 
180
179
 
180
+ class JobSSHKey(CoreModel):
181
+ private: str
182
+ public: str
183
+
184
+
181
185
  class JobSpec(CoreModel):
182
186
  replica_num: int = 0 # default value for backward compatibility
183
187
  job_num: int
@@ -198,9 +202,7 @@ class JobSpec(CoreModel):
198
202
  requirements: Requirements
199
203
  retry: Optional[Retry]
200
204
  volumes: Optional[List[MountPoint]] = None
201
- # For backward compatibility with 0.18.x when retry_policy was required.
202
- # TODO: remove in 0.19
203
- retry_policy: ProfileRetryPolicy = ProfileRetryPolicy(retry=False)
205
+ ssh_key: Optional[JobSSHKey] = None
204
206
  working_dir: Optional[str]
205
207
 
206
208
 
@@ -306,7 +308,7 @@ class RunSpec(CoreModel):
306
308
  run_name: Annotated[
307
309
  Optional[str],
308
310
  Field(description="The run name. If not set, the run name is generated automatically."),
309
- ]
311
+ ] = None
310
312
  repo_id: Annotated[
311
313
  Optional[str],
312
314
  Field(
@@ -316,15 +318,18 @@ class RunSpec(CoreModel):
316
318
  " If not specified, a default virtual repo is used."
317
319
  )
318
320
  ),
319
- ]
321
+ ] = None
320
322
  repo_data: Annotated[
321
323
  Optional[AnyRunRepoData],
322
324
  Field(
323
325
  discriminator="repo_type",
324
326
  description="The repo data such as the current branch and commit.",
325
327
  ),
326
- ]
327
- repo_code_hash: Annotated[Optional[str], Field(description="The hash of the repo diff")]
328
+ ] = None
329
+ repo_code_hash: Annotated[
330
+ Optional[str],
331
+ Field(description="The hash of the repo diff. Can be omitted if there is no repo diff."),
332
+ ] = None
328
333
  working_dir: Annotated[
329
334
  Optional[str],
330
335
  Field(
@@ -334,7 +339,7 @@ class RunSpec(CoreModel):
334
339
  ' Defaults to `"."`.'
335
340
  )
336
341
  ),
337
- ]
342
+ ] = None
338
343
  configuration_path: Annotated[
339
344
  Optional[str],
340
345
  Field(
@@ -343,9 +348,9 @@ class RunSpec(CoreModel):
343
348
  " It can be omitted when using the programmatic API."
344
349
  )
345
350
  ),
346
- ]
351
+ ] = None
347
352
  configuration: Annotated[AnyRunConfiguration, Field(discriminator="type")]
348
- profile: Annotated[Optional[Profile], Field(description="The profile parameters")]
353
+ profile: Annotated[Optional[Profile], Field(description="The profile parameters")] = None
349
354
  ssh_key_pub: Annotated[
350
355
  str,
351
356
  Field(
@@ -452,9 +457,7 @@ class RunPlan(CoreModel):
452
457
  run_spec: RunSpec
453
458
  job_plans: List[JobPlan]
454
459
  current_resource: Optional[Run] = None
455
- # Optional for backward-compatibility with 0.18.x servers
456
- # TODO: make required in 0.19
457
- action: Optional[ApplyAction] = None
460
+ action: ApplyAction
458
461
 
459
462
 
460
463
  class ApplyRunPlanInput(CoreModel):
@@ -470,11 +473,6 @@ class ApplyRunPlanInput(CoreModel):
470
473
  ] = None
471
474
 
472
475
 
473
- class PoolInstanceOffers(CoreModel):
474
- pool_name: str
475
- instances: List[InstanceOfferWithAvailability]
476
-
477
-
478
476
  def get_policy_map(spot_policy: Optional[SpotPolicy], default: SpotPolicy) -> Optional[bool]:
479
477
  """
480
478
  Map profile.spot_policy[SpotPolicy|None] to requirements.spot[bool|None]
@@ -86,9 +86,7 @@ class VolumeAttachment(CoreModel):
86
86
  class Volume(CoreModel):
87
87
  id: uuid.UUID
88
88
  name: str
89
- # Default user to "" for client backward compatibility (old 0.18 servers).
90
- # TODO: Remove in 0.19
91
- user: str = ""
89
+ user: str
92
90
  project_name: str
93
91
  configuration: VolumeConfiguration
94
92
  external: bool
@@ -12,18 +12,7 @@ from dstack._internal.core.models.runs import Retry
12
12
  def get_retry(profile: Profile) -> Optional[Retry]:
13
13
  profile_retry = profile.retry
14
14
  if profile_retry is None:
15
- # Handle retry_policy before retry was introduced
16
- # TODO: Remove once retry_policy no longer supported
17
- profile_retry_policy = profile.retry_policy
18
- if profile_retry_policy is None:
19
- return None
20
- if not profile_retry_policy.retry:
21
- return None
22
- duration = profile_retry_policy.duration or DEFAULT_RETRY_DURATION
23
- return Retry(
24
- on_events=[RetryEvent.NO_CAPACITY, RetryEvent.INTERRUPTION, RetryEvent.ERROR],
25
- duration=duration,
26
- )
15
+ return None
27
16
  if isinstance(profile_retry, bool):
28
17
  if profile_retry:
29
18
  return Retry(
@@ -32,6 +21,12 @@ def get_retry(profile: Profile) -> Optional[Retry]:
32
21
  )
33
22
  return None
34
23
  profile_retry = profile_retry.copy()
24
+ if profile_retry.on_events is None:
25
+ profile_retry.on_events = [
26
+ RetryEvent.NO_CAPACITY,
27
+ RetryEvent.INTERRUPTION,
28
+ RetryEvent.ERROR,
29
+ ]
35
30
  if profile_retry.duration is None:
36
31
  profile_retry.duration = DEFAULT_RETRY_DURATION
37
32
  return Retry.parse_obj(profile_retry)
@@ -42,10 +37,6 @@ def get_termination(
42
37
  ) -> Tuple[TerminationPolicy, int]:
43
38
  termination_policy = TerminationPolicy.DESTROY_AFTER_IDLE
44
39
  termination_idle_time = default_termination_idle_time
45
- if profile.termination_policy is not None:
46
- termination_policy = profile.termination_policy
47
- if profile.termination_idle_time is not None:
48
- termination_idle_time = profile.termination_idle_time
49
40
  if profile.idle_duration is not None and int(profile.idle_duration) < 0:
50
41
  termination_policy = TerminationPolicy.DONT_DESTROY
51
42
  elif profile.idle_duration is not None: