dstack 0.19.25rc1__py3-none-any.whl → 0.19.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dstack might be problematic. Click here for more details.
- dstack/_internal/cli/commands/__init__.py +2 -2
- dstack/_internal/cli/commands/apply.py +3 -61
- dstack/_internal/cli/commands/attach.py +1 -1
- dstack/_internal/cli/commands/completion.py +1 -1
- dstack/_internal/cli/commands/delete.py +2 -2
- dstack/_internal/cli/commands/fleet.py +1 -1
- dstack/_internal/cli/commands/gateway.py +2 -2
- dstack/_internal/cli/commands/init.py +56 -24
- dstack/_internal/cli/commands/logs.py +1 -1
- dstack/_internal/cli/commands/metrics.py +1 -1
- dstack/_internal/cli/commands/offer.py +45 -7
- dstack/_internal/cli/commands/project.py +2 -2
- dstack/_internal/cli/commands/secrets.py +2 -2
- dstack/_internal/cli/commands/server.py +1 -1
- dstack/_internal/cli/commands/stop.py +1 -1
- dstack/_internal/cli/commands/volume.py +1 -1
- dstack/_internal/cli/main.py +2 -2
- dstack/_internal/cli/services/completion.py +2 -2
- dstack/_internal/cli/services/configurators/__init__.py +6 -2
- dstack/_internal/cli/services/configurators/base.py +6 -7
- dstack/_internal/cli/services/configurators/fleet.py +1 -3
- dstack/_internal/cli/services/configurators/gateway.py +2 -4
- dstack/_internal/cli/services/configurators/run.py +293 -58
- dstack/_internal/cli/services/configurators/volume.py +2 -4
- dstack/_internal/cli/services/profile.py +1 -1
- dstack/_internal/cli/services/repos.py +35 -48
- dstack/_internal/core/backends/amddevcloud/__init__.py +1 -0
- dstack/_internal/core/backends/amddevcloud/backend.py +16 -0
- dstack/_internal/core/backends/amddevcloud/compute.py +5 -0
- dstack/_internal/core/backends/amddevcloud/configurator.py +29 -0
- dstack/_internal/core/backends/aws/compute.py +6 -1
- dstack/_internal/core/backends/aws/configurator.py +11 -7
- dstack/_internal/core/backends/azure/configurator.py +11 -7
- dstack/_internal/core/backends/base/compute.py +33 -5
- dstack/_internal/core/backends/base/configurator.py +25 -13
- dstack/_internal/core/backends/base/offers.py +2 -0
- dstack/_internal/core/backends/cloudrift/configurator.py +13 -7
- dstack/_internal/core/backends/configurators.py +15 -0
- dstack/_internal/core/backends/cudo/configurator.py +11 -7
- dstack/_internal/core/backends/datacrunch/compute.py +5 -1
- dstack/_internal/core/backends/datacrunch/configurator.py +13 -7
- dstack/_internal/core/backends/digitalocean/__init__.py +1 -0
- dstack/_internal/core/backends/digitalocean/backend.py +16 -0
- dstack/_internal/core/backends/digitalocean/compute.py +5 -0
- dstack/_internal/core/backends/digitalocean/configurator.py +31 -0
- dstack/_internal/core/backends/digitalocean_base/__init__.py +1 -0
- dstack/_internal/core/backends/digitalocean_base/api_client.py +104 -0
- dstack/_internal/core/backends/digitalocean_base/backend.py +5 -0
- dstack/_internal/core/backends/digitalocean_base/compute.py +173 -0
- dstack/_internal/core/backends/digitalocean_base/configurator.py +57 -0
- dstack/_internal/core/backends/digitalocean_base/models.py +43 -0
- dstack/_internal/core/backends/gcp/compute.py +32 -8
- dstack/_internal/core/backends/gcp/configurator.py +11 -7
- dstack/_internal/core/backends/hotaisle/api_client.py +25 -33
- dstack/_internal/core/backends/hotaisle/compute.py +1 -6
- dstack/_internal/core/backends/hotaisle/configurator.py +13 -7
- dstack/_internal/core/backends/kubernetes/configurator.py +13 -7
- dstack/_internal/core/backends/lambdalabs/configurator.py +11 -7
- dstack/_internal/core/backends/models.py +7 -0
- dstack/_internal/core/backends/nebius/compute.py +1 -8
- dstack/_internal/core/backends/nebius/configurator.py +11 -7
- dstack/_internal/core/backends/nebius/resources.py +21 -11
- dstack/_internal/core/backends/oci/compute.py +4 -5
- dstack/_internal/core/backends/oci/configurator.py +11 -7
- dstack/_internal/core/backends/runpod/configurator.py +11 -7
- dstack/_internal/core/backends/template/configurator.py.jinja +11 -7
- dstack/_internal/core/backends/tensordock/configurator.py +13 -7
- dstack/_internal/core/backends/vastai/configurator.py +11 -7
- dstack/_internal/core/backends/vultr/compute.py +1 -5
- dstack/_internal/core/backends/vultr/configurator.py +11 -4
- dstack/_internal/core/compatibility/fleets.py +5 -0
- dstack/_internal/core/compatibility/gpus.py +13 -0
- dstack/_internal/core/compatibility/runs.py +9 -1
- dstack/_internal/core/models/backends/base.py +5 -1
- dstack/_internal/core/models/common.py +3 -3
- dstack/_internal/core/models/configurations.py +191 -32
- dstack/_internal/core/models/files.py +1 -1
- dstack/_internal/core/models/fleets.py +80 -3
- dstack/_internal/core/models/profiles.py +41 -11
- dstack/_internal/core/models/resources.py +46 -42
- dstack/_internal/core/models/runs.py +28 -5
- dstack/_internal/core/services/configs/__init__.py +6 -3
- dstack/_internal/core/services/profiles.py +2 -2
- dstack/_internal/core/services/repos.py +86 -79
- dstack/_internal/core/services/ssh/ports.py +1 -1
- dstack/_internal/proxy/lib/deps.py +6 -2
- dstack/_internal/server/app.py +22 -17
- dstack/_internal/server/background/tasks/process_fleets.py +109 -13
- dstack/_internal/server/background/tasks/process_gateways.py +4 -1
- dstack/_internal/server/background/tasks/process_instances.py +22 -73
- dstack/_internal/server/background/tasks/process_probes.py +1 -1
- dstack/_internal/server/background/tasks/process_running_jobs.py +12 -4
- dstack/_internal/server/background/tasks/process_runs.py +3 -1
- dstack/_internal/server/background/tasks/process_submitted_jobs.py +67 -44
- dstack/_internal/server/background/tasks/process_terminating_jobs.py +2 -2
- dstack/_internal/server/background/tasks/process_volumes.py +1 -1
- dstack/_internal/server/db.py +8 -4
- dstack/_internal/server/migrations/versions/2498ab323443_add_fleetmodel_consolidation_attempt_.py +44 -0
- dstack/_internal/server/models.py +6 -2
- dstack/_internal/server/routers/gpus.py +1 -6
- dstack/_internal/server/schemas/runner.py +11 -0
- dstack/_internal/server/services/backends/__init__.py +14 -8
- dstack/_internal/server/services/backends/handlers.py +6 -1
- dstack/_internal/server/services/docker.py +5 -5
- dstack/_internal/server/services/fleets.py +37 -38
- dstack/_internal/server/services/gateways/__init__.py +2 -0
- dstack/_internal/server/services/gateways/client.py +5 -2
- dstack/_internal/server/services/gateways/connection.py +1 -1
- dstack/_internal/server/services/gpus.py +50 -49
- dstack/_internal/server/services/instances.py +44 -4
- dstack/_internal/server/services/jobs/__init__.py +15 -4
- dstack/_internal/server/services/jobs/configurators/base.py +53 -17
- dstack/_internal/server/services/jobs/configurators/dev.py +9 -4
- dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +6 -8
- dstack/_internal/server/services/jobs/configurators/extensions/vscode.py +7 -9
- dstack/_internal/server/services/jobs/configurators/service.py +1 -3
- dstack/_internal/server/services/jobs/configurators/task.py +3 -3
- dstack/_internal/server/services/locking.py +5 -5
- dstack/_internal/server/services/logging.py +10 -2
- dstack/_internal/server/services/logs/__init__.py +8 -6
- dstack/_internal/server/services/logs/aws.py +330 -327
- dstack/_internal/server/services/logs/filelog.py +7 -6
- dstack/_internal/server/services/logs/gcp.py +141 -139
- dstack/_internal/server/services/plugins.py +1 -1
- dstack/_internal/server/services/projects.py +2 -5
- dstack/_internal/server/services/proxy/repo.py +5 -1
- dstack/_internal/server/services/requirements/__init__.py +0 -0
- dstack/_internal/server/services/requirements/combine.py +259 -0
- dstack/_internal/server/services/runner/client.py +7 -0
- dstack/_internal/server/services/runs.py +17 -1
- dstack/_internal/server/services/services/__init__.py +8 -2
- dstack/_internal/server/services/services/autoscalers.py +2 -0
- dstack/_internal/server/services/ssh.py +2 -1
- dstack/_internal/server/services/storage/__init__.py +5 -6
- dstack/_internal/server/services/storage/gcs.py +49 -49
- dstack/_internal/server/services/storage/s3.py +52 -52
- dstack/_internal/server/statics/index.html +1 -1
- dstack/_internal/server/statics/{main-d151b300fcac3933213d.js → main-4eecc75fbe64067eb1bc.js} +1146 -899
- dstack/_internal/server/statics/{main-d151b300fcac3933213d.js.map → main-4eecc75fbe64067eb1bc.js.map} +1 -1
- dstack/_internal/server/statics/{main-aec4762350e34d6fbff9.css → main-56191c63d516fd0041c4.css} +1 -1
- dstack/_internal/server/testing/common.py +7 -4
- dstack/_internal/server/utils/logging.py +3 -3
- dstack/_internal/server/utils/provisioning.py +3 -3
- dstack/_internal/utils/json_schema.py +3 -1
- dstack/_internal/utils/path.py +8 -1
- dstack/_internal/utils/ssh.py +7 -0
- dstack/_internal/utils/typing.py +14 -0
- dstack/api/_public/repos.py +62 -8
- dstack/api/_public/runs.py +19 -8
- dstack/api/server/__init__.py +17 -19
- dstack/api/server/_gpus.py +2 -1
- dstack/api/server/_group.py +4 -3
- dstack/api/server/_repos.py +20 -3
- dstack/plugins/builtin/rest_plugin/_plugin.py +1 -0
- dstack/version.py +1 -1
- {dstack-0.19.25rc1.dist-info → dstack-0.19.27.dist-info}/METADATA +2 -2
- {dstack-0.19.25rc1.dist-info → dstack-0.19.27.dist-info}/RECORD +160 -142
- dstack/api/huggingface/__init__.py +0 -73
- {dstack-0.19.25rc1.dist-info → dstack-0.19.27.dist-info}/WHEEL +0 -0
- {dstack-0.19.25rc1.dist-info → dstack-0.19.27.dist-info}/entry_points.txt +0 -0
- {dstack-0.19.25rc1.dist-info → dstack-0.19.27.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -130,6 +130,12 @@ DEFAULT_GPU_COUNT = Range[int](min=1)
|
|
|
130
130
|
|
|
131
131
|
|
|
132
132
|
class CPUSpec(CoreModel):
|
|
133
|
+
arch: Annotated[
|
|
134
|
+
Optional[gpuhunt.CPUArchitecture],
|
|
135
|
+
Field(description="The CPU architecture, one of: `x86`, `arm`"),
|
|
136
|
+
] = None
|
|
137
|
+
count: Annotated[Range[int], Field(description="The number of CPU cores")] = DEFAULT_CPU_COUNT
|
|
138
|
+
|
|
133
139
|
class Config(CoreModel.Config):
|
|
134
140
|
@staticmethod
|
|
135
141
|
def schema_extra(schema: Dict[str, Any]):
|
|
@@ -138,12 +144,6 @@ class CPUSpec(CoreModel):
|
|
|
138
144
|
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
139
145
|
)
|
|
140
146
|
|
|
141
|
-
arch: Annotated[
|
|
142
|
-
Optional[gpuhunt.CPUArchitecture],
|
|
143
|
-
Field(description="The CPU architecture, one of: `x86`, `arm`"),
|
|
144
|
-
] = None
|
|
145
|
-
count: Annotated[Range[int], Field(description="The number of CPU cores")] = DEFAULT_CPU_COUNT
|
|
146
|
-
|
|
147
147
|
@classmethod
|
|
148
148
|
def __get_validators__(cls):
|
|
149
149
|
yield cls.parse
|
|
@@ -191,22 +191,6 @@ class CPUSpec(CoreModel):
|
|
|
191
191
|
|
|
192
192
|
|
|
193
193
|
class GPUSpec(CoreModel):
|
|
194
|
-
class Config(CoreModel.Config):
|
|
195
|
-
@staticmethod
|
|
196
|
-
def schema_extra(schema: Dict[str, Any]):
|
|
197
|
-
add_extra_schema_types(
|
|
198
|
-
schema["properties"]["count"],
|
|
199
|
-
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
200
|
-
)
|
|
201
|
-
add_extra_schema_types(
|
|
202
|
-
schema["properties"]["memory"],
|
|
203
|
-
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
204
|
-
)
|
|
205
|
-
add_extra_schema_types(
|
|
206
|
-
schema["properties"]["total_memory"],
|
|
207
|
-
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
208
|
-
)
|
|
209
|
-
|
|
210
194
|
vendor: Annotated[
|
|
211
195
|
Optional[gpuhunt.AcceleratorVendor],
|
|
212
196
|
Field(
|
|
@@ -234,6 +218,26 @@ class GPUSpec(CoreModel):
|
|
|
234
218
|
Field(description="The minimum compute capability of the GPU (e.g., `7.5`)"),
|
|
235
219
|
] = None
|
|
236
220
|
|
|
221
|
+
class Config(CoreModel.Config):
|
|
222
|
+
@staticmethod
|
|
223
|
+
def schema_extra(schema: Dict[str, Any]):
|
|
224
|
+
add_extra_schema_types(
|
|
225
|
+
schema["properties"]["count"],
|
|
226
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
227
|
+
)
|
|
228
|
+
add_extra_schema_types(
|
|
229
|
+
schema["properties"]["name"],
|
|
230
|
+
extra_types=[{"type": "string"}],
|
|
231
|
+
)
|
|
232
|
+
add_extra_schema_types(
|
|
233
|
+
schema["properties"]["memory"],
|
|
234
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
235
|
+
)
|
|
236
|
+
add_extra_schema_types(
|
|
237
|
+
schema["properties"]["total_memory"],
|
|
238
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
239
|
+
)
|
|
240
|
+
|
|
237
241
|
@classmethod
|
|
238
242
|
def __get_validators__(cls):
|
|
239
243
|
yield cls.parse
|
|
@@ -314,6 +318,8 @@ class GPUSpec(CoreModel):
|
|
|
314
318
|
|
|
315
319
|
|
|
316
320
|
class DiskSpec(CoreModel):
|
|
321
|
+
size: Annotated[Range[Memory], Field(description="Disk size")]
|
|
322
|
+
|
|
317
323
|
class Config(CoreModel.Config):
|
|
318
324
|
@staticmethod
|
|
319
325
|
def schema_extra(schema: Dict[str, Any]):
|
|
@@ -322,8 +328,6 @@ class DiskSpec(CoreModel):
|
|
|
322
328
|
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
323
329
|
)
|
|
324
330
|
|
|
325
|
-
size: Annotated[Range[Memory], Field(description="Disk size")]
|
|
326
|
-
|
|
327
331
|
@classmethod
|
|
328
332
|
def __get_validators__(cls):
|
|
329
333
|
yield cls._parse
|
|
@@ -340,6 +344,24 @@ DEFAULT_DISK = DiskSpec(size=Range[Memory](min=Memory.parse("100GB"), max=None))
|
|
|
340
344
|
|
|
341
345
|
|
|
342
346
|
class ResourcesSpec(CoreModel):
|
|
347
|
+
# TODO: Remove Range[int] in 0.20. Range[int] for backward compatibility only.
|
|
348
|
+
cpu: Annotated[Union[CPUSpec, Range[int]], Field(description="The CPU requirements")] = (
|
|
349
|
+
CPUSpec()
|
|
350
|
+
)
|
|
351
|
+
memory: Annotated[Range[Memory], Field(description="The RAM size (e.g., `8GB`)")] = (
|
|
352
|
+
DEFAULT_MEMORY_SIZE
|
|
353
|
+
)
|
|
354
|
+
shm_size: Annotated[
|
|
355
|
+
Optional[Memory],
|
|
356
|
+
Field(
|
|
357
|
+
description="The size of shared memory (e.g., `8GB`). "
|
|
358
|
+
"If you are using parallel communicating processes (e.g., dataloaders in PyTorch), "
|
|
359
|
+
"you may need to configure this"
|
|
360
|
+
),
|
|
361
|
+
] = None
|
|
362
|
+
gpu: Annotated[Optional[GPUSpec], Field(description="The GPU requirements")] = None
|
|
363
|
+
disk: Annotated[Optional[DiskSpec], Field(description="The disk resources")] = DEFAULT_DISK
|
|
364
|
+
|
|
343
365
|
class Config(CoreModel.Config):
|
|
344
366
|
@staticmethod
|
|
345
367
|
def schema_extra(schema: Dict[str, Any]):
|
|
@@ -364,24 +386,6 @@ class ResourcesSpec(CoreModel):
|
|
|
364
386
|
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
365
387
|
)
|
|
366
388
|
|
|
367
|
-
# TODO: Remove Range[int] in 0.20. Range[int] for backward compatibility only.
|
|
368
|
-
cpu: Annotated[Union[CPUSpec, Range[int]], Field(description="The CPU requirements")] = (
|
|
369
|
-
CPUSpec()
|
|
370
|
-
)
|
|
371
|
-
memory: Annotated[Range[Memory], Field(description="The RAM size (e.g., `8GB`)")] = (
|
|
372
|
-
DEFAULT_MEMORY_SIZE
|
|
373
|
-
)
|
|
374
|
-
shm_size: Annotated[
|
|
375
|
-
Optional[Memory],
|
|
376
|
-
Field(
|
|
377
|
-
description="The size of shared memory (e.g., `8GB`). "
|
|
378
|
-
"If you are using parallel communicating processes (e.g., dataloaders in PyTorch), "
|
|
379
|
-
"you may need to configure this"
|
|
380
|
-
),
|
|
381
|
-
] = None
|
|
382
|
-
gpu: Annotated[Optional[GPUSpec], Field(description="The GPU requirements")] = None
|
|
383
|
-
disk: Annotated[Optional[DiskSpec], Field(description="The disk resources")] = DEFAULT_DISK
|
|
384
|
-
|
|
385
389
|
def pretty_format(self) -> str:
|
|
386
390
|
# TODO: Remove in 0.20. Use self.cpu directly
|
|
387
391
|
cpu = parse_obj_as(CPUSpec, self.cpu)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from datetime import datetime, timedelta
|
|
2
2
|
from enum import Enum
|
|
3
3
|
from typing import Any, Dict, List, Literal, Optional, Type
|
|
4
|
+
from urllib.parse import urlparse
|
|
4
5
|
|
|
5
6
|
from pydantic import UUID4, Field, root_validator
|
|
6
7
|
from typing_extensions import Annotated
|
|
@@ -9,7 +10,7 @@ from dstack._internal.core.models.backends.base import BackendType
|
|
|
9
10
|
from dstack._internal.core.models.common import ApplyAction, CoreModel, NetworkMode, RegistryAuth
|
|
10
11
|
from dstack._internal.core.models.configurations import (
|
|
11
12
|
DEFAULT_PROBE_METHOD,
|
|
12
|
-
|
|
13
|
+
LEGACY_REPO_DIR,
|
|
13
14
|
AnyRunConfiguration,
|
|
14
15
|
HTTPHeaderSpec,
|
|
15
16
|
HTTPMethod,
|
|
@@ -258,6 +259,7 @@ class JobSpec(CoreModel):
|
|
|
258
259
|
retry: Optional[Retry]
|
|
259
260
|
volumes: Optional[List[MountPoint]] = None
|
|
260
261
|
ssh_key: Optional[JobSSHKey] = None
|
|
262
|
+
# `working_dir` is always absolute (if not None) since 0.19.27
|
|
261
263
|
working_dir: Optional[str]
|
|
262
264
|
# `repo_data` is optional for client compatibility with pre-0.19.17 servers and for compatibility
|
|
263
265
|
# with jobs submitted before 0.19.17. All new jobs are expected to have non-None `repo_data`.
|
|
@@ -267,6 +269,8 @@ class JobSpec(CoreModel):
|
|
|
267
269
|
# submitted before 0.19.17. See `_get_repo_code_hash` on how to get the correct `repo_code_hash`
|
|
268
270
|
# TODO: drop this comment when supporting jobs submitted before 0.19.17 is no longer relevant.
|
|
269
271
|
repo_code_hash: Optional[str] = None
|
|
272
|
+
# `repo_dir` was added in 0.19.27. Default value is set for backward compatibility
|
|
273
|
+
repo_dir: str = LEGACY_REPO_DIR
|
|
270
274
|
file_archives: list[FileArchiveMapping] = []
|
|
271
275
|
# None for non-services and pre-0.19.19 services. See `get_service_port`
|
|
272
276
|
service_port: Optional[int] = None
|
|
@@ -408,17 +412,27 @@ class RunSpec(CoreModel):
|
|
|
408
412
|
Optional[str],
|
|
409
413
|
Field(description="The hash of the repo diff. Can be omitted if there is no repo diff."),
|
|
410
414
|
] = None
|
|
415
|
+
repo_dir: Annotated[
|
|
416
|
+
Optional[str],
|
|
417
|
+
Field(
|
|
418
|
+
description=(
|
|
419
|
+
"The repo path inside the container. Relative paths are resolved"
|
|
420
|
+
f" relative to the working directory. Defaults to `{LEGACY_REPO_DIR}`."
|
|
421
|
+
)
|
|
422
|
+
),
|
|
423
|
+
] = None
|
|
411
424
|
file_archives: Annotated[
|
|
412
425
|
list[FileArchiveMapping],
|
|
413
|
-
Field(description="The list of file archive ID to container path mappings"),
|
|
426
|
+
Field(description="The list of file archive ID to container path mappings."),
|
|
414
427
|
] = []
|
|
428
|
+
# Server uses configuration.working_dir instead of this field since 0.19.27, but
|
|
429
|
+
# the field still exists for compatibility with older servers
|
|
415
430
|
working_dir: Annotated[
|
|
416
431
|
Optional[str],
|
|
417
432
|
Field(
|
|
418
433
|
description=(
|
|
419
|
-
"The path to the working directory inside the container."
|
|
420
|
-
|
|
421
|
-
' Defaults to `"."`.'
|
|
434
|
+
"The absolute path to the working directory inside the container."
|
|
435
|
+
" Defaults to the default working directory from the `image`."
|
|
422
436
|
)
|
|
423
437
|
),
|
|
424
438
|
] = None
|
|
@@ -483,6 +497,9 @@ class ServiceSpec(CoreModel):
|
|
|
483
497
|
model: Optional[ServiceModelSpec] = None
|
|
484
498
|
options: Dict[str, Any] = {}
|
|
485
499
|
|
|
500
|
+
def get_domain(self) -> Optional[str]:
|
|
501
|
+
return urlparse(self.url).hostname
|
|
502
|
+
|
|
486
503
|
|
|
487
504
|
class RunStatus(str, Enum):
|
|
488
505
|
PENDING = "pending"
|
|
@@ -502,10 +519,16 @@ class RunStatus(str, Enum):
|
|
|
502
519
|
return self in self.finished_statuses()
|
|
503
520
|
|
|
504
521
|
|
|
522
|
+
class RunFleet(CoreModel):
|
|
523
|
+
id: UUID4
|
|
524
|
+
name: str
|
|
525
|
+
|
|
526
|
+
|
|
505
527
|
class Run(CoreModel):
|
|
506
528
|
id: UUID4
|
|
507
529
|
project_name: str
|
|
508
530
|
user: str
|
|
531
|
+
fleet: Optional[RunFleet] = None
|
|
509
532
|
submitted_at: datetime
|
|
510
533
|
last_processed_at: datetime
|
|
511
534
|
status: RunStatus
|
|
@@ -38,7 +38,10 @@ class ConfigManager:
|
|
|
38
38
|
with open(self.config_filepath, "r") as f:
|
|
39
39
|
config = yaml.safe_load(f)
|
|
40
40
|
self.config = GlobalConfig.parse_obj(config)
|
|
41
|
-
except
|
|
41
|
+
except FileNotFoundError:
|
|
42
|
+
self.config = GlobalConfig()
|
|
43
|
+
except ValidationError:
|
|
44
|
+
logger.error(f"Error in `{self.config_filepath}`", exc_info=True)
|
|
42
45
|
self.config = GlobalConfig()
|
|
43
46
|
|
|
44
47
|
def get_project_config(self, name: Optional[str] = None) -> Optional[ProjectConfig]:
|
|
@@ -65,8 +68,8 @@ class ConfigManager:
|
|
|
65
68
|
if len(self.config.projects) == 1:
|
|
66
69
|
self.config.projects[0].default = True
|
|
67
70
|
|
|
68
|
-
def
|
|
69
|
-
return
|
|
71
|
+
def list_project_configs(self) -> list[ProjectConfig]:
|
|
72
|
+
return self.config.projects
|
|
70
73
|
|
|
71
74
|
def delete_project(self, name: str):
|
|
72
75
|
self.config.projects = [p for p in self.config.projects if p.name != name]
|
|
@@ -37,10 +37,10 @@ def get_termination(
|
|
|
37
37
|
) -> Tuple[TerminationPolicy, int]:
|
|
38
38
|
termination_policy = TerminationPolicy.DESTROY_AFTER_IDLE
|
|
39
39
|
termination_idle_time = default_termination_idle_time
|
|
40
|
-
if profile.idle_duration is not None and
|
|
40
|
+
if profile.idle_duration is not None and profile.idle_duration < 0:
|
|
41
41
|
termination_policy = TerminationPolicy.DONT_DESTROY
|
|
42
42
|
elif profile.idle_duration is not None:
|
|
43
43
|
termination_idle_time = profile.idle_duration
|
|
44
44
|
if termination_policy == TerminationPolicy.DONT_DESTROY:
|
|
45
45
|
termination_idle_time = -1
|
|
46
|
-
return termination_policy,
|
|
46
|
+
return termination_policy, termination_idle_time
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from contextlib import suppress
|
|
2
3
|
from pathlib import Path
|
|
4
|
+
from tempfile import NamedTemporaryFile
|
|
3
5
|
from typing import Optional, Union
|
|
4
6
|
|
|
5
|
-
import git
|
|
6
|
-
import requests
|
|
7
|
+
import git.cmd
|
|
7
8
|
import yaml
|
|
8
9
|
from git.exc import GitCommandError
|
|
9
10
|
|
|
@@ -13,11 +14,7 @@ from dstack._internal.core.models.repos import LocalRepo, RemoteRepo, RemoteRepo
|
|
|
13
14
|
from dstack._internal.core.models.repos.remote import GitRepoURL
|
|
14
15
|
from dstack._internal.utils.logging import get_logger
|
|
15
16
|
from dstack._internal.utils.path import PathLike
|
|
16
|
-
from dstack._internal.utils.ssh import
|
|
17
|
-
get_host_config,
|
|
18
|
-
make_ssh_command_for_git,
|
|
19
|
-
try_ssh_key_passphrase,
|
|
20
|
-
)
|
|
17
|
+
from dstack._internal.utils.ssh import get_host_config, make_git_env, try_ssh_key_passphrase
|
|
21
18
|
|
|
22
19
|
logger = get_logger(__name__)
|
|
23
20
|
|
|
@@ -29,117 +26,127 @@ class InvalidRepoCredentialsError(DstackError):
|
|
|
29
26
|
pass
|
|
30
27
|
|
|
31
28
|
|
|
32
|
-
def
|
|
29
|
+
def get_repo_creds_and_default_branch(
|
|
33
30
|
repo_url: str,
|
|
34
31
|
identity_file: Optional[PathLike] = None,
|
|
32
|
+
private_key: Optional[str] = None,
|
|
35
33
|
oauth_token: Optional[str] = None,
|
|
36
|
-
) -> RemoteRepoCreds:
|
|
34
|
+
) -> tuple[RemoteRepoCreds, Optional[str]]:
|
|
37
35
|
url = GitRepoURL.parse(repo_url, get_ssh_config=get_host_config)
|
|
38
36
|
|
|
39
37
|
# no auth
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
38
|
+
with suppress(InvalidRepoCredentialsError):
|
|
39
|
+
return _get_repo_creds_and_default_branch_https(url)
|
|
40
|
+
|
|
41
|
+
# ssh key provided by the user or pulled from the server
|
|
42
|
+
if identity_file is not None or private_key is not None:
|
|
43
|
+
if identity_file is not None:
|
|
44
|
+
private_key = _read_private_key(identity_file)
|
|
45
|
+
return _get_repo_creds_and_default_branch_ssh(url, identity_file, private_key)
|
|
46
|
+
elif private_key is not None:
|
|
47
|
+
with NamedTemporaryFile("w+", 0o600) as f:
|
|
48
|
+
f.write(private_key)
|
|
49
|
+
f.flush()
|
|
50
|
+
return _get_repo_creds_and_default_branch_ssh(url, f.name, private_key)
|
|
51
|
+
else:
|
|
52
|
+
assert False, "should not reach here"
|
|
53
|
+
|
|
54
|
+
# oauth token provided by the user or pulled from the server
|
|
54
55
|
if oauth_token is not None:
|
|
55
|
-
return
|
|
56
|
+
return _get_repo_creds_and_default_branch_https(url, oauth_token)
|
|
56
57
|
|
|
57
58
|
# key from ssh config
|
|
58
59
|
identities = get_host_config(url.original_host).get("identityfile")
|
|
59
60
|
if identities:
|
|
60
|
-
|
|
61
|
+
_identity_file = identities[0]
|
|
62
|
+
with suppress(InvalidRepoCredentialsError):
|
|
63
|
+
_private_key = _read_private_key(_identity_file)
|
|
64
|
+
return _get_repo_creds_and_default_branch_ssh(url, _identity_file, _private_key)
|
|
61
65
|
|
|
62
66
|
# token from gh config
|
|
63
67
|
if os.path.exists(gh_config_path):
|
|
64
68
|
with open(gh_config_path, "r") as f:
|
|
65
69
|
gh_hosts = yaml.load(f, Loader=yaml.FullLoader)
|
|
66
|
-
|
|
67
|
-
if
|
|
68
|
-
|
|
69
|
-
return
|
|
70
|
-
except InvalidRepoCredentialsError:
|
|
71
|
-
pass
|
|
70
|
+
_oauth_token = gh_hosts.get(url.host, {}).get("oauth_token")
|
|
71
|
+
if _oauth_token is not None:
|
|
72
|
+
with suppress(InvalidRepoCredentialsError):
|
|
73
|
+
return _get_repo_creds_and_default_branch_https(url, _oauth_token)
|
|
72
74
|
|
|
73
75
|
# default user key
|
|
74
76
|
if os.path.exists(default_ssh_key):
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
pass
|
|
77
|
+
with suppress(InvalidRepoCredentialsError):
|
|
78
|
+
_private_key = _read_private_key(default_ssh_key)
|
|
79
|
+
return _get_repo_creds_and_default_branch_ssh(url, default_ssh_key, _private_key)
|
|
79
80
|
|
|
80
81
|
raise InvalidRepoCredentialsError(
|
|
81
82
|
"No valid default Git credentials found. Pass valid `--token` or `--git-identity`."
|
|
82
83
|
)
|
|
83
84
|
|
|
84
85
|
|
|
85
|
-
def
|
|
86
|
+
def _get_repo_creds_and_default_branch_ssh(
|
|
87
|
+
url: GitRepoURL, identity_file: PathLike, private_key: str
|
|
88
|
+
) -> tuple[RemoteRepoCreds, Optional[str]]:
|
|
89
|
+
_url = url.as_ssh()
|
|
86
90
|
try:
|
|
87
|
-
|
|
88
|
-
except GitCommandError:
|
|
89
|
-
|
|
90
|
-
raise InvalidRepoCredentialsError(
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
91
|
+
default_branch = _get_repo_default_branch(_url, make_git_env(identity_file=identity_file))
|
|
92
|
+
except GitCommandError as e:
|
|
93
|
+
message = f"Cannot access `{_url}` using the `{identity_file}` private SSH key"
|
|
94
|
+
raise InvalidRepoCredentialsError(message) from e
|
|
95
|
+
creds = RemoteRepoCreds(
|
|
96
|
+
clone_url=_url,
|
|
97
|
+
private_key=private_key,
|
|
98
|
+
oauth_token=None,
|
|
99
|
+
)
|
|
100
|
+
return creds, default_branch
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _get_repo_creds_and_default_branch_https(
|
|
104
|
+
url: GitRepoURL, oauth_token: Optional[str] = None
|
|
105
|
+
) -> tuple[RemoteRepoCreds, Optional[str]]:
|
|
106
|
+
_url = url.as_https()
|
|
107
|
+
try:
|
|
108
|
+
default_branch = _get_repo_default_branch(url.as_https(oauth_token), make_git_env())
|
|
109
|
+
except GitCommandError as e:
|
|
110
|
+
message = f"Cannot access `{_url}`"
|
|
111
|
+
if oauth_token is not None:
|
|
112
|
+
masked_token = len(oauth_token[:-4]) * "*" + oauth_token[-4:]
|
|
113
|
+
message = f"{message} using the `{masked_token}` token"
|
|
114
|
+
raise InvalidRepoCredentialsError(message) from e
|
|
115
|
+
creds = RemoteRepoCreds(
|
|
116
|
+
clone_url=_url,
|
|
96
117
|
private_key=None,
|
|
118
|
+
oauth_token=oauth_token,
|
|
97
119
|
)
|
|
120
|
+
return creds, default_branch
|
|
98
121
|
|
|
99
122
|
|
|
100
|
-
def
|
|
123
|
+
def _get_repo_default_branch(url: str, env: dict[str, str]) -> Optional[str]:
|
|
124
|
+
# output example: "ref: refs/heads/dev\tHEAD\n545344f77c0df78367085952a97fc3a058eb4c65\tHEAD"
|
|
125
|
+
output: str = git.cmd.Git().ls_remote("--symref", url, "HEAD", env=env)
|
|
126
|
+
for line in output.splitlines():
|
|
127
|
+
# line format: `<oid> TAB <ref> LF`
|
|
128
|
+
oid, _, ref = line.partition("\t")
|
|
129
|
+
if oid.startswith("ref:") and ref == "HEAD":
|
|
130
|
+
return oid.rsplit("/", maxsplit=1)[-1]
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _read_private_key(identity_file: PathLike) -> str:
|
|
135
|
+
identity_file = Path(identity_file).expanduser().resolve()
|
|
101
136
|
if not Path(identity_file).exists():
|
|
102
|
-
raise InvalidRepoCredentialsError(f"The {identity_file} private SSH key doesn't exist")
|
|
137
|
+
raise InvalidRepoCredentialsError(f"The `{identity_file}` private SSH key doesn't exist")
|
|
103
138
|
if not os.access(identity_file, os.R_OK):
|
|
104
|
-
raise InvalidRepoCredentialsError(f"
|
|
139
|
+
raise InvalidRepoCredentialsError(f"Cannot access the `{identity_file}` private SSH key")
|
|
105
140
|
if not try_ssh_key_passphrase(identity_file):
|
|
106
141
|
raise InvalidRepoCredentialsError(
|
|
107
142
|
f"Cannot use the `{identity_file}` private SSH key. "
|
|
108
143
|
"Ensure that it is valid and passphrase-free"
|
|
109
144
|
)
|
|
110
|
-
with open(identity_file, "r") as
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
try:
|
|
114
|
-
git.cmd.Git().ls_remote(
|
|
115
|
-
url.as_ssh(), env=dict(GIT_SSH_COMMAND=make_ssh_command_for_git(identity_file))
|
|
116
|
-
)
|
|
117
|
-
except GitCommandError:
|
|
118
|
-
raise InvalidRepoCredentialsError(
|
|
119
|
-
f"Can't access `{url.as_ssh()}` using the `{identity_file}` private SSH key"
|
|
120
|
-
)
|
|
121
|
-
|
|
122
|
-
return RemoteRepoCreds(
|
|
123
|
-
clone_url=url.as_ssh(),
|
|
124
|
-
private_key=private_key,
|
|
125
|
-
oauth_token=None,
|
|
126
|
-
)
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
def get_default_branch(remote_url: str) -> Optional[str]:
|
|
130
|
-
"""
|
|
131
|
-
Get the default branch of a remote Git repository.
|
|
132
|
-
"""
|
|
133
|
-
try:
|
|
134
|
-
output = git.cmd.Git().ls_remote("--symref", remote_url, "HEAD")
|
|
135
|
-
for line in output.splitlines():
|
|
136
|
-
if line.startswith("ref:"):
|
|
137
|
-
return line.split()[1].split("/")[-1]
|
|
138
|
-
except Exception as e:
|
|
139
|
-
logger.debug("Failed to get remote repo default branch: %s", repr(e))
|
|
140
|
-
return None
|
|
145
|
+
with open(identity_file, "r") as file:
|
|
146
|
+
return file.read()
|
|
141
147
|
|
|
142
148
|
|
|
149
|
+
# Used for `config.yml` only, remove it with `repos` in `config.yml`
|
|
143
150
|
def load_repo(config: RepoConfig) -> Union[RemoteRepo, LocalRepo]:
|
|
144
151
|
if config.repo_type == "remote":
|
|
145
152
|
return RemoteRepo(repo_id=config.repo_id, local_repo_dir=config.path)
|
|
@@ -74,7 +74,7 @@ class PortsLock:
|
|
|
74
74
|
try:
|
|
75
75
|
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
76
76
|
if IS_WINDOWS:
|
|
77
|
-
sock.setsockopt(socket.SOL_SOCKET, socket.SO_EXCLUSIVEADDRUSE, 1)
|
|
77
|
+
sock.setsockopt(socket.SOL_SOCKET, socket.SO_EXCLUSIVEADDRUSE, 1) # type: ignore[attr-defined]
|
|
78
78
|
sock.bind(("", port))
|
|
79
79
|
return sock
|
|
80
80
|
except socket.error as e:
|
|
@@ -21,12 +21,16 @@ class ProxyDependencyInjector(ABC):
|
|
|
21
21
|
def __init__(self) -> None:
|
|
22
22
|
self._service_conn_pool = ServiceConnectionPool()
|
|
23
23
|
|
|
24
|
+
# Abstract AsyncGenerator does not need async def since
|
|
25
|
+
# type checkers infer a different type without yield in body.
|
|
26
|
+
# https://mypy.readthedocs.io/en/stable/more_types.html#asynchronous-iterators
|
|
27
|
+
|
|
24
28
|
@abstractmethod
|
|
25
|
-
|
|
29
|
+
def get_repo(self) -> AsyncGenerator[BaseProxyRepo, None]:
|
|
26
30
|
pass
|
|
27
31
|
|
|
28
32
|
@abstractmethod
|
|
29
|
-
|
|
33
|
+
def get_auth_provider(self) -> AsyncGenerator[BaseProxyAuthProvider, None]:
|
|
30
34
|
pass
|
|
31
35
|
|
|
32
36
|
async def get_service_connection_pool(self) -> ServiceConnectionPool:
|
dstack/_internal/server/app.py
CHANGED
|
@@ -110,9 +110,11 @@ async def lifespan(app: FastAPI):
|
|
|
110
110
|
_print_dstack_logo()
|
|
111
111
|
if not check_required_ssh_version():
|
|
112
112
|
logger.warning("OpenSSH 8.4+ is required. The dstack server may not work properly")
|
|
113
|
+
server_config_manager = None
|
|
114
|
+
server_config_loaded = False
|
|
113
115
|
if settings.SERVER_CONFIG_ENABLED:
|
|
114
116
|
server_config_manager = ServerConfigManager()
|
|
115
|
-
|
|
117
|
+
server_config_loaded = server_config_manager.load_config()
|
|
116
118
|
# Encryption has to be configured before working with users and projects
|
|
117
119
|
await server_config_manager.apply_encryption()
|
|
118
120
|
async with get_session_ctx() as session:
|
|
@@ -126,11 +128,9 @@ async def lifespan(app: FastAPI):
|
|
|
126
128
|
session=session,
|
|
127
129
|
user=admin,
|
|
128
130
|
)
|
|
129
|
-
if
|
|
130
|
-
server_config_dir =
|
|
131
|
-
|
|
132
|
-
)
|
|
133
|
-
if not config_loaded:
|
|
131
|
+
if server_config_manager is not None:
|
|
132
|
+
server_config_dir = _get_server_config_dir()
|
|
133
|
+
if not server_config_loaded:
|
|
134
134
|
logger.info("Initializing the default configuration...", {"show_path": False})
|
|
135
135
|
await server_config_manager.init_config(session=session)
|
|
136
136
|
logger.info(
|
|
@@ -153,6 +153,7 @@ async def lifespan(app: FastAPI):
|
|
|
153
153
|
)
|
|
154
154
|
if settings.SERVER_S3_BUCKET is not None or settings.SERVER_GCS_BUCKET is not None:
|
|
155
155
|
init_default_storage()
|
|
156
|
+
scheduler = None
|
|
156
157
|
if settings.SERVER_BACKGROUND_PROCESSING_ENABLED:
|
|
157
158
|
scheduler = start_background_tasks()
|
|
158
159
|
else:
|
|
@@ -167,7 +168,7 @@ async def lifespan(app: FastAPI):
|
|
|
167
168
|
for func in _ON_STARTUP_HOOKS:
|
|
168
169
|
await func(app)
|
|
169
170
|
yield
|
|
170
|
-
if
|
|
171
|
+
if scheduler is not None:
|
|
171
172
|
scheduler.shutdown()
|
|
172
173
|
PROBES_SCHEDULER.shutdown(wait=False)
|
|
173
174
|
await gateway_connections_pool.remove_all()
|
|
@@ -371,6 +372,18 @@ def _is_prometheus_request(request: Request) -> bool:
|
|
|
371
372
|
return request.url.path.startswith("/metrics")
|
|
372
373
|
|
|
373
374
|
|
|
375
|
+
def _sentry_traces_sampler(sampling_context: SamplingContext) -> float:
|
|
376
|
+
parent_sampling_decision = sampling_context["parent_sampled"]
|
|
377
|
+
if parent_sampling_decision is not None:
|
|
378
|
+
return float(parent_sampling_decision)
|
|
379
|
+
transaction_context = sampling_context["transaction_context"]
|
|
380
|
+
name = transaction_context.get("name")
|
|
381
|
+
if name is not None:
|
|
382
|
+
if name.startswith("background."):
|
|
383
|
+
return settings.SENTRY_TRACES_BACKGROUND_SAMPLE_RATE
|
|
384
|
+
return settings.SENTRY_TRACES_SAMPLE_RATE
|
|
385
|
+
|
|
386
|
+
|
|
374
387
|
def _print_dstack_logo():
|
|
375
388
|
console.print(
|
|
376
389
|
"""[purple]╱╱╭╮╱╱╭╮╱╱╱╱╱╱╭╮
|
|
@@ -387,13 +400,5 @@ def _print_dstack_logo():
|
|
|
387
400
|
)
|
|
388
401
|
|
|
389
402
|
|
|
390
|
-
def
|
|
391
|
-
|
|
392
|
-
if parent_sampling_decision is not None:
|
|
393
|
-
return float(parent_sampling_decision)
|
|
394
|
-
transaction_context = sampling_context["transaction_context"]
|
|
395
|
-
name = transaction_context.get("name")
|
|
396
|
-
if name is not None:
|
|
397
|
-
if name.startswith("background."):
|
|
398
|
-
return settings.SENTRY_TRACES_BACKGROUND_SAMPLE_RATE
|
|
399
|
-
return settings.SENTRY_TRACES_SAMPLE_RATE
|
|
403
|
+
def _get_server_config_dir() -> str:
|
|
404
|
+
return str(SERVER_CONFIG_FILE_PATH).replace(os.path.expanduser("~"), "~", 1)
|