dstack 0.18.40rc1__py3-none-any.whl → 0.18.42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dstack/_internal/cli/commands/apply.py +8 -5
- dstack/_internal/cli/services/configurators/base.py +4 -2
- dstack/_internal/cli/services/configurators/fleet.py +21 -9
- dstack/_internal/cli/services/configurators/gateway.py +15 -0
- dstack/_internal/cli/services/configurators/run.py +6 -5
- dstack/_internal/cli/services/configurators/volume.py +15 -0
- dstack/_internal/cli/services/repos.py +3 -3
- dstack/_internal/cli/utils/fleet.py +44 -33
- dstack/_internal/cli/utils/run.py +27 -7
- dstack/_internal/cli/utils/volume.py +30 -9
- dstack/_internal/core/backends/aws/compute.py +94 -53
- dstack/_internal/core/backends/aws/resources.py +22 -12
- dstack/_internal/core/backends/azure/compute.py +2 -0
- dstack/_internal/core/backends/base/compute.py +20 -2
- dstack/_internal/core/backends/gcp/compute.py +32 -24
- dstack/_internal/core/backends/gcp/resources.py +0 -15
- dstack/_internal/core/backends/oci/compute.py +10 -5
- dstack/_internal/core/backends/oci/resources.py +23 -26
- dstack/_internal/core/backends/remote/provisioning.py +65 -27
- dstack/_internal/core/backends/runpod/compute.py +1 -0
- dstack/_internal/core/models/backends/azure.py +3 -1
- dstack/_internal/core/models/configurations.py +24 -1
- dstack/_internal/core/models/fleets.py +46 -0
- dstack/_internal/core/models/instances.py +5 -1
- dstack/_internal/core/models/pools.py +4 -1
- dstack/_internal/core/models/profiles.py +10 -4
- dstack/_internal/core/models/runs.py +23 -3
- dstack/_internal/core/models/volumes.py +26 -0
- dstack/_internal/core/services/ssh/attach.py +92 -53
- dstack/_internal/core/services/ssh/tunnel.py +58 -31
- dstack/_internal/proxy/gateway/routers/registry.py +2 -0
- dstack/_internal/proxy/gateway/schemas/registry.py +2 -0
- dstack/_internal/proxy/gateway/services/registry.py +4 -0
- dstack/_internal/proxy/lib/models.py +3 -0
- dstack/_internal/proxy/lib/services/service_connection.py +8 -1
- dstack/_internal/server/background/tasks/process_instances.py +73 -35
- dstack/_internal/server/background/tasks/process_metrics.py +9 -9
- dstack/_internal/server/background/tasks/process_running_jobs.py +77 -26
- dstack/_internal/server/background/tasks/process_runs.py +2 -12
- dstack/_internal/server/background/tasks/process_submitted_jobs.py +121 -49
- dstack/_internal/server/background/tasks/process_terminating_jobs.py +14 -3
- dstack/_internal/server/background/tasks/process_volumes.py +11 -1
- dstack/_internal/server/migrations/versions/1338b788b612_reverse_job_instance_relationship.py +71 -0
- dstack/_internal/server/migrations/versions/1e76fb0dde87_add_jobmodel_inactivity_secs.py +32 -0
- dstack/_internal/server/migrations/versions/51d45659d574_add_instancemodel_blocks_fields.py +43 -0
- dstack/_internal/server/migrations/versions/63c3f19cb184_add_jobterminationreason_inactivity_.py +83 -0
- dstack/_internal/server/migrations/versions/a751ef183f27_move_attachment_data_to_volumes_.py +34 -0
- dstack/_internal/server/models.py +27 -23
- dstack/_internal/server/routers/runs.py +1 -0
- dstack/_internal/server/schemas/runner.py +1 -0
- dstack/_internal/server/services/backends/configurators/azure.py +34 -8
- dstack/_internal/server/services/config.py +9 -0
- dstack/_internal/server/services/fleets.py +32 -3
- dstack/_internal/server/services/gateways/client.py +9 -1
- dstack/_internal/server/services/jobs/__init__.py +217 -45
- dstack/_internal/server/services/jobs/configurators/base.py +47 -2
- dstack/_internal/server/services/offers.py +96 -10
- dstack/_internal/server/services/pools.py +98 -14
- dstack/_internal/server/services/proxy/repo.py +17 -3
- dstack/_internal/server/services/runner/client.py +9 -6
- dstack/_internal/server/services/runner/ssh.py +33 -5
- dstack/_internal/server/services/runs.py +48 -179
- dstack/_internal/server/services/services/__init__.py +9 -1
- dstack/_internal/server/services/volumes.py +68 -9
- dstack/_internal/server/statics/index.html +1 -1
- dstack/_internal/server/statics/{main-11ec5e4a00ea6ec833e3.js → main-2ac66bfcbd2e39830b88.js} +30 -31
- dstack/_internal/server/statics/{main-11ec5e4a00ea6ec833e3.js.map → main-2ac66bfcbd2e39830b88.js.map} +1 -1
- dstack/_internal/server/statics/{main-fc56d1f4af8e57522a1c.css → main-ad5150a441de98cd8987.css} +1 -1
- dstack/_internal/server/testing/common.py +130 -61
- dstack/_internal/utils/common.py +22 -8
- dstack/_internal/utils/env.py +14 -0
- dstack/_internal/utils/ssh.py +1 -1
- dstack/api/server/_fleets.py +25 -1
- dstack/api/server/_runs.py +23 -2
- dstack/api/server/_volumes.py +12 -1
- dstack/version.py +1 -1
- {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/METADATA +1 -1
- {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/RECORD +104 -93
- tests/_internal/cli/services/configurators/test_profile.py +3 -3
- tests/_internal/core/services/ssh/test_tunnel.py +56 -4
- tests/_internal/proxy/gateway/routers/test_registry.py +30 -7
- tests/_internal/server/background/tasks/test_process_instances.py +138 -20
- tests/_internal/server/background/tasks/test_process_metrics.py +12 -0
- tests/_internal/server/background/tasks/test_process_running_jobs.py +193 -0
- tests/_internal/server/background/tasks/test_process_runs.py +27 -3
- tests/_internal/server/background/tasks/test_process_submitted_jobs.py +53 -6
- tests/_internal/server/background/tasks/test_process_terminating_jobs.py +135 -17
- tests/_internal/server/routers/test_fleets.py +15 -2
- tests/_internal/server/routers/test_pools.py +6 -0
- tests/_internal/server/routers/test_runs.py +27 -0
- tests/_internal/server/routers/test_volumes.py +9 -2
- tests/_internal/server/services/jobs/__init__.py +0 -0
- tests/_internal/server/services/jobs/configurators/__init__.py +0 -0
- tests/_internal/server/services/jobs/configurators/test_base.py +72 -0
- tests/_internal/server/services/runner/test_client.py +22 -3
- tests/_internal/server/services/test_offers.py +167 -0
- tests/_internal/server/services/test_pools.py +109 -1
- tests/_internal/server/services/test_runs.py +5 -41
- tests/_internal/utils/test_common.py +21 -0
- tests/_internal/utils/test_env.py +38 -0
- {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/LICENSE.md +0 -0
- {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/WHEEL +0 -0
- {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/entry_points.txt +0 -0
- {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/top_level.txt +0 -0
|
@@ -5,6 +5,7 @@ from datetime import datetime, timezone
|
|
|
5
5
|
from typing import Dict, List, Optional, Union
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
8
|
+
import gpuhunt
|
|
8
9
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
9
10
|
|
|
10
11
|
from dstack._internal.core.models.backends.base import BackendType
|
|
@@ -13,14 +14,20 @@ from dstack._internal.core.models.configurations import (
|
|
|
13
14
|
AnyRunConfiguration,
|
|
14
15
|
DevEnvironmentConfiguration,
|
|
15
16
|
)
|
|
17
|
+
from dstack._internal.core.models.envs import Env
|
|
16
18
|
from dstack._internal.core.models.fleets import FleetConfiguration, FleetSpec, FleetStatus
|
|
17
19
|
from dstack._internal.core.models.gateways import GatewayStatus
|
|
18
20
|
from dstack._internal.core.models.instances import (
|
|
21
|
+
Disk,
|
|
22
|
+
Gpu,
|
|
23
|
+
InstanceAvailability,
|
|
19
24
|
InstanceConfiguration,
|
|
25
|
+
InstanceOfferWithAvailability,
|
|
20
26
|
InstanceStatus,
|
|
21
27
|
InstanceType,
|
|
22
28
|
RemoteConnectionInfo,
|
|
23
29
|
Resources,
|
|
30
|
+
SSHKey,
|
|
24
31
|
)
|
|
25
32
|
from dstack._internal.core.models.placement import (
|
|
26
33
|
PlacementGroupConfiguration,
|
|
@@ -48,7 +55,7 @@ from dstack._internal.core.models.runs import (
|
|
|
48
55
|
from dstack._internal.core.models.users import GlobalRole
|
|
49
56
|
from dstack._internal.core.models.volumes import (
|
|
50
57
|
Volume,
|
|
51
|
-
|
|
58
|
+
VolumeAttachment,
|
|
52
59
|
VolumeConfiguration,
|
|
53
60
|
VolumeProvisioningData,
|
|
54
61
|
VolumeStatus,
|
|
@@ -69,6 +76,7 @@ from dstack._internal.server.models import (
|
|
|
69
76
|
RepoModel,
|
|
70
77
|
RunModel,
|
|
71
78
|
UserModel,
|
|
79
|
+
VolumeAttachmentModel,
|
|
72
80
|
VolumeModel,
|
|
73
81
|
)
|
|
74
82
|
from dstack._internal.server.services.jobs import get_job_specs_from_run_spec
|
|
@@ -311,17 +319,30 @@ async def create_job(
|
|
|
311
319
|
return job
|
|
312
320
|
|
|
313
321
|
|
|
314
|
-
def get_job_provisioning_data(
|
|
322
|
+
def get_job_provisioning_data(
|
|
323
|
+
dockerized: bool = False,
|
|
324
|
+
backend: BackendType = BackendType.AWS,
|
|
325
|
+
region: str = "us-east-1",
|
|
326
|
+
gpu_count: int = 0,
|
|
327
|
+
cpu_count: int = 1,
|
|
328
|
+
memory_gib: float = 0.5,
|
|
329
|
+
spot: bool = False,
|
|
330
|
+
hostname: str = "127.0.0.4",
|
|
331
|
+
internal_ip: Optional[str] = "127.0.0.4",
|
|
332
|
+
) -> JobProvisioningData:
|
|
333
|
+
gpus = [Gpu(name="T4", memory_mib=16384, vendor=gpuhunt.AcceleratorVendor.NVIDIA)] * gpu_count
|
|
315
334
|
return JobProvisioningData(
|
|
316
|
-
backend=
|
|
335
|
+
backend=backend,
|
|
317
336
|
instance_type=InstanceType(
|
|
318
337
|
name="instance",
|
|
319
|
-
resources=Resources(
|
|
338
|
+
resources=Resources(
|
|
339
|
+
cpus=cpu_count, memory_mib=int(memory_gib * 1024), spot=spot, gpus=gpus
|
|
340
|
+
),
|
|
320
341
|
),
|
|
321
342
|
instance_id="instance_id",
|
|
322
|
-
hostname=
|
|
323
|
-
internal_ip=
|
|
324
|
-
region=
|
|
343
|
+
hostname=hostname,
|
|
344
|
+
internal_ip=internal_ip,
|
|
345
|
+
region=region,
|
|
325
346
|
price=10.5,
|
|
326
347
|
username="ubuntu",
|
|
327
348
|
ssh_port=22,
|
|
@@ -337,6 +358,8 @@ def get_job_runtime_data(
|
|
|
337
358
|
gpu: Optional[int] = None,
|
|
338
359
|
memory: Optional[float] = None,
|
|
339
360
|
ports: Optional[dict[int, int]] = None,
|
|
361
|
+
offer: Optional[InstanceOfferWithAvailability] = None,
|
|
362
|
+
volume_names: Optional[list[str]] = None,
|
|
340
363
|
) -> JobRuntimeData:
|
|
341
364
|
return JobRuntimeData(
|
|
342
365
|
network_mode=NetworkMode(network_mode),
|
|
@@ -344,6 +367,8 @@ def get_job_runtime_data(
|
|
|
344
367
|
gpu=gpu,
|
|
345
368
|
memory=Memory(memory) if memory is not None else None,
|
|
346
369
|
ports=ports,
|
|
370
|
+
offer=offer,
|
|
371
|
+
volume_names=volume_names,
|
|
347
372
|
)
|
|
348
373
|
|
|
349
374
|
|
|
@@ -481,56 +506,26 @@ async def create_instance(
|
|
|
481
506
|
termination_idle_time: int = DEFAULT_POOL_TERMINATION_IDLE_TIME,
|
|
482
507
|
region: str = "eu-west",
|
|
483
508
|
remote_connection_info: Optional[RemoteConnectionInfo] = None,
|
|
509
|
+
offer: Optional[InstanceOfferWithAvailability] = None,
|
|
484
510
|
job_provisioning_data: Optional[JobProvisioningData] = None,
|
|
511
|
+
total_blocks: Optional[int] = 1,
|
|
512
|
+
busy_blocks: int = 0,
|
|
485
513
|
name: str = "test_instance",
|
|
486
514
|
volumes: Optional[List[VolumeModel]] = None,
|
|
487
515
|
) -> InstanceModel:
|
|
488
516
|
if instance_id is None:
|
|
489
517
|
instance_id = uuid.uuid4()
|
|
490
518
|
if job_provisioning_data is None:
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
"description": "",
|
|
502
|
-
},
|
|
503
|
-
},
|
|
504
|
-
"instance_id": "running_instance.id",
|
|
505
|
-
"ssh_proxy": None,
|
|
506
|
-
"hostname": "running_instance.ip",
|
|
507
|
-
"region": region,
|
|
508
|
-
"price": 0.1,
|
|
509
|
-
"username": "root",
|
|
510
|
-
"ssh_port": 22,
|
|
511
|
-
"dockerized": True,
|
|
512
|
-
"backend_data": None,
|
|
513
|
-
}
|
|
514
|
-
else:
|
|
515
|
-
job_provisioning_data_dict = job_provisioning_data.dict()
|
|
516
|
-
offer = {
|
|
517
|
-
"backend": backend.value,
|
|
518
|
-
"instance": {
|
|
519
|
-
"name": "instance",
|
|
520
|
-
"resources": {
|
|
521
|
-
"cpus": 2,
|
|
522
|
-
"memory_mib": 12000,
|
|
523
|
-
"gpus": [],
|
|
524
|
-
"spot": spot,
|
|
525
|
-
"disk": {"size_mib": 102400},
|
|
526
|
-
"description": "",
|
|
527
|
-
},
|
|
528
|
-
},
|
|
529
|
-
"region": region,
|
|
530
|
-
"price": 1,
|
|
531
|
-
"availability": "available",
|
|
532
|
-
}
|
|
533
|
-
|
|
519
|
+
job_provisioning_data = get_job_provisioning_data(
|
|
520
|
+
dockerized=True,
|
|
521
|
+
backend=backend,
|
|
522
|
+
region=region,
|
|
523
|
+
spot=spot,
|
|
524
|
+
hostname="running_instance.ip",
|
|
525
|
+
internal_ip=None,
|
|
526
|
+
)
|
|
527
|
+
if offer is None:
|
|
528
|
+
offer = get_instance_offer_with_availability(backend=backend, region=region, spot=spot)
|
|
534
529
|
if profile is None:
|
|
535
530
|
profile = Profile(name="test_name")
|
|
536
531
|
|
|
@@ -548,6 +543,9 @@ async def create_instance(
|
|
|
548
543
|
|
|
549
544
|
if volumes is None:
|
|
550
545
|
volumes = []
|
|
546
|
+
volume_attachments = []
|
|
547
|
+
for volume in volumes:
|
|
548
|
+
volume_attachments.append(VolumeAttachmentModel(volume=volume))
|
|
551
549
|
|
|
552
550
|
im = InstanceModel(
|
|
553
551
|
id=instance_id,
|
|
@@ -561,8 +559,8 @@ async def create_instance(
|
|
|
561
559
|
created_at=created_at,
|
|
562
560
|
started_at=created_at,
|
|
563
561
|
finished_at=finished_at,
|
|
564
|
-
job_provisioning_data=json
|
|
565
|
-
offer=json
|
|
562
|
+
job_provisioning_data=job_provisioning_data.json(),
|
|
563
|
+
offer=offer.json(),
|
|
566
564
|
price=1,
|
|
567
565
|
region=region,
|
|
568
566
|
backend=backend,
|
|
@@ -572,14 +570,87 @@ async def create_instance(
|
|
|
572
570
|
requirements=requirements.json(),
|
|
573
571
|
instance_configuration=instance_configuration.json(),
|
|
574
572
|
remote_connection_info=remote_connection_info.json() if remote_connection_info else None,
|
|
575
|
-
|
|
576
|
-
|
|
573
|
+
volume_attachments=volume_attachments,
|
|
574
|
+
total_blocks=total_blocks,
|
|
575
|
+
busy_blocks=busy_blocks,
|
|
577
576
|
)
|
|
577
|
+
if job:
|
|
578
|
+
im.jobs.append(job)
|
|
578
579
|
session.add(im)
|
|
579
580
|
await session.commit()
|
|
580
581
|
return im
|
|
581
582
|
|
|
582
583
|
|
|
584
|
+
def get_instance_offer_with_availability(
|
|
585
|
+
backend: BackendType = BackendType.AWS,
|
|
586
|
+
region: str = "eu-west",
|
|
587
|
+
gpu_count: int = 0,
|
|
588
|
+
cpu_count: int = 2,
|
|
589
|
+
memory_gib: float = 12,
|
|
590
|
+
disk_gib: float = 100.0,
|
|
591
|
+
spot: bool = False,
|
|
592
|
+
blocks: int = 1,
|
|
593
|
+
total_blocks: int = 1,
|
|
594
|
+
availability_zones: Optional[List[str]] = None,
|
|
595
|
+
):
|
|
596
|
+
gpus = [Gpu(name="T4", memory_mib=16384, vendor=gpuhunt.AcceleratorVendor.NVIDIA)] * gpu_count
|
|
597
|
+
return InstanceOfferWithAvailability(
|
|
598
|
+
backend=backend,
|
|
599
|
+
instance=InstanceType(
|
|
600
|
+
name="instance",
|
|
601
|
+
resources=Resources(
|
|
602
|
+
cpus=cpu_count,
|
|
603
|
+
memory_mib=int(memory_gib * 1024),
|
|
604
|
+
gpus=gpus,
|
|
605
|
+
spot=spot,
|
|
606
|
+
disk=Disk(size_mib=int(disk_gib * 1024)),
|
|
607
|
+
description="",
|
|
608
|
+
),
|
|
609
|
+
),
|
|
610
|
+
region=region,
|
|
611
|
+
price=1,
|
|
612
|
+
availability=InstanceAvailability.AVAILABLE,
|
|
613
|
+
availability_zones=availability_zones,
|
|
614
|
+
blocks=blocks,
|
|
615
|
+
total_blocks=total_blocks,
|
|
616
|
+
)
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
def get_remote_connection_info(
|
|
620
|
+
host: str = "10.0.0.10",
|
|
621
|
+
port: int = 22,
|
|
622
|
+
ssh_user: str = "ubuntu",
|
|
623
|
+
ssh_keys: Optional[list[SSHKey]] = None,
|
|
624
|
+
env: Optional[Union[Env, dict]] = None,
|
|
625
|
+
):
|
|
626
|
+
if ssh_keys is None:
|
|
627
|
+
ssh_keys = [
|
|
628
|
+
SSHKey(
|
|
629
|
+
public="ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIO6mJxVbNtm0zXgMLvByrhXJCmJRveSrJxLB5/OzcyCk",
|
|
630
|
+
private="""
|
|
631
|
+
-----BEGIN OPENSSH PRIVATE KEY-----
|
|
632
|
+
b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW
|
|
633
|
+
QyNTUxOQAAACDupicVWzbZtM14DC7wcq4VyQpiUb3kqycSwefzs3MgpAAAAJCiWa5Volmu
|
|
634
|
+
VQAAAAtzc2gtZWQyNTUxOQAAACDupicVWzbZtM14DC7wcq4VyQpiUb3kqycSwefzs3MgpA
|
|
635
|
+
AAAEAncHi4AhS6XdMp5Gzd+IMse/4ekyQ54UngByf0Sp0uH+6mJxVbNtm0zXgMLvByrhXJ
|
|
636
|
+
CmJRveSrJxLB5/OzcyCkAAAACWRlZkBkZWZwYwECAwQ=
|
|
637
|
+
-----END OPENSSH PRIVATE KEY-----
|
|
638
|
+
""",
|
|
639
|
+
)
|
|
640
|
+
]
|
|
641
|
+
if env is None:
|
|
642
|
+
env = Env()
|
|
643
|
+
elif isinstance(env, dict):
|
|
644
|
+
env = Env.parse_obj(env)
|
|
645
|
+
return RemoteConnectionInfo(
|
|
646
|
+
host=host,
|
|
647
|
+
port=port,
|
|
648
|
+
ssh_user=ssh_user,
|
|
649
|
+
ssh_keys=ssh_keys,
|
|
650
|
+
env=env,
|
|
651
|
+
)
|
|
652
|
+
|
|
653
|
+
|
|
583
654
|
async def create_volume(
|
|
584
655
|
session: AsyncSession,
|
|
585
656
|
project: ProjectModel,
|
|
@@ -604,7 +675,7 @@ async def create_volume(
|
|
|
604
675
|
volume_provisioning_data=volume_provisioning_data.json()
|
|
605
676
|
if volume_provisioning_data
|
|
606
677
|
else None,
|
|
607
|
-
|
|
678
|
+
attachments=[],
|
|
608
679
|
deleted_at=deleted_at,
|
|
609
680
|
deleted=True if deleted_at else False,
|
|
610
681
|
)
|
|
@@ -626,16 +697,14 @@ def get_volume(
|
|
|
626
697
|
deleted: bool = False,
|
|
627
698
|
volume_id: Optional[str] = None,
|
|
628
699
|
provisioning_data: Optional[VolumeProvisioningData] = None,
|
|
629
|
-
|
|
630
|
-
device_name: Optional[str] = None,
|
|
700
|
+
attachments: Optional[List[VolumeAttachment]] = None,
|
|
631
701
|
) -> Volume:
|
|
632
702
|
if id_ is None:
|
|
633
703
|
id_ = uuid.uuid4()
|
|
634
704
|
if configuration is None:
|
|
635
705
|
configuration = get_volume_configuration()
|
|
636
|
-
if
|
|
637
|
-
|
|
638
|
-
attachment_data = VolumeAttachmentData(device_name=device_name)
|
|
706
|
+
if attachments is None:
|
|
707
|
+
attachments = []
|
|
639
708
|
return Volume(
|
|
640
709
|
id=id_,
|
|
641
710
|
name=name,
|
|
@@ -649,7 +718,7 @@ def get_volume(
|
|
|
649
718
|
deleted=deleted,
|
|
650
719
|
volume_id=volume_id,
|
|
651
720
|
provisioning_data=provisioning_data,
|
|
652
|
-
|
|
721
|
+
attachments=attachments,
|
|
653
722
|
)
|
|
654
723
|
|
|
655
724
|
|
dstack/_internal/utils/common.py
CHANGED
|
@@ -157,24 +157,38 @@ def parse_pretty_duration(duration: str) -> int:
|
|
|
157
157
|
return amount * multiplier
|
|
158
158
|
|
|
159
159
|
|
|
160
|
+
DURATION_UNITS_DESC = [
|
|
161
|
+
("w", 7 * 24 * 3600),
|
|
162
|
+
("d", 24 * 3600),
|
|
163
|
+
("h", 3600),
|
|
164
|
+
("m", 60),
|
|
165
|
+
("s", 1),
|
|
166
|
+
]
|
|
167
|
+
|
|
168
|
+
|
|
160
169
|
def format_pretty_duration(seconds: int) -> str:
|
|
161
170
|
if seconds == 0:
|
|
162
171
|
return "0s"
|
|
163
172
|
if seconds < 0:
|
|
164
173
|
raise ValueError("Seconds cannot be negative")
|
|
165
|
-
|
|
166
|
-
("w", 7 * 24 * 3600),
|
|
167
|
-
("d", 24 * 3600),
|
|
168
|
-
("h", 3600),
|
|
169
|
-
("m", 60),
|
|
170
|
-
("s", 1),
|
|
171
|
-
]
|
|
172
|
-
for unit, multiplier in units:
|
|
174
|
+
for unit, multiplier in DURATION_UNITS_DESC:
|
|
173
175
|
if seconds % multiplier == 0:
|
|
174
176
|
return f"{seconds // multiplier}{unit}"
|
|
175
177
|
return f"{seconds}s" # Fallback to seconds if no larger unit fits perfectly
|
|
176
178
|
|
|
177
179
|
|
|
180
|
+
def format_duration_multiunit(seconds: int) -> str:
|
|
181
|
+
"""90 -> 1m 30s, 4545 -> 1h 15m 45s, etc"""
|
|
182
|
+
if seconds < 0:
|
|
183
|
+
raise ValueError("Seconds cannot be negative")
|
|
184
|
+
result = ""
|
|
185
|
+
for unit, multiplier in DURATION_UNITS_DESC:
|
|
186
|
+
if unit_value := seconds // multiplier:
|
|
187
|
+
result += f" {unit_value}{unit}"
|
|
188
|
+
seconds -= unit_value * multiplier
|
|
189
|
+
return result.lstrip() or "0s"
|
|
190
|
+
|
|
191
|
+
|
|
178
192
|
def sizeof_fmt(num, suffix="B"):
|
|
179
193
|
for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]:
|
|
180
194
|
if abs(num) < 1024.0:
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def get_bool(name: str, default: bool = False) -> bool:
|
|
5
|
+
try:
|
|
6
|
+
value = os.environ[name]
|
|
7
|
+
except KeyError:
|
|
8
|
+
return default
|
|
9
|
+
value = value.lower()
|
|
10
|
+
if value in ["0", "false", "off"]:
|
|
11
|
+
return False
|
|
12
|
+
if value in ["1", "true", "on"]:
|
|
13
|
+
return True
|
|
14
|
+
raise ValueError(f"Invalid bool value: {name}={value}")
|
dstack/_internal/utils/ssh.py
CHANGED
|
@@ -159,7 +159,7 @@ def get_ssh_config(path: PathLike, host: str) -> Optional[Dict[str, str]]:
|
|
|
159
159
|
return None
|
|
160
160
|
|
|
161
161
|
|
|
162
|
-
def update_ssh_config(path: PathLike, host: str, options: Dict[str, Union[str, FilePath]]):
|
|
162
|
+
def update_ssh_config(path: PathLike, host: str, options: Dict[str, Union[str, int, FilePath]]):
|
|
163
163
|
Path(path).parent.mkdir(parents=True, exist_ok=True)
|
|
164
164
|
with FileLock(str(path) + ".lock"):
|
|
165
165
|
copy_mode = True
|
dstack/api/server/_fleets.py
CHANGED
|
@@ -62,16 +62,29 @@ def _get_fleet_spec_excludes(fleet_spec: FleetSpec) -> Optional[_ExcludeDict]:
|
|
|
62
62
|
spec_excludes: _ExcludeDict = {}
|
|
63
63
|
configuration_excludes: _ExcludeDict = {}
|
|
64
64
|
profile_excludes: set[str] = set()
|
|
65
|
+
ssh_config_excludes: _ExcludeDict = {}
|
|
66
|
+
ssh_hosts_excludes: set[str] = set()
|
|
65
67
|
|
|
66
68
|
# TODO: Can be removed in 0.19
|
|
67
69
|
if fleet_spec.configuration_path is None:
|
|
68
70
|
spec_excludes["configuration_path"] = True
|
|
69
71
|
if fleet_spec.configuration.ssh_config is not None:
|
|
72
|
+
if fleet_spec.configuration.ssh_config.proxy_jump is None:
|
|
73
|
+
ssh_config_excludes["proxy_jump"] = True
|
|
74
|
+
if all(
|
|
75
|
+
isinstance(h, str) or h.proxy_jump is None
|
|
76
|
+
for h in fleet_spec.configuration.ssh_config.hosts
|
|
77
|
+
):
|
|
78
|
+
ssh_hosts_excludes.add("proxy_jump")
|
|
70
79
|
if all(
|
|
71
80
|
isinstance(h, str) or h.internal_ip is None
|
|
72
81
|
for h in fleet_spec.configuration.ssh_config.hosts
|
|
73
82
|
):
|
|
74
|
-
|
|
83
|
+
ssh_hosts_excludes.add("internal_ip")
|
|
84
|
+
if all(
|
|
85
|
+
isinstance(h, str) or h.blocks == 1 for h in fleet_spec.configuration.ssh_config.hosts
|
|
86
|
+
):
|
|
87
|
+
ssh_hosts_excludes.add("blocks")
|
|
75
88
|
# client >= 0.18.30 / server <= 0.18.29 compatibility tweak
|
|
76
89
|
if fleet_spec.configuration.reservation is None:
|
|
77
90
|
configuration_excludes["reservation"] = True
|
|
@@ -84,7 +97,18 @@ def _get_fleet_spec_excludes(fleet_spec: FleetSpec) -> Optional[_ExcludeDict]:
|
|
|
84
97
|
# client >= 0.18.38 / server <= 0.18.37 compatibility tweak
|
|
85
98
|
if fleet_spec.profile is not None and fleet_spec.profile.stop_duration is None:
|
|
86
99
|
profile_excludes.add("stop_duration")
|
|
100
|
+
# client >= 0.18.41 / server <= 0.18.40 compatibility tweak
|
|
101
|
+
if fleet_spec.configuration.availability_zones is None:
|
|
102
|
+
configuration_excludes["availability_zones"] = True
|
|
103
|
+
if fleet_spec.profile is not None and fleet_spec.profile.availability_zones is None:
|
|
104
|
+
profile_excludes.add("availability_zones")
|
|
105
|
+
if fleet_spec.configuration.blocks == 1:
|
|
106
|
+
configuration_excludes["blocks"] = True
|
|
87
107
|
|
|
108
|
+
if ssh_hosts_excludes:
|
|
109
|
+
ssh_config_excludes["hosts"] = {"__all__": ssh_hosts_excludes}
|
|
110
|
+
if ssh_config_excludes:
|
|
111
|
+
configuration_excludes["ssh_config"] = ssh_config_excludes
|
|
88
112
|
if configuration_excludes:
|
|
89
113
|
spec_excludes["configuration"] = configuration_excludes
|
|
90
114
|
if profile_excludes:
|
dstack/api/server/_runs.py
CHANGED
|
@@ -7,6 +7,7 @@ from pydantic import parse_obj_as
|
|
|
7
7
|
from dstack._internal.core.models.common import is_core_model_instance
|
|
8
8
|
from dstack._internal.core.models.configurations import (
|
|
9
9
|
STRIP_PREFIX_DEFAULT,
|
|
10
|
+
DevEnvironmentConfiguration,
|
|
10
11
|
ServiceConfiguration,
|
|
11
12
|
)
|
|
12
13
|
from dstack._internal.core.models.pools import Instance
|
|
@@ -82,7 +83,10 @@ class RunsAPIClient(APIClientGroup):
|
|
|
82
83
|
) -> Run:
|
|
83
84
|
plan_input: ApplyRunPlanInput = ApplyRunPlanInput.__response__.parse_obj(plan)
|
|
84
85
|
body = ApplyRunPlanRequest(plan=plan_input, force=force)
|
|
85
|
-
resp = self._request(
|
|
86
|
+
resp = self._request(
|
|
87
|
+
f"/api/project/{project_name}/runs/apply",
|
|
88
|
+
body=body.json(exclude=_get_apply_plan_excludes(plan_input)),
|
|
89
|
+
)
|
|
86
90
|
return parse_obj_as(Run.__response__, resp.json())
|
|
87
91
|
|
|
88
92
|
def submit(self, project_name: str, run_spec: RunSpec) -> Run:
|
|
@@ -121,8 +125,15 @@ class RunsAPIClient(APIClientGroup):
|
|
|
121
125
|
return parse_obj_as(Instance.__response__, resp.json())
|
|
122
126
|
|
|
123
127
|
|
|
128
|
+
def _get_apply_plan_excludes(plan: ApplyRunPlanInput) -> Optional[dict]:
|
|
129
|
+
run_spec_excludes = _get_run_spec_excludes(plan.run_spec)
|
|
130
|
+
if run_spec_excludes is not None:
|
|
131
|
+
return {"plan": run_spec_excludes}
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
|
|
124
135
|
def _get_run_spec_excludes(run_spec: RunSpec) -> Optional[dict]:
|
|
125
|
-
spec_excludes: dict[str,
|
|
136
|
+
spec_excludes: dict[str, Any] = {}
|
|
126
137
|
configuration_excludes: dict[str, Any] = {}
|
|
127
138
|
profile_excludes: set[str] = set()
|
|
128
139
|
configuration = run_spec.configuration
|
|
@@ -164,6 +175,16 @@ def _get_run_spec_excludes(run_spec: RunSpec) -> Optional[dict]:
|
|
|
164
175
|
for v in configuration.volumes
|
|
165
176
|
):
|
|
166
177
|
configuration_excludes["volumes"] = {"__all__": {"optional"}}
|
|
178
|
+
# client >= 0.18.41 / server <= 0.18.40 compatibility tweak
|
|
179
|
+
if configuration.availability_zones is None:
|
|
180
|
+
configuration_excludes["availability_zones"] = True
|
|
181
|
+
if profile is not None and profile.availability_zones is None:
|
|
182
|
+
profile_excludes.add("availability_zones")
|
|
183
|
+
if (
|
|
184
|
+
is_core_model_instance(configuration, DevEnvironmentConfiguration)
|
|
185
|
+
and configuration.inactivity_duration is None
|
|
186
|
+
):
|
|
187
|
+
configuration_excludes["inactivity_duration"] = True
|
|
167
188
|
|
|
168
189
|
if configuration_excludes:
|
|
169
190
|
spec_excludes["configuration"] = configuration_excludes
|
dstack/api/server/_volumes.py
CHANGED
|
@@ -27,9 +27,20 @@ class VolumesAPIClient(APIClientGroup):
|
|
|
27
27
|
configuration: VolumeConfiguration,
|
|
28
28
|
) -> Volume:
|
|
29
29
|
body = CreateVolumeRequest(configuration=configuration)
|
|
30
|
-
resp = self._request(
|
|
30
|
+
resp = self._request(
|
|
31
|
+
f"/api/project/{project_name}/volumes/create",
|
|
32
|
+
body=body.json(exclude=_get_volume_configuration_excludes(configuration)),
|
|
33
|
+
)
|
|
31
34
|
return parse_obj_as(Volume.__response__, resp.json())
|
|
32
35
|
|
|
33
36
|
def delete(self, project_name: str, names: List[str]) -> None:
|
|
34
37
|
body = DeleteVolumesRequest(names=names)
|
|
35
38
|
self._request(f"/api/project/{project_name}/volumes/delete", body=body.json())
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _get_volume_configuration_excludes(configuration: VolumeConfiguration) -> dict:
|
|
42
|
+
configuration_excludes = {}
|
|
43
|
+
# client >= 0.18.41 / server <= 0.18.40 compatibility tweak
|
|
44
|
+
if configuration.availability_zone is None:
|
|
45
|
+
configuration_excludes["availability_zone"] = True
|
|
46
|
+
return {"configuration": configuration_excludes}
|
dstack/version.py
CHANGED