dstack 0.18.42__py3-none-any.whl → 0.18.44__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dstack/_internal/cli/commands/__init__.py +2 -1
- dstack/_internal/cli/commands/apply.py +4 -2
- dstack/_internal/cli/commands/attach.py +21 -1
- dstack/_internal/cli/commands/completion.py +20 -0
- dstack/_internal/cli/commands/delete.py +3 -1
- dstack/_internal/cli/commands/fleet.py +2 -1
- dstack/_internal/cli/commands/gateway.py +7 -2
- dstack/_internal/cli/commands/logs.py +3 -2
- dstack/_internal/cli/commands/stats.py +2 -1
- dstack/_internal/cli/commands/stop.py +2 -1
- dstack/_internal/cli/commands/volume.py +2 -1
- dstack/_internal/cli/main.py +6 -0
- dstack/_internal/cli/services/completion.py +86 -0
- dstack/_internal/cli/services/configurators/run.py +11 -17
- dstack/_internal/cli/utils/fleet.py +5 -1
- dstack/_internal/cli/utils/run.py +11 -0
- dstack/_internal/core/backends/aws/compute.py +23 -10
- dstack/_internal/core/backends/aws/resources.py +3 -3
- dstack/_internal/core/backends/azure/compute.py +15 -9
- dstack/_internal/core/backends/azure/resources.py +2 -0
- dstack/_internal/core/backends/base/compute.py +102 -2
- dstack/_internal/core/backends/base/offers.py +7 -1
- dstack/_internal/core/backends/cudo/compute.py +8 -4
- dstack/_internal/core/backends/datacrunch/compute.py +10 -4
- dstack/_internal/core/backends/gcp/auth.py +19 -13
- dstack/_internal/core/backends/gcp/compute.py +26 -20
- dstack/_internal/core/backends/gcp/resources.py +3 -10
- dstack/_internal/core/backends/kubernetes/compute.py +4 -3
- dstack/_internal/core/backends/lambdalabs/compute.py +9 -3
- dstack/_internal/core/backends/nebius/compute.py +2 -2
- dstack/_internal/core/backends/oci/compute.py +10 -4
- dstack/_internal/core/backends/runpod/compute.py +32 -7
- dstack/_internal/core/backends/runpod/config.py +8 -0
- dstack/_internal/core/backends/tensordock/compute.py +14 -3
- dstack/_internal/core/backends/vastai/compute.py +12 -2
- dstack/_internal/core/backends/vultr/api_client.py +3 -3
- dstack/_internal/core/backends/vultr/compute.py +9 -3
- dstack/_internal/core/models/backends/aws.py +2 -0
- dstack/_internal/core/models/backends/base.py +1 -0
- dstack/_internal/core/models/backends/runpod.py +2 -0
- dstack/_internal/core/models/configurations.py +2 -2
- dstack/_internal/core/models/profiles.py +46 -1
- dstack/_internal/core/models/runs.py +4 -0
- dstack/_internal/core/services/__init__.py +5 -1
- dstack/_internal/core/services/configs/__init__.py +3 -0
- dstack/_internal/server/app.py +11 -1
- dstack/_internal/server/background/__init__.py +10 -0
- dstack/_internal/server/background/tasks/common.py +22 -0
- dstack/_internal/server/background/tasks/process_instances.py +11 -18
- dstack/_internal/server/background/tasks/process_placement_groups.py +1 -0
- dstack/_internal/server/background/tasks/process_prometheus_metrics.py +135 -0
- dstack/_internal/server/background/tasks/process_running_jobs.py +74 -34
- dstack/_internal/server/background/tasks/process_runs.py +1 -0
- dstack/_internal/server/background/tasks/process_submitted_jobs.py +4 -1
- dstack/_internal/server/background/tasks/process_terminating_jobs.py +1 -7
- dstack/_internal/server/migrations/versions/60e444118b6d_add_jobprometheusmetrics.py +40 -0
- dstack/_internal/server/migrations/versions/98d1b92988bc_add_jobterminationreason_terminated_due_.py +140 -0
- dstack/_internal/server/models.py +11 -0
- dstack/_internal/server/routers/logs.py +3 -0
- dstack/_internal/server/routers/metrics.py +21 -2
- dstack/_internal/server/routers/prometheus.py +36 -0
- dstack/_internal/server/security/permissions.py +1 -1
- dstack/_internal/server/services/backends/configurators/aws.py +31 -1
- dstack/_internal/server/services/backends/configurators/gcp.py +8 -15
- dstack/_internal/server/services/backends/configurators/runpod.py +3 -33
- dstack/_internal/server/services/config.py +24 -4
- dstack/_internal/server/services/fleets.py +1 -0
- dstack/_internal/server/services/gateways/__init__.py +1 -0
- dstack/_internal/server/services/jobs/__init__.py +12 -9
- dstack/_internal/server/services/jobs/configurators/base.py +9 -1
- dstack/_internal/server/services/jobs/configurators/dev.py +1 -3
- dstack/_internal/server/services/jobs/configurators/task.py +1 -3
- dstack/_internal/server/services/logs/__init__.py +78 -0
- dstack/_internal/server/services/{logs.py → logs/aws.py} +12 -207
- dstack/_internal/server/services/logs/base.py +47 -0
- dstack/_internal/server/services/logs/filelog.py +110 -0
- dstack/_internal/server/services/logs/gcp.py +165 -0
- dstack/_internal/server/services/metrics.py +103 -70
- dstack/_internal/server/services/pools.py +16 -17
- dstack/_internal/server/services/prometheus.py +87 -0
- dstack/_internal/server/services/proxy/routers/service_proxy.py +14 -7
- dstack/_internal/server/services/runner/client.py +14 -3
- dstack/_internal/server/services/runs.py +43 -15
- dstack/_internal/server/services/volumes.py +1 -0
- dstack/_internal/server/settings.py +6 -0
- dstack/_internal/server/statics/index.html +1 -1
- dstack/_internal/server/statics/{main-2ac66bfcbd2e39830b88.js → main-4eb116b97819badd1e2c.js} +131 -78
- dstack/_internal/server/statics/{main-2ac66bfcbd2e39830b88.js.map → main-4eb116b97819badd1e2c.js.map} +1 -1
- dstack/_internal/server/statics/{main-ad5150a441de98cd8987.css → main-da9f8c06a69c20dac23e.css} +1 -1
- dstack/_internal/server/statics/static/media/entraID.d65d1f3e9486a8e56d24fc07b3230885.svg +9 -0
- dstack/_internal/server/testing/common.py +50 -8
- dstack/api/_public/runs.py +4 -1
- dstack/api/server/_fleets.py +2 -0
- dstack/api/server/_runs.py +4 -0
- dstack/api/utils.py +3 -0
- dstack/version.py +2 -2
- {dstack-0.18.42.dist-info → dstack-0.18.44.dist-info}/METADATA +13 -3
- {dstack-0.18.42.dist-info → dstack-0.18.44.dist-info}/RECORD +115 -97
- tests/_internal/core/backends/base/__init__.py +0 -0
- tests/_internal/core/backends/base/test_compute.py +56 -0
- tests/_internal/server/background/tasks/test_process_prometheus_metrics.py +189 -0
- tests/_internal/server/background/tasks/test_process_running_jobs.py +126 -1
- tests/_internal/server/conftest.py +4 -5
- tests/_internal/server/routers/test_backends.py +1 -0
- tests/_internal/server/routers/test_fleets.py +2 -0
- tests/_internal/server/routers/test_logs.py +1 -1
- tests/_internal/server/routers/test_metrics.py +15 -0
- tests/_internal/server/routers/test_prometheus.py +244 -0
- tests/_internal/server/routers/test_runs.py +81 -58
- tests/_internal/server/services/test_logs.py +3 -3
- tests/_internal/server/services/test_metrics.py +163 -0
- {dstack-0.18.42.dist-info → dstack-0.18.44.dist-info}/LICENSE.md +0 -0
- {dstack-0.18.42.dist-info → dstack-0.18.44.dist-info}/WHEEL +0 -0
- {dstack-0.18.42.dist-info → dstack-0.18.44.dist-info}/entry_points.txt +0 -0
- {dstack-0.18.42.dist-info → dstack-0.18.44.dist-info}/top_level.txt +0 -0
|
@@ -5,6 +5,7 @@ from typing import List, Optional
|
|
|
5
5
|
|
|
6
6
|
from rich_argparse import RichHelpFormatter
|
|
7
7
|
|
|
8
|
+
from dstack._internal.cli.services.completion import ProjectNameCompleter
|
|
8
9
|
from dstack._internal.cli.utils.common import configure_logging
|
|
9
10
|
from dstack.api import Client
|
|
10
11
|
|
|
@@ -61,7 +62,7 @@ class APIBaseCommand(BaseCommand):
|
|
|
61
62
|
help="The name of the project. Defaults to [code]$DSTACK_PROJECT[/]",
|
|
62
63
|
metavar="NAME",
|
|
63
64
|
default=os.getenv("DSTACK_PROJECT"),
|
|
64
|
-
)
|
|
65
|
+
).completer = ProjectNameCompleter()
|
|
65
66
|
|
|
66
67
|
def _command(self, args: argparse.Namespace):
|
|
67
68
|
configure_logging()
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
|
+
from argcomplete import FilesCompleter
|
|
5
|
+
|
|
4
6
|
from dstack._internal.cli.commands import APIBaseCommand
|
|
5
7
|
from dstack._internal.cli.services.configurators import (
|
|
6
8
|
get_apply_configurator_class,
|
|
@@ -42,7 +44,7 @@ class ApplyCommand(APIBaseCommand):
|
|
|
42
44
|
metavar="FILE",
|
|
43
45
|
help="The path to the configuration file. Defaults to [code]$PWD/.dstack.yml[/]",
|
|
44
46
|
dest="configuration_file",
|
|
45
|
-
)
|
|
47
|
+
).completer = FilesCompleter(allowednames=["*.yml", "*.yaml"])
|
|
46
48
|
self._parser.add_argument(
|
|
47
49
|
"-y",
|
|
48
50
|
"--yes",
|
|
@@ -57,7 +59,7 @@ class ApplyCommand(APIBaseCommand):
|
|
|
57
59
|
self._parser.add_argument(
|
|
58
60
|
"-d",
|
|
59
61
|
"--detach",
|
|
60
|
-
help="Exit immediately after
|
|
62
|
+
help="Exit immediately after submitting configuration",
|
|
61
63
|
action="store_true",
|
|
62
64
|
)
|
|
63
65
|
repo_group = self._parser.add_argument_group("Repo Options")
|
|
@@ -6,6 +6,11 @@ from typing import Optional
|
|
|
6
6
|
|
|
7
7
|
from dstack._internal.cli.commands import APIBaseCommand
|
|
8
8
|
from dstack._internal.cli.services.args import port_mapping
|
|
9
|
+
from dstack._internal.cli.services.completion import RunNameCompleter
|
|
10
|
+
from dstack._internal.cli.services.configurators.run import (
|
|
11
|
+
get_run_exit_code,
|
|
12
|
+
print_finished_message,
|
|
13
|
+
)
|
|
9
14
|
from dstack._internal.cli.utils.common import console
|
|
10
15
|
from dstack._internal.core.consts import DSTACK_RUNNER_HTTP_PORT
|
|
11
16
|
from dstack._internal.core.errors import CLIError
|
|
@@ -57,7 +62,7 @@ class AttachCommand(APIBaseCommand):
|
|
|
57
62
|
type=int,
|
|
58
63
|
default=0,
|
|
59
64
|
)
|
|
60
|
-
self._parser.add_argument("run_name")
|
|
65
|
+
self._parser.add_argument("run_name").completer = RunNameCompleter()
|
|
61
66
|
|
|
62
67
|
def _command(self, args: argparse.Namespace):
|
|
63
68
|
super()._command(args)
|
|
@@ -99,6 +104,21 @@ class AttachCommand(APIBaseCommand):
|
|
|
99
104
|
pass
|
|
100
105
|
finally:
|
|
101
106
|
run.detach()
|
|
107
|
+
# TODO: Handle run resubmissions similar to dstack apply
|
|
108
|
+
|
|
109
|
+
# After reading the logs, the run may not be marked as finished immediately.
|
|
110
|
+
# Give the run some time to transition to a finished state before exiting.
|
|
111
|
+
for _ in range(30):
|
|
112
|
+
run.refresh()
|
|
113
|
+
if run.status.is_finished():
|
|
114
|
+
print_finished_message(run)
|
|
115
|
+
exit(get_run_exit_code(run))
|
|
116
|
+
time.sleep(1)
|
|
117
|
+
console.print(
|
|
118
|
+
"[error]Lost run connection. Timed out waiting for run final status."
|
|
119
|
+
" Check `dstack ps` to see if it's done or failed."
|
|
120
|
+
)
|
|
121
|
+
exit(1)
|
|
102
122
|
|
|
103
123
|
|
|
104
124
|
_IGNORED_PORTS = [DSTACK_RUNNER_HTTP_PORT]
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import argcomplete
|
|
2
|
+
|
|
3
|
+
from dstack._internal.cli.commands import BaseCommand
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CompletionCommand(BaseCommand):
|
|
7
|
+
NAME = "completion"
|
|
8
|
+
DESCRIPTION = "Generate shell completion scripts"
|
|
9
|
+
|
|
10
|
+
def _register(self):
|
|
11
|
+
super()._register()
|
|
12
|
+
self._parser.add_argument(
|
|
13
|
+
"shell",
|
|
14
|
+
help="The shell to generate the completion script for",
|
|
15
|
+
choices=["bash", "zsh"],
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
def _command(self, args):
|
|
19
|
+
super()._command(args)
|
|
20
|
+
print(argcomplete.shellcode(["dstack"], shell=args.shell))
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
|
+
from argcomplete import FilesCompleter
|
|
5
|
+
|
|
4
6
|
from dstack._internal.cli.commands import APIBaseCommand
|
|
5
7
|
from dstack._internal.cli.services.configurators import (
|
|
6
8
|
get_apply_configurator_class,
|
|
@@ -22,7 +24,7 @@ class DeleteCommand(APIBaseCommand):
|
|
|
22
24
|
metavar="FILE",
|
|
23
25
|
help="The path to the configuration file. Defaults to [code]$PWD/.dstack.yml[/]",
|
|
24
26
|
dest="configuration_file",
|
|
25
|
-
)
|
|
27
|
+
).completer = FilesCompleter(allowednames=["*.yml", "*.yaml"])
|
|
26
28
|
self._parser.add_argument(
|
|
27
29
|
"-y",
|
|
28
30
|
"--yes",
|
|
@@ -4,6 +4,7 @@ import time
|
|
|
4
4
|
from rich.live import Live
|
|
5
5
|
|
|
6
6
|
from dstack._internal.cli.commands import APIBaseCommand
|
|
7
|
+
from dstack._internal.cli.services.completion import FleetNameCompleter
|
|
7
8
|
from dstack._internal.cli.utils.common import (
|
|
8
9
|
LIVE_TABLE_PROVISION_INTERVAL_SECS,
|
|
9
10
|
LIVE_TABLE_REFRESH_RATE_PER_SEC,
|
|
@@ -47,7 +48,7 @@ class FleetCommand(APIBaseCommand):
|
|
|
47
48
|
delete_parser.add_argument(
|
|
48
49
|
"name",
|
|
49
50
|
help="The name of the fleet",
|
|
50
|
-
)
|
|
51
|
+
).completer = FleetNameCompleter()
|
|
51
52
|
delete_parser.add_argument(
|
|
52
53
|
"-i",
|
|
53
54
|
"--instance",
|
|
@@ -4,6 +4,7 @@ import time
|
|
|
4
4
|
from rich.live import Live
|
|
5
5
|
|
|
6
6
|
from dstack._internal.cli.commands import APIBaseCommand
|
|
7
|
+
from dstack._internal.cli.services.completion import GatewayNameCompleter
|
|
7
8
|
from dstack._internal.cli.utils.common import (
|
|
8
9
|
LIVE_TABLE_PROVISION_INTERVAL_SECS,
|
|
9
10
|
LIVE_TABLE_REFRESH_RATE_PER_SEC,
|
|
@@ -59,7 +60,9 @@ class GatewayCommand(APIBaseCommand):
|
|
|
59
60
|
"delete", help="Delete a gateway", formatter_class=self._parser.formatter_class
|
|
60
61
|
)
|
|
61
62
|
delete_parser.set_defaults(subfunc=self._delete)
|
|
62
|
-
delete_parser.add_argument(
|
|
63
|
+
delete_parser.add_argument(
|
|
64
|
+
"name", help="The name of the gateway"
|
|
65
|
+
).completer = GatewayNameCompleter()
|
|
63
66
|
delete_parser.add_argument(
|
|
64
67
|
"-y", "--yes", action="store_true", help="Don't ask for confirmation"
|
|
65
68
|
)
|
|
@@ -68,7 +71,9 @@ class GatewayCommand(APIBaseCommand):
|
|
|
68
71
|
"update", help="Update a gateway", formatter_class=self._parser.formatter_class
|
|
69
72
|
)
|
|
70
73
|
update_parser.set_defaults(subfunc=self._update)
|
|
71
|
-
update_parser.add_argument(
|
|
74
|
+
update_parser.add_argument(
|
|
75
|
+
"name", help="The name of the gateway"
|
|
76
|
+
).completer = GatewayNameCompleter()
|
|
72
77
|
update_parser.add_argument(
|
|
73
78
|
"--set-default", action="store_true", help="Set it the default gateway for the project"
|
|
74
79
|
)
|
|
@@ -3,6 +3,7 @@ import sys
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
|
|
5
5
|
from dstack._internal.cli.commands import APIBaseCommand
|
|
6
|
+
from dstack._internal.cli.services.completion import RunNameCompleter
|
|
6
7
|
from dstack._internal.core.errors import CLIError
|
|
7
8
|
from dstack._internal.utils.logging import get_logger
|
|
8
9
|
|
|
@@ -33,7 +34,7 @@ class LogsCommand(APIBaseCommand):
|
|
|
33
34
|
)
|
|
34
35
|
self._parser.add_argument(
|
|
35
36
|
"--replica",
|
|
36
|
-
help="The
|
|
37
|
+
help="The replica number. Defaults to 0.",
|
|
37
38
|
type=int,
|
|
38
39
|
default=0,
|
|
39
40
|
)
|
|
@@ -43,7 +44,7 @@ class LogsCommand(APIBaseCommand):
|
|
|
43
44
|
type=int,
|
|
44
45
|
default=0,
|
|
45
46
|
)
|
|
46
|
-
self._parser.add_argument("run_name")
|
|
47
|
+
self._parser.add_argument("run_name").completer = RunNameCompleter(all=True)
|
|
47
48
|
|
|
48
49
|
def _command(self, args: argparse.Namespace):
|
|
49
50
|
super()._command(args)
|
|
@@ -7,6 +7,7 @@ from rich.live import Live
|
|
|
7
7
|
from rich.table import Table
|
|
8
8
|
|
|
9
9
|
from dstack._internal.cli.commands import APIBaseCommand
|
|
10
|
+
from dstack._internal.cli.services.completion import RunNameCompleter
|
|
10
11
|
from dstack._internal.cli.utils.common import (
|
|
11
12
|
LIVE_TABLE_PROVISION_INTERVAL_SECS,
|
|
12
13
|
LIVE_TABLE_REFRESH_RATE_PER_SEC,
|
|
@@ -25,7 +26,7 @@ class StatsCommand(APIBaseCommand):
|
|
|
25
26
|
|
|
26
27
|
def _register(self):
|
|
27
28
|
super()._register()
|
|
28
|
-
self._parser.add_argument("run_name")
|
|
29
|
+
self._parser.add_argument("run_name").completer = RunNameCompleter()
|
|
29
30
|
self._parser.add_argument(
|
|
30
31
|
"-w",
|
|
31
32
|
"--watch",
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
|
|
3
3
|
from dstack._internal.cli.commands import APIBaseCommand
|
|
4
|
+
from dstack._internal.cli.services.completion import RunNameCompleter
|
|
4
5
|
from dstack._internal.cli.utils.common import confirm_ask
|
|
5
6
|
from dstack._internal.core.errors import CLIError
|
|
6
7
|
|
|
@@ -13,7 +14,7 @@ class StopCommand(APIBaseCommand):
|
|
|
13
14
|
super()._register()
|
|
14
15
|
self._parser.add_argument("-x", "--abort", action="store_true")
|
|
15
16
|
self._parser.add_argument("-y", "--yes", action="store_true")
|
|
16
|
-
self._parser.add_argument("run_name")
|
|
17
|
+
self._parser.add_argument("run_name").completer = RunNameCompleter()
|
|
17
18
|
|
|
18
19
|
def _command(self, args: argparse.Namespace):
|
|
19
20
|
super()._command(args)
|
|
@@ -4,6 +4,7 @@ import time
|
|
|
4
4
|
from rich.live import Live
|
|
5
5
|
|
|
6
6
|
from dstack._internal.cli.commands import APIBaseCommand
|
|
7
|
+
from dstack._internal.cli.services.completion import VolumeNameCompleter
|
|
7
8
|
from dstack._internal.cli.utils.common import (
|
|
8
9
|
LIVE_TABLE_PROVISION_INTERVAL_SECS,
|
|
9
10
|
LIVE_TABLE_REFRESH_RATE_PER_SEC,
|
|
@@ -47,7 +48,7 @@ class VolumeCommand(APIBaseCommand):
|
|
|
47
48
|
delete_parser.add_argument(
|
|
48
49
|
"name",
|
|
49
50
|
help="The name of the volume",
|
|
50
|
-
)
|
|
51
|
+
).completer = VolumeNameCompleter()
|
|
51
52
|
delete_parser.add_argument(
|
|
52
53
|
"-y", "--yes", help="Don't ask for confirmation", action="store_true"
|
|
53
54
|
)
|
dstack/_internal/cli/main.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
|
|
3
|
+
import argcomplete
|
|
3
4
|
from rich.markup import escape
|
|
4
5
|
from rich_argparse import RichHelpFormatter
|
|
5
6
|
|
|
6
7
|
from dstack._internal.cli.commands.apply import ApplyCommand
|
|
7
8
|
from dstack._internal.cli.commands.attach import AttachCommand
|
|
9
|
+
from dstack._internal.cli.commands.completion import CompletionCommand
|
|
8
10
|
from dstack._internal.cli.commands.config import ConfigCommand
|
|
9
11
|
from dstack._internal.cli.commands.delete import DeleteCommand
|
|
10
12
|
from dstack._internal.cli.commands.fleet import FleetCommand
|
|
@@ -72,9 +74,13 @@ def main():
|
|
|
72
74
|
StatsCommand.register(subparsers)
|
|
73
75
|
StopCommand.register(subparsers)
|
|
74
76
|
VolumeCommand.register(subparsers)
|
|
77
|
+
CompletionCommand.register(subparsers)
|
|
78
|
+
|
|
79
|
+
argcomplete.autocomplete(parser, always_complete_options=False)
|
|
75
80
|
|
|
76
81
|
args, unknown_args = parser.parse_known_args()
|
|
77
82
|
args.unknown = unknown_args
|
|
83
|
+
|
|
78
84
|
try:
|
|
79
85
|
check_for_updates()
|
|
80
86
|
get_ssh_client_info()
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import os
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import Iterable, List, Optional
|
|
5
|
+
|
|
6
|
+
import argcomplete
|
|
7
|
+
from argcomplete.completers import BaseCompleter
|
|
8
|
+
|
|
9
|
+
from dstack._internal.core.errors import ConfigurationError
|
|
10
|
+
from dstack._internal.core.services.configs import ConfigManager
|
|
11
|
+
from dstack.api import Client
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BaseAPINameCompleter(BaseCompleter, ABC):
|
|
15
|
+
"""
|
|
16
|
+
Base class for name completers that fetch resource names via the API.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self):
|
|
20
|
+
super().__init__()
|
|
21
|
+
|
|
22
|
+
def get_api(self, parsed_args: argparse.Namespace) -> Optional[Client]:
|
|
23
|
+
argcomplete.debug(f"{self.__class__.__name__}: Retrieving API client")
|
|
24
|
+
project = getattr(parsed_args, "project", os.getenv("DSTACK_PROJECT"))
|
|
25
|
+
try:
|
|
26
|
+
return Client.from_config(project_name=project)
|
|
27
|
+
except ConfigurationError as e:
|
|
28
|
+
argcomplete.debug(f"{self.__class__.__name__}: Error initializing API client: {e}")
|
|
29
|
+
return None
|
|
30
|
+
|
|
31
|
+
def __call__(self, prefix: str, parsed_args: argparse.Namespace, **kwargs) -> List[str]:
|
|
32
|
+
api = self.get_api(parsed_args)
|
|
33
|
+
if api is None:
|
|
34
|
+
return []
|
|
35
|
+
|
|
36
|
+
argcomplete.debug(f"{self.__class__.__name__}: Fetching completions")
|
|
37
|
+
try:
|
|
38
|
+
resource_names = self.fetch_resource_names(api)
|
|
39
|
+
return [name for name in resource_names if name.startswith(prefix)]
|
|
40
|
+
except Exception as e:
|
|
41
|
+
argcomplete.debug(
|
|
42
|
+
f"{self.__class__.__name__}: Error fetching resource completions: {e}"
|
|
43
|
+
)
|
|
44
|
+
return []
|
|
45
|
+
|
|
46
|
+
@abstractmethod
|
|
47
|
+
def fetch_resource_names(self, api: Client) -> Iterable[str]:
|
|
48
|
+
"""
|
|
49
|
+
Returns an iterable of resource names.
|
|
50
|
+
"""
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class RunNameCompleter(BaseAPINameCompleter):
|
|
55
|
+
def __init__(self, all: bool = False):
|
|
56
|
+
super().__init__()
|
|
57
|
+
self.all = all
|
|
58
|
+
|
|
59
|
+
def fetch_resource_names(self, api: Client) -> Iterable[str]:
|
|
60
|
+
return [r.name for r in api.runs.list(self.all)]
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class FleetNameCompleter(BaseAPINameCompleter):
|
|
64
|
+
def fetch_resource_names(self, api: Client) -> Iterable[str]:
|
|
65
|
+
return [r.name for r in api.client.fleets.list(api.project)]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class VolumeNameCompleter(BaseAPINameCompleter):
|
|
69
|
+
def fetch_resource_names(self, api: Client) -> Iterable[str]:
|
|
70
|
+
return [r.name for r in api.client.volumes.list(api.project)]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class GatewayNameCompleter(BaseAPINameCompleter):
|
|
74
|
+
def fetch_resource_names(self, api: Client) -> Iterable[str]:
|
|
75
|
+
return [r.name for r in api.client.gateways.list(api.project)]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class ProjectNameCompleter(BaseCompleter):
|
|
79
|
+
"""
|
|
80
|
+
Completer for local project names.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
def __call__(self, prefix: str, parsed_args: argparse.Namespace, **kwargs) -> List[str]:
|
|
84
|
+
argcomplete.debug(f"{self.__class__.__name__}: Listing projects from ConfigManager")
|
|
85
|
+
projects = ConfigManager().list_projects()
|
|
86
|
+
return [p for p in projects if p.startswith(prefix)]
|
|
@@ -34,7 +34,6 @@ from dstack._internal.core.models.configurations import (
|
|
|
34
34
|
BaseRunConfigurationWithPorts,
|
|
35
35
|
DevEnvironmentConfiguration,
|
|
36
36
|
PortMapping,
|
|
37
|
-
PythonVersion,
|
|
38
37
|
RunConfigurationType,
|
|
39
38
|
ServiceConfiguration,
|
|
40
39
|
TaskConfiguration,
|
|
@@ -73,12 +72,6 @@ class BaseRunConfigurator(ApplyEnvVarsConfiguratorMixin, BaseApplyConfigurator):
|
|
|
73
72
|
):
|
|
74
73
|
self.apply_args(conf, configurator_args, unknown_args)
|
|
75
74
|
self.validate_gpu_vendor_and_image(conf)
|
|
76
|
-
if conf.python == PythonVersion.PY38:
|
|
77
|
-
logger.warning(
|
|
78
|
-
"Specifying [code]python: 3.8[/] in run configurations is deprecated"
|
|
79
|
-
" and will be forbidden in a future [code]dstack[/] release."
|
|
80
|
-
" Please upgrade your configuration to a newer Python version."
|
|
81
|
-
)
|
|
82
75
|
if repo is None:
|
|
83
76
|
repo = self.api.repos.load(Path.cwd())
|
|
84
77
|
config_manager = ConfigManager()
|
|
@@ -102,6 +95,7 @@ class BaseRunConfigurator(ApplyEnvVarsConfiguratorMixin, BaseApplyConfigurator):
|
|
|
102
95
|
reservation=profile.reservation,
|
|
103
96
|
spot_policy=profile.spot_policy,
|
|
104
97
|
retry_policy=profile.retry_policy,
|
|
98
|
+
utilization_policy=profile.utilization_policy,
|
|
105
99
|
max_duration=profile.max_duration,
|
|
106
100
|
stop_duration=profile.stop_duration,
|
|
107
101
|
max_price=profile.max_price,
|
|
@@ -238,8 +232,8 @@ class BaseRunConfigurator(ApplyEnvVarsConfiguratorMixin, BaseApplyConfigurator):
|
|
|
238
232
|
reattach = True
|
|
239
233
|
break
|
|
240
234
|
if run.status.is_finished():
|
|
241
|
-
|
|
242
|
-
exit(
|
|
235
|
+
print_finished_message(run)
|
|
236
|
+
exit(get_run_exit_code(run))
|
|
243
237
|
time.sleep(1)
|
|
244
238
|
if not reattach:
|
|
245
239
|
console.print(
|
|
@@ -439,7 +433,7 @@ class RunWithPortsConfigurator(BaseRunConfigurator):
|
|
|
439
433
|
):
|
|
440
434
|
super().apply_args(conf, args, unknown)
|
|
441
435
|
if args.ports:
|
|
442
|
-
conf.ports = list(
|
|
436
|
+
conf.ports = list(_merge_ports(conf.ports, args.ports).values())
|
|
443
437
|
|
|
444
438
|
|
|
445
439
|
class TaskConfigurator(RunWithPortsConfigurator):
|
|
@@ -475,17 +469,17 @@ class ServiceConfigurator(BaseRunConfigurator):
|
|
|
475
469
|
self.interpolate_run_args(conf.commands, unknown)
|
|
476
470
|
|
|
477
471
|
|
|
478
|
-
def
|
|
479
|
-
|
|
480
|
-
|
|
472
|
+
def _merge_ports(conf: List[PortMapping], args: List[PortMapping]) -> Dict[int, PortMapping]:
|
|
473
|
+
_unique_ports_constraint([pm.container_port for pm in conf])
|
|
474
|
+
_unique_ports_constraint([pm.container_port for pm in args])
|
|
481
475
|
ports = {pm.container_port: pm for pm in conf}
|
|
482
476
|
for pm in args: # override conf
|
|
483
477
|
ports[pm.container_port] = pm
|
|
484
|
-
|
|
478
|
+
_unique_ports_constraint([pm.local_port for pm in ports.values() if pm.local_port is not None])
|
|
485
479
|
return ports
|
|
486
480
|
|
|
487
481
|
|
|
488
|
-
def
|
|
482
|
+
def _unique_ports_constraint(ports: List[int]):
|
|
489
483
|
used_ports = set()
|
|
490
484
|
for i in ports:
|
|
491
485
|
if i in used_ports:
|
|
@@ -514,7 +508,7 @@ def _print_service_urls(run: Run) -> None:
|
|
|
514
508
|
console.print()
|
|
515
509
|
|
|
516
510
|
|
|
517
|
-
def
|
|
511
|
+
def print_finished_message(run: Run):
|
|
518
512
|
if run.status == RunStatus.DONE:
|
|
519
513
|
console.print("[code]Done[/]")
|
|
520
514
|
return
|
|
@@ -542,7 +536,7 @@ def _print_finished_message(run: Run):
|
|
|
542
536
|
console.print(f"[error]{message}[/]")
|
|
543
537
|
|
|
544
538
|
|
|
545
|
-
def
|
|
539
|
+
def get_run_exit_code(run: Run) -> int:
|
|
546
540
|
if run.status == RunStatus.DONE:
|
|
547
541
|
return 0
|
|
548
542
|
return 1
|
|
@@ -45,7 +45,11 @@ def get_fleets_table(
|
|
|
45
45
|
status = instance.status.value
|
|
46
46
|
total_blocks = instance.total_blocks
|
|
47
47
|
busy_blocks = instance.busy_blocks
|
|
48
|
-
if
|
|
48
|
+
if (
|
|
49
|
+
instance.status in [InstanceStatus.IDLE, InstanceStatus.BUSY]
|
|
50
|
+
and total_blocks is not None
|
|
51
|
+
and total_blocks > 1
|
|
52
|
+
):
|
|
49
53
|
status = f"{busy_blocks}/{total_blocks} {InstanceStatus.BUSY.value}"
|
|
50
54
|
if (
|
|
51
55
|
instance.status in [InstanceStatus.IDLE, InstanceStatus.BUSY]
|
|
@@ -4,6 +4,8 @@ from rich.markup import escape
|
|
|
4
4
|
from rich.table import Table
|
|
5
5
|
|
|
6
6
|
from dstack._internal.cli.utils.common import NO_OFFERS_WARNING, add_row_from_dict, console
|
|
7
|
+
from dstack._internal.core.models.common import is_core_model_instance
|
|
8
|
+
from dstack._internal.core.models.configurations import DevEnvironmentConfiguration
|
|
7
9
|
from dstack._internal.core.models.instances import InstanceAvailability
|
|
8
10
|
from dstack._internal.core.models.profiles import (
|
|
9
11
|
DEFAULT_RUN_TERMINATION_IDLE_TIME,
|
|
@@ -38,6 +40,13 @@ def print_run_plan(run_plan: RunPlan, offers_limit: int = 3):
|
|
|
38
40
|
if job_plan.job_spec.max_duration
|
|
39
41
|
else "-"
|
|
40
42
|
)
|
|
43
|
+
inactivity_duration = None
|
|
44
|
+
if is_core_model_instance(run_plan.run_spec.configuration, DevEnvironmentConfiguration):
|
|
45
|
+
inactivity_duration = "-"
|
|
46
|
+
if isinstance(run_plan.run_spec.configuration.inactivity_duration, int):
|
|
47
|
+
inactivity_duration = format_pretty_duration(
|
|
48
|
+
run_plan.run_spec.configuration.inactivity_duration
|
|
49
|
+
)
|
|
41
50
|
if job_plan.job_spec.retry is None:
|
|
42
51
|
retry = "-"
|
|
43
52
|
else:
|
|
@@ -72,6 +81,8 @@ def print_run_plan(run_plan: RunPlan, offers_limit: int = 3):
|
|
|
72
81
|
props.add_row(th("Resources"), pretty_req)
|
|
73
82
|
props.add_row(th("Max price"), max_price)
|
|
74
83
|
props.add_row(th("Max duration"), max_duration)
|
|
84
|
+
if inactivity_duration is not None: # None means n/a
|
|
85
|
+
props.add_row(th("Inactivity duration"), inactivity_duration)
|
|
75
86
|
props.add_row(th("Spot policy"), spot_policy)
|
|
76
87
|
props.add_row(th("Retry policy"), retry)
|
|
77
88
|
props.add_row(th("Creation policy"), creation_policy)
|
|
@@ -11,8 +11,11 @@ from dstack._internal import settings
|
|
|
11
11
|
from dstack._internal.core.backends.aws.config import AWSConfig
|
|
12
12
|
from dstack._internal.core.backends.base.compute import (
|
|
13
13
|
Compute,
|
|
14
|
+
generate_unique_gateway_instance_name,
|
|
15
|
+
generate_unique_instance_name,
|
|
16
|
+
generate_unique_volume_name,
|
|
14
17
|
get_gateway_user_data,
|
|
15
|
-
|
|
18
|
+
get_job_instance_name,
|
|
16
19
|
get_user_data,
|
|
17
20
|
merge_tags,
|
|
18
21
|
)
|
|
@@ -152,10 +155,12 @@ class AWSCompute(Compute):
|
|
|
152
155
|
if zones is not None and len(zones) == 0:
|
|
153
156
|
raise NoCapacityError("No eligible availability zones")
|
|
154
157
|
|
|
158
|
+
instance_name = generate_unique_instance_name(instance_config)
|
|
155
159
|
tags = {
|
|
156
|
-
"Name":
|
|
160
|
+
"Name": instance_name,
|
|
157
161
|
"owner": "dstack",
|
|
158
162
|
"dstack_project": project_name,
|
|
163
|
+
"dstack_name": instance_config.instance_name,
|
|
159
164
|
"dstack_user": instance_config.user,
|
|
160
165
|
}
|
|
161
166
|
tags = merge_tags(tags=tags, backend_tags=self.config.tags)
|
|
@@ -214,7 +219,7 @@ class AWSCompute(Compute):
|
|
|
214
219
|
disk_size=disk_size,
|
|
215
220
|
image_id=image_id,
|
|
216
221
|
instance_type=instance_offer.instance.name,
|
|
217
|
-
|
|
222
|
+
iam_instance_profile=self.config.iam_instance_profile,
|
|
218
223
|
user_data=get_user_data(authorized_keys=instance_config.get_public_keys()),
|
|
219
224
|
tags=aws_resources.make_tags(tags),
|
|
220
225
|
security_group_id=aws_resources.create_security_group(
|
|
@@ -259,6 +264,9 @@ class AWSCompute(Compute):
|
|
|
259
264
|
)
|
|
260
265
|
except botocore.exceptions.ClientError as e:
|
|
261
266
|
logger.warning("Got botocore.exceptions.ClientError: %s", e)
|
|
267
|
+
if e.response["Error"]["Code"] == "InvalidParameterValue":
|
|
268
|
+
msg = e.response["Error"].get("Message", "")
|
|
269
|
+
raise ComputeError(f"Invalid AWS request: {msg}")
|
|
262
270
|
continue
|
|
263
271
|
raise NoCapacityError()
|
|
264
272
|
|
|
@@ -274,7 +282,7 @@ class AWSCompute(Compute):
|
|
|
274
282
|
# TODO: run_job is the same for vm-based backends, refactor
|
|
275
283
|
instance_config = InstanceConfiguration(
|
|
276
284
|
project_name=run.project_name,
|
|
277
|
-
instance_name=
|
|
285
|
+
instance_name=get_job_instance_name(run, job), # TODO: generate name
|
|
278
286
|
ssh_keys=[
|
|
279
287
|
SSHKey(public=project_ssh_public_key.strip()),
|
|
280
288
|
],
|
|
@@ -342,10 +350,12 @@ class AWSCompute(Compute):
|
|
|
342
350
|
ec2_resource = self.session.resource("ec2", region_name=configuration.region)
|
|
343
351
|
ec2_client = self.session.client("ec2", region_name=configuration.region)
|
|
344
352
|
|
|
353
|
+
instance_name = generate_unique_gateway_instance_name(configuration)
|
|
345
354
|
tags = {
|
|
346
|
-
"Name":
|
|
355
|
+
"Name": instance_name,
|
|
347
356
|
"owner": "dstack",
|
|
348
357
|
"dstack_project": configuration.project_name,
|
|
358
|
+
"dstack_name": configuration.instance_name,
|
|
349
359
|
}
|
|
350
360
|
if settings.DSTACK_VERSION is not None:
|
|
351
361
|
tags["dstack_version"] = settings.DSTACK_VERSION
|
|
@@ -373,7 +383,7 @@ class AWSCompute(Compute):
|
|
|
373
383
|
disk_size=10,
|
|
374
384
|
image_id=aws_resources.get_gateway_image_id(ec2_client),
|
|
375
385
|
instance_type="t2.micro",
|
|
376
|
-
|
|
386
|
+
iam_instance_profile=None,
|
|
377
387
|
user_data=get_gateway_user_data(configuration.ssh_key_pub),
|
|
378
388
|
tags=tags,
|
|
379
389
|
security_group_id=security_group_id,
|
|
@@ -403,7 +413,7 @@ class AWSCompute(Compute):
|
|
|
403
413
|
|
|
404
414
|
logger.debug("Creating ALB for gateway %s...", configuration.instance_name)
|
|
405
415
|
response = elb_client.create_load_balancer(
|
|
406
|
-
Name=f"{
|
|
416
|
+
Name=f"{instance_name}-lb",
|
|
407
417
|
Subnets=subnets_ids,
|
|
408
418
|
SecurityGroups=[security_group_id],
|
|
409
419
|
Scheme="internet-facing" if configuration.public_ip else "internal",
|
|
@@ -418,7 +428,7 @@ class AWSCompute(Compute):
|
|
|
418
428
|
|
|
419
429
|
logger.debug("Creating Target Group for gateway %s...", configuration.instance_name)
|
|
420
430
|
response = elb_client.create_target_group(
|
|
421
|
-
Name=f"{
|
|
431
|
+
Name=f"{instance_name}-tg",
|
|
422
432
|
Protocol="HTTP",
|
|
423
433
|
Port=80,
|
|
424
434
|
VpcId=vpc_id,
|
|
@@ -496,6 +506,7 @@ class AWSCompute(Compute):
|
|
|
496
506
|
"Failed to terminate all gateway %s resources. backend_data parsing error.",
|
|
497
507
|
configuration.instance_name,
|
|
498
508
|
)
|
|
509
|
+
return
|
|
499
510
|
|
|
500
511
|
elb_client = self.session.client("elbv2", region_name=configuration.region)
|
|
501
512
|
|
|
@@ -535,11 +546,13 @@ class AWSCompute(Compute):
|
|
|
535
546
|
def create_volume(self, volume: Volume) -> VolumeProvisioningData:
|
|
536
547
|
ec2_client = self.session.client("ec2", region_name=volume.configuration.region)
|
|
537
548
|
|
|
549
|
+
volume_name = generate_unique_volume_name(volume)
|
|
538
550
|
tags = {
|
|
539
|
-
"Name":
|
|
551
|
+
"Name": volume_name,
|
|
540
552
|
"owner": "dstack",
|
|
541
|
-
"dstack_user": volume.user,
|
|
542
553
|
"dstack_project": volume.project_name,
|
|
554
|
+
"dstack_name": volume.name,
|
|
555
|
+
"dstack_user": volume.user,
|
|
543
556
|
}
|
|
544
557
|
tags = merge_tags(tags=tags, backend_tags=self.config.tags)
|
|
545
558
|
|
|
@@ -131,7 +131,7 @@ def create_instances_struct(
|
|
|
131
131
|
disk_size: int,
|
|
132
132
|
image_id: str,
|
|
133
133
|
instance_type: str,
|
|
134
|
-
|
|
134
|
+
iam_instance_profile: Optional[str],
|
|
135
135
|
user_data: str,
|
|
136
136
|
tags: List[Dict[str, str]],
|
|
137
137
|
security_group_id: str,
|
|
@@ -166,8 +166,8 @@ def create_instances_struct(
|
|
|
166
166
|
},
|
|
167
167
|
],
|
|
168
168
|
)
|
|
169
|
-
if
|
|
170
|
-
struct["IamInstanceProfile"] = {"
|
|
169
|
+
if iam_instance_profile:
|
|
170
|
+
struct["IamInstanceProfile"] = {"Name": iam_instance_profile}
|
|
171
171
|
if spot:
|
|
172
172
|
struct["InstanceMarketOptions"] = {
|
|
173
173
|
"MarketType": "spot",
|