dstack 0.18.41__py3-none-any.whl → 0.18.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dstack/_internal/cli/commands/__init__.py +2 -1
- dstack/_internal/cli/commands/apply.py +4 -2
- dstack/_internal/cli/commands/attach.py +21 -1
- dstack/_internal/cli/commands/completion.py +20 -0
- dstack/_internal/cli/commands/delete.py +3 -1
- dstack/_internal/cli/commands/fleet.py +2 -1
- dstack/_internal/cli/commands/gateway.py +7 -2
- dstack/_internal/cli/commands/logs.py +3 -2
- dstack/_internal/cli/commands/stats.py +2 -1
- dstack/_internal/cli/commands/stop.py +2 -1
- dstack/_internal/cli/commands/volume.py +2 -1
- dstack/_internal/cli/main.py +6 -0
- dstack/_internal/cli/services/completion.py +86 -0
- dstack/_internal/cli/services/configurators/run.py +10 -17
- dstack/_internal/cli/utils/fleet.py +5 -1
- dstack/_internal/cli/utils/volume.py +9 -0
- dstack/_internal/core/backends/aws/compute.py +24 -11
- dstack/_internal/core/backends/aws/resources.py +3 -3
- dstack/_internal/core/backends/azure/compute.py +14 -8
- dstack/_internal/core/backends/azure/resources.py +2 -0
- dstack/_internal/core/backends/base/compute.py +102 -2
- dstack/_internal/core/backends/base/offers.py +7 -1
- dstack/_internal/core/backends/cudo/compute.py +8 -4
- dstack/_internal/core/backends/datacrunch/compute.py +10 -4
- dstack/_internal/core/backends/gcp/auth.py +19 -13
- dstack/_internal/core/backends/gcp/compute.py +27 -20
- dstack/_internal/core/backends/gcp/resources.py +3 -10
- dstack/_internal/core/backends/kubernetes/compute.py +4 -3
- dstack/_internal/core/backends/lambdalabs/compute.py +9 -3
- dstack/_internal/core/backends/nebius/compute.py +2 -2
- dstack/_internal/core/backends/oci/compute.py +10 -4
- dstack/_internal/core/backends/runpod/compute.py +11 -4
- dstack/_internal/core/backends/tensordock/compute.py +14 -3
- dstack/_internal/core/backends/vastai/compute.py +12 -2
- dstack/_internal/core/backends/vultr/api_client.py +3 -3
- dstack/_internal/core/backends/vultr/compute.py +9 -3
- dstack/_internal/core/models/backends/aws.py +2 -0
- dstack/_internal/core/models/backends/base.py +1 -0
- dstack/_internal/core/models/configurations.py +0 -1
- dstack/_internal/core/models/runs.py +3 -3
- dstack/_internal/core/models/volumes.py +23 -0
- dstack/_internal/core/services/__init__.py +5 -1
- dstack/_internal/core/services/configs/__init__.py +3 -0
- dstack/_internal/server/background/tasks/common.py +22 -0
- dstack/_internal/server/background/tasks/process_instances.py +13 -21
- dstack/_internal/server/background/tasks/process_running_jobs.py +13 -16
- dstack/_internal/server/background/tasks/process_submitted_jobs.py +12 -7
- dstack/_internal/server/background/tasks/process_terminating_jobs.py +7 -2
- dstack/_internal/server/background/tasks/process_volumes.py +11 -1
- dstack/_internal/server/migrations/versions/a751ef183f27_move_attachment_data_to_volumes_.py +34 -0
- dstack/_internal/server/models.py +17 -19
- dstack/_internal/server/routers/logs.py +3 -0
- dstack/_internal/server/services/backends/configurators/aws.py +31 -1
- dstack/_internal/server/services/backends/configurators/gcp.py +8 -15
- dstack/_internal/server/services/config.py +11 -1
- dstack/_internal/server/services/fleets.py +5 -1
- dstack/_internal/server/services/jobs/__init__.py +14 -11
- dstack/_internal/server/services/jobs/configurators/dev.py +1 -3
- dstack/_internal/server/services/jobs/configurators/task.py +1 -3
- dstack/_internal/server/services/logs/__init__.py +78 -0
- dstack/_internal/server/services/{logs.py → logs/aws.py} +12 -207
- dstack/_internal/server/services/logs/base.py +47 -0
- dstack/_internal/server/services/logs/filelog.py +110 -0
- dstack/_internal/server/services/logs/gcp.py +165 -0
- dstack/_internal/server/services/offers.py +7 -7
- dstack/_internal/server/services/pools.py +19 -20
- dstack/_internal/server/services/proxy/routers/service_proxy.py +14 -7
- dstack/_internal/server/services/runner/client.py +8 -5
- dstack/_internal/server/services/volumes.py +68 -9
- dstack/_internal/server/settings.py +3 -0
- dstack/_internal/server/statics/index.html +1 -1
- dstack/_internal/server/statics/{main-ad5150a441de98cd8987.css → main-7510e71dfa9749a4e70e.css} +1 -1
- dstack/_internal/server/statics/{main-2ac66bfcbd2e39830b88.js → main-fe8fd9db55df8d10e648.js} +66 -66
- dstack/_internal/server/statics/{main-2ac66bfcbd2e39830b88.js.map → main-fe8fd9db55df8d10e648.js.map} +1 -1
- dstack/_internal/server/testing/common.py +46 -17
- dstack/api/_public/runs.py +1 -1
- dstack/version.py +2 -2
- {dstack-0.18.41.dist-info → dstack-0.18.43.dist-info}/METADATA +4 -3
- {dstack-0.18.41.dist-info → dstack-0.18.43.dist-info}/RECORD +97 -86
- tests/_internal/core/backends/base/__init__.py +0 -0
- tests/_internal/core/backends/base/test_compute.py +56 -0
- tests/_internal/server/background/tasks/test_process_running_jobs.py +2 -1
- tests/_internal/server/background/tasks/test_process_submitted_jobs.py +5 -3
- tests/_internal/server/background/tasks/test_process_terminating_jobs.py +11 -6
- tests/_internal/server/conftest.py +4 -5
- tests/_internal/server/routers/test_backends.py +1 -0
- tests/_internal/server/routers/test_logs.py +1 -1
- tests/_internal/server/routers/test_runs.py +2 -2
- tests/_internal/server/routers/test_volumes.py +9 -2
- tests/_internal/server/services/runner/test_client.py +22 -3
- tests/_internal/server/services/test_logs.py +3 -3
- tests/_internal/server/services/test_offers.py +167 -0
- tests/_internal/server/services/test_pools.py +105 -1
- {dstack-0.18.41.dist-info → dstack-0.18.43.dist-info}/LICENSE.md +0 -0
- {dstack-0.18.41.dist-info → dstack-0.18.43.dist-info}/WHEEL +0 -0
- {dstack-0.18.41.dist-info → dstack-0.18.43.dist-info}/entry_points.txt +0 -0
- {dstack-0.18.41.dist-info → dstack-0.18.43.dist-info}/top_level.txt +0 -0
|
@@ -5,6 +5,7 @@ from typing import List, Optional
|
|
|
5
5
|
|
|
6
6
|
from rich_argparse import RichHelpFormatter
|
|
7
7
|
|
|
8
|
+
from dstack._internal.cli.services.completion import ProjectNameCompleter
|
|
8
9
|
from dstack._internal.cli.utils.common import configure_logging
|
|
9
10
|
from dstack.api import Client
|
|
10
11
|
|
|
@@ -61,7 +62,7 @@ class APIBaseCommand(BaseCommand):
|
|
|
61
62
|
help="The name of the project. Defaults to [code]$DSTACK_PROJECT[/]",
|
|
62
63
|
metavar="NAME",
|
|
63
64
|
default=os.getenv("DSTACK_PROJECT"),
|
|
64
|
-
)
|
|
65
|
+
).completer = ProjectNameCompleter()
|
|
65
66
|
|
|
66
67
|
def _command(self, args: argparse.Namespace):
|
|
67
68
|
configure_logging()
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
|
+
from argcomplete import FilesCompleter
|
|
5
|
+
|
|
4
6
|
from dstack._internal.cli.commands import APIBaseCommand
|
|
5
7
|
from dstack._internal.cli.services.configurators import (
|
|
6
8
|
get_apply_configurator_class,
|
|
@@ -42,7 +44,7 @@ class ApplyCommand(APIBaseCommand):
|
|
|
42
44
|
metavar="FILE",
|
|
43
45
|
help="The path to the configuration file. Defaults to [code]$PWD/.dstack.yml[/]",
|
|
44
46
|
dest="configuration_file",
|
|
45
|
-
)
|
|
47
|
+
).completer = FilesCompleter(allowednames=["*.yml", "*.yaml"])
|
|
46
48
|
self._parser.add_argument(
|
|
47
49
|
"-y",
|
|
48
50
|
"--yes",
|
|
@@ -57,7 +59,7 @@ class ApplyCommand(APIBaseCommand):
|
|
|
57
59
|
self._parser.add_argument(
|
|
58
60
|
"-d",
|
|
59
61
|
"--detach",
|
|
60
|
-
help="Exit immediately after
|
|
62
|
+
help="Exit immediately after submitting configuration",
|
|
61
63
|
action="store_true",
|
|
62
64
|
)
|
|
63
65
|
repo_group = self._parser.add_argument_group("Repo Options")
|
|
@@ -6,6 +6,11 @@ from typing import Optional
|
|
|
6
6
|
|
|
7
7
|
from dstack._internal.cli.commands import APIBaseCommand
|
|
8
8
|
from dstack._internal.cli.services.args import port_mapping
|
|
9
|
+
from dstack._internal.cli.services.completion import RunNameCompleter
|
|
10
|
+
from dstack._internal.cli.services.configurators.run import (
|
|
11
|
+
get_run_exit_code,
|
|
12
|
+
print_finished_message,
|
|
13
|
+
)
|
|
9
14
|
from dstack._internal.cli.utils.common import console
|
|
10
15
|
from dstack._internal.core.consts import DSTACK_RUNNER_HTTP_PORT
|
|
11
16
|
from dstack._internal.core.errors import CLIError
|
|
@@ -57,7 +62,7 @@ class AttachCommand(APIBaseCommand):
|
|
|
57
62
|
type=int,
|
|
58
63
|
default=0,
|
|
59
64
|
)
|
|
60
|
-
self._parser.add_argument("run_name")
|
|
65
|
+
self._parser.add_argument("run_name").completer = RunNameCompleter()
|
|
61
66
|
|
|
62
67
|
def _command(self, args: argparse.Namespace):
|
|
63
68
|
super()._command(args)
|
|
@@ -99,6 +104,21 @@ class AttachCommand(APIBaseCommand):
|
|
|
99
104
|
pass
|
|
100
105
|
finally:
|
|
101
106
|
run.detach()
|
|
107
|
+
# TODO: Handle run resubmissions similar to dstack apply
|
|
108
|
+
|
|
109
|
+
# After reading the logs, the run may not be marked as finished immediately.
|
|
110
|
+
# Give the run some time to transition to a finished state before exiting.
|
|
111
|
+
for _ in range(30):
|
|
112
|
+
run.refresh()
|
|
113
|
+
if run.status.is_finished():
|
|
114
|
+
print_finished_message(run)
|
|
115
|
+
exit(get_run_exit_code(run))
|
|
116
|
+
time.sleep(1)
|
|
117
|
+
console.print(
|
|
118
|
+
"[error]Lost run connection. Timed out waiting for run final status."
|
|
119
|
+
" Check `dstack ps` to see if it's done or failed."
|
|
120
|
+
)
|
|
121
|
+
exit(1)
|
|
102
122
|
|
|
103
123
|
|
|
104
124
|
_IGNORED_PORTS = [DSTACK_RUNNER_HTTP_PORT]
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import argcomplete
|
|
2
|
+
|
|
3
|
+
from dstack._internal.cli.commands import BaseCommand
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CompletionCommand(BaseCommand):
|
|
7
|
+
NAME = "completion"
|
|
8
|
+
DESCRIPTION = "Generate shell completion scripts"
|
|
9
|
+
|
|
10
|
+
def _register(self):
|
|
11
|
+
super()._register()
|
|
12
|
+
self._parser.add_argument(
|
|
13
|
+
"shell",
|
|
14
|
+
help="The shell to generate the completion script for",
|
|
15
|
+
choices=["bash", "zsh"],
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
def _command(self, args):
|
|
19
|
+
super()._command(args)
|
|
20
|
+
print(argcomplete.shellcode(["dstack"], shell=args.shell))
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
|
+
from argcomplete import FilesCompleter
|
|
5
|
+
|
|
4
6
|
from dstack._internal.cli.commands import APIBaseCommand
|
|
5
7
|
from dstack._internal.cli.services.configurators import (
|
|
6
8
|
get_apply_configurator_class,
|
|
@@ -22,7 +24,7 @@ class DeleteCommand(APIBaseCommand):
|
|
|
22
24
|
metavar="FILE",
|
|
23
25
|
help="The path to the configuration file. Defaults to [code]$PWD/.dstack.yml[/]",
|
|
24
26
|
dest="configuration_file",
|
|
25
|
-
)
|
|
27
|
+
).completer = FilesCompleter(allowednames=["*.yml", "*.yaml"])
|
|
26
28
|
self._parser.add_argument(
|
|
27
29
|
"-y",
|
|
28
30
|
"--yes",
|
|
@@ -4,6 +4,7 @@ import time
|
|
|
4
4
|
from rich.live import Live
|
|
5
5
|
|
|
6
6
|
from dstack._internal.cli.commands import APIBaseCommand
|
|
7
|
+
from dstack._internal.cli.services.completion import FleetNameCompleter
|
|
7
8
|
from dstack._internal.cli.utils.common import (
|
|
8
9
|
LIVE_TABLE_PROVISION_INTERVAL_SECS,
|
|
9
10
|
LIVE_TABLE_REFRESH_RATE_PER_SEC,
|
|
@@ -47,7 +48,7 @@ class FleetCommand(APIBaseCommand):
|
|
|
47
48
|
delete_parser.add_argument(
|
|
48
49
|
"name",
|
|
49
50
|
help="The name of the fleet",
|
|
50
|
-
)
|
|
51
|
+
).completer = FleetNameCompleter()
|
|
51
52
|
delete_parser.add_argument(
|
|
52
53
|
"-i",
|
|
53
54
|
"--instance",
|
|
@@ -4,6 +4,7 @@ import time
|
|
|
4
4
|
from rich.live import Live
|
|
5
5
|
|
|
6
6
|
from dstack._internal.cli.commands import APIBaseCommand
|
|
7
|
+
from dstack._internal.cli.services.completion import GatewayNameCompleter
|
|
7
8
|
from dstack._internal.cli.utils.common import (
|
|
8
9
|
LIVE_TABLE_PROVISION_INTERVAL_SECS,
|
|
9
10
|
LIVE_TABLE_REFRESH_RATE_PER_SEC,
|
|
@@ -59,7 +60,9 @@ class GatewayCommand(APIBaseCommand):
|
|
|
59
60
|
"delete", help="Delete a gateway", formatter_class=self._parser.formatter_class
|
|
60
61
|
)
|
|
61
62
|
delete_parser.set_defaults(subfunc=self._delete)
|
|
62
|
-
delete_parser.add_argument(
|
|
63
|
+
delete_parser.add_argument(
|
|
64
|
+
"name", help="The name of the gateway"
|
|
65
|
+
).completer = GatewayNameCompleter()
|
|
63
66
|
delete_parser.add_argument(
|
|
64
67
|
"-y", "--yes", action="store_true", help="Don't ask for confirmation"
|
|
65
68
|
)
|
|
@@ -68,7 +71,9 @@ class GatewayCommand(APIBaseCommand):
|
|
|
68
71
|
"update", help="Update a gateway", formatter_class=self._parser.formatter_class
|
|
69
72
|
)
|
|
70
73
|
update_parser.set_defaults(subfunc=self._update)
|
|
71
|
-
update_parser.add_argument(
|
|
74
|
+
update_parser.add_argument(
|
|
75
|
+
"name", help="The name of the gateway"
|
|
76
|
+
).completer = GatewayNameCompleter()
|
|
72
77
|
update_parser.add_argument(
|
|
73
78
|
"--set-default", action="store_true", help="Set it the default gateway for the project"
|
|
74
79
|
)
|
|
@@ -3,6 +3,7 @@ import sys
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
|
|
5
5
|
from dstack._internal.cli.commands import APIBaseCommand
|
|
6
|
+
from dstack._internal.cli.services.completion import RunNameCompleter
|
|
6
7
|
from dstack._internal.core.errors import CLIError
|
|
7
8
|
from dstack._internal.utils.logging import get_logger
|
|
8
9
|
|
|
@@ -33,7 +34,7 @@ class LogsCommand(APIBaseCommand):
|
|
|
33
34
|
)
|
|
34
35
|
self._parser.add_argument(
|
|
35
36
|
"--replica",
|
|
36
|
-
help="The
|
|
37
|
+
help="The replica number. Defaults to 0.",
|
|
37
38
|
type=int,
|
|
38
39
|
default=0,
|
|
39
40
|
)
|
|
@@ -43,7 +44,7 @@ class LogsCommand(APIBaseCommand):
|
|
|
43
44
|
type=int,
|
|
44
45
|
default=0,
|
|
45
46
|
)
|
|
46
|
-
self._parser.add_argument("run_name")
|
|
47
|
+
self._parser.add_argument("run_name").completer = RunNameCompleter(all=True)
|
|
47
48
|
|
|
48
49
|
def _command(self, args: argparse.Namespace):
|
|
49
50
|
super()._command(args)
|
|
@@ -7,6 +7,7 @@ from rich.live import Live
|
|
|
7
7
|
from rich.table import Table
|
|
8
8
|
|
|
9
9
|
from dstack._internal.cli.commands import APIBaseCommand
|
|
10
|
+
from dstack._internal.cli.services.completion import RunNameCompleter
|
|
10
11
|
from dstack._internal.cli.utils.common import (
|
|
11
12
|
LIVE_TABLE_PROVISION_INTERVAL_SECS,
|
|
12
13
|
LIVE_TABLE_REFRESH_RATE_PER_SEC,
|
|
@@ -25,7 +26,7 @@ class StatsCommand(APIBaseCommand):
|
|
|
25
26
|
|
|
26
27
|
def _register(self):
|
|
27
28
|
super()._register()
|
|
28
|
-
self._parser.add_argument("run_name")
|
|
29
|
+
self._parser.add_argument("run_name").completer = RunNameCompleter()
|
|
29
30
|
self._parser.add_argument(
|
|
30
31
|
"-w",
|
|
31
32
|
"--watch",
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
|
|
3
3
|
from dstack._internal.cli.commands import APIBaseCommand
|
|
4
|
+
from dstack._internal.cli.services.completion import RunNameCompleter
|
|
4
5
|
from dstack._internal.cli.utils.common import confirm_ask
|
|
5
6
|
from dstack._internal.core.errors import CLIError
|
|
6
7
|
|
|
@@ -13,7 +14,7 @@ class StopCommand(APIBaseCommand):
|
|
|
13
14
|
super()._register()
|
|
14
15
|
self._parser.add_argument("-x", "--abort", action="store_true")
|
|
15
16
|
self._parser.add_argument("-y", "--yes", action="store_true")
|
|
16
|
-
self._parser.add_argument("run_name")
|
|
17
|
+
self._parser.add_argument("run_name").completer = RunNameCompleter()
|
|
17
18
|
|
|
18
19
|
def _command(self, args: argparse.Namespace):
|
|
19
20
|
super()._command(args)
|
|
@@ -4,6 +4,7 @@ import time
|
|
|
4
4
|
from rich.live import Live
|
|
5
5
|
|
|
6
6
|
from dstack._internal.cli.commands import APIBaseCommand
|
|
7
|
+
from dstack._internal.cli.services.completion import VolumeNameCompleter
|
|
7
8
|
from dstack._internal.cli.utils.common import (
|
|
8
9
|
LIVE_TABLE_PROVISION_INTERVAL_SECS,
|
|
9
10
|
LIVE_TABLE_REFRESH_RATE_PER_SEC,
|
|
@@ -47,7 +48,7 @@ class VolumeCommand(APIBaseCommand):
|
|
|
47
48
|
delete_parser.add_argument(
|
|
48
49
|
"name",
|
|
49
50
|
help="The name of the volume",
|
|
50
|
-
)
|
|
51
|
+
).completer = VolumeNameCompleter()
|
|
51
52
|
delete_parser.add_argument(
|
|
52
53
|
"-y", "--yes", help="Don't ask for confirmation", action="store_true"
|
|
53
54
|
)
|
dstack/_internal/cli/main.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
|
|
3
|
+
import argcomplete
|
|
3
4
|
from rich.markup import escape
|
|
4
5
|
from rich_argparse import RichHelpFormatter
|
|
5
6
|
|
|
6
7
|
from dstack._internal.cli.commands.apply import ApplyCommand
|
|
7
8
|
from dstack._internal.cli.commands.attach import AttachCommand
|
|
9
|
+
from dstack._internal.cli.commands.completion import CompletionCommand
|
|
8
10
|
from dstack._internal.cli.commands.config import ConfigCommand
|
|
9
11
|
from dstack._internal.cli.commands.delete import DeleteCommand
|
|
10
12
|
from dstack._internal.cli.commands.fleet import FleetCommand
|
|
@@ -72,9 +74,13 @@ def main():
|
|
|
72
74
|
StatsCommand.register(subparsers)
|
|
73
75
|
StopCommand.register(subparsers)
|
|
74
76
|
VolumeCommand.register(subparsers)
|
|
77
|
+
CompletionCommand.register(subparsers)
|
|
78
|
+
|
|
79
|
+
argcomplete.autocomplete(parser, always_complete_options=False)
|
|
75
80
|
|
|
76
81
|
args, unknown_args = parser.parse_known_args()
|
|
77
82
|
args.unknown = unknown_args
|
|
83
|
+
|
|
78
84
|
try:
|
|
79
85
|
check_for_updates()
|
|
80
86
|
get_ssh_client_info()
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import os
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import Iterable, List, Optional
|
|
5
|
+
|
|
6
|
+
import argcomplete
|
|
7
|
+
from argcomplete.completers import BaseCompleter
|
|
8
|
+
|
|
9
|
+
from dstack._internal.core.errors import ConfigurationError
|
|
10
|
+
from dstack._internal.core.services.configs import ConfigManager
|
|
11
|
+
from dstack.api import Client
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BaseAPINameCompleter(BaseCompleter, ABC):
|
|
15
|
+
"""
|
|
16
|
+
Base class for name completers that fetch resource names via the API.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self):
|
|
20
|
+
super().__init__()
|
|
21
|
+
|
|
22
|
+
def get_api(self, parsed_args: argparse.Namespace) -> Optional[Client]:
|
|
23
|
+
argcomplete.debug(f"{self.__class__.__name__}: Retrieving API client")
|
|
24
|
+
project = getattr(parsed_args, "project", os.getenv("DSTACK_PROJECT"))
|
|
25
|
+
try:
|
|
26
|
+
return Client.from_config(project_name=project)
|
|
27
|
+
except ConfigurationError as e:
|
|
28
|
+
argcomplete.debug(f"{self.__class__.__name__}: Error initializing API client: {e}")
|
|
29
|
+
return None
|
|
30
|
+
|
|
31
|
+
def __call__(self, prefix: str, parsed_args: argparse.Namespace, **kwargs) -> List[str]:
|
|
32
|
+
api = self.get_api(parsed_args)
|
|
33
|
+
if api is None:
|
|
34
|
+
return []
|
|
35
|
+
|
|
36
|
+
argcomplete.debug(f"{self.__class__.__name__}: Fetching completions")
|
|
37
|
+
try:
|
|
38
|
+
resource_names = self.fetch_resource_names(api)
|
|
39
|
+
return [name for name in resource_names if name.startswith(prefix)]
|
|
40
|
+
except Exception as e:
|
|
41
|
+
argcomplete.debug(
|
|
42
|
+
f"{self.__class__.__name__}: Error fetching resource completions: {e}"
|
|
43
|
+
)
|
|
44
|
+
return []
|
|
45
|
+
|
|
46
|
+
@abstractmethod
|
|
47
|
+
def fetch_resource_names(self, api: Client) -> Iterable[str]:
|
|
48
|
+
"""
|
|
49
|
+
Returns an iterable of resource names.
|
|
50
|
+
"""
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class RunNameCompleter(BaseAPINameCompleter):
|
|
55
|
+
def __init__(self, all: bool = False):
|
|
56
|
+
super().__init__()
|
|
57
|
+
self.all = all
|
|
58
|
+
|
|
59
|
+
def fetch_resource_names(self, api: Client) -> Iterable[str]:
|
|
60
|
+
return [r.name for r in api.runs.list(self.all)]
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class FleetNameCompleter(BaseAPINameCompleter):
|
|
64
|
+
def fetch_resource_names(self, api: Client) -> Iterable[str]:
|
|
65
|
+
return [r.name for r in api.client.fleets.list(api.project)]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class VolumeNameCompleter(BaseAPINameCompleter):
|
|
69
|
+
def fetch_resource_names(self, api: Client) -> Iterable[str]:
|
|
70
|
+
return [r.name for r in api.client.volumes.list(api.project)]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class GatewayNameCompleter(BaseAPINameCompleter):
|
|
74
|
+
def fetch_resource_names(self, api: Client) -> Iterable[str]:
|
|
75
|
+
return [r.name for r in api.client.gateways.list(api.project)]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class ProjectNameCompleter(BaseCompleter):
|
|
79
|
+
"""
|
|
80
|
+
Completer for local project names.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
def __call__(self, prefix: str, parsed_args: argparse.Namespace, **kwargs) -> List[str]:
|
|
84
|
+
argcomplete.debug(f"{self.__class__.__name__}: Listing projects from ConfigManager")
|
|
85
|
+
projects = ConfigManager().list_projects()
|
|
86
|
+
return [p for p in projects if p.startswith(prefix)]
|
|
@@ -34,7 +34,6 @@ from dstack._internal.core.models.configurations import (
|
|
|
34
34
|
BaseRunConfigurationWithPorts,
|
|
35
35
|
DevEnvironmentConfiguration,
|
|
36
36
|
PortMapping,
|
|
37
|
-
PythonVersion,
|
|
38
37
|
RunConfigurationType,
|
|
39
38
|
ServiceConfiguration,
|
|
40
39
|
TaskConfiguration,
|
|
@@ -73,12 +72,6 @@ class BaseRunConfigurator(ApplyEnvVarsConfiguratorMixin, BaseApplyConfigurator):
|
|
|
73
72
|
):
|
|
74
73
|
self.apply_args(conf, configurator_args, unknown_args)
|
|
75
74
|
self.validate_gpu_vendor_and_image(conf)
|
|
76
|
-
if conf.python == PythonVersion.PY38:
|
|
77
|
-
logger.warning(
|
|
78
|
-
"Specifying [code]python: 3.8[/] in run configurations is deprecated"
|
|
79
|
-
" and will be forbidden in a future [code]dstack[/] release."
|
|
80
|
-
" Please upgrade your configuration to a newer Python version."
|
|
81
|
-
)
|
|
82
75
|
if repo is None:
|
|
83
76
|
repo = self.api.repos.load(Path.cwd())
|
|
84
77
|
config_manager = ConfigManager()
|
|
@@ -238,8 +231,8 @@ class BaseRunConfigurator(ApplyEnvVarsConfiguratorMixin, BaseApplyConfigurator):
|
|
|
238
231
|
reattach = True
|
|
239
232
|
break
|
|
240
233
|
if run.status.is_finished():
|
|
241
|
-
|
|
242
|
-
exit(
|
|
234
|
+
print_finished_message(run)
|
|
235
|
+
exit(get_run_exit_code(run))
|
|
243
236
|
time.sleep(1)
|
|
244
237
|
if not reattach:
|
|
245
238
|
console.print(
|
|
@@ -439,7 +432,7 @@ class RunWithPortsConfigurator(BaseRunConfigurator):
|
|
|
439
432
|
):
|
|
440
433
|
super().apply_args(conf, args, unknown)
|
|
441
434
|
if args.ports:
|
|
442
|
-
conf.ports = list(
|
|
435
|
+
conf.ports = list(_merge_ports(conf.ports, args.ports).values())
|
|
443
436
|
|
|
444
437
|
|
|
445
438
|
class TaskConfigurator(RunWithPortsConfigurator):
|
|
@@ -475,17 +468,17 @@ class ServiceConfigurator(BaseRunConfigurator):
|
|
|
475
468
|
self.interpolate_run_args(conf.commands, unknown)
|
|
476
469
|
|
|
477
470
|
|
|
478
|
-
def
|
|
479
|
-
|
|
480
|
-
|
|
471
|
+
def _merge_ports(conf: List[PortMapping], args: List[PortMapping]) -> Dict[int, PortMapping]:
|
|
472
|
+
_unique_ports_constraint([pm.container_port for pm in conf])
|
|
473
|
+
_unique_ports_constraint([pm.container_port for pm in args])
|
|
481
474
|
ports = {pm.container_port: pm for pm in conf}
|
|
482
475
|
for pm in args: # override conf
|
|
483
476
|
ports[pm.container_port] = pm
|
|
484
|
-
|
|
477
|
+
_unique_ports_constraint([pm.local_port for pm in ports.values() if pm.local_port is not None])
|
|
485
478
|
return ports
|
|
486
479
|
|
|
487
480
|
|
|
488
|
-
def
|
|
481
|
+
def _unique_ports_constraint(ports: List[int]):
|
|
489
482
|
used_ports = set()
|
|
490
483
|
for i in ports:
|
|
491
484
|
if i in used_ports:
|
|
@@ -514,7 +507,7 @@ def _print_service_urls(run: Run) -> None:
|
|
|
514
507
|
console.print()
|
|
515
508
|
|
|
516
509
|
|
|
517
|
-
def
|
|
510
|
+
def print_finished_message(run: Run):
|
|
518
511
|
if run.status == RunStatus.DONE:
|
|
519
512
|
console.print("[code]Done[/]")
|
|
520
513
|
return
|
|
@@ -542,7 +535,7 @@ def _print_finished_message(run: Run):
|
|
|
542
535
|
console.print(f"[error]{message}[/]")
|
|
543
536
|
|
|
544
537
|
|
|
545
|
-
def
|
|
538
|
+
def get_run_exit_code(run: Run) -> int:
|
|
546
539
|
if run.status == RunStatus.DONE:
|
|
547
540
|
return 0
|
|
548
541
|
return 1
|
|
@@ -45,7 +45,11 @@ def get_fleets_table(
|
|
|
45
45
|
status = instance.status.value
|
|
46
46
|
total_blocks = instance.total_blocks
|
|
47
47
|
busy_blocks = instance.busy_blocks
|
|
48
|
-
if
|
|
48
|
+
if (
|
|
49
|
+
instance.status in [InstanceStatus.IDLE, InstanceStatus.BUSY]
|
|
50
|
+
and total_blocks is not None
|
|
51
|
+
and total_blocks > 1
|
|
52
|
+
):
|
|
49
53
|
status = f"{busy_blocks}/{total_blocks} {InstanceStatus.BUSY.value}"
|
|
50
54
|
if (
|
|
51
55
|
instance.status in [InstanceStatus.IDLE, InstanceStatus.BUSY]
|
|
@@ -22,6 +22,8 @@ def get_volumes_table(
|
|
|
22
22
|
if verbose:
|
|
23
23
|
table.add_column("REGION")
|
|
24
24
|
table.add_column("STATUS")
|
|
25
|
+
if verbose:
|
|
26
|
+
table.add_column("ATTACHED")
|
|
25
27
|
table.add_column("CREATED")
|
|
26
28
|
if verbose:
|
|
27
29
|
table.add_column("ERROR")
|
|
@@ -37,11 +39,18 @@ def get_volumes_table(
|
|
|
37
39
|
and volume.provisioning_data.availability_zone is not None
|
|
38
40
|
):
|
|
39
41
|
region += f" ({volume.provisioning_data.availability_zone})"
|
|
42
|
+
attached = "-"
|
|
43
|
+
if volume.attachments is not None:
|
|
44
|
+
attached = ", ".join(
|
|
45
|
+
{va.instance.fleet_name for va in volume.attachments if va.instance.fleet_name}
|
|
46
|
+
)
|
|
47
|
+
attached = attached or "-"
|
|
40
48
|
row = {
|
|
41
49
|
"NAME": volume.name,
|
|
42
50
|
"BACKEND": backend,
|
|
43
51
|
"REGION": region,
|
|
44
52
|
"STATUS": volume.status,
|
|
53
|
+
"ATTACHED": attached,
|
|
45
54
|
"CREATED": format_date(volume.created_at),
|
|
46
55
|
"ERROR": volume.status_message,
|
|
47
56
|
}
|
|
@@ -11,8 +11,11 @@ from dstack._internal import settings
|
|
|
11
11
|
from dstack._internal.core.backends.aws.config import AWSConfig
|
|
12
12
|
from dstack._internal.core.backends.base.compute import (
|
|
13
13
|
Compute,
|
|
14
|
+
generate_unique_gateway_instance_name,
|
|
15
|
+
generate_unique_instance_name,
|
|
16
|
+
generate_unique_volume_name,
|
|
14
17
|
get_gateway_user_data,
|
|
15
|
-
|
|
18
|
+
get_job_instance_name,
|
|
16
19
|
get_user_data,
|
|
17
20
|
merge_tags,
|
|
18
21
|
)
|
|
@@ -152,10 +155,12 @@ class AWSCompute(Compute):
|
|
|
152
155
|
if zones is not None and len(zones) == 0:
|
|
153
156
|
raise NoCapacityError("No eligible availability zones")
|
|
154
157
|
|
|
158
|
+
instance_name = generate_unique_instance_name(instance_config)
|
|
155
159
|
tags = {
|
|
156
|
-
"Name":
|
|
160
|
+
"Name": instance_name,
|
|
157
161
|
"owner": "dstack",
|
|
158
162
|
"dstack_project": project_name,
|
|
163
|
+
"dstack_name": instance_config.instance_name,
|
|
159
164
|
"dstack_user": instance_config.user,
|
|
160
165
|
}
|
|
161
166
|
tags = merge_tags(tags=tags, backend_tags=self.config.tags)
|
|
@@ -214,7 +219,7 @@ class AWSCompute(Compute):
|
|
|
214
219
|
disk_size=disk_size,
|
|
215
220
|
image_id=image_id,
|
|
216
221
|
instance_type=instance_offer.instance.name,
|
|
217
|
-
|
|
222
|
+
iam_instance_profile=self.config.iam_instance_profile,
|
|
218
223
|
user_data=get_user_data(authorized_keys=instance_config.get_public_keys()),
|
|
219
224
|
tags=aws_resources.make_tags(tags),
|
|
220
225
|
security_group_id=aws_resources.create_security_group(
|
|
@@ -259,6 +264,9 @@ class AWSCompute(Compute):
|
|
|
259
264
|
)
|
|
260
265
|
except botocore.exceptions.ClientError as e:
|
|
261
266
|
logger.warning("Got botocore.exceptions.ClientError: %s", e)
|
|
267
|
+
if e.response["Error"]["Code"] == "InvalidParameterValue":
|
|
268
|
+
msg = e.response["Error"].get("Message", "")
|
|
269
|
+
raise ComputeError(f"Invalid AWS request: {msg}")
|
|
262
270
|
continue
|
|
263
271
|
raise NoCapacityError()
|
|
264
272
|
|
|
@@ -274,7 +282,7 @@ class AWSCompute(Compute):
|
|
|
274
282
|
# TODO: run_job is the same for vm-based backends, refactor
|
|
275
283
|
instance_config = InstanceConfiguration(
|
|
276
284
|
project_name=run.project_name,
|
|
277
|
-
instance_name=
|
|
285
|
+
instance_name=get_job_instance_name(run, job), # TODO: generate name
|
|
278
286
|
ssh_keys=[
|
|
279
287
|
SSHKey(public=project_ssh_public_key.strip()),
|
|
280
288
|
],
|
|
@@ -342,10 +350,12 @@ class AWSCompute(Compute):
|
|
|
342
350
|
ec2_resource = self.session.resource("ec2", region_name=configuration.region)
|
|
343
351
|
ec2_client = self.session.client("ec2", region_name=configuration.region)
|
|
344
352
|
|
|
353
|
+
instance_name = generate_unique_gateway_instance_name(configuration)
|
|
345
354
|
tags = {
|
|
346
|
-
"Name":
|
|
355
|
+
"Name": instance_name,
|
|
347
356
|
"owner": "dstack",
|
|
348
357
|
"dstack_project": configuration.project_name,
|
|
358
|
+
"dstack_name": configuration.instance_name,
|
|
349
359
|
}
|
|
350
360
|
if settings.DSTACK_VERSION is not None:
|
|
351
361
|
tags["dstack_version"] = settings.DSTACK_VERSION
|
|
@@ -373,7 +383,7 @@ class AWSCompute(Compute):
|
|
|
373
383
|
disk_size=10,
|
|
374
384
|
image_id=aws_resources.get_gateway_image_id(ec2_client),
|
|
375
385
|
instance_type="t2.micro",
|
|
376
|
-
|
|
386
|
+
iam_instance_profile=None,
|
|
377
387
|
user_data=get_gateway_user_data(configuration.ssh_key_pub),
|
|
378
388
|
tags=tags,
|
|
379
389
|
security_group_id=security_group_id,
|
|
@@ -403,7 +413,7 @@ class AWSCompute(Compute):
|
|
|
403
413
|
|
|
404
414
|
logger.debug("Creating ALB for gateway %s...", configuration.instance_name)
|
|
405
415
|
response = elb_client.create_load_balancer(
|
|
406
|
-
Name=f"{
|
|
416
|
+
Name=f"{instance_name}-lb",
|
|
407
417
|
Subnets=subnets_ids,
|
|
408
418
|
SecurityGroups=[security_group_id],
|
|
409
419
|
Scheme="internet-facing" if configuration.public_ip else "internal",
|
|
@@ -418,7 +428,7 @@ class AWSCompute(Compute):
|
|
|
418
428
|
|
|
419
429
|
logger.debug("Creating Target Group for gateway %s...", configuration.instance_name)
|
|
420
430
|
response = elb_client.create_target_group(
|
|
421
|
-
Name=f"{
|
|
431
|
+
Name=f"{instance_name}-tg",
|
|
422
432
|
Protocol="HTTP",
|
|
423
433
|
Port=80,
|
|
424
434
|
VpcId=vpc_id,
|
|
@@ -535,11 +545,13 @@ class AWSCompute(Compute):
|
|
|
535
545
|
def create_volume(self, volume: Volume) -> VolumeProvisioningData:
|
|
536
546
|
ec2_client = self.session.client("ec2", region_name=volume.configuration.region)
|
|
537
547
|
|
|
548
|
+
volume_name = generate_unique_volume_name(volume)
|
|
538
549
|
tags = {
|
|
539
|
-
"Name":
|
|
550
|
+
"Name": volume_name,
|
|
540
551
|
"owner": "dstack",
|
|
541
|
-
"dstack_user": volume.user,
|
|
542
552
|
"dstack_project": volume.project_name,
|
|
553
|
+
"dstack_name": volume.name,
|
|
554
|
+
"dstack_user": volume.user,
|
|
543
555
|
}
|
|
544
556
|
tags = merge_tags(tags=tags, backend_tags=self.config.tags)
|
|
545
557
|
|
|
@@ -635,11 +647,12 @@ class AWSCompute(Compute):
|
|
|
635
647
|
ec2_client = self.session.client("ec2", region_name=volume.configuration.region)
|
|
636
648
|
|
|
637
649
|
logger.debug("Detaching EBS volume %s from instance %s", volume.volume_id, instance_id)
|
|
650
|
+
attachment_data = get_or_error(volume.get_attachment_data_for_instance(instance_id))
|
|
638
651
|
try:
|
|
639
652
|
ec2_client.detach_volume(
|
|
640
653
|
VolumeId=volume.volume_id,
|
|
641
654
|
InstanceId=instance_id,
|
|
642
|
-
Device=
|
|
655
|
+
Device=attachment_data.device_name,
|
|
643
656
|
Force=force,
|
|
644
657
|
)
|
|
645
658
|
except botocore.exceptions.ClientError as e:
|
|
@@ -131,7 +131,7 @@ def create_instances_struct(
|
|
|
131
131
|
disk_size: int,
|
|
132
132
|
image_id: str,
|
|
133
133
|
instance_type: str,
|
|
134
|
-
|
|
134
|
+
iam_instance_profile: Optional[str],
|
|
135
135
|
user_data: str,
|
|
136
136
|
tags: List[Dict[str, str]],
|
|
137
137
|
security_group_id: str,
|
|
@@ -166,8 +166,8 @@ def create_instances_struct(
|
|
|
166
166
|
},
|
|
167
167
|
],
|
|
168
168
|
)
|
|
169
|
-
if
|
|
170
|
-
struct["IamInstanceProfile"] = {"
|
|
169
|
+
if iam_instance_profile:
|
|
170
|
+
struct["IamInstanceProfile"] = {"Name": iam_instance_profile}
|
|
171
171
|
if spot:
|
|
172
172
|
struct["InstanceMarketOptions"] = {
|
|
173
173
|
"MarketType": "spot",
|