dstack 0.18.42__py3-none-any.whl → 0.18.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. dstack/_internal/cli/commands/__init__.py +2 -1
  2. dstack/_internal/cli/commands/apply.py +4 -2
  3. dstack/_internal/cli/commands/attach.py +21 -1
  4. dstack/_internal/cli/commands/completion.py +20 -0
  5. dstack/_internal/cli/commands/delete.py +3 -1
  6. dstack/_internal/cli/commands/fleet.py +2 -1
  7. dstack/_internal/cli/commands/gateway.py +7 -2
  8. dstack/_internal/cli/commands/logs.py +3 -2
  9. dstack/_internal/cli/commands/stats.py +2 -1
  10. dstack/_internal/cli/commands/stop.py +2 -1
  11. dstack/_internal/cli/commands/volume.py +2 -1
  12. dstack/_internal/cli/main.py +6 -0
  13. dstack/_internal/cli/services/completion.py +86 -0
  14. dstack/_internal/cli/services/configurators/run.py +10 -17
  15. dstack/_internal/cli/utils/fleet.py +5 -1
  16. dstack/_internal/core/backends/aws/compute.py +22 -10
  17. dstack/_internal/core/backends/aws/resources.py +3 -3
  18. dstack/_internal/core/backends/azure/compute.py +14 -8
  19. dstack/_internal/core/backends/azure/resources.py +2 -0
  20. dstack/_internal/core/backends/base/compute.py +102 -2
  21. dstack/_internal/core/backends/base/offers.py +7 -1
  22. dstack/_internal/core/backends/cudo/compute.py +8 -4
  23. dstack/_internal/core/backends/datacrunch/compute.py +10 -4
  24. dstack/_internal/core/backends/gcp/auth.py +19 -13
  25. dstack/_internal/core/backends/gcp/compute.py +25 -19
  26. dstack/_internal/core/backends/gcp/resources.py +3 -10
  27. dstack/_internal/core/backends/kubernetes/compute.py +4 -3
  28. dstack/_internal/core/backends/lambdalabs/compute.py +9 -3
  29. dstack/_internal/core/backends/nebius/compute.py +2 -2
  30. dstack/_internal/core/backends/oci/compute.py +10 -4
  31. dstack/_internal/core/backends/runpod/compute.py +11 -4
  32. dstack/_internal/core/backends/tensordock/compute.py +14 -3
  33. dstack/_internal/core/backends/vastai/compute.py +12 -2
  34. dstack/_internal/core/backends/vultr/api_client.py +3 -3
  35. dstack/_internal/core/backends/vultr/compute.py +9 -3
  36. dstack/_internal/core/models/backends/aws.py +2 -0
  37. dstack/_internal/core/models/backends/base.py +1 -0
  38. dstack/_internal/core/models/configurations.py +0 -1
  39. dstack/_internal/core/services/__init__.py +5 -1
  40. dstack/_internal/core/services/configs/__init__.py +3 -0
  41. dstack/_internal/server/background/tasks/common.py +22 -0
  42. dstack/_internal/server/background/tasks/process_instances.py +11 -18
  43. dstack/_internal/server/background/tasks/process_running_jobs.py +9 -16
  44. dstack/_internal/server/background/tasks/process_terminating_jobs.py +1 -7
  45. dstack/_internal/server/routers/logs.py +3 -0
  46. dstack/_internal/server/services/backends/configurators/aws.py +31 -1
  47. dstack/_internal/server/services/backends/configurators/gcp.py +8 -15
  48. dstack/_internal/server/services/config.py +11 -1
  49. dstack/_internal/server/services/jobs/__init__.py +12 -9
  50. dstack/_internal/server/services/jobs/configurators/dev.py +1 -3
  51. dstack/_internal/server/services/jobs/configurators/task.py +1 -3
  52. dstack/_internal/server/services/logs/__init__.py +78 -0
  53. dstack/_internal/server/services/{logs.py → logs/aws.py} +12 -207
  54. dstack/_internal/server/services/logs/base.py +47 -0
  55. dstack/_internal/server/services/logs/filelog.py +110 -0
  56. dstack/_internal/server/services/logs/gcp.py +165 -0
  57. dstack/_internal/server/services/pools.py +16 -17
  58. dstack/_internal/server/services/proxy/routers/service_proxy.py +14 -7
  59. dstack/_internal/server/settings.py +3 -0
  60. dstack/_internal/server/statics/index.html +1 -1
  61. dstack/_internal/server/statics/{main-ad5150a441de98cd8987.css → main-7510e71dfa9749a4e70e.css} +1 -1
  62. dstack/_internal/server/statics/{main-2ac66bfcbd2e39830b88.js → main-fe8fd9db55df8d10e648.js} +66 -66
  63. dstack/_internal/server/statics/{main-2ac66bfcbd2e39830b88.js.map → main-fe8fd9db55df8d10e648.js.map} +1 -1
  64. dstack/_internal/server/testing/common.py +33 -8
  65. dstack/api/_public/runs.py +1 -1
  66. dstack/version.py +2 -2
  67. {dstack-0.18.42.dist-info → dstack-0.18.43.dist-info}/METADATA +4 -3
  68. {dstack-0.18.42.dist-info → dstack-0.18.43.dist-info}/RECORD +80 -71
  69. tests/_internal/core/backends/base/__init__.py +0 -0
  70. tests/_internal/core/backends/base/test_compute.py +56 -0
  71. tests/_internal/server/background/tasks/test_process_running_jobs.py +1 -1
  72. tests/_internal/server/conftest.py +4 -5
  73. tests/_internal/server/routers/test_backends.py +1 -0
  74. tests/_internal/server/routers/test_logs.py +1 -1
  75. tests/_internal/server/routers/test_runs.py +2 -2
  76. tests/_internal/server/services/test_logs.py +3 -3
  77. {dstack-0.18.42.dist-info → dstack-0.18.43.dist-info}/LICENSE.md +0 -0
  78. {dstack-0.18.42.dist-info → dstack-0.18.43.dist-info}/WHEEL +0 -0
  79. {dstack-0.18.42.dist-info → dstack-0.18.43.dist-info}/entry_points.txt +0 -0
  80. {dstack-0.18.42.dist-info → dstack-0.18.43.dist-info}/top_level.txt +0 -0
@@ -5,6 +5,7 @@ from typing import List, Optional
5
5
 
6
6
  from rich_argparse import RichHelpFormatter
7
7
 
8
+ from dstack._internal.cli.services.completion import ProjectNameCompleter
8
9
  from dstack._internal.cli.utils.common import configure_logging
9
10
  from dstack.api import Client
10
11
 
@@ -61,7 +62,7 @@ class APIBaseCommand(BaseCommand):
61
62
  help="The name of the project. Defaults to [code]$DSTACK_PROJECT[/]",
62
63
  metavar="NAME",
63
64
  default=os.getenv("DSTACK_PROJECT"),
64
- )
65
+ ).completer = ProjectNameCompleter()
65
66
 
66
67
  def _command(self, args: argparse.Namespace):
67
68
  configure_logging()
@@ -1,6 +1,8 @@
1
1
  import argparse
2
2
  from pathlib import Path
3
3
 
4
+ from argcomplete import FilesCompleter
5
+
4
6
  from dstack._internal.cli.commands import APIBaseCommand
5
7
  from dstack._internal.cli.services.configurators import (
6
8
  get_apply_configurator_class,
@@ -42,7 +44,7 @@ class ApplyCommand(APIBaseCommand):
42
44
  metavar="FILE",
43
45
  help="The path to the configuration file. Defaults to [code]$PWD/.dstack.yml[/]",
44
46
  dest="configuration_file",
45
- )
47
+ ).completer = FilesCompleter(allowednames=["*.yml", "*.yaml"])
46
48
  self._parser.add_argument(
47
49
  "-y",
48
50
  "--yes",
@@ -57,7 +59,7 @@ class ApplyCommand(APIBaseCommand):
57
59
  self._parser.add_argument(
58
60
  "-d",
59
61
  "--detach",
60
- help="Exit immediately after sumbitting configuration",
62
+ help="Exit immediately after submitting configuration",
61
63
  action="store_true",
62
64
  )
63
65
  repo_group = self._parser.add_argument_group("Repo Options")
@@ -6,6 +6,11 @@ from typing import Optional
6
6
 
7
7
  from dstack._internal.cli.commands import APIBaseCommand
8
8
  from dstack._internal.cli.services.args import port_mapping
9
+ from dstack._internal.cli.services.completion import RunNameCompleter
10
+ from dstack._internal.cli.services.configurators.run import (
11
+ get_run_exit_code,
12
+ print_finished_message,
13
+ )
9
14
  from dstack._internal.cli.utils.common import console
10
15
  from dstack._internal.core.consts import DSTACK_RUNNER_HTTP_PORT
11
16
  from dstack._internal.core.errors import CLIError
@@ -57,7 +62,7 @@ class AttachCommand(APIBaseCommand):
57
62
  type=int,
58
63
  default=0,
59
64
  )
60
- self._parser.add_argument("run_name")
65
+ self._parser.add_argument("run_name").completer = RunNameCompleter()
61
66
 
62
67
  def _command(self, args: argparse.Namespace):
63
68
  super()._command(args)
@@ -99,6 +104,21 @@ class AttachCommand(APIBaseCommand):
99
104
  pass
100
105
  finally:
101
106
  run.detach()
107
+ # TODO: Handle run resubmissions similar to dstack apply
108
+
109
+ # After reading the logs, the run may not be marked as finished immediately.
110
+ # Give the run some time to transition to a finished state before exiting.
111
+ for _ in range(30):
112
+ run.refresh()
113
+ if run.status.is_finished():
114
+ print_finished_message(run)
115
+ exit(get_run_exit_code(run))
116
+ time.sleep(1)
117
+ console.print(
118
+ "[error]Lost run connection. Timed out waiting for run final status."
119
+ " Check `dstack ps` to see if it's done or failed."
120
+ )
121
+ exit(1)
102
122
 
103
123
 
104
124
  _IGNORED_PORTS = [DSTACK_RUNNER_HTTP_PORT]
@@ -0,0 +1,20 @@
1
+ import argcomplete
2
+
3
+ from dstack._internal.cli.commands import BaseCommand
4
+
5
+
6
+ class CompletionCommand(BaseCommand):
7
+ NAME = "completion"
8
+ DESCRIPTION = "Generate shell completion scripts"
9
+
10
+ def _register(self):
11
+ super()._register()
12
+ self._parser.add_argument(
13
+ "shell",
14
+ help="The shell to generate the completion script for",
15
+ choices=["bash", "zsh"],
16
+ )
17
+
18
+ def _command(self, args):
19
+ super()._command(args)
20
+ print(argcomplete.shellcode(["dstack"], shell=args.shell))
@@ -1,6 +1,8 @@
1
1
  import argparse
2
2
  from pathlib import Path
3
3
 
4
+ from argcomplete import FilesCompleter
5
+
4
6
  from dstack._internal.cli.commands import APIBaseCommand
5
7
  from dstack._internal.cli.services.configurators import (
6
8
  get_apply_configurator_class,
@@ -22,7 +24,7 @@ class DeleteCommand(APIBaseCommand):
22
24
  metavar="FILE",
23
25
  help="The path to the configuration file. Defaults to [code]$PWD/.dstack.yml[/]",
24
26
  dest="configuration_file",
25
- )
27
+ ).completer = FilesCompleter(allowednames=["*.yml", "*.yaml"])
26
28
  self._parser.add_argument(
27
29
  "-y",
28
30
  "--yes",
@@ -4,6 +4,7 @@ import time
4
4
  from rich.live import Live
5
5
 
6
6
  from dstack._internal.cli.commands import APIBaseCommand
7
+ from dstack._internal.cli.services.completion import FleetNameCompleter
7
8
  from dstack._internal.cli.utils.common import (
8
9
  LIVE_TABLE_PROVISION_INTERVAL_SECS,
9
10
  LIVE_TABLE_REFRESH_RATE_PER_SEC,
@@ -47,7 +48,7 @@ class FleetCommand(APIBaseCommand):
47
48
  delete_parser.add_argument(
48
49
  "name",
49
50
  help="The name of the fleet",
50
- )
51
+ ).completer = FleetNameCompleter()
51
52
  delete_parser.add_argument(
52
53
  "-i",
53
54
  "--instance",
@@ -4,6 +4,7 @@ import time
4
4
  from rich.live import Live
5
5
 
6
6
  from dstack._internal.cli.commands import APIBaseCommand
7
+ from dstack._internal.cli.services.completion import GatewayNameCompleter
7
8
  from dstack._internal.cli.utils.common import (
8
9
  LIVE_TABLE_PROVISION_INTERVAL_SECS,
9
10
  LIVE_TABLE_REFRESH_RATE_PER_SEC,
@@ -59,7 +60,9 @@ class GatewayCommand(APIBaseCommand):
59
60
  "delete", help="Delete a gateway", formatter_class=self._parser.formatter_class
60
61
  )
61
62
  delete_parser.set_defaults(subfunc=self._delete)
62
- delete_parser.add_argument("name", help="The name of the gateway")
63
+ delete_parser.add_argument(
64
+ "name", help="The name of the gateway"
65
+ ).completer = GatewayNameCompleter()
63
66
  delete_parser.add_argument(
64
67
  "-y", "--yes", action="store_true", help="Don't ask for confirmation"
65
68
  )
@@ -68,7 +71,9 @@ class GatewayCommand(APIBaseCommand):
68
71
  "update", help="Update a gateway", formatter_class=self._parser.formatter_class
69
72
  )
70
73
  update_parser.set_defaults(subfunc=self._update)
71
- update_parser.add_argument("name", help="The name of the gateway")
74
+ update_parser.add_argument(
75
+ "name", help="The name of the gateway"
76
+ ).completer = GatewayNameCompleter()
72
77
  update_parser.add_argument(
73
78
  "--set-default", action="store_true", help="Set it the default gateway for the project"
74
79
  )
@@ -3,6 +3,7 @@ import sys
3
3
  from pathlib import Path
4
4
 
5
5
  from dstack._internal.cli.commands import APIBaseCommand
6
+ from dstack._internal.cli.services.completion import RunNameCompleter
6
7
  from dstack._internal.core.errors import CLIError
7
8
  from dstack._internal.utils.logging import get_logger
8
9
 
@@ -33,7 +34,7 @@ class LogsCommand(APIBaseCommand):
33
34
  )
34
35
  self._parser.add_argument(
35
36
  "--replica",
36
- help="The relica number. Defaults to 0.",
37
+ help="The replica number. Defaults to 0.",
37
38
  type=int,
38
39
  default=0,
39
40
  )
@@ -43,7 +44,7 @@ class LogsCommand(APIBaseCommand):
43
44
  type=int,
44
45
  default=0,
45
46
  )
46
- self._parser.add_argument("run_name")
47
+ self._parser.add_argument("run_name").completer = RunNameCompleter(all=True)
47
48
 
48
49
  def _command(self, args: argparse.Namespace):
49
50
  super()._command(args)
@@ -7,6 +7,7 @@ from rich.live import Live
7
7
  from rich.table import Table
8
8
 
9
9
  from dstack._internal.cli.commands import APIBaseCommand
10
+ from dstack._internal.cli.services.completion import RunNameCompleter
10
11
  from dstack._internal.cli.utils.common import (
11
12
  LIVE_TABLE_PROVISION_INTERVAL_SECS,
12
13
  LIVE_TABLE_REFRESH_RATE_PER_SEC,
@@ -25,7 +26,7 @@ class StatsCommand(APIBaseCommand):
25
26
 
26
27
  def _register(self):
27
28
  super()._register()
28
- self._parser.add_argument("run_name")
29
+ self._parser.add_argument("run_name").completer = RunNameCompleter()
29
30
  self._parser.add_argument(
30
31
  "-w",
31
32
  "--watch",
@@ -1,6 +1,7 @@
1
1
  import argparse
2
2
 
3
3
  from dstack._internal.cli.commands import APIBaseCommand
4
+ from dstack._internal.cli.services.completion import RunNameCompleter
4
5
  from dstack._internal.cli.utils.common import confirm_ask
5
6
  from dstack._internal.core.errors import CLIError
6
7
 
@@ -13,7 +14,7 @@ class StopCommand(APIBaseCommand):
13
14
  super()._register()
14
15
  self._parser.add_argument("-x", "--abort", action="store_true")
15
16
  self._parser.add_argument("-y", "--yes", action="store_true")
16
- self._parser.add_argument("run_name")
17
+ self._parser.add_argument("run_name").completer = RunNameCompleter()
17
18
 
18
19
  def _command(self, args: argparse.Namespace):
19
20
  super()._command(args)
@@ -4,6 +4,7 @@ import time
4
4
  from rich.live import Live
5
5
 
6
6
  from dstack._internal.cli.commands import APIBaseCommand
7
+ from dstack._internal.cli.services.completion import VolumeNameCompleter
7
8
  from dstack._internal.cli.utils.common import (
8
9
  LIVE_TABLE_PROVISION_INTERVAL_SECS,
9
10
  LIVE_TABLE_REFRESH_RATE_PER_SEC,
@@ -47,7 +48,7 @@ class VolumeCommand(APIBaseCommand):
47
48
  delete_parser.add_argument(
48
49
  "name",
49
50
  help="The name of the volume",
50
- )
51
+ ).completer = VolumeNameCompleter()
51
52
  delete_parser.add_argument(
52
53
  "-y", "--yes", help="Don't ask for confirmation", action="store_true"
53
54
  )
@@ -1,10 +1,12 @@
1
1
  import argparse
2
2
 
3
+ import argcomplete
3
4
  from rich.markup import escape
4
5
  from rich_argparse import RichHelpFormatter
5
6
 
6
7
  from dstack._internal.cli.commands.apply import ApplyCommand
7
8
  from dstack._internal.cli.commands.attach import AttachCommand
9
+ from dstack._internal.cli.commands.completion import CompletionCommand
8
10
  from dstack._internal.cli.commands.config import ConfigCommand
9
11
  from dstack._internal.cli.commands.delete import DeleteCommand
10
12
  from dstack._internal.cli.commands.fleet import FleetCommand
@@ -72,9 +74,13 @@ def main():
72
74
  StatsCommand.register(subparsers)
73
75
  StopCommand.register(subparsers)
74
76
  VolumeCommand.register(subparsers)
77
+ CompletionCommand.register(subparsers)
78
+
79
+ argcomplete.autocomplete(parser, always_complete_options=False)
75
80
 
76
81
  args, unknown_args = parser.parse_known_args()
77
82
  args.unknown = unknown_args
83
+
78
84
  try:
79
85
  check_for_updates()
80
86
  get_ssh_client_info()
@@ -0,0 +1,86 @@
1
+ import argparse
2
+ import os
3
+ from abc import ABC, abstractmethod
4
+ from typing import Iterable, List, Optional
5
+
6
+ import argcomplete
7
+ from argcomplete.completers import BaseCompleter
8
+
9
+ from dstack._internal.core.errors import ConfigurationError
10
+ from dstack._internal.core.services.configs import ConfigManager
11
+ from dstack.api import Client
12
+
13
+
14
+ class BaseAPINameCompleter(BaseCompleter, ABC):
15
+ """
16
+ Base class for name completers that fetch resource names via the API.
17
+ """
18
+
19
+ def __init__(self):
20
+ super().__init__()
21
+
22
+ def get_api(self, parsed_args: argparse.Namespace) -> Optional[Client]:
23
+ argcomplete.debug(f"{self.__class__.__name__}: Retrieving API client")
24
+ project = getattr(parsed_args, "project", os.getenv("DSTACK_PROJECT"))
25
+ try:
26
+ return Client.from_config(project_name=project)
27
+ except ConfigurationError as e:
28
+ argcomplete.debug(f"{self.__class__.__name__}: Error initializing API client: {e}")
29
+ return None
30
+
31
+ def __call__(self, prefix: str, parsed_args: argparse.Namespace, **kwargs) -> List[str]:
32
+ api = self.get_api(parsed_args)
33
+ if api is None:
34
+ return []
35
+
36
+ argcomplete.debug(f"{self.__class__.__name__}: Fetching completions")
37
+ try:
38
+ resource_names = self.fetch_resource_names(api)
39
+ return [name for name in resource_names if name.startswith(prefix)]
40
+ except Exception as e:
41
+ argcomplete.debug(
42
+ f"{self.__class__.__name__}: Error fetching resource completions: {e}"
43
+ )
44
+ return []
45
+
46
+ @abstractmethod
47
+ def fetch_resource_names(self, api: Client) -> Iterable[str]:
48
+ """
49
+ Returns an iterable of resource names.
50
+ """
51
+ pass
52
+
53
+
54
+ class RunNameCompleter(BaseAPINameCompleter):
55
+ def __init__(self, all: bool = False):
56
+ super().__init__()
57
+ self.all = all
58
+
59
+ def fetch_resource_names(self, api: Client) -> Iterable[str]:
60
+ return [r.name for r in api.runs.list(self.all)]
61
+
62
+
63
+ class FleetNameCompleter(BaseAPINameCompleter):
64
+ def fetch_resource_names(self, api: Client) -> Iterable[str]:
65
+ return [r.name for r in api.client.fleets.list(api.project)]
66
+
67
+
68
+ class VolumeNameCompleter(BaseAPINameCompleter):
69
+ def fetch_resource_names(self, api: Client) -> Iterable[str]:
70
+ return [r.name for r in api.client.volumes.list(api.project)]
71
+
72
+
73
+ class GatewayNameCompleter(BaseAPINameCompleter):
74
+ def fetch_resource_names(self, api: Client) -> Iterable[str]:
75
+ return [r.name for r in api.client.gateways.list(api.project)]
76
+
77
+
78
+ class ProjectNameCompleter(BaseCompleter):
79
+ """
80
+ Completer for local project names.
81
+ """
82
+
83
+ def __call__(self, prefix: str, parsed_args: argparse.Namespace, **kwargs) -> List[str]:
84
+ argcomplete.debug(f"{self.__class__.__name__}: Listing projects from ConfigManager")
85
+ projects = ConfigManager().list_projects()
86
+ return [p for p in projects if p.startswith(prefix)]
@@ -34,7 +34,6 @@ from dstack._internal.core.models.configurations import (
34
34
  BaseRunConfigurationWithPorts,
35
35
  DevEnvironmentConfiguration,
36
36
  PortMapping,
37
- PythonVersion,
38
37
  RunConfigurationType,
39
38
  ServiceConfiguration,
40
39
  TaskConfiguration,
@@ -73,12 +72,6 @@ class BaseRunConfigurator(ApplyEnvVarsConfiguratorMixin, BaseApplyConfigurator):
73
72
  ):
74
73
  self.apply_args(conf, configurator_args, unknown_args)
75
74
  self.validate_gpu_vendor_and_image(conf)
76
- if conf.python == PythonVersion.PY38:
77
- logger.warning(
78
- "Specifying [code]python: 3.8[/] in run configurations is deprecated"
79
- " and will be forbidden in a future [code]dstack[/] release."
80
- " Please upgrade your configuration to a newer Python version."
81
- )
82
75
  if repo is None:
83
76
  repo = self.api.repos.load(Path.cwd())
84
77
  config_manager = ConfigManager()
@@ -238,8 +231,8 @@ class BaseRunConfigurator(ApplyEnvVarsConfiguratorMixin, BaseApplyConfigurator):
238
231
  reattach = True
239
232
  break
240
233
  if run.status.is_finished():
241
- _print_finished_message(run)
242
- exit(_get_run_exit_code(run))
234
+ print_finished_message(run)
235
+ exit(get_run_exit_code(run))
243
236
  time.sleep(1)
244
237
  if not reattach:
245
238
  console.print(
@@ -439,7 +432,7 @@ class RunWithPortsConfigurator(BaseRunConfigurator):
439
432
  ):
440
433
  super().apply_args(conf, args, unknown)
441
434
  if args.ports:
442
- conf.ports = list(merge_ports(conf.ports, args.ports).values())
435
+ conf.ports = list(_merge_ports(conf.ports, args.ports).values())
443
436
 
444
437
 
445
438
  class TaskConfigurator(RunWithPortsConfigurator):
@@ -475,17 +468,17 @@ class ServiceConfigurator(BaseRunConfigurator):
475
468
  self.interpolate_run_args(conf.commands, unknown)
476
469
 
477
470
 
478
- def merge_ports(conf: List[PortMapping], args: List[PortMapping]) -> Dict[int, PortMapping]:
479
- unique_ports_constraint([pm.container_port for pm in conf])
480
- unique_ports_constraint([pm.container_port for pm in args])
471
+ def _merge_ports(conf: List[PortMapping], args: List[PortMapping]) -> Dict[int, PortMapping]:
472
+ _unique_ports_constraint([pm.container_port for pm in conf])
473
+ _unique_ports_constraint([pm.container_port for pm in args])
481
474
  ports = {pm.container_port: pm for pm in conf}
482
475
  for pm in args: # override conf
483
476
  ports[pm.container_port] = pm
484
- unique_ports_constraint([pm.local_port for pm in ports.values() if pm.local_port is not None])
477
+ _unique_ports_constraint([pm.local_port for pm in ports.values() if pm.local_port is not None])
485
478
  return ports
486
479
 
487
480
 
488
- def unique_ports_constraint(ports: List[int]):
481
+ def _unique_ports_constraint(ports: List[int]):
489
482
  used_ports = set()
490
483
  for i in ports:
491
484
  if i in used_ports:
@@ -514,7 +507,7 @@ def _print_service_urls(run: Run) -> None:
514
507
  console.print()
515
508
 
516
509
 
517
- def _print_finished_message(run: Run):
510
+ def print_finished_message(run: Run):
518
511
  if run.status == RunStatus.DONE:
519
512
  console.print("[code]Done[/]")
520
513
  return
@@ -542,7 +535,7 @@ def _print_finished_message(run: Run):
542
535
  console.print(f"[error]{message}[/]")
543
536
 
544
537
 
545
- def _get_run_exit_code(run: Run) -> int:
538
+ def get_run_exit_code(run: Run) -> int:
546
539
  if run.status == RunStatus.DONE:
547
540
  return 0
548
541
  return 1
@@ -45,7 +45,11 @@ def get_fleets_table(
45
45
  status = instance.status.value
46
46
  total_blocks = instance.total_blocks
47
47
  busy_blocks = instance.busy_blocks
48
- if total_blocks is not None and total_blocks > 1:
48
+ if (
49
+ instance.status in [InstanceStatus.IDLE, InstanceStatus.BUSY]
50
+ and total_blocks is not None
51
+ and total_blocks > 1
52
+ ):
49
53
  status = f"{busy_blocks}/{total_blocks} {InstanceStatus.BUSY.value}"
50
54
  if (
51
55
  instance.status in [InstanceStatus.IDLE, InstanceStatus.BUSY]
@@ -11,8 +11,11 @@ from dstack._internal import settings
11
11
  from dstack._internal.core.backends.aws.config import AWSConfig
12
12
  from dstack._internal.core.backends.base.compute import (
13
13
  Compute,
14
+ generate_unique_gateway_instance_name,
15
+ generate_unique_instance_name,
16
+ generate_unique_volume_name,
14
17
  get_gateway_user_data,
15
- get_instance_name,
18
+ get_job_instance_name,
16
19
  get_user_data,
17
20
  merge_tags,
18
21
  )
@@ -152,10 +155,12 @@ class AWSCompute(Compute):
152
155
  if zones is not None and len(zones) == 0:
153
156
  raise NoCapacityError("No eligible availability zones")
154
157
 
158
+ instance_name = generate_unique_instance_name(instance_config)
155
159
  tags = {
156
- "Name": instance_config.instance_name,
160
+ "Name": instance_name,
157
161
  "owner": "dstack",
158
162
  "dstack_project": project_name,
163
+ "dstack_name": instance_config.instance_name,
159
164
  "dstack_user": instance_config.user,
160
165
  }
161
166
  tags = merge_tags(tags=tags, backend_tags=self.config.tags)
@@ -214,7 +219,7 @@ class AWSCompute(Compute):
214
219
  disk_size=disk_size,
215
220
  image_id=image_id,
216
221
  instance_type=instance_offer.instance.name,
217
- iam_instance_profile_arn=None,
222
+ iam_instance_profile=self.config.iam_instance_profile,
218
223
  user_data=get_user_data(authorized_keys=instance_config.get_public_keys()),
219
224
  tags=aws_resources.make_tags(tags),
220
225
  security_group_id=aws_resources.create_security_group(
@@ -259,6 +264,9 @@ class AWSCompute(Compute):
259
264
  )
260
265
  except botocore.exceptions.ClientError as e:
261
266
  logger.warning("Got botocore.exceptions.ClientError: %s", e)
267
+ if e.response["Error"]["Code"] == "InvalidParameterValue":
268
+ msg = e.response["Error"].get("Message", "")
269
+ raise ComputeError(f"Invalid AWS request: {msg}")
262
270
  continue
263
271
  raise NoCapacityError()
264
272
 
@@ -274,7 +282,7 @@ class AWSCompute(Compute):
274
282
  # TODO: run_job is the same for vm-based backends, refactor
275
283
  instance_config = InstanceConfiguration(
276
284
  project_name=run.project_name,
277
- instance_name=get_instance_name(run, job), # TODO: generate name
285
+ instance_name=get_job_instance_name(run, job), # TODO: generate name
278
286
  ssh_keys=[
279
287
  SSHKey(public=project_ssh_public_key.strip()),
280
288
  ],
@@ -342,10 +350,12 @@ class AWSCompute(Compute):
342
350
  ec2_resource = self.session.resource("ec2", region_name=configuration.region)
343
351
  ec2_client = self.session.client("ec2", region_name=configuration.region)
344
352
 
353
+ instance_name = generate_unique_gateway_instance_name(configuration)
345
354
  tags = {
346
- "Name": configuration.instance_name,
355
+ "Name": instance_name,
347
356
  "owner": "dstack",
348
357
  "dstack_project": configuration.project_name,
358
+ "dstack_name": configuration.instance_name,
349
359
  }
350
360
  if settings.DSTACK_VERSION is not None:
351
361
  tags["dstack_version"] = settings.DSTACK_VERSION
@@ -373,7 +383,7 @@ class AWSCompute(Compute):
373
383
  disk_size=10,
374
384
  image_id=aws_resources.get_gateway_image_id(ec2_client),
375
385
  instance_type="t2.micro",
376
- iam_instance_profile_arn=None,
386
+ iam_instance_profile=None,
377
387
  user_data=get_gateway_user_data(configuration.ssh_key_pub),
378
388
  tags=tags,
379
389
  security_group_id=security_group_id,
@@ -403,7 +413,7 @@ class AWSCompute(Compute):
403
413
 
404
414
  logger.debug("Creating ALB for gateway %s...", configuration.instance_name)
405
415
  response = elb_client.create_load_balancer(
406
- Name=f"{configuration.instance_name}-lb",
416
+ Name=f"{instance_name}-lb",
407
417
  Subnets=subnets_ids,
408
418
  SecurityGroups=[security_group_id],
409
419
  Scheme="internet-facing" if configuration.public_ip else "internal",
@@ -418,7 +428,7 @@ class AWSCompute(Compute):
418
428
 
419
429
  logger.debug("Creating Target Group for gateway %s...", configuration.instance_name)
420
430
  response = elb_client.create_target_group(
421
- Name=f"{configuration.instance_name}-tg",
431
+ Name=f"{instance_name}-tg",
422
432
  Protocol="HTTP",
423
433
  Port=80,
424
434
  VpcId=vpc_id,
@@ -535,11 +545,13 @@ class AWSCompute(Compute):
535
545
  def create_volume(self, volume: Volume) -> VolumeProvisioningData:
536
546
  ec2_client = self.session.client("ec2", region_name=volume.configuration.region)
537
547
 
548
+ volume_name = generate_unique_volume_name(volume)
538
549
  tags = {
539
- "Name": volume.configuration.name,
550
+ "Name": volume_name,
540
551
  "owner": "dstack",
541
- "dstack_user": volume.user,
542
552
  "dstack_project": volume.project_name,
553
+ "dstack_name": volume.name,
554
+ "dstack_user": volume.user,
543
555
  }
544
556
  tags = merge_tags(tags=tags, backend_tags=self.config.tags)
545
557
 
@@ -131,7 +131,7 @@ def create_instances_struct(
131
131
  disk_size: int,
132
132
  image_id: str,
133
133
  instance_type: str,
134
- iam_instance_profile_arn: Optional[str],
134
+ iam_instance_profile: Optional[str],
135
135
  user_data: str,
136
136
  tags: List[Dict[str, str]],
137
137
  security_group_id: str,
@@ -166,8 +166,8 @@ def create_instances_struct(
166
166
  },
167
167
  ],
168
168
  )
169
- if iam_instance_profile_arn:
170
- struct["IamInstanceProfile"] = {"Arn": iam_instance_profile_arn}
169
+ if iam_instance_profile:
170
+ struct["IamInstanceProfile"] = {"Name": iam_instance_profile}
171
171
  if spot:
172
172
  struct["InstanceMarketOptions"] = {
173
173
  "MarketType": "spot",
@@ -39,8 +39,10 @@ from dstack._internal.core.backends.azure import utils as azure_utils
39
39
  from dstack._internal.core.backends.azure.config import AzureConfig
40
40
  from dstack._internal.core.backends.base.compute import (
41
41
  Compute,
42
+ generate_unique_gateway_instance_name,
43
+ generate_unique_instance_name,
42
44
  get_gateway_user_data,
43
- get_instance_name,
45
+ get_job_instance_name,
44
46
  get_user_data,
45
47
  merge_tags,
46
48
  )
@@ -103,6 +105,9 @@ class AzureCompute(Compute):
103
105
  instance_offer: InstanceOfferWithAvailability,
104
106
  instance_config: InstanceConfiguration,
105
107
  ) -> JobProvisioningData:
108
+ instance_name = generate_unique_instance_name(
109
+ instance_config, max_length=azure_resources.MAX_RESOURCE_NAME_LEN
110
+ )
106
111
  location = instance_offer.region
107
112
  logger.info(
108
113
  "Requesting %s %s instance in %s...",
@@ -129,6 +134,7 @@ class AzureCompute(Compute):
129
134
  tags = {
130
135
  "owner": "dstack",
131
136
  "dstack_project": instance_config.project_name,
137
+ "dstack_name": instance_config.instance_name,
132
138
  "dstack_user": instance_config.user,
133
139
  }
134
140
  tags = merge_tags(tags=tags, backend_tags=self.config.tags)
@@ -150,9 +156,7 @@ class AzureCompute(Compute):
150
156
  variant=VMImageVariant.from_instance_type(instance_offer.instance),
151
157
  ),
152
158
  vm_size=instance_offer.instance.name,
153
- # instance_name includes region because Azure may create an instance resource
154
- # even when provisioning fails.
155
- instance_name=f"{instance_config.instance_name}-{instance_offer.region}",
159
+ instance_name=instance_name,
156
160
  user_data=get_user_data(authorized_keys=ssh_pub_keys),
157
161
  ssh_pub_keys=ssh_pub_keys,
158
162
  spot=instance_offer.instance.resources.spot,
@@ -197,7 +201,7 @@ class AzureCompute(Compute):
197
201
  ) -> JobProvisioningData:
198
202
  instance_config = InstanceConfiguration(
199
203
  project_name=run.project_name,
200
- instance_name=get_instance_name(run, job), # TODO: generate name
204
+ instance_name=get_job_instance_name(run, job), # TODO: generate name
201
205
  ssh_keys=[
202
206
  SSHKey(public=project_ssh_public_key.strip()),
203
207
  ],
@@ -223,7 +227,9 @@ class AzureCompute(Compute):
223
227
  configuration.instance_name,
224
228
  configuration.region,
225
229
  )
226
-
230
+ instance_name = generate_unique_gateway_instance_name(
231
+ configuration, max_length=azure_resources.MAX_RESOURCE_NAME_LEN
232
+ )
227
233
  network_resource_group, network, subnet = get_resource_group_network_subnet_or_error(
228
234
  network_client=self._network_client,
229
235
  resource_group=self.config.resource_group,
@@ -237,9 +243,9 @@ class AzureCompute(Compute):
237
243
  )
238
244
 
239
245
  tags = {
240
- "Name": configuration.instance_name,
241
246
  "owner": "dstack",
242
247
  "dstack_project": configuration.project_name,
248
+ "dstack_name": configuration.instance_name,
243
249
  }
244
250
  if settings.DSTACK_VERSION is not None:
245
251
  tags["dstack_version"] = settings.DSTACK_VERSION
@@ -256,7 +262,7 @@ class AzureCompute(Compute):
256
262
  managed_identity=None,
257
263
  image_reference=_get_gateway_image_ref(),
258
264
  vm_size="Standard_B1ms",
259
- instance_name=configuration.instance_name,
265
+ instance_name=instance_name,
260
266
  user_data=get_gateway_user_data(configuration.ssh_key_pub),
261
267
  ssh_pub_keys=[configuration.ssh_key_pub],
262
268
  spot=False,