dstack 0.18.42__py3-none-any.whl → 0.18.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. dstack/_internal/cli/commands/__init__.py +2 -1
  2. dstack/_internal/cli/commands/apply.py +4 -2
  3. dstack/_internal/cli/commands/attach.py +21 -1
  4. dstack/_internal/cli/commands/completion.py +20 -0
  5. dstack/_internal/cli/commands/delete.py +3 -1
  6. dstack/_internal/cli/commands/fleet.py +2 -1
  7. dstack/_internal/cli/commands/gateway.py +7 -2
  8. dstack/_internal/cli/commands/logs.py +3 -2
  9. dstack/_internal/cli/commands/stats.py +2 -1
  10. dstack/_internal/cli/commands/stop.py +2 -1
  11. dstack/_internal/cli/commands/volume.py +2 -1
  12. dstack/_internal/cli/main.py +6 -0
  13. dstack/_internal/cli/services/completion.py +86 -0
  14. dstack/_internal/cli/services/configurators/run.py +11 -17
  15. dstack/_internal/cli/utils/fleet.py +5 -1
  16. dstack/_internal/cli/utils/run.py +11 -0
  17. dstack/_internal/core/backends/aws/compute.py +23 -10
  18. dstack/_internal/core/backends/aws/resources.py +3 -3
  19. dstack/_internal/core/backends/azure/compute.py +15 -9
  20. dstack/_internal/core/backends/azure/resources.py +2 -0
  21. dstack/_internal/core/backends/base/compute.py +102 -2
  22. dstack/_internal/core/backends/base/offers.py +7 -1
  23. dstack/_internal/core/backends/cudo/compute.py +8 -4
  24. dstack/_internal/core/backends/datacrunch/compute.py +10 -4
  25. dstack/_internal/core/backends/gcp/auth.py +19 -13
  26. dstack/_internal/core/backends/gcp/compute.py +26 -20
  27. dstack/_internal/core/backends/gcp/resources.py +3 -10
  28. dstack/_internal/core/backends/kubernetes/compute.py +4 -3
  29. dstack/_internal/core/backends/lambdalabs/compute.py +9 -3
  30. dstack/_internal/core/backends/nebius/compute.py +2 -2
  31. dstack/_internal/core/backends/oci/compute.py +10 -4
  32. dstack/_internal/core/backends/runpod/compute.py +32 -7
  33. dstack/_internal/core/backends/runpod/config.py +8 -0
  34. dstack/_internal/core/backends/tensordock/compute.py +14 -3
  35. dstack/_internal/core/backends/vastai/compute.py +12 -2
  36. dstack/_internal/core/backends/vultr/api_client.py +3 -3
  37. dstack/_internal/core/backends/vultr/compute.py +9 -3
  38. dstack/_internal/core/models/backends/aws.py +2 -0
  39. dstack/_internal/core/models/backends/base.py +1 -0
  40. dstack/_internal/core/models/backends/runpod.py +2 -0
  41. dstack/_internal/core/models/configurations.py +2 -2
  42. dstack/_internal/core/models/profiles.py +46 -1
  43. dstack/_internal/core/models/runs.py +4 -0
  44. dstack/_internal/core/services/__init__.py +5 -1
  45. dstack/_internal/core/services/configs/__init__.py +3 -0
  46. dstack/_internal/server/app.py +11 -1
  47. dstack/_internal/server/background/__init__.py +10 -0
  48. dstack/_internal/server/background/tasks/common.py +22 -0
  49. dstack/_internal/server/background/tasks/process_instances.py +11 -18
  50. dstack/_internal/server/background/tasks/process_placement_groups.py +1 -0
  51. dstack/_internal/server/background/tasks/process_prometheus_metrics.py +135 -0
  52. dstack/_internal/server/background/tasks/process_running_jobs.py +74 -34
  53. dstack/_internal/server/background/tasks/process_runs.py +1 -0
  54. dstack/_internal/server/background/tasks/process_submitted_jobs.py +4 -1
  55. dstack/_internal/server/background/tasks/process_terminating_jobs.py +1 -7
  56. dstack/_internal/server/migrations/versions/60e444118b6d_add_jobprometheusmetrics.py +40 -0
  57. dstack/_internal/server/migrations/versions/98d1b92988bc_add_jobterminationreason_terminated_due_.py +140 -0
  58. dstack/_internal/server/models.py +11 -0
  59. dstack/_internal/server/routers/logs.py +3 -0
  60. dstack/_internal/server/routers/metrics.py +21 -2
  61. dstack/_internal/server/routers/prometheus.py +36 -0
  62. dstack/_internal/server/security/permissions.py +1 -1
  63. dstack/_internal/server/services/backends/configurators/aws.py +31 -1
  64. dstack/_internal/server/services/backends/configurators/gcp.py +8 -15
  65. dstack/_internal/server/services/backends/configurators/runpod.py +3 -33
  66. dstack/_internal/server/services/config.py +24 -4
  67. dstack/_internal/server/services/fleets.py +1 -0
  68. dstack/_internal/server/services/gateways/__init__.py +1 -0
  69. dstack/_internal/server/services/jobs/__init__.py +12 -9
  70. dstack/_internal/server/services/jobs/configurators/base.py +9 -1
  71. dstack/_internal/server/services/jobs/configurators/dev.py +1 -3
  72. dstack/_internal/server/services/jobs/configurators/task.py +1 -3
  73. dstack/_internal/server/services/logs/__init__.py +78 -0
  74. dstack/_internal/server/services/{logs.py → logs/aws.py} +12 -207
  75. dstack/_internal/server/services/logs/base.py +47 -0
  76. dstack/_internal/server/services/logs/filelog.py +110 -0
  77. dstack/_internal/server/services/logs/gcp.py +165 -0
  78. dstack/_internal/server/services/metrics.py +103 -70
  79. dstack/_internal/server/services/pools.py +16 -17
  80. dstack/_internal/server/services/prometheus.py +87 -0
  81. dstack/_internal/server/services/proxy/routers/service_proxy.py +14 -7
  82. dstack/_internal/server/services/runner/client.py +14 -3
  83. dstack/_internal/server/services/runs.py +43 -15
  84. dstack/_internal/server/services/volumes.py +1 -0
  85. dstack/_internal/server/settings.py +6 -0
  86. dstack/_internal/server/statics/index.html +1 -1
  87. dstack/_internal/server/statics/{main-2ac66bfcbd2e39830b88.js → main-4eb116b97819badd1e2c.js} +131 -78
  88. dstack/_internal/server/statics/{main-2ac66bfcbd2e39830b88.js.map → main-4eb116b97819badd1e2c.js.map} +1 -1
  89. dstack/_internal/server/statics/{main-ad5150a441de98cd8987.css → main-da9f8c06a69c20dac23e.css} +1 -1
  90. dstack/_internal/server/statics/static/media/entraID.d65d1f3e9486a8e56d24fc07b3230885.svg +9 -0
  91. dstack/_internal/server/testing/common.py +50 -8
  92. dstack/api/_public/runs.py +4 -1
  93. dstack/api/server/_fleets.py +2 -0
  94. dstack/api/server/_runs.py +4 -0
  95. dstack/api/utils.py +3 -0
  96. dstack/version.py +2 -2
  97. {dstack-0.18.42.dist-info → dstack-0.18.44.dist-info}/METADATA +13 -3
  98. {dstack-0.18.42.dist-info → dstack-0.18.44.dist-info}/RECORD +115 -97
  99. tests/_internal/core/backends/base/__init__.py +0 -0
  100. tests/_internal/core/backends/base/test_compute.py +56 -0
  101. tests/_internal/server/background/tasks/test_process_prometheus_metrics.py +189 -0
  102. tests/_internal/server/background/tasks/test_process_running_jobs.py +126 -1
  103. tests/_internal/server/conftest.py +4 -5
  104. tests/_internal/server/routers/test_backends.py +1 -0
  105. tests/_internal/server/routers/test_fleets.py +2 -0
  106. tests/_internal/server/routers/test_logs.py +1 -1
  107. tests/_internal/server/routers/test_metrics.py +15 -0
  108. tests/_internal/server/routers/test_prometheus.py +244 -0
  109. tests/_internal/server/routers/test_runs.py +81 -58
  110. tests/_internal/server/services/test_logs.py +3 -3
  111. tests/_internal/server/services/test_metrics.py +163 -0
  112. {dstack-0.18.42.dist-info → dstack-0.18.44.dist-info}/LICENSE.md +0 -0
  113. {dstack-0.18.42.dist-info → dstack-0.18.44.dist-info}/WHEEL +0 -0
  114. {dstack-0.18.42.dist-info → dstack-0.18.44.dist-info}/entry_points.txt +0 -0
  115. {dstack-0.18.42.dist-info → dstack-0.18.44.dist-info}/top_level.txt +0 -0
@@ -5,6 +5,7 @@ from typing import List, Optional
5
5
 
6
6
  from rich_argparse import RichHelpFormatter
7
7
 
8
+ from dstack._internal.cli.services.completion import ProjectNameCompleter
8
9
  from dstack._internal.cli.utils.common import configure_logging
9
10
  from dstack.api import Client
10
11
 
@@ -61,7 +62,7 @@ class APIBaseCommand(BaseCommand):
61
62
  help="The name of the project. Defaults to [code]$DSTACK_PROJECT[/]",
62
63
  metavar="NAME",
63
64
  default=os.getenv("DSTACK_PROJECT"),
64
- )
65
+ ).completer = ProjectNameCompleter()
65
66
 
66
67
  def _command(self, args: argparse.Namespace):
67
68
  configure_logging()
@@ -1,6 +1,8 @@
1
1
  import argparse
2
2
  from pathlib import Path
3
3
 
4
+ from argcomplete import FilesCompleter
5
+
4
6
  from dstack._internal.cli.commands import APIBaseCommand
5
7
  from dstack._internal.cli.services.configurators import (
6
8
  get_apply_configurator_class,
@@ -42,7 +44,7 @@ class ApplyCommand(APIBaseCommand):
42
44
  metavar="FILE",
43
45
  help="The path to the configuration file. Defaults to [code]$PWD/.dstack.yml[/]",
44
46
  dest="configuration_file",
45
- )
47
+ ).completer = FilesCompleter(allowednames=["*.yml", "*.yaml"])
46
48
  self._parser.add_argument(
47
49
  "-y",
48
50
  "--yes",
@@ -57,7 +59,7 @@ class ApplyCommand(APIBaseCommand):
57
59
  self._parser.add_argument(
58
60
  "-d",
59
61
  "--detach",
60
- help="Exit immediately after sumbitting configuration",
62
+ help="Exit immediately after submitting configuration",
61
63
  action="store_true",
62
64
  )
63
65
  repo_group = self._parser.add_argument_group("Repo Options")
@@ -6,6 +6,11 @@ from typing import Optional
6
6
 
7
7
  from dstack._internal.cli.commands import APIBaseCommand
8
8
  from dstack._internal.cli.services.args import port_mapping
9
+ from dstack._internal.cli.services.completion import RunNameCompleter
10
+ from dstack._internal.cli.services.configurators.run import (
11
+ get_run_exit_code,
12
+ print_finished_message,
13
+ )
9
14
  from dstack._internal.cli.utils.common import console
10
15
  from dstack._internal.core.consts import DSTACK_RUNNER_HTTP_PORT
11
16
  from dstack._internal.core.errors import CLIError
@@ -57,7 +62,7 @@ class AttachCommand(APIBaseCommand):
57
62
  type=int,
58
63
  default=0,
59
64
  )
60
- self._parser.add_argument("run_name")
65
+ self._parser.add_argument("run_name").completer = RunNameCompleter()
61
66
 
62
67
  def _command(self, args: argparse.Namespace):
63
68
  super()._command(args)
@@ -99,6 +104,21 @@ class AttachCommand(APIBaseCommand):
99
104
  pass
100
105
  finally:
101
106
  run.detach()
107
+ # TODO: Handle run resubmissions similar to dstack apply
108
+
109
+ # After reading the logs, the run may not be marked as finished immediately.
110
+ # Give the run some time to transition to a finished state before exiting.
111
+ for _ in range(30):
112
+ run.refresh()
113
+ if run.status.is_finished():
114
+ print_finished_message(run)
115
+ exit(get_run_exit_code(run))
116
+ time.sleep(1)
117
+ console.print(
118
+ "[error]Lost run connection. Timed out waiting for run final status."
119
+ " Check `dstack ps` to see if it's done or failed."
120
+ )
121
+ exit(1)
102
122
 
103
123
 
104
124
  _IGNORED_PORTS = [DSTACK_RUNNER_HTTP_PORT]
@@ -0,0 +1,20 @@
1
+ import argcomplete
2
+
3
+ from dstack._internal.cli.commands import BaseCommand
4
+
5
+
6
+ class CompletionCommand(BaseCommand):
7
+ NAME = "completion"
8
+ DESCRIPTION = "Generate shell completion scripts"
9
+
10
+ def _register(self):
11
+ super()._register()
12
+ self._parser.add_argument(
13
+ "shell",
14
+ help="The shell to generate the completion script for",
15
+ choices=["bash", "zsh"],
16
+ )
17
+
18
+ def _command(self, args):
19
+ super()._command(args)
20
+ print(argcomplete.shellcode(["dstack"], shell=args.shell))
@@ -1,6 +1,8 @@
1
1
  import argparse
2
2
  from pathlib import Path
3
3
 
4
+ from argcomplete import FilesCompleter
5
+
4
6
  from dstack._internal.cli.commands import APIBaseCommand
5
7
  from dstack._internal.cli.services.configurators import (
6
8
  get_apply_configurator_class,
@@ -22,7 +24,7 @@ class DeleteCommand(APIBaseCommand):
22
24
  metavar="FILE",
23
25
  help="The path to the configuration file. Defaults to [code]$PWD/.dstack.yml[/]",
24
26
  dest="configuration_file",
25
- )
27
+ ).completer = FilesCompleter(allowednames=["*.yml", "*.yaml"])
26
28
  self._parser.add_argument(
27
29
  "-y",
28
30
  "--yes",
@@ -4,6 +4,7 @@ import time
4
4
  from rich.live import Live
5
5
 
6
6
  from dstack._internal.cli.commands import APIBaseCommand
7
+ from dstack._internal.cli.services.completion import FleetNameCompleter
7
8
  from dstack._internal.cli.utils.common import (
8
9
  LIVE_TABLE_PROVISION_INTERVAL_SECS,
9
10
  LIVE_TABLE_REFRESH_RATE_PER_SEC,
@@ -47,7 +48,7 @@ class FleetCommand(APIBaseCommand):
47
48
  delete_parser.add_argument(
48
49
  "name",
49
50
  help="The name of the fleet",
50
- )
51
+ ).completer = FleetNameCompleter()
51
52
  delete_parser.add_argument(
52
53
  "-i",
53
54
  "--instance",
@@ -4,6 +4,7 @@ import time
4
4
  from rich.live import Live
5
5
 
6
6
  from dstack._internal.cli.commands import APIBaseCommand
7
+ from dstack._internal.cli.services.completion import GatewayNameCompleter
7
8
  from dstack._internal.cli.utils.common import (
8
9
  LIVE_TABLE_PROVISION_INTERVAL_SECS,
9
10
  LIVE_TABLE_REFRESH_RATE_PER_SEC,
@@ -59,7 +60,9 @@ class GatewayCommand(APIBaseCommand):
59
60
  "delete", help="Delete a gateway", formatter_class=self._parser.formatter_class
60
61
  )
61
62
  delete_parser.set_defaults(subfunc=self._delete)
62
- delete_parser.add_argument("name", help="The name of the gateway")
63
+ delete_parser.add_argument(
64
+ "name", help="The name of the gateway"
65
+ ).completer = GatewayNameCompleter()
63
66
  delete_parser.add_argument(
64
67
  "-y", "--yes", action="store_true", help="Don't ask for confirmation"
65
68
  )
@@ -68,7 +71,9 @@ class GatewayCommand(APIBaseCommand):
68
71
  "update", help="Update a gateway", formatter_class=self._parser.formatter_class
69
72
  )
70
73
  update_parser.set_defaults(subfunc=self._update)
71
- update_parser.add_argument("name", help="The name of the gateway")
74
+ update_parser.add_argument(
75
+ "name", help="The name of the gateway"
76
+ ).completer = GatewayNameCompleter()
72
77
  update_parser.add_argument(
73
78
  "--set-default", action="store_true", help="Set it the default gateway for the project"
74
79
  )
@@ -3,6 +3,7 @@ import sys
3
3
  from pathlib import Path
4
4
 
5
5
  from dstack._internal.cli.commands import APIBaseCommand
6
+ from dstack._internal.cli.services.completion import RunNameCompleter
6
7
  from dstack._internal.core.errors import CLIError
7
8
  from dstack._internal.utils.logging import get_logger
8
9
 
@@ -33,7 +34,7 @@ class LogsCommand(APIBaseCommand):
33
34
  )
34
35
  self._parser.add_argument(
35
36
  "--replica",
36
- help="The relica number. Defaults to 0.",
37
+ help="The replica number. Defaults to 0.",
37
38
  type=int,
38
39
  default=0,
39
40
  )
@@ -43,7 +44,7 @@ class LogsCommand(APIBaseCommand):
43
44
  type=int,
44
45
  default=0,
45
46
  )
46
- self._parser.add_argument("run_name")
47
+ self._parser.add_argument("run_name").completer = RunNameCompleter(all=True)
47
48
 
48
49
  def _command(self, args: argparse.Namespace):
49
50
  super()._command(args)
@@ -7,6 +7,7 @@ from rich.live import Live
7
7
  from rich.table import Table
8
8
 
9
9
  from dstack._internal.cli.commands import APIBaseCommand
10
+ from dstack._internal.cli.services.completion import RunNameCompleter
10
11
  from dstack._internal.cli.utils.common import (
11
12
  LIVE_TABLE_PROVISION_INTERVAL_SECS,
12
13
  LIVE_TABLE_REFRESH_RATE_PER_SEC,
@@ -25,7 +26,7 @@ class StatsCommand(APIBaseCommand):
25
26
 
26
27
  def _register(self):
27
28
  super()._register()
28
- self._parser.add_argument("run_name")
29
+ self._parser.add_argument("run_name").completer = RunNameCompleter()
29
30
  self._parser.add_argument(
30
31
  "-w",
31
32
  "--watch",
@@ -1,6 +1,7 @@
1
1
  import argparse
2
2
 
3
3
  from dstack._internal.cli.commands import APIBaseCommand
4
+ from dstack._internal.cli.services.completion import RunNameCompleter
4
5
  from dstack._internal.cli.utils.common import confirm_ask
5
6
  from dstack._internal.core.errors import CLIError
6
7
 
@@ -13,7 +14,7 @@ class StopCommand(APIBaseCommand):
13
14
  super()._register()
14
15
  self._parser.add_argument("-x", "--abort", action="store_true")
15
16
  self._parser.add_argument("-y", "--yes", action="store_true")
16
- self._parser.add_argument("run_name")
17
+ self._parser.add_argument("run_name").completer = RunNameCompleter()
17
18
 
18
19
  def _command(self, args: argparse.Namespace):
19
20
  super()._command(args)
@@ -4,6 +4,7 @@ import time
4
4
  from rich.live import Live
5
5
 
6
6
  from dstack._internal.cli.commands import APIBaseCommand
7
+ from dstack._internal.cli.services.completion import VolumeNameCompleter
7
8
  from dstack._internal.cli.utils.common import (
8
9
  LIVE_TABLE_PROVISION_INTERVAL_SECS,
9
10
  LIVE_TABLE_REFRESH_RATE_PER_SEC,
@@ -47,7 +48,7 @@ class VolumeCommand(APIBaseCommand):
47
48
  delete_parser.add_argument(
48
49
  "name",
49
50
  help="The name of the volume",
50
- )
51
+ ).completer = VolumeNameCompleter()
51
52
  delete_parser.add_argument(
52
53
  "-y", "--yes", help="Don't ask for confirmation", action="store_true"
53
54
  )
@@ -1,10 +1,12 @@
1
1
  import argparse
2
2
 
3
+ import argcomplete
3
4
  from rich.markup import escape
4
5
  from rich_argparse import RichHelpFormatter
5
6
 
6
7
  from dstack._internal.cli.commands.apply import ApplyCommand
7
8
  from dstack._internal.cli.commands.attach import AttachCommand
9
+ from dstack._internal.cli.commands.completion import CompletionCommand
8
10
  from dstack._internal.cli.commands.config import ConfigCommand
9
11
  from dstack._internal.cli.commands.delete import DeleteCommand
10
12
  from dstack._internal.cli.commands.fleet import FleetCommand
@@ -72,9 +74,13 @@ def main():
72
74
  StatsCommand.register(subparsers)
73
75
  StopCommand.register(subparsers)
74
76
  VolumeCommand.register(subparsers)
77
+ CompletionCommand.register(subparsers)
78
+
79
+ argcomplete.autocomplete(parser, always_complete_options=False)
75
80
 
76
81
  args, unknown_args = parser.parse_known_args()
77
82
  args.unknown = unknown_args
83
+
78
84
  try:
79
85
  check_for_updates()
80
86
  get_ssh_client_info()
@@ -0,0 +1,86 @@
1
+ import argparse
2
+ import os
3
+ from abc import ABC, abstractmethod
4
+ from typing import Iterable, List, Optional
5
+
6
+ import argcomplete
7
+ from argcomplete.completers import BaseCompleter
8
+
9
+ from dstack._internal.core.errors import ConfigurationError
10
+ from dstack._internal.core.services.configs import ConfigManager
11
+ from dstack.api import Client
12
+
13
+
14
+ class BaseAPINameCompleter(BaseCompleter, ABC):
15
+ """
16
+ Base class for name completers that fetch resource names via the API.
17
+ """
18
+
19
+ def __init__(self):
20
+ super().__init__()
21
+
22
+ def get_api(self, parsed_args: argparse.Namespace) -> Optional[Client]:
23
+ argcomplete.debug(f"{self.__class__.__name__}: Retrieving API client")
24
+ project = getattr(parsed_args, "project", os.getenv("DSTACK_PROJECT"))
25
+ try:
26
+ return Client.from_config(project_name=project)
27
+ except ConfigurationError as e:
28
+ argcomplete.debug(f"{self.__class__.__name__}: Error initializing API client: {e}")
29
+ return None
30
+
31
+ def __call__(self, prefix: str, parsed_args: argparse.Namespace, **kwargs) -> List[str]:
32
+ api = self.get_api(parsed_args)
33
+ if api is None:
34
+ return []
35
+
36
+ argcomplete.debug(f"{self.__class__.__name__}: Fetching completions")
37
+ try:
38
+ resource_names = self.fetch_resource_names(api)
39
+ return [name for name in resource_names if name.startswith(prefix)]
40
+ except Exception as e:
41
+ argcomplete.debug(
42
+ f"{self.__class__.__name__}: Error fetching resource completions: {e}"
43
+ )
44
+ return []
45
+
46
+ @abstractmethod
47
+ def fetch_resource_names(self, api: Client) -> Iterable[str]:
48
+ """
49
+ Returns an iterable of resource names.
50
+ """
51
+ pass
52
+
53
+
54
+ class RunNameCompleter(BaseAPINameCompleter):
55
+ def __init__(self, all: bool = False):
56
+ super().__init__()
57
+ self.all = all
58
+
59
+ def fetch_resource_names(self, api: Client) -> Iterable[str]:
60
+ return [r.name for r in api.runs.list(self.all)]
61
+
62
+
63
+ class FleetNameCompleter(BaseAPINameCompleter):
64
+ def fetch_resource_names(self, api: Client) -> Iterable[str]:
65
+ return [r.name for r in api.client.fleets.list(api.project)]
66
+
67
+
68
+ class VolumeNameCompleter(BaseAPINameCompleter):
69
+ def fetch_resource_names(self, api: Client) -> Iterable[str]:
70
+ return [r.name for r in api.client.volumes.list(api.project)]
71
+
72
+
73
+ class GatewayNameCompleter(BaseAPINameCompleter):
74
+ def fetch_resource_names(self, api: Client) -> Iterable[str]:
75
+ return [r.name for r in api.client.gateways.list(api.project)]
76
+
77
+
78
+ class ProjectNameCompleter(BaseCompleter):
79
+ """
80
+ Completer for local project names.
81
+ """
82
+
83
+ def __call__(self, prefix: str, parsed_args: argparse.Namespace, **kwargs) -> List[str]:
84
+ argcomplete.debug(f"{self.__class__.__name__}: Listing projects from ConfigManager")
85
+ projects = ConfigManager().list_projects()
86
+ return [p for p in projects if p.startswith(prefix)]
@@ -34,7 +34,6 @@ from dstack._internal.core.models.configurations import (
34
34
  BaseRunConfigurationWithPorts,
35
35
  DevEnvironmentConfiguration,
36
36
  PortMapping,
37
- PythonVersion,
38
37
  RunConfigurationType,
39
38
  ServiceConfiguration,
40
39
  TaskConfiguration,
@@ -73,12 +72,6 @@ class BaseRunConfigurator(ApplyEnvVarsConfiguratorMixin, BaseApplyConfigurator):
73
72
  ):
74
73
  self.apply_args(conf, configurator_args, unknown_args)
75
74
  self.validate_gpu_vendor_and_image(conf)
76
- if conf.python == PythonVersion.PY38:
77
- logger.warning(
78
- "Specifying [code]python: 3.8[/] in run configurations is deprecated"
79
- " and will be forbidden in a future [code]dstack[/] release."
80
- " Please upgrade your configuration to a newer Python version."
81
- )
82
75
  if repo is None:
83
76
  repo = self.api.repos.load(Path.cwd())
84
77
  config_manager = ConfigManager()
@@ -102,6 +95,7 @@ class BaseRunConfigurator(ApplyEnvVarsConfiguratorMixin, BaseApplyConfigurator):
102
95
  reservation=profile.reservation,
103
96
  spot_policy=profile.spot_policy,
104
97
  retry_policy=profile.retry_policy,
98
+ utilization_policy=profile.utilization_policy,
105
99
  max_duration=profile.max_duration,
106
100
  stop_duration=profile.stop_duration,
107
101
  max_price=profile.max_price,
@@ -238,8 +232,8 @@ class BaseRunConfigurator(ApplyEnvVarsConfiguratorMixin, BaseApplyConfigurator):
238
232
  reattach = True
239
233
  break
240
234
  if run.status.is_finished():
241
- _print_finished_message(run)
242
- exit(_get_run_exit_code(run))
235
+ print_finished_message(run)
236
+ exit(get_run_exit_code(run))
243
237
  time.sleep(1)
244
238
  if not reattach:
245
239
  console.print(
@@ -439,7 +433,7 @@ class RunWithPortsConfigurator(BaseRunConfigurator):
439
433
  ):
440
434
  super().apply_args(conf, args, unknown)
441
435
  if args.ports:
442
- conf.ports = list(merge_ports(conf.ports, args.ports).values())
436
+ conf.ports = list(_merge_ports(conf.ports, args.ports).values())
443
437
 
444
438
 
445
439
  class TaskConfigurator(RunWithPortsConfigurator):
@@ -475,17 +469,17 @@ class ServiceConfigurator(BaseRunConfigurator):
475
469
  self.interpolate_run_args(conf.commands, unknown)
476
470
 
477
471
 
478
- def merge_ports(conf: List[PortMapping], args: List[PortMapping]) -> Dict[int, PortMapping]:
479
- unique_ports_constraint([pm.container_port for pm in conf])
480
- unique_ports_constraint([pm.container_port for pm in args])
472
+ def _merge_ports(conf: List[PortMapping], args: List[PortMapping]) -> Dict[int, PortMapping]:
473
+ _unique_ports_constraint([pm.container_port for pm in conf])
474
+ _unique_ports_constraint([pm.container_port for pm in args])
481
475
  ports = {pm.container_port: pm for pm in conf}
482
476
  for pm in args: # override conf
483
477
  ports[pm.container_port] = pm
484
- unique_ports_constraint([pm.local_port for pm in ports.values() if pm.local_port is not None])
478
+ _unique_ports_constraint([pm.local_port for pm in ports.values() if pm.local_port is not None])
485
479
  return ports
486
480
 
487
481
 
488
- def unique_ports_constraint(ports: List[int]):
482
+ def _unique_ports_constraint(ports: List[int]):
489
483
  used_ports = set()
490
484
  for i in ports:
491
485
  if i in used_ports:
@@ -514,7 +508,7 @@ def _print_service_urls(run: Run) -> None:
514
508
  console.print()
515
509
 
516
510
 
517
- def _print_finished_message(run: Run):
511
+ def print_finished_message(run: Run):
518
512
  if run.status == RunStatus.DONE:
519
513
  console.print("[code]Done[/]")
520
514
  return
@@ -542,7 +536,7 @@ def _print_finished_message(run: Run):
542
536
  console.print(f"[error]{message}[/]")
543
537
 
544
538
 
545
- def _get_run_exit_code(run: Run) -> int:
539
+ def get_run_exit_code(run: Run) -> int:
546
540
  if run.status == RunStatus.DONE:
547
541
  return 0
548
542
  return 1
@@ -45,7 +45,11 @@ def get_fleets_table(
45
45
  status = instance.status.value
46
46
  total_blocks = instance.total_blocks
47
47
  busy_blocks = instance.busy_blocks
48
- if total_blocks is not None and total_blocks > 1:
48
+ if (
49
+ instance.status in [InstanceStatus.IDLE, InstanceStatus.BUSY]
50
+ and total_blocks is not None
51
+ and total_blocks > 1
52
+ ):
49
53
  status = f"{busy_blocks}/{total_blocks} {InstanceStatus.BUSY.value}"
50
54
  if (
51
55
  instance.status in [InstanceStatus.IDLE, InstanceStatus.BUSY]
@@ -4,6 +4,8 @@ from rich.markup import escape
4
4
  from rich.table import Table
5
5
 
6
6
  from dstack._internal.cli.utils.common import NO_OFFERS_WARNING, add_row_from_dict, console
7
+ from dstack._internal.core.models.common import is_core_model_instance
8
+ from dstack._internal.core.models.configurations import DevEnvironmentConfiguration
7
9
  from dstack._internal.core.models.instances import InstanceAvailability
8
10
  from dstack._internal.core.models.profiles import (
9
11
  DEFAULT_RUN_TERMINATION_IDLE_TIME,
@@ -38,6 +40,13 @@ def print_run_plan(run_plan: RunPlan, offers_limit: int = 3):
38
40
  if job_plan.job_spec.max_duration
39
41
  else "-"
40
42
  )
43
+ inactivity_duration = None
44
+ if is_core_model_instance(run_plan.run_spec.configuration, DevEnvironmentConfiguration):
45
+ inactivity_duration = "-"
46
+ if isinstance(run_plan.run_spec.configuration.inactivity_duration, int):
47
+ inactivity_duration = format_pretty_duration(
48
+ run_plan.run_spec.configuration.inactivity_duration
49
+ )
41
50
  if job_plan.job_spec.retry is None:
42
51
  retry = "-"
43
52
  else:
@@ -72,6 +81,8 @@ def print_run_plan(run_plan: RunPlan, offers_limit: int = 3):
72
81
  props.add_row(th("Resources"), pretty_req)
73
82
  props.add_row(th("Max price"), max_price)
74
83
  props.add_row(th("Max duration"), max_duration)
84
+ if inactivity_duration is not None: # None means n/a
85
+ props.add_row(th("Inactivity duration"), inactivity_duration)
75
86
  props.add_row(th("Spot policy"), spot_policy)
76
87
  props.add_row(th("Retry policy"), retry)
77
88
  props.add_row(th("Creation policy"), creation_policy)
@@ -11,8 +11,11 @@ from dstack._internal import settings
11
11
  from dstack._internal.core.backends.aws.config import AWSConfig
12
12
  from dstack._internal.core.backends.base.compute import (
13
13
  Compute,
14
+ generate_unique_gateway_instance_name,
15
+ generate_unique_instance_name,
16
+ generate_unique_volume_name,
14
17
  get_gateway_user_data,
15
- get_instance_name,
18
+ get_job_instance_name,
16
19
  get_user_data,
17
20
  merge_tags,
18
21
  )
@@ -152,10 +155,12 @@ class AWSCompute(Compute):
152
155
  if zones is not None and len(zones) == 0:
153
156
  raise NoCapacityError("No eligible availability zones")
154
157
 
158
+ instance_name = generate_unique_instance_name(instance_config)
155
159
  tags = {
156
- "Name": instance_config.instance_name,
160
+ "Name": instance_name,
157
161
  "owner": "dstack",
158
162
  "dstack_project": project_name,
163
+ "dstack_name": instance_config.instance_name,
159
164
  "dstack_user": instance_config.user,
160
165
  }
161
166
  tags = merge_tags(tags=tags, backend_tags=self.config.tags)
@@ -214,7 +219,7 @@ class AWSCompute(Compute):
214
219
  disk_size=disk_size,
215
220
  image_id=image_id,
216
221
  instance_type=instance_offer.instance.name,
217
- iam_instance_profile_arn=None,
222
+ iam_instance_profile=self.config.iam_instance_profile,
218
223
  user_data=get_user_data(authorized_keys=instance_config.get_public_keys()),
219
224
  tags=aws_resources.make_tags(tags),
220
225
  security_group_id=aws_resources.create_security_group(
@@ -259,6 +264,9 @@ class AWSCompute(Compute):
259
264
  )
260
265
  except botocore.exceptions.ClientError as e:
261
266
  logger.warning("Got botocore.exceptions.ClientError: %s", e)
267
+ if e.response["Error"]["Code"] == "InvalidParameterValue":
268
+ msg = e.response["Error"].get("Message", "")
269
+ raise ComputeError(f"Invalid AWS request: {msg}")
262
270
  continue
263
271
  raise NoCapacityError()
264
272
 
@@ -274,7 +282,7 @@ class AWSCompute(Compute):
274
282
  # TODO: run_job is the same for vm-based backends, refactor
275
283
  instance_config = InstanceConfiguration(
276
284
  project_name=run.project_name,
277
- instance_name=get_instance_name(run, job), # TODO: generate name
285
+ instance_name=get_job_instance_name(run, job), # TODO: generate name
278
286
  ssh_keys=[
279
287
  SSHKey(public=project_ssh_public_key.strip()),
280
288
  ],
@@ -342,10 +350,12 @@ class AWSCompute(Compute):
342
350
  ec2_resource = self.session.resource("ec2", region_name=configuration.region)
343
351
  ec2_client = self.session.client("ec2", region_name=configuration.region)
344
352
 
353
+ instance_name = generate_unique_gateway_instance_name(configuration)
345
354
  tags = {
346
- "Name": configuration.instance_name,
355
+ "Name": instance_name,
347
356
  "owner": "dstack",
348
357
  "dstack_project": configuration.project_name,
358
+ "dstack_name": configuration.instance_name,
349
359
  }
350
360
  if settings.DSTACK_VERSION is not None:
351
361
  tags["dstack_version"] = settings.DSTACK_VERSION
@@ -373,7 +383,7 @@ class AWSCompute(Compute):
373
383
  disk_size=10,
374
384
  image_id=aws_resources.get_gateway_image_id(ec2_client),
375
385
  instance_type="t2.micro",
376
- iam_instance_profile_arn=None,
386
+ iam_instance_profile=None,
377
387
  user_data=get_gateway_user_data(configuration.ssh_key_pub),
378
388
  tags=tags,
379
389
  security_group_id=security_group_id,
@@ -403,7 +413,7 @@ class AWSCompute(Compute):
403
413
 
404
414
  logger.debug("Creating ALB for gateway %s...", configuration.instance_name)
405
415
  response = elb_client.create_load_balancer(
406
- Name=f"{configuration.instance_name}-lb",
416
+ Name=f"{instance_name}-lb",
407
417
  Subnets=subnets_ids,
408
418
  SecurityGroups=[security_group_id],
409
419
  Scheme="internet-facing" if configuration.public_ip else "internal",
@@ -418,7 +428,7 @@ class AWSCompute(Compute):
418
428
 
419
429
  logger.debug("Creating Target Group for gateway %s...", configuration.instance_name)
420
430
  response = elb_client.create_target_group(
421
- Name=f"{configuration.instance_name}-tg",
431
+ Name=f"{instance_name}-tg",
422
432
  Protocol="HTTP",
423
433
  Port=80,
424
434
  VpcId=vpc_id,
@@ -496,6 +506,7 @@ class AWSCompute(Compute):
496
506
  "Failed to terminate all gateway %s resources. backend_data parsing error.",
497
507
  configuration.instance_name,
498
508
  )
509
+ return
499
510
 
500
511
  elb_client = self.session.client("elbv2", region_name=configuration.region)
501
512
 
@@ -535,11 +546,13 @@ class AWSCompute(Compute):
535
546
  def create_volume(self, volume: Volume) -> VolumeProvisioningData:
536
547
  ec2_client = self.session.client("ec2", region_name=volume.configuration.region)
537
548
 
549
+ volume_name = generate_unique_volume_name(volume)
538
550
  tags = {
539
- "Name": volume.configuration.name,
551
+ "Name": volume_name,
540
552
  "owner": "dstack",
541
- "dstack_user": volume.user,
542
553
  "dstack_project": volume.project_name,
554
+ "dstack_name": volume.name,
555
+ "dstack_user": volume.user,
543
556
  }
544
557
  tags = merge_tags(tags=tags, backend_tags=self.config.tags)
545
558
 
@@ -131,7 +131,7 @@ def create_instances_struct(
131
131
  disk_size: int,
132
132
  image_id: str,
133
133
  instance_type: str,
134
- iam_instance_profile_arn: Optional[str],
134
+ iam_instance_profile: Optional[str],
135
135
  user_data: str,
136
136
  tags: List[Dict[str, str]],
137
137
  security_group_id: str,
@@ -166,8 +166,8 @@ def create_instances_struct(
166
166
  },
167
167
  ],
168
168
  )
169
- if iam_instance_profile_arn:
170
- struct["IamInstanceProfile"] = {"Arn": iam_instance_profile_arn}
169
+ if iam_instance_profile:
170
+ struct["IamInstanceProfile"] = {"Name": iam_instance_profile}
171
171
  if spot:
172
172
  struct["InstanceMarketOptions"] = {
173
173
  "MarketType": "spot",