dstack 0.19.24__py3-none-any.whl → 0.19.25rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dstack might be problematic. Click here for more details.
- dstack/_internal/cli/commands/apply.py +14 -2
- dstack/_internal/cli/commands/init.py +47 -2
- dstack/_internal/cli/commands/offer.py +68 -60
- dstack/_internal/cli/services/configurators/run.py +38 -10
- dstack/_internal/cli/services/repos.py +6 -24
- dstack/_internal/cli/utils/common.py +7 -0
- dstack/_internal/cli/utils/gpu.py +210 -0
- dstack/_internal/cli/utils/run.py +33 -0
- dstack/_internal/core/backends/aws/compute.py +1 -4
- dstack/_internal/core/backends/base/compute.py +0 -4
- dstack/_internal/core/backends/gcp/compute.py +1 -4
- dstack/_internal/core/backends/nebius/compute.py +1 -4
- dstack/_internal/core/models/common.py +1 -1
- dstack/_internal/core/models/config.py +3 -1
- dstack/_internal/core/models/configurations.py +16 -14
- dstack/_internal/core/models/fleets.py +2 -2
- dstack/_internal/core/models/instances.py +1 -1
- dstack/_internal/core/models/profiles.py +2 -2
- dstack/_internal/core/models/repos/remote.py +2 -2
- dstack/_internal/core/models/resources.py +4 -4
- dstack/_internal/core/models/runs.py +1 -1
- dstack/_internal/core/services/configs/__init__.py +4 -6
- dstack/_internal/proxy/gateway/services/registry.py +2 -0
- dstack/_internal/server/app.py +2 -0
- dstack/_internal/server/background/tasks/process_fleets.py +10 -2
- dstack/_internal/server/background/tasks/process_running_jobs.py +65 -44
- dstack/_internal/server/background/tasks/process_runs.py +15 -14
- dstack/_internal/server/background/tasks/process_submitted_jobs.py +251 -52
- dstack/_internal/server/migrations/versions/3d7f6c2ec000_add_jobmodel_registered.py +28 -0
- dstack/_internal/server/migrations/versions/e2d08cd1b8d9_add_jobmodel_fleet.py +41 -0
- dstack/_internal/server/models.py +13 -1
- dstack/_internal/server/routers/gpus.py +29 -0
- dstack/_internal/server/schemas/gateways.py +1 -1
- dstack/_internal/server/schemas/gpus.py +66 -0
- dstack/_internal/server/services/docker.py +1 -1
- dstack/_internal/server/services/gpus.py +390 -0
- dstack/_internal/server/services/offers.py +48 -31
- dstack/_internal/server/services/probes.py +5 -1
- dstack/_internal/server/services/proxy/repo.py +1 -0
- dstack/_internal/server/services/runs.py +12 -11
- dstack/_internal/server/services/services/__init__.py +60 -41
- dstack/_internal/server/statics/index.html +1 -1
- dstack/_internal/server/statics/logo-notext.svg +116 -0
- dstack/_internal/server/statics/{main-03e818b110e1d5705378.css → main-aec4762350e34d6fbff9.css} +1 -1
- dstack/_internal/server/statics/{main-16813e4e1d1c4119eda3.js → main-d151b300fcac3933213d.js} +19 -22
- dstack/_internal/server/statics/{main-16813e4e1d1c4119eda3.js.map → main-d151b300fcac3933213d.js.map} +1 -1
- dstack/_internal/server/testing/common.py +7 -2
- dstack/api/_public/repos.py +8 -7
- dstack/api/server/__init__.py +6 -0
- dstack/api/server/_gpus.py +22 -0
- dstack/version.py +1 -1
- {dstack-0.19.24.dist-info → dstack-0.19.25rc1.dist-info}/METADATA +1 -1
- {dstack-0.19.24.dist-info → dstack-0.19.25rc1.dist-info}/RECORD +56 -48
- {dstack-0.19.24.dist-info → dstack-0.19.25rc1.dist-info}/WHEEL +0 -0
- {dstack-0.19.24.dist-info → dstack-0.19.25rc1.dist-info}/entry_points.txt +0 -0
- {dstack-0.19.24.dist-info → dstack-0.19.25rc1.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import argparse
|
|
2
|
+
from pathlib import Path
|
|
2
3
|
|
|
3
4
|
from argcomplete import FilesCompleter
|
|
4
5
|
|
|
@@ -13,7 +14,7 @@ from dstack._internal.cli.services.repos import (
|
|
|
13
14
|
init_repo,
|
|
14
15
|
register_init_repo_args,
|
|
15
16
|
)
|
|
16
|
-
from dstack._internal.cli.utils.common import console
|
|
17
|
+
from dstack._internal.cli.utils.common import console, warn
|
|
17
18
|
from dstack._internal.core.errors import CLIError
|
|
18
19
|
from dstack._internal.core.models.configurations import ApplyConfigurationType
|
|
19
20
|
|
|
@@ -65,6 +66,13 @@ class ApplyCommand(APIBaseCommand):
|
|
|
65
66
|
help="Exit immediately after submitting configuration",
|
|
66
67
|
action="store_true",
|
|
67
68
|
)
|
|
69
|
+
self._parser.add_argument(
|
|
70
|
+
"--ssh-identity",
|
|
71
|
+
metavar="SSH_PRIVATE_KEY",
|
|
72
|
+
help="The private SSH key path for SSH tunneling",
|
|
73
|
+
type=Path,
|
|
74
|
+
dest="ssh_identity_file",
|
|
75
|
+
)
|
|
68
76
|
repo_group = self._parser.add_argument_group("Repo Options")
|
|
69
77
|
repo_group.add_argument(
|
|
70
78
|
"-P",
|
|
@@ -111,6 +119,11 @@ class ApplyCommand(APIBaseCommand):
|
|
|
111
119
|
raise CLIError("Cannot read configuration from stdin if -y/--yes is not specified")
|
|
112
120
|
if args.repo and args.no_repo:
|
|
113
121
|
raise CLIError("Either --repo or --no-repo can be specified")
|
|
122
|
+
if args.local:
|
|
123
|
+
warn(
|
|
124
|
+
"Local repos are deprecated since 0.19.25 and will be removed soon."
|
|
125
|
+
" Consider using `files` instead: https://dstack.ai/docs/concepts/tasks/#files"
|
|
126
|
+
)
|
|
114
127
|
repo = None
|
|
115
128
|
if args.repo:
|
|
116
129
|
repo = init_repo(
|
|
@@ -121,7 +134,6 @@ class ApplyCommand(APIBaseCommand):
|
|
|
121
134
|
local=args.local,
|
|
122
135
|
git_identity_file=args.git_identity_file,
|
|
123
136
|
oauth_token=args.gh_token,
|
|
124
|
-
ssh_identity_file=args.ssh_identity_file,
|
|
125
137
|
)
|
|
126
138
|
elif args.no_repo:
|
|
127
139
|
repo = init_default_virtual_repo(api=self.api)
|
|
@@ -4,7 +4,10 @@ from pathlib import Path
|
|
|
4
4
|
|
|
5
5
|
from dstack._internal.cli.commands import BaseCommand
|
|
6
6
|
from dstack._internal.cli.services.repos import init_repo, register_init_repo_args
|
|
7
|
-
from dstack._internal.cli.utils.common import configure_logging, console
|
|
7
|
+
from dstack._internal.cli.utils.common import configure_logging, confirm_ask, console, warn
|
|
8
|
+
from dstack._internal.core.errors import ConfigurationError
|
|
9
|
+
from dstack._internal.core.models.repos.base import RepoType
|
|
10
|
+
from dstack._internal.core.services.configs import ConfigManager
|
|
8
11
|
from dstack.api import Client
|
|
9
12
|
|
|
10
13
|
|
|
@@ -19,12 +22,55 @@ class InitCommand(BaseCommand):
|
|
|
19
22
|
default=os.getenv("DSTACK_PROJECT"),
|
|
20
23
|
)
|
|
21
24
|
register_init_repo_args(self._parser)
|
|
25
|
+
# Deprecated since 0.19.25, ignored
|
|
26
|
+
self._parser.add_argument(
|
|
27
|
+
"--ssh-identity",
|
|
28
|
+
metavar="SSH_PRIVATE_KEY",
|
|
29
|
+
help=argparse.SUPPRESS,
|
|
30
|
+
type=Path,
|
|
31
|
+
dest="ssh_identity_file",
|
|
32
|
+
)
|
|
33
|
+
# A hidden mode for transitional period only, remove it with local repos
|
|
34
|
+
self._parser.add_argument(
|
|
35
|
+
"--remove",
|
|
36
|
+
help=argparse.SUPPRESS,
|
|
37
|
+
action="store_true",
|
|
38
|
+
)
|
|
22
39
|
|
|
23
40
|
def _command(self, args: argparse.Namespace):
|
|
24
41
|
configure_logging()
|
|
42
|
+
if args.remove:
|
|
43
|
+
config_manager = ConfigManager()
|
|
44
|
+
repo_path = Path.cwd()
|
|
45
|
+
repo_config = config_manager.get_repo_config(repo_path)
|
|
46
|
+
if repo_config is None:
|
|
47
|
+
raise ConfigurationError("The repo is not initialized, nothing to remove")
|
|
48
|
+
if repo_config.repo_type != RepoType.LOCAL:
|
|
49
|
+
raise ConfigurationError("`dstack init --remove` is for local repos only")
|
|
50
|
+
console.print(
|
|
51
|
+
f"You are about to remove the local repo {repo_path}\n"
|
|
52
|
+
"Only the record about the repo will be removed,"
|
|
53
|
+
" the repo files will remain intact\n"
|
|
54
|
+
)
|
|
55
|
+
if not confirm_ask("Remove the local repo?"):
|
|
56
|
+
return
|
|
57
|
+
config_manager.delete_repo_config(repo_config.repo_id)
|
|
58
|
+
config_manager.save()
|
|
59
|
+
console.print("Local repo has been removed")
|
|
60
|
+
return
|
|
25
61
|
api = Client.from_config(
|
|
26
62
|
project_name=args.project, ssh_identity_file=args.ssh_identity_file
|
|
27
63
|
)
|
|
64
|
+
if args.local:
|
|
65
|
+
warn(
|
|
66
|
+
"Local repos are deprecated since 0.19.25 and will be removed soon."
|
|
67
|
+
" Consider using `files` instead: https://dstack.ai/docs/concepts/tasks/#files"
|
|
68
|
+
)
|
|
69
|
+
if args.ssh_identity_file:
|
|
70
|
+
warn(
|
|
71
|
+
"`--ssh-identity` in `dstack init` is deprecated and ignored since 0.19.25."
|
|
72
|
+
" Use this option with `dstack apply` and `dstack attach` instead"
|
|
73
|
+
)
|
|
28
74
|
init_repo(
|
|
29
75
|
api=api,
|
|
30
76
|
repo_path=Path.cwd(),
|
|
@@ -33,6 +79,5 @@ class InitCommand(BaseCommand):
|
|
|
33
79
|
local=args.local,
|
|
34
80
|
git_identity_file=args.git_identity_file,
|
|
35
81
|
oauth_token=args.gh_token,
|
|
36
|
-
ssh_identity_file=args.ssh_identity_file,
|
|
37
82
|
)
|
|
38
83
|
console.print("OK")
|
|
@@ -1,29 +1,20 @@
|
|
|
1
1
|
import argparse
|
|
2
|
-
import contextlib
|
|
3
|
-
import json
|
|
4
2
|
from pathlib import Path
|
|
3
|
+
from typing import List
|
|
5
4
|
|
|
6
5
|
from dstack._internal.cli.commands import APIBaseCommand
|
|
7
|
-
from dstack._internal.cli.services.configurators.run import
|
|
8
|
-
BaseRunConfigurator,
|
|
9
|
-
)
|
|
6
|
+
from dstack._internal.cli.services.configurators.run import BaseRunConfigurator
|
|
10
7
|
from dstack._internal.cli.utils.common import console
|
|
11
|
-
from dstack._internal.cli.utils.
|
|
12
|
-
from dstack._internal.
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
)
|
|
8
|
+
from dstack._internal.cli.utils.gpu import print_gpu_json, print_gpu_table
|
|
9
|
+
from dstack._internal.cli.utils.run import print_offers_json, print_run_plan
|
|
10
|
+
from dstack._internal.core.errors import CLIError
|
|
11
|
+
from dstack._internal.core.models.configurations import ApplyConfigurationType, TaskConfiguration
|
|
16
12
|
from dstack._internal.core.models.runs import RunSpec
|
|
13
|
+
from dstack._internal.server.schemas.gpus import GpuGroup
|
|
17
14
|
from dstack.api.utils import load_profile
|
|
18
15
|
|
|
19
16
|
|
|
20
17
|
class OfferConfigurator(BaseRunConfigurator):
|
|
21
|
-
# TODO: The command currently uses `BaseRunConfigurator` to register arguments.
|
|
22
|
-
# This includes --env, --retry-policy, and other arguments that are unnecessary for this command.
|
|
23
|
-
# Eventually, we should introduce a base `OfferConfigurator` that doesn't include those arguments—
|
|
24
|
-
# `BaseRunConfigurator` will inherit from `OfferConfigurator`.
|
|
25
|
-
#
|
|
26
|
-
# Additionally, it should have its own type: `ApplyConfigurationType.OFFER`.
|
|
27
18
|
TYPE = ApplyConfigurationType.TASK
|
|
28
19
|
|
|
29
20
|
@classmethod
|
|
@@ -32,10 +23,18 @@ class OfferConfigurator(BaseRunConfigurator):
|
|
|
32
23
|
parser: argparse.ArgumentParser,
|
|
33
24
|
):
|
|
34
25
|
super().register_args(parser, default_max_offers=50)
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"--group-by",
|
|
28
|
+
action="append",
|
|
29
|
+
help=(
|
|
30
|
+
"Group results by fields ([code]gpu[/code], [code]backend[/code], [code]region[/code], [code]count[/code]). "
|
|
31
|
+
"Optional, but if used, must include [code]gpu[/code]. "
|
|
32
|
+
"The use of [code]region[/code] also requires [code]backend[/code]. "
|
|
33
|
+
"Can be repeated or comma-separated (e.g. [code]--group-by gpu,backend[/code])."
|
|
34
|
+
),
|
|
35
|
+
)
|
|
35
36
|
|
|
36
37
|
|
|
37
|
-
# TODO: Support aggregated offers
|
|
38
|
-
# TODO: Add tests
|
|
39
38
|
class OfferCommand(APIBaseCommand):
|
|
40
39
|
NAME = "offer"
|
|
41
40
|
DESCRIPTION = "List offers"
|
|
@@ -70,49 +69,58 @@ class OfferCommand(APIBaseCommand):
|
|
|
70
69
|
ssh_key_pub="(dummy)",
|
|
71
70
|
profile=profile,
|
|
72
71
|
)
|
|
72
|
+
|
|
73
|
+
if args.group_by:
|
|
74
|
+
args.group_by = self._process_group_by_args(args.group_by)
|
|
75
|
+
|
|
76
|
+
if args.group_by and "gpu" not in args.group_by:
|
|
77
|
+
group_values = ", ".join(args.group_by)
|
|
78
|
+
raise CLIError(f"Cannot group by '{group_values}' without also grouping by 'gpu'")
|
|
79
|
+
|
|
73
80
|
if args.format == "plain":
|
|
74
|
-
|
|
81
|
+
with console.status("Getting offers..."):
|
|
82
|
+
if args.group_by:
|
|
83
|
+
gpus = self._list_gpus(args, run_spec)
|
|
84
|
+
print_gpu_table(gpus, run_spec, args.group_by, self.api.project)
|
|
85
|
+
else:
|
|
86
|
+
run_plan = self.api.client.runs.get_plan(
|
|
87
|
+
self.api.project,
|
|
88
|
+
run_spec,
|
|
89
|
+
max_offers=args.max_offers,
|
|
90
|
+
)
|
|
91
|
+
print_run_plan(run_plan, include_run_properties=False)
|
|
75
92
|
else:
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
job_plan = run_plan.job_plans[0]
|
|
85
|
-
|
|
86
|
-
if args.format == "json":
|
|
87
|
-
# FIXME: Should use effective_run_spec from run_plan,
|
|
88
|
-
# since the spec can be changed by the server and plugins
|
|
89
|
-
output = {
|
|
90
|
-
"project": run_plan.project_name,
|
|
91
|
-
"user": run_plan.user,
|
|
92
|
-
"resources": job_plan.job_spec.requirements.resources.dict(),
|
|
93
|
-
"max_price": (job_plan.job_spec.requirements.max_price),
|
|
94
|
-
"spot": run_spec.configuration.spot_policy,
|
|
95
|
-
"reservation": run_plan.run_spec.configuration.reservation,
|
|
96
|
-
"offers": [],
|
|
97
|
-
"total_offers": job_plan.total_offers,
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
for offer in job_plan.offers:
|
|
101
|
-
output["offers"].append(
|
|
102
|
-
{
|
|
103
|
-
"backend": (
|
|
104
|
-
"ssh" if offer.backend.value == "remote" else offer.backend.value
|
|
105
|
-
),
|
|
106
|
-
"region": offer.region,
|
|
107
|
-
"instance_type": offer.instance.name,
|
|
108
|
-
"resources": offer.instance.resources.dict(),
|
|
109
|
-
"spot": offer.instance.resources.spot,
|
|
110
|
-
"price": float(offer.price),
|
|
111
|
-
"availability": offer.availability.value,
|
|
112
|
-
}
|
|
93
|
+
if args.group_by:
|
|
94
|
+
gpus = self._list_gpus(args, run_spec)
|
|
95
|
+
print_gpu_json(gpus, run_spec, args.group_by, self.api.project)
|
|
96
|
+
else:
|
|
97
|
+
run_plan = self.api.client.runs.get_plan(
|
|
98
|
+
self.api.project,
|
|
99
|
+
run_spec,
|
|
100
|
+
max_offers=args.max_offers,
|
|
113
101
|
)
|
|
102
|
+
print_offers_json(run_plan, run_spec)
|
|
114
103
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
104
|
+
def _process_group_by_args(self, group_by_args: List[str]) -> List[str]:
|
|
105
|
+
valid_choices = {"gpu", "backend", "region", "count"}
|
|
106
|
+
processed = []
|
|
107
|
+
|
|
108
|
+
for arg in group_by_args:
|
|
109
|
+
values = [v.strip() for v in arg.split(",") if v.strip()]
|
|
110
|
+
for value in values:
|
|
111
|
+
if value in valid_choices:
|
|
112
|
+
processed.append(value)
|
|
113
|
+
else:
|
|
114
|
+
raise CLIError(
|
|
115
|
+
f"Invalid group-by value: '{value}'. Valid choices are: {', '.join(sorted(valid_choices))}"
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
return processed
|
|
119
|
+
|
|
120
|
+
def _list_gpus(self, args: List[str], run_spec: RunSpec) -> List[GpuGroup]:
|
|
121
|
+
group_by = [g for g in args.group_by if g != "gpu"] or None
|
|
122
|
+
return self.api.client.gpus.list_gpus(
|
|
123
|
+
self.api.project,
|
|
124
|
+
run_spec,
|
|
125
|
+
group_by=group_by,
|
|
126
|
+
)
|
|
@@ -15,9 +15,11 @@ from dstack._internal.cli.services.configurators.base import (
|
|
|
15
15
|
BaseApplyConfigurator,
|
|
16
16
|
)
|
|
17
17
|
from dstack._internal.cli.services.profile import apply_profile_args, register_profile_args
|
|
18
|
+
from dstack._internal.cli.services.repos import init_default_virtual_repo
|
|
18
19
|
from dstack._internal.cli.utils.common import (
|
|
19
20
|
confirm_ask,
|
|
20
21
|
console,
|
|
22
|
+
warn,
|
|
21
23
|
)
|
|
22
24
|
from dstack._internal.cli.utils.rich import MultiItemStatus
|
|
23
25
|
from dstack._internal.cli.utils.run import get_runs_table, print_run_plan
|
|
@@ -40,6 +42,7 @@ from dstack._internal.core.models.configurations import (
|
|
|
40
42
|
TaskConfiguration,
|
|
41
43
|
)
|
|
42
44
|
from dstack._internal.core.models.repos.base import Repo
|
|
45
|
+
from dstack._internal.core.models.repos.local import LocalRepo
|
|
43
46
|
from dstack._internal.core.models.resources import CPUSpec
|
|
44
47
|
from dstack._internal.core.models.runs import JobStatus, JobSubmission, RunSpec, RunStatus
|
|
45
48
|
from dstack._internal.core.services.configs import ConfigManager
|
|
@@ -76,17 +79,42 @@ class BaseRunConfigurator(ApplyEnvVarsConfiguratorMixin, BaseApplyConfigurator):
|
|
|
76
79
|
self.apply_args(conf, configurator_args, unknown_args)
|
|
77
80
|
self.validate_gpu_vendor_and_image(conf)
|
|
78
81
|
self.validate_cpu_arch_and_image(conf)
|
|
79
|
-
if repo is None:
|
|
80
|
-
repo = self.api.repos.load(Path.cwd())
|
|
81
82
|
config_manager = ConfigManager()
|
|
82
|
-
if repo
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
83
|
+
if repo is None:
|
|
84
|
+
repo_path = Path.cwd()
|
|
85
|
+
repo_config = config_manager.get_repo_config(repo_path)
|
|
86
|
+
if repo_config is None:
|
|
87
|
+
warn(
|
|
88
|
+
"The repo is not initialized. Starting from 0.19.25, repos are optional\n"
|
|
89
|
+
"There are three options:\n"
|
|
90
|
+
" - Run `dstack init` to initialize the current directory as a repo\n"
|
|
91
|
+
" - Specify `--repo`\n"
|
|
92
|
+
" - Specify `--no-repo` to not use any repo and supress this warning"
|
|
93
|
+
" (this will be the default in the future versions)"
|
|
94
|
+
)
|
|
95
|
+
if not command_args.yes and not confirm_ask("Continue without the repo?"):
|
|
96
|
+
console.print("\nExiting...")
|
|
97
|
+
return
|
|
98
|
+
repo = init_default_virtual_repo(self.api)
|
|
99
|
+
else:
|
|
100
|
+
# Unlikely, but may raise ConfigurationError if the repo does not exist
|
|
101
|
+
# on the server side (stale entry in `config.yml`)
|
|
102
|
+
repo = self.api.repos.load(repo_path)
|
|
103
|
+
if isinstance(repo, LocalRepo):
|
|
104
|
+
warn(
|
|
105
|
+
f"{repo.repo_dir} is a local repo.\n"
|
|
106
|
+
"Local repos are deprecated since 0.19.25"
|
|
107
|
+
" and will be removed soon\n"
|
|
108
|
+
"There are two options:\n"
|
|
109
|
+
" - Migrate to `files`: https://dstack.ai/docs/concepts/tasks/#files\n"
|
|
110
|
+
" - Specify `--no-repo` if you don't need the repo at all\n"
|
|
111
|
+
"In either case, you can run `dstack init --remove` to remove the repo"
|
|
112
|
+
" (only the record about the repo, not its files) and this warning"
|
|
113
|
+
)
|
|
114
|
+
self.api.ssh_identity_file = get_ssh_keypair(
|
|
115
|
+
command_args.ssh_identity_file,
|
|
116
|
+
config_manager.dstack_key_path,
|
|
117
|
+
)
|
|
90
118
|
profile = load_profile(Path.cwd(), configurator_args.profile)
|
|
91
119
|
with console.status("Getting apply plan..."):
|
|
92
120
|
run_plan = self.api.runs.get_run_plan(
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
+
import argparse
|
|
1
2
|
from pathlib import Path
|
|
2
3
|
from typing import Optional
|
|
3
4
|
|
|
4
5
|
from dstack._internal.cli.services.configurators.base import ArgsParser
|
|
5
6
|
from dstack._internal.core.errors import CLIError
|
|
6
|
-
from dstack._internal.core.models.repos.base import Repo
|
|
7
|
+
from dstack._internal.core.models.repos.base import Repo
|
|
7
8
|
from dstack._internal.core.models.repos.remote import GitRepoURL, RemoteRepo, RepoError
|
|
8
9
|
from dstack._internal.core.models.repos.virtual import VirtualRepo
|
|
9
|
-
from dstack._internal.core.services.configs import ConfigManager
|
|
10
10
|
from dstack._internal.core.services.repos import get_default_branch
|
|
11
11
|
from dstack._internal.utils.path import PathLike
|
|
12
12
|
from dstack.api._public import Client
|
|
@@ -28,49 +28,31 @@ def register_init_repo_args(parser: ArgsParser):
|
|
|
28
28
|
type=str,
|
|
29
29
|
dest="git_identity_file",
|
|
30
30
|
)
|
|
31
|
-
|
|
32
|
-
"--ssh-identity",
|
|
33
|
-
metavar="SSH_PRIVATE_KEY",
|
|
34
|
-
help="The private SSH key path for SSH tunneling",
|
|
35
|
-
type=Path,
|
|
36
|
-
dest="ssh_identity_file",
|
|
37
|
-
)
|
|
31
|
+
# Deprecated since 0.19.25
|
|
38
32
|
parser.add_argument(
|
|
39
33
|
"--local",
|
|
40
34
|
action="store_true",
|
|
41
|
-
help=
|
|
35
|
+
help=argparse.SUPPRESS,
|
|
42
36
|
)
|
|
43
37
|
|
|
44
38
|
|
|
45
39
|
def init_repo(
|
|
46
40
|
api: Client,
|
|
47
|
-
repo_path:
|
|
41
|
+
repo_path: PathLike,
|
|
48
42
|
repo_branch: Optional[str],
|
|
49
43
|
repo_hash: Optional[str],
|
|
50
44
|
local: bool,
|
|
51
45
|
git_identity_file: Optional[PathLike],
|
|
52
46
|
oauth_token: Optional[str],
|
|
53
|
-
ssh_identity_file: Optional[PathLike],
|
|
54
47
|
) -> Repo:
|
|
55
|
-
init = True
|
|
56
|
-
if repo_path is None:
|
|
57
|
-
init = False
|
|
58
|
-
repo_path = Path.cwd()
|
|
59
48
|
if Path(repo_path).exists():
|
|
60
49
|
repo = api.repos.load(
|
|
61
50
|
repo_dir=repo_path,
|
|
62
51
|
local=local,
|
|
63
|
-
init=
|
|
52
|
+
init=True,
|
|
64
53
|
git_identity_file=git_identity_file,
|
|
65
54
|
oauth_token=oauth_token,
|
|
66
55
|
)
|
|
67
|
-
if ssh_identity_file:
|
|
68
|
-
ConfigManager().save_repo_config(
|
|
69
|
-
repo_path=repo.get_repo_dir_or_error(),
|
|
70
|
-
repo_id=repo.repo_id,
|
|
71
|
-
repo_type=RepoType(repo.run_repo_data.repo_type),
|
|
72
|
-
ssh_key_path=ssh_identity_file,
|
|
73
|
-
)
|
|
74
56
|
elif isinstance(repo_path, str):
|
|
75
57
|
try:
|
|
76
58
|
GitRepoURL.parse(repo_path)
|
|
@@ -103,3 +103,10 @@ def add_row_from_dict(table: Table, data: Dict[Union[str, int], Any], **kwargs):
|
|
|
103
103
|
else:
|
|
104
104
|
row.append("")
|
|
105
105
|
table.add_row(*row, **kwargs)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def warn(message: str):
|
|
109
|
+
if not message.endswith("\n"):
|
|
110
|
+
# Additional blank line for better visibility if there are more than one warning
|
|
111
|
+
message = f"{message}\n"
|
|
112
|
+
console.print(f"[warning][bold]{message}[/]")
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
from rich.table import Table
|
|
5
|
+
|
|
6
|
+
from dstack._internal.cli.utils.common import console
|
|
7
|
+
from dstack._internal.core.models.profiles import SpotPolicy
|
|
8
|
+
from dstack._internal.core.models.runs import Requirements, RunSpec, get_policy_map
|
|
9
|
+
from dstack._internal.server.schemas.gpus import GpuGroup
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def print_gpu_json(gpu_response, run_spec, group_by_cli, api_project):
|
|
13
|
+
"""Print GPU information in JSON format."""
|
|
14
|
+
req = Requirements(
|
|
15
|
+
resources=run_spec.configuration.resources,
|
|
16
|
+
max_price=run_spec.merged_profile.max_price,
|
|
17
|
+
spot=get_policy_map(run_spec.merged_profile.spot_policy, default=SpotPolicy.AUTO),
|
|
18
|
+
reservation=run_spec.configuration.reservation,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
if req.spot is None:
|
|
22
|
+
spot_policy = "auto"
|
|
23
|
+
elif req.spot:
|
|
24
|
+
spot_policy = "spot"
|
|
25
|
+
else:
|
|
26
|
+
spot_policy = "on-demand"
|
|
27
|
+
|
|
28
|
+
output = {
|
|
29
|
+
"project": api_project,
|
|
30
|
+
"user": "admin", # TODO: Get actual user name
|
|
31
|
+
"resources": req.resources.dict(),
|
|
32
|
+
"spot_policy": spot_policy,
|
|
33
|
+
"max_price": req.max_price,
|
|
34
|
+
"reservation": run_spec.configuration.reservation,
|
|
35
|
+
"group_by": group_by_cli,
|
|
36
|
+
"gpus": [],
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
for gpu_group in gpu_response.gpus:
|
|
40
|
+
gpu_data = {
|
|
41
|
+
"name": gpu_group.name,
|
|
42
|
+
"memory_mib": gpu_group.memory_mib,
|
|
43
|
+
"vendor": gpu_group.vendor.value,
|
|
44
|
+
"availability": [av.value for av in gpu_group.availability],
|
|
45
|
+
"spot": gpu_group.spot,
|
|
46
|
+
"count": {"min": gpu_group.count.min, "max": gpu_group.count.max},
|
|
47
|
+
"price": {"min": gpu_group.price.min, "max": gpu_group.price.max},
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if gpu_group.backend:
|
|
51
|
+
gpu_data["backend"] = gpu_group.backend.value
|
|
52
|
+
if gpu_group.backends:
|
|
53
|
+
gpu_data["backends"] = [b.value for b in gpu_group.backends]
|
|
54
|
+
if gpu_group.region:
|
|
55
|
+
gpu_data["region"] = gpu_group.region
|
|
56
|
+
if gpu_group.regions:
|
|
57
|
+
gpu_data["regions"] = gpu_group.regions
|
|
58
|
+
|
|
59
|
+
output["gpus"].append(gpu_data)
|
|
60
|
+
|
|
61
|
+
import json
|
|
62
|
+
|
|
63
|
+
print(json.dumps(output, indent=2))
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def print_gpu_table(gpus: List[GpuGroup], run_spec: RunSpec, group_by: List[str], project: str):
|
|
67
|
+
"""Print GPU information in a formatted table."""
|
|
68
|
+
print_filter_info(run_spec, group_by, project)
|
|
69
|
+
|
|
70
|
+
has_single_backend = any(gpu_group.backend for gpu_group in gpus)
|
|
71
|
+
has_single_region = any(gpu_group.region for gpu_group in gpus)
|
|
72
|
+
has_multiple_regions = any(gpu_group.regions for gpu_group in gpus)
|
|
73
|
+
|
|
74
|
+
if has_single_backend and has_single_region:
|
|
75
|
+
backend_column = "BACKEND"
|
|
76
|
+
region_column = "REGION"
|
|
77
|
+
elif has_single_backend and has_multiple_regions:
|
|
78
|
+
backend_column = "BACKEND"
|
|
79
|
+
region_column = "REGIONS"
|
|
80
|
+
else:
|
|
81
|
+
backend_column = "BACKENDS"
|
|
82
|
+
region_column = None
|
|
83
|
+
|
|
84
|
+
table = Table(box=None, expand=shutil.get_terminal_size(fallback=(120, 40)).columns <= 110)
|
|
85
|
+
table.add_column("#")
|
|
86
|
+
table.add_column("GPU", no_wrap=True, ratio=2)
|
|
87
|
+
table.add_column("SPOT", style="grey58", ratio=1)
|
|
88
|
+
table.add_column("$/GPU", style="grey58", ratio=1)
|
|
89
|
+
table.add_column(backend_column, style="grey58", ratio=2)
|
|
90
|
+
if region_column:
|
|
91
|
+
table.add_column(region_column, style="grey58", ratio=2)
|
|
92
|
+
table.add_column()
|
|
93
|
+
|
|
94
|
+
for i, gpu_group in enumerate(gpus, start=1):
|
|
95
|
+
backend_text = ""
|
|
96
|
+
if gpu_group.backend:
|
|
97
|
+
backend_text = gpu_group.backend.value
|
|
98
|
+
elif gpu_group.backends:
|
|
99
|
+
backend_text = ", ".join(b.value for b in gpu_group.backends)
|
|
100
|
+
|
|
101
|
+
region_text = ""
|
|
102
|
+
if gpu_group.region:
|
|
103
|
+
region_text = gpu_group.region
|
|
104
|
+
elif gpu_group.regions:
|
|
105
|
+
if len(gpu_group.regions) <= 3:
|
|
106
|
+
region_text = ", ".join(gpu_group.regions)
|
|
107
|
+
else:
|
|
108
|
+
region_text = f"{len(gpu_group.regions)} regions"
|
|
109
|
+
|
|
110
|
+
if not region_column:
|
|
111
|
+
if gpu_group.regions and len(gpu_group.regions) > 3:
|
|
112
|
+
shortened_region_text = f"{len(gpu_group.regions)} regions"
|
|
113
|
+
backends_display = (
|
|
114
|
+
f"{backend_text} ({shortened_region_text})"
|
|
115
|
+
if shortened_region_text
|
|
116
|
+
else backend_text
|
|
117
|
+
)
|
|
118
|
+
else:
|
|
119
|
+
backends_display = (
|
|
120
|
+
f"{backend_text} ({region_text})" if region_text else backend_text
|
|
121
|
+
)
|
|
122
|
+
else:
|
|
123
|
+
backends_display = backend_text
|
|
124
|
+
|
|
125
|
+
memory_gb = f"{gpu_group.memory_mib // 1024}GB"
|
|
126
|
+
if gpu_group.count.min == gpu_group.count.max:
|
|
127
|
+
count_range = str(gpu_group.count.min)
|
|
128
|
+
else:
|
|
129
|
+
count_range = f"{gpu_group.count.min}..{gpu_group.count.max}"
|
|
130
|
+
|
|
131
|
+
gpu_spec = f"{gpu_group.name}:{memory_gb}:{count_range}"
|
|
132
|
+
|
|
133
|
+
spot_types = []
|
|
134
|
+
if "spot" in gpu_group.spot:
|
|
135
|
+
spot_types.append("spot")
|
|
136
|
+
if "on-demand" in gpu_group.spot:
|
|
137
|
+
spot_types.append("on-demand")
|
|
138
|
+
spot_display = ", ".join(spot_types)
|
|
139
|
+
|
|
140
|
+
if gpu_group.price.min == gpu_group.price.max:
|
|
141
|
+
price_display = f"{gpu_group.price.min:.4f}".rstrip("0").rstrip(".")
|
|
142
|
+
else:
|
|
143
|
+
min_formatted = f"{gpu_group.price.min:.4f}".rstrip("0").rstrip(".")
|
|
144
|
+
max_formatted = f"{gpu_group.price.max:.4f}".rstrip("0").rstrip(".")
|
|
145
|
+
price_display = f"{min_formatted}..{max_formatted}"
|
|
146
|
+
|
|
147
|
+
availability = ""
|
|
148
|
+
has_available = any(av.is_available() for av in gpu_group.availability)
|
|
149
|
+
has_unavailable = any(not av.is_available() for av in gpu_group.availability)
|
|
150
|
+
|
|
151
|
+
if has_unavailable and not has_available:
|
|
152
|
+
for av in gpu_group.availability:
|
|
153
|
+
if av.value in {"not_available", "no_quota", "idle", "busy"}:
|
|
154
|
+
availability = av.value.replace("_", " ").lower()
|
|
155
|
+
break
|
|
156
|
+
|
|
157
|
+
secondary_style = "grey58"
|
|
158
|
+
row_data = [
|
|
159
|
+
f"[{secondary_style}]{i}[/]",
|
|
160
|
+
gpu_spec,
|
|
161
|
+
f"[{secondary_style}]{spot_display}[/]",
|
|
162
|
+
f"[{secondary_style}]{price_display}[/]",
|
|
163
|
+
f"[{secondary_style}]{backends_display}[/]",
|
|
164
|
+
]
|
|
165
|
+
if region_column:
|
|
166
|
+
row_data.append(f"[{secondary_style}]{region_text}[/]")
|
|
167
|
+
row_data.append(f"[{secondary_style}]{availability}[/]")
|
|
168
|
+
|
|
169
|
+
table.add_row(*row_data)
|
|
170
|
+
|
|
171
|
+
console.print(table)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def print_filter_info(run_spec: RunSpec, group_by: List[str], project: str):
|
|
175
|
+
"""Print filter information for GPU display."""
|
|
176
|
+
props = Table(box=None, show_header=False)
|
|
177
|
+
props.add_column(no_wrap=True)
|
|
178
|
+
props.add_column()
|
|
179
|
+
|
|
180
|
+
req = Requirements(
|
|
181
|
+
resources=run_spec.configuration.resources,
|
|
182
|
+
max_price=run_spec.merged_profile.max_price,
|
|
183
|
+
spot=get_policy_map(run_spec.merged_profile.spot_policy, default=SpotPolicy.AUTO),
|
|
184
|
+
reservation=run_spec.merged_profile.reservation,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
pretty_req = req.pretty_format(resources_only=True)
|
|
188
|
+
max_price = f"${req.max_price:3f}".rstrip("0").rstrip(".") if req.max_price else "-"
|
|
189
|
+
|
|
190
|
+
if req.spot is None:
|
|
191
|
+
spot_policy = "auto"
|
|
192
|
+
elif req.spot:
|
|
193
|
+
spot_policy = "spot"
|
|
194
|
+
else:
|
|
195
|
+
spot_policy = "on-demand"
|
|
196
|
+
|
|
197
|
+
def th(s: str) -> str:
|
|
198
|
+
return f"[bold]{s}[/bold]"
|
|
199
|
+
|
|
200
|
+
props.add_row(th("Project"), project)
|
|
201
|
+
# TODO: Show user name
|
|
202
|
+
props.add_row(th("Resources"), pretty_req)
|
|
203
|
+
props.add_row(th("Spot policy"), spot_policy)
|
|
204
|
+
props.add_row(th("Max price"), max_price)
|
|
205
|
+
props.add_row(th("Reservation"), run_spec.configuration.reservation or "-")
|
|
206
|
+
if group_by:
|
|
207
|
+
props.add_row(th("Group by"), ", ".join(group_by))
|
|
208
|
+
|
|
209
|
+
console.print(props)
|
|
210
|
+
console.print()
|