dstack 0.19.24__py3-none-any.whl → 0.19.25rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (56) hide show
  1. dstack/_internal/cli/commands/apply.py +14 -2
  2. dstack/_internal/cli/commands/init.py +47 -2
  3. dstack/_internal/cli/commands/offer.py +68 -60
  4. dstack/_internal/cli/services/configurators/run.py +38 -10
  5. dstack/_internal/cli/services/repos.py +6 -24
  6. dstack/_internal/cli/utils/common.py +7 -0
  7. dstack/_internal/cli/utils/gpu.py +210 -0
  8. dstack/_internal/cli/utils/run.py +33 -0
  9. dstack/_internal/core/backends/aws/compute.py +1 -4
  10. dstack/_internal/core/backends/base/compute.py +0 -4
  11. dstack/_internal/core/backends/gcp/compute.py +1 -4
  12. dstack/_internal/core/backends/nebius/compute.py +1 -4
  13. dstack/_internal/core/models/common.py +1 -1
  14. dstack/_internal/core/models/config.py +3 -1
  15. dstack/_internal/core/models/configurations.py +16 -14
  16. dstack/_internal/core/models/fleets.py +2 -2
  17. dstack/_internal/core/models/instances.py +1 -1
  18. dstack/_internal/core/models/profiles.py +2 -2
  19. dstack/_internal/core/models/repos/remote.py +2 -2
  20. dstack/_internal/core/models/resources.py +4 -4
  21. dstack/_internal/core/models/runs.py +1 -1
  22. dstack/_internal/core/services/configs/__init__.py +4 -6
  23. dstack/_internal/proxy/gateway/services/registry.py +2 -0
  24. dstack/_internal/server/app.py +2 -0
  25. dstack/_internal/server/background/tasks/process_fleets.py +10 -2
  26. dstack/_internal/server/background/tasks/process_running_jobs.py +65 -44
  27. dstack/_internal/server/background/tasks/process_runs.py +15 -14
  28. dstack/_internal/server/background/tasks/process_submitted_jobs.py +251 -52
  29. dstack/_internal/server/migrations/versions/3d7f6c2ec000_add_jobmodel_registered.py +28 -0
  30. dstack/_internal/server/migrations/versions/e2d08cd1b8d9_add_jobmodel_fleet.py +41 -0
  31. dstack/_internal/server/models.py +13 -1
  32. dstack/_internal/server/routers/gpus.py +29 -0
  33. dstack/_internal/server/schemas/gateways.py +1 -1
  34. dstack/_internal/server/schemas/gpus.py +66 -0
  35. dstack/_internal/server/services/docker.py +1 -1
  36. dstack/_internal/server/services/gpus.py +390 -0
  37. dstack/_internal/server/services/offers.py +48 -31
  38. dstack/_internal/server/services/probes.py +5 -1
  39. dstack/_internal/server/services/proxy/repo.py +1 -0
  40. dstack/_internal/server/services/runs.py +12 -11
  41. dstack/_internal/server/services/services/__init__.py +60 -41
  42. dstack/_internal/server/statics/index.html +1 -1
  43. dstack/_internal/server/statics/logo-notext.svg +116 -0
  44. dstack/_internal/server/statics/{main-03e818b110e1d5705378.css → main-aec4762350e34d6fbff9.css} +1 -1
  45. dstack/_internal/server/statics/{main-16813e4e1d1c4119eda3.js → main-d151b300fcac3933213d.js} +19 -22
  46. dstack/_internal/server/statics/{main-16813e4e1d1c4119eda3.js.map → main-d151b300fcac3933213d.js.map} +1 -1
  47. dstack/_internal/server/testing/common.py +7 -2
  48. dstack/api/_public/repos.py +8 -7
  49. dstack/api/server/__init__.py +6 -0
  50. dstack/api/server/_gpus.py +22 -0
  51. dstack/version.py +1 -1
  52. {dstack-0.19.24.dist-info → dstack-0.19.25rc1.dist-info}/METADATA +1 -1
  53. {dstack-0.19.24.dist-info → dstack-0.19.25rc1.dist-info}/RECORD +56 -48
  54. {dstack-0.19.24.dist-info → dstack-0.19.25rc1.dist-info}/WHEEL +0 -0
  55. {dstack-0.19.24.dist-info → dstack-0.19.25rc1.dist-info}/entry_points.txt +0 -0
  56. {dstack-0.19.24.dist-info → dstack-0.19.25rc1.dist-info}/licenses/LICENSE.md +0 -0
@@ -1,4 +1,5 @@
1
1
  import argparse
2
+ from pathlib import Path
2
3
 
3
4
  from argcomplete import FilesCompleter
4
5
 
@@ -13,7 +14,7 @@ from dstack._internal.cli.services.repos import (
13
14
  init_repo,
14
15
  register_init_repo_args,
15
16
  )
16
- from dstack._internal.cli.utils.common import console
17
+ from dstack._internal.cli.utils.common import console, warn
17
18
  from dstack._internal.core.errors import CLIError
18
19
  from dstack._internal.core.models.configurations import ApplyConfigurationType
19
20
 
@@ -65,6 +66,13 @@ class ApplyCommand(APIBaseCommand):
65
66
  help="Exit immediately after submitting configuration",
66
67
  action="store_true",
67
68
  )
69
+ self._parser.add_argument(
70
+ "--ssh-identity",
71
+ metavar="SSH_PRIVATE_KEY",
72
+ help="The private SSH key path for SSH tunneling",
73
+ type=Path,
74
+ dest="ssh_identity_file",
75
+ )
68
76
  repo_group = self._parser.add_argument_group("Repo Options")
69
77
  repo_group.add_argument(
70
78
  "-P",
@@ -111,6 +119,11 @@ class ApplyCommand(APIBaseCommand):
111
119
  raise CLIError("Cannot read configuration from stdin if -y/--yes is not specified")
112
120
  if args.repo and args.no_repo:
113
121
  raise CLIError("Either --repo or --no-repo can be specified")
122
+ if args.local:
123
+ warn(
124
+ "Local repos are deprecated since 0.19.25 and will be removed soon."
125
+ " Consider using `files` instead: https://dstack.ai/docs/concepts/tasks/#files"
126
+ )
114
127
  repo = None
115
128
  if args.repo:
116
129
  repo = init_repo(
@@ -121,7 +134,6 @@ class ApplyCommand(APIBaseCommand):
121
134
  local=args.local,
122
135
  git_identity_file=args.git_identity_file,
123
136
  oauth_token=args.gh_token,
124
- ssh_identity_file=args.ssh_identity_file,
125
137
  )
126
138
  elif args.no_repo:
127
139
  repo = init_default_virtual_repo(api=self.api)
@@ -4,7 +4,10 @@ from pathlib import Path
4
4
 
5
5
  from dstack._internal.cli.commands import BaseCommand
6
6
  from dstack._internal.cli.services.repos import init_repo, register_init_repo_args
7
- from dstack._internal.cli.utils.common import configure_logging, console
7
+ from dstack._internal.cli.utils.common import configure_logging, confirm_ask, console, warn
8
+ from dstack._internal.core.errors import ConfigurationError
9
+ from dstack._internal.core.models.repos.base import RepoType
10
+ from dstack._internal.core.services.configs import ConfigManager
8
11
  from dstack.api import Client
9
12
 
10
13
 
@@ -19,12 +22,55 @@ class InitCommand(BaseCommand):
19
22
  default=os.getenv("DSTACK_PROJECT"),
20
23
  )
21
24
  register_init_repo_args(self._parser)
25
+ # Deprecated since 0.19.25, ignored
26
+ self._parser.add_argument(
27
+ "--ssh-identity",
28
+ metavar="SSH_PRIVATE_KEY",
29
+ help=argparse.SUPPRESS,
30
+ type=Path,
31
+ dest="ssh_identity_file",
32
+ )
33
+ # A hidden mode for transitional period only, remove it with local repos
34
+ self._parser.add_argument(
35
+ "--remove",
36
+ help=argparse.SUPPRESS,
37
+ action="store_true",
38
+ )
22
39
 
23
40
  def _command(self, args: argparse.Namespace):
24
41
  configure_logging()
42
+ if args.remove:
43
+ config_manager = ConfigManager()
44
+ repo_path = Path.cwd()
45
+ repo_config = config_manager.get_repo_config(repo_path)
46
+ if repo_config is None:
47
+ raise ConfigurationError("The repo is not initialized, nothing to remove")
48
+ if repo_config.repo_type != RepoType.LOCAL:
49
+ raise ConfigurationError("`dstack init --remove` is for local repos only")
50
+ console.print(
51
+ f"You are about to remove the local repo {repo_path}\n"
52
+ "Only the record about the repo will be removed,"
53
+ " the repo files will remain intact\n"
54
+ )
55
+ if not confirm_ask("Remove the local repo?"):
56
+ return
57
+ config_manager.delete_repo_config(repo_config.repo_id)
58
+ config_manager.save()
59
+ console.print("Local repo has been removed")
60
+ return
25
61
  api = Client.from_config(
26
62
  project_name=args.project, ssh_identity_file=args.ssh_identity_file
27
63
  )
64
+ if args.local:
65
+ warn(
66
+ "Local repos are deprecated since 0.19.25 and will be removed soon."
67
+ " Consider using `files` instead: https://dstack.ai/docs/concepts/tasks/#files"
68
+ )
69
+ if args.ssh_identity_file:
70
+ warn(
71
+ "`--ssh-identity` in `dstack init` is deprecated and ignored since 0.19.25."
72
+ " Use this option with `dstack apply` and `dstack attach` instead"
73
+ )
28
74
  init_repo(
29
75
  api=api,
30
76
  repo_path=Path.cwd(),
@@ -33,6 +79,5 @@ class InitCommand(BaseCommand):
33
79
  local=args.local,
34
80
  git_identity_file=args.git_identity_file,
35
81
  oauth_token=args.gh_token,
36
- ssh_identity_file=args.ssh_identity_file,
37
82
  )
38
83
  console.print("OK")
@@ -1,29 +1,20 @@
1
1
  import argparse
2
- import contextlib
3
- import json
4
2
  from pathlib import Path
3
+ from typing import List
5
4
 
6
5
  from dstack._internal.cli.commands import APIBaseCommand
7
- from dstack._internal.cli.services.configurators.run import (
8
- BaseRunConfigurator,
9
- )
6
+ from dstack._internal.cli.services.configurators.run import BaseRunConfigurator
10
7
  from dstack._internal.cli.utils.common import console
11
- from dstack._internal.cli.utils.run import print_run_plan
12
- from dstack._internal.core.models.configurations import (
13
- ApplyConfigurationType,
14
- TaskConfiguration,
15
- )
8
+ from dstack._internal.cli.utils.gpu import print_gpu_json, print_gpu_table
9
+ from dstack._internal.cli.utils.run import print_offers_json, print_run_plan
10
+ from dstack._internal.core.errors import CLIError
11
+ from dstack._internal.core.models.configurations import ApplyConfigurationType, TaskConfiguration
16
12
  from dstack._internal.core.models.runs import RunSpec
13
+ from dstack._internal.server.schemas.gpus import GpuGroup
17
14
  from dstack.api.utils import load_profile
18
15
 
19
16
 
20
17
  class OfferConfigurator(BaseRunConfigurator):
21
- # TODO: The command currently uses `BaseRunConfigurator` to register arguments.
22
- # This includes --env, --retry-policy, and other arguments that are unnecessary for this command.
23
- # Eventually, we should introduce a base `OfferConfigurator` that doesn't include those arguments—
24
- # `BaseRunConfigurator` will inherit from `OfferConfigurator`.
25
- #
26
- # Additionally, it should have its own type: `ApplyConfigurationType.OFFER`.
27
18
  TYPE = ApplyConfigurationType.TASK
28
19
 
29
20
  @classmethod
@@ -32,10 +23,18 @@ class OfferConfigurator(BaseRunConfigurator):
32
23
  parser: argparse.ArgumentParser,
33
24
  ):
34
25
  super().register_args(parser, default_max_offers=50)
26
+ parser.add_argument(
27
+ "--group-by",
28
+ action="append",
29
+ help=(
30
+ "Group results by fields ([code]gpu[/code], [code]backend[/code], [code]region[/code], [code]count[/code]). "
31
+ "Optional, but if used, must include [code]gpu[/code]. "
32
+ "The use of [code]region[/code] also requires [code]backend[/code]. "
33
+ "Can be repeated or comma-separated (e.g. [code]--group-by gpu,backend[/code])."
34
+ ),
35
+ )
35
36
 
36
37
 
37
- # TODO: Support aggregated offers
38
- # TODO: Add tests
39
38
  class OfferCommand(APIBaseCommand):
40
39
  NAME = "offer"
41
40
  DESCRIPTION = "List offers"
@@ -70,49 +69,58 @@ class OfferCommand(APIBaseCommand):
70
69
  ssh_key_pub="(dummy)",
71
70
  profile=profile,
72
71
  )
72
+
73
+ if args.group_by:
74
+ args.group_by = self._process_group_by_args(args.group_by)
75
+
76
+ if args.group_by and "gpu" not in args.group_by:
77
+ group_values = ", ".join(args.group_by)
78
+ raise CLIError(f"Cannot group by '{group_values}' without also grouping by 'gpu'")
79
+
73
80
  if args.format == "plain":
74
- status = console.status("Getting offers...")
81
+ with console.status("Getting offers..."):
82
+ if args.group_by:
83
+ gpus = self._list_gpus(args, run_spec)
84
+ print_gpu_table(gpus, run_spec, args.group_by, self.api.project)
85
+ else:
86
+ run_plan = self.api.client.runs.get_plan(
87
+ self.api.project,
88
+ run_spec,
89
+ max_offers=args.max_offers,
90
+ )
91
+ print_run_plan(run_plan, include_run_properties=False)
75
92
  else:
76
- status = contextlib.nullcontext()
77
- with status:
78
- run_plan = self.api.client.runs.get_plan(
79
- self.api.project,
80
- run_spec,
81
- max_offers=args.max_offers,
82
- )
83
-
84
- job_plan = run_plan.job_plans[0]
85
-
86
- if args.format == "json":
87
- # FIXME: Should use effective_run_spec from run_plan,
88
- # since the spec can be changed by the server and plugins
89
- output = {
90
- "project": run_plan.project_name,
91
- "user": run_plan.user,
92
- "resources": job_plan.job_spec.requirements.resources.dict(),
93
- "max_price": (job_plan.job_spec.requirements.max_price),
94
- "spot": run_spec.configuration.spot_policy,
95
- "reservation": run_plan.run_spec.configuration.reservation,
96
- "offers": [],
97
- "total_offers": job_plan.total_offers,
98
- }
99
-
100
- for offer in job_plan.offers:
101
- output["offers"].append(
102
- {
103
- "backend": (
104
- "ssh" if offer.backend.value == "remote" else offer.backend.value
105
- ),
106
- "region": offer.region,
107
- "instance_type": offer.instance.name,
108
- "resources": offer.instance.resources.dict(),
109
- "spot": offer.instance.resources.spot,
110
- "price": float(offer.price),
111
- "availability": offer.availability.value,
112
- }
93
+ if args.group_by:
94
+ gpus = self._list_gpus(args, run_spec)
95
+ print_gpu_json(gpus, run_spec, args.group_by, self.api.project)
96
+ else:
97
+ run_plan = self.api.client.runs.get_plan(
98
+ self.api.project,
99
+ run_spec,
100
+ max_offers=args.max_offers,
113
101
  )
102
+ print_offers_json(run_plan, run_spec)
114
103
 
115
- print(json.dumps(output, indent=2))
116
- return
117
- else:
118
- print_run_plan(run_plan, include_run_properties=False)
104
+ def _process_group_by_args(self, group_by_args: List[str]) -> List[str]:
105
+ valid_choices = {"gpu", "backend", "region", "count"}
106
+ processed = []
107
+
108
+ for arg in group_by_args:
109
+ values = [v.strip() for v in arg.split(",") if v.strip()]
110
+ for value in values:
111
+ if value in valid_choices:
112
+ processed.append(value)
113
+ else:
114
+ raise CLIError(
115
+ f"Invalid group-by value: '{value}'. Valid choices are: {', '.join(sorted(valid_choices))}"
116
+ )
117
+
118
+ return processed
119
+
120
+ def _list_gpus(self, args: List[str], run_spec: RunSpec) -> List[GpuGroup]:
121
+ group_by = [g for g in args.group_by if g != "gpu"] or None
122
+ return self.api.client.gpus.list_gpus(
123
+ self.api.project,
124
+ run_spec,
125
+ group_by=group_by,
126
+ )
@@ -15,9 +15,11 @@ from dstack._internal.cli.services.configurators.base import (
15
15
  BaseApplyConfigurator,
16
16
  )
17
17
  from dstack._internal.cli.services.profile import apply_profile_args, register_profile_args
18
+ from dstack._internal.cli.services.repos import init_default_virtual_repo
18
19
  from dstack._internal.cli.utils.common import (
19
20
  confirm_ask,
20
21
  console,
22
+ warn,
21
23
  )
22
24
  from dstack._internal.cli.utils.rich import MultiItemStatus
23
25
  from dstack._internal.cli.utils.run import get_runs_table, print_run_plan
@@ -40,6 +42,7 @@ from dstack._internal.core.models.configurations import (
40
42
  TaskConfiguration,
41
43
  )
42
44
  from dstack._internal.core.models.repos.base import Repo
45
+ from dstack._internal.core.models.repos.local import LocalRepo
43
46
  from dstack._internal.core.models.resources import CPUSpec
44
47
  from dstack._internal.core.models.runs import JobStatus, JobSubmission, RunSpec, RunStatus
45
48
  from dstack._internal.core.services.configs import ConfigManager
@@ -76,17 +79,42 @@ class BaseRunConfigurator(ApplyEnvVarsConfiguratorMixin, BaseApplyConfigurator):
76
79
  self.apply_args(conf, configurator_args, unknown_args)
77
80
  self.validate_gpu_vendor_and_image(conf)
78
81
  self.validate_cpu_arch_and_image(conf)
79
- if repo is None:
80
- repo = self.api.repos.load(Path.cwd())
81
82
  config_manager = ConfigManager()
82
- if repo.repo_dir is not None:
83
- repo_config = config_manager.get_repo_config_or_error(repo.repo_dir)
84
- self.api.ssh_identity_file = repo_config.ssh_key_path
85
- else:
86
- self.api.ssh_identity_file = get_ssh_keypair(
87
- command_args.ssh_identity_file,
88
- config_manager.dstack_key_path,
89
- )
83
+ if repo is None:
84
+ repo_path = Path.cwd()
85
+ repo_config = config_manager.get_repo_config(repo_path)
86
+ if repo_config is None:
87
+ warn(
88
+ "The repo is not initialized. Starting from 0.19.25, repos are optional\n"
89
+ "There are three options:\n"
90
+ " - Run `dstack init` to initialize the current directory as a repo\n"
91
+ " - Specify `--repo`\n"
92
+ " - Specify `--no-repo` to not use any repo and supress this warning"
93
+ " (this will be the default in the future versions)"
94
+ )
95
+ if not command_args.yes and not confirm_ask("Continue without the repo?"):
96
+ console.print("\nExiting...")
97
+ return
98
+ repo = init_default_virtual_repo(self.api)
99
+ else:
100
+ # Unlikely, but may raise ConfigurationError if the repo does not exist
101
+ # on the server side (stale entry in `config.yml`)
102
+ repo = self.api.repos.load(repo_path)
103
+ if isinstance(repo, LocalRepo):
104
+ warn(
105
+ f"{repo.repo_dir} is a local repo.\n"
106
+ "Local repos are deprecated since 0.19.25"
107
+ " and will be removed soon\n"
108
+ "There are two options:\n"
109
+ " - Migrate to `files`: https://dstack.ai/docs/concepts/tasks/#files\n"
110
+ " - Specify `--no-repo` if you don't need the repo at all\n"
111
+ "In either case, you can run `dstack init --remove` to remove the repo"
112
+ " (only the record about the repo, not its files) and this warning"
113
+ )
114
+ self.api.ssh_identity_file = get_ssh_keypair(
115
+ command_args.ssh_identity_file,
116
+ config_manager.dstack_key_path,
117
+ )
90
118
  profile = load_profile(Path.cwd(), configurator_args.profile)
91
119
  with console.status("Getting apply plan..."):
92
120
  run_plan = self.api.runs.get_run_plan(
@@ -1,12 +1,12 @@
1
+ import argparse
1
2
  from pathlib import Path
2
3
  from typing import Optional
3
4
 
4
5
  from dstack._internal.cli.services.configurators.base import ArgsParser
5
6
  from dstack._internal.core.errors import CLIError
6
- from dstack._internal.core.models.repos.base import Repo, RepoType
7
+ from dstack._internal.core.models.repos.base import Repo
7
8
  from dstack._internal.core.models.repos.remote import GitRepoURL, RemoteRepo, RepoError
8
9
  from dstack._internal.core.models.repos.virtual import VirtualRepo
9
- from dstack._internal.core.services.configs import ConfigManager
10
10
  from dstack._internal.core.services.repos import get_default_branch
11
11
  from dstack._internal.utils.path import PathLike
12
12
  from dstack.api._public import Client
@@ -28,49 +28,31 @@ def register_init_repo_args(parser: ArgsParser):
28
28
  type=str,
29
29
  dest="git_identity_file",
30
30
  )
31
- parser.add_argument(
32
- "--ssh-identity",
33
- metavar="SSH_PRIVATE_KEY",
34
- help="The private SSH key path for SSH tunneling",
35
- type=Path,
36
- dest="ssh_identity_file",
37
- )
31
+ # Deprecated since 0.19.25
38
32
  parser.add_argument(
39
33
  "--local",
40
34
  action="store_true",
41
- help="Do not use Git",
35
+ help=argparse.SUPPRESS,
42
36
  )
43
37
 
44
38
 
45
39
  def init_repo(
46
40
  api: Client,
47
- repo_path: Optional[PathLike],
41
+ repo_path: PathLike,
48
42
  repo_branch: Optional[str],
49
43
  repo_hash: Optional[str],
50
44
  local: bool,
51
45
  git_identity_file: Optional[PathLike],
52
46
  oauth_token: Optional[str],
53
- ssh_identity_file: Optional[PathLike],
54
47
  ) -> Repo:
55
- init = True
56
- if repo_path is None:
57
- init = False
58
- repo_path = Path.cwd()
59
48
  if Path(repo_path).exists():
60
49
  repo = api.repos.load(
61
50
  repo_dir=repo_path,
62
51
  local=local,
63
- init=init,
52
+ init=True,
64
53
  git_identity_file=git_identity_file,
65
54
  oauth_token=oauth_token,
66
55
  )
67
- if ssh_identity_file:
68
- ConfigManager().save_repo_config(
69
- repo_path=repo.get_repo_dir_or_error(),
70
- repo_id=repo.repo_id,
71
- repo_type=RepoType(repo.run_repo_data.repo_type),
72
- ssh_key_path=ssh_identity_file,
73
- )
74
56
  elif isinstance(repo_path, str):
75
57
  try:
76
58
  GitRepoURL.parse(repo_path)
@@ -103,3 +103,10 @@ def add_row_from_dict(table: Table, data: Dict[Union[str, int], Any], **kwargs):
103
103
  else:
104
104
  row.append("")
105
105
  table.add_row(*row, **kwargs)
106
+
107
+
108
+ def warn(message: str):
109
+ if not message.endswith("\n"):
110
+ # Additional blank line for better visibility if there are more than one warning
111
+ message = f"{message}\n"
112
+ console.print(f"[warning][bold]{message}[/]")
@@ -0,0 +1,210 @@
1
+ import shutil
2
+ from typing import List
3
+
4
+ from rich.table import Table
5
+
6
+ from dstack._internal.cli.utils.common import console
7
+ from dstack._internal.core.models.profiles import SpotPolicy
8
+ from dstack._internal.core.models.runs import Requirements, RunSpec, get_policy_map
9
+ from dstack._internal.server.schemas.gpus import GpuGroup
10
+
11
+
12
+ def print_gpu_json(gpu_response, run_spec, group_by_cli, api_project):
13
+ """Print GPU information in JSON format."""
14
+ req = Requirements(
15
+ resources=run_spec.configuration.resources,
16
+ max_price=run_spec.merged_profile.max_price,
17
+ spot=get_policy_map(run_spec.merged_profile.spot_policy, default=SpotPolicy.AUTO),
18
+ reservation=run_spec.configuration.reservation,
19
+ )
20
+
21
+ if req.spot is None:
22
+ spot_policy = "auto"
23
+ elif req.spot:
24
+ spot_policy = "spot"
25
+ else:
26
+ spot_policy = "on-demand"
27
+
28
+ output = {
29
+ "project": api_project,
30
+ "user": "admin", # TODO: Get actual user name
31
+ "resources": req.resources.dict(),
32
+ "spot_policy": spot_policy,
33
+ "max_price": req.max_price,
34
+ "reservation": run_spec.configuration.reservation,
35
+ "group_by": group_by_cli,
36
+ "gpus": [],
37
+ }
38
+
39
+ for gpu_group in gpu_response.gpus:
40
+ gpu_data = {
41
+ "name": gpu_group.name,
42
+ "memory_mib": gpu_group.memory_mib,
43
+ "vendor": gpu_group.vendor.value,
44
+ "availability": [av.value for av in gpu_group.availability],
45
+ "spot": gpu_group.spot,
46
+ "count": {"min": gpu_group.count.min, "max": gpu_group.count.max},
47
+ "price": {"min": gpu_group.price.min, "max": gpu_group.price.max},
48
+ }
49
+
50
+ if gpu_group.backend:
51
+ gpu_data["backend"] = gpu_group.backend.value
52
+ if gpu_group.backends:
53
+ gpu_data["backends"] = [b.value for b in gpu_group.backends]
54
+ if gpu_group.region:
55
+ gpu_data["region"] = gpu_group.region
56
+ if gpu_group.regions:
57
+ gpu_data["regions"] = gpu_group.regions
58
+
59
+ output["gpus"].append(gpu_data)
60
+
61
+ import json
62
+
63
+ print(json.dumps(output, indent=2))
64
+
65
+
66
+ def print_gpu_table(gpus: List[GpuGroup], run_spec: RunSpec, group_by: List[str], project: str):
67
+ """Print GPU information in a formatted table."""
68
+ print_filter_info(run_spec, group_by, project)
69
+
70
+ has_single_backend = any(gpu_group.backend for gpu_group in gpus)
71
+ has_single_region = any(gpu_group.region for gpu_group in gpus)
72
+ has_multiple_regions = any(gpu_group.regions for gpu_group in gpus)
73
+
74
+ if has_single_backend and has_single_region:
75
+ backend_column = "BACKEND"
76
+ region_column = "REGION"
77
+ elif has_single_backend and has_multiple_regions:
78
+ backend_column = "BACKEND"
79
+ region_column = "REGIONS"
80
+ else:
81
+ backend_column = "BACKENDS"
82
+ region_column = None
83
+
84
+ table = Table(box=None, expand=shutil.get_terminal_size(fallback=(120, 40)).columns <= 110)
85
+ table.add_column("#")
86
+ table.add_column("GPU", no_wrap=True, ratio=2)
87
+ table.add_column("SPOT", style="grey58", ratio=1)
88
+ table.add_column("$/GPU", style="grey58", ratio=1)
89
+ table.add_column(backend_column, style="grey58", ratio=2)
90
+ if region_column:
91
+ table.add_column(region_column, style="grey58", ratio=2)
92
+ table.add_column()
93
+
94
+ for i, gpu_group in enumerate(gpus, start=1):
95
+ backend_text = ""
96
+ if gpu_group.backend:
97
+ backend_text = gpu_group.backend.value
98
+ elif gpu_group.backends:
99
+ backend_text = ", ".join(b.value for b in gpu_group.backends)
100
+
101
+ region_text = ""
102
+ if gpu_group.region:
103
+ region_text = gpu_group.region
104
+ elif gpu_group.regions:
105
+ if len(gpu_group.regions) <= 3:
106
+ region_text = ", ".join(gpu_group.regions)
107
+ else:
108
+ region_text = f"{len(gpu_group.regions)} regions"
109
+
110
+ if not region_column:
111
+ if gpu_group.regions and len(gpu_group.regions) > 3:
112
+ shortened_region_text = f"{len(gpu_group.regions)} regions"
113
+ backends_display = (
114
+ f"{backend_text} ({shortened_region_text})"
115
+ if shortened_region_text
116
+ else backend_text
117
+ )
118
+ else:
119
+ backends_display = (
120
+ f"{backend_text} ({region_text})" if region_text else backend_text
121
+ )
122
+ else:
123
+ backends_display = backend_text
124
+
125
+ memory_gb = f"{gpu_group.memory_mib // 1024}GB"
126
+ if gpu_group.count.min == gpu_group.count.max:
127
+ count_range = str(gpu_group.count.min)
128
+ else:
129
+ count_range = f"{gpu_group.count.min}..{gpu_group.count.max}"
130
+
131
+ gpu_spec = f"{gpu_group.name}:{memory_gb}:{count_range}"
132
+
133
+ spot_types = []
134
+ if "spot" in gpu_group.spot:
135
+ spot_types.append("spot")
136
+ if "on-demand" in gpu_group.spot:
137
+ spot_types.append("on-demand")
138
+ spot_display = ", ".join(spot_types)
139
+
140
+ if gpu_group.price.min == gpu_group.price.max:
141
+ price_display = f"{gpu_group.price.min:.4f}".rstrip("0").rstrip(".")
142
+ else:
143
+ min_formatted = f"{gpu_group.price.min:.4f}".rstrip("0").rstrip(".")
144
+ max_formatted = f"{gpu_group.price.max:.4f}".rstrip("0").rstrip(".")
145
+ price_display = f"{min_formatted}..{max_formatted}"
146
+
147
+ availability = ""
148
+ has_available = any(av.is_available() for av in gpu_group.availability)
149
+ has_unavailable = any(not av.is_available() for av in gpu_group.availability)
150
+
151
+ if has_unavailable and not has_available:
152
+ for av in gpu_group.availability:
153
+ if av.value in {"not_available", "no_quota", "idle", "busy"}:
154
+ availability = av.value.replace("_", " ").lower()
155
+ break
156
+
157
+ secondary_style = "grey58"
158
+ row_data = [
159
+ f"[{secondary_style}]{i}[/]",
160
+ gpu_spec,
161
+ f"[{secondary_style}]{spot_display}[/]",
162
+ f"[{secondary_style}]{price_display}[/]",
163
+ f"[{secondary_style}]{backends_display}[/]",
164
+ ]
165
+ if region_column:
166
+ row_data.append(f"[{secondary_style}]{region_text}[/]")
167
+ row_data.append(f"[{secondary_style}]{availability}[/]")
168
+
169
+ table.add_row(*row_data)
170
+
171
+ console.print(table)
172
+
173
+
174
+ def print_filter_info(run_spec: RunSpec, group_by: List[str], project: str):
175
+ """Print filter information for GPU display."""
176
+ props = Table(box=None, show_header=False)
177
+ props.add_column(no_wrap=True)
178
+ props.add_column()
179
+
180
+ req = Requirements(
181
+ resources=run_spec.configuration.resources,
182
+ max_price=run_spec.merged_profile.max_price,
183
+ spot=get_policy_map(run_spec.merged_profile.spot_policy, default=SpotPolicy.AUTO),
184
+ reservation=run_spec.merged_profile.reservation,
185
+ )
186
+
187
+ pretty_req = req.pretty_format(resources_only=True)
188
+ max_price = f"${req.max_price:3f}".rstrip("0").rstrip(".") if req.max_price else "-"
189
+
190
+ if req.spot is None:
191
+ spot_policy = "auto"
192
+ elif req.spot:
193
+ spot_policy = "spot"
194
+ else:
195
+ spot_policy = "on-demand"
196
+
197
+ def th(s: str) -> str:
198
+ return f"[bold]{s}[/bold]"
199
+
200
+ props.add_row(th("Project"), project)
201
+ # TODO: Show user name
202
+ props.add_row(th("Resources"), pretty_req)
203
+ props.add_row(th("Spot policy"), spot_policy)
204
+ props.add_row(th("Max price"), max_price)
205
+ props.add_row(th("Reservation"), run_spec.configuration.reservation or "-")
206
+ if group_by:
207
+ props.add_row(th("Group by"), ", ".join(group_by))
208
+
209
+ console.print(props)
210
+ console.print()