nomadctl 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. nd/__init__.py +7 -0
  2. nd/binary/__init__.py +10 -0
  3. nd/binary/env.py +43 -0
  4. nd/binary/runner.py +192 -0
  5. nd/cli.py +97 -0
  6. nd/commands/__init__.py +1 -0
  7. nd/commands/_common.py +101 -0
  8. nd/commands/clean.py +50 -0
  9. nd/commands/exec.py +67 -0
  10. nd/commands/list.py +120 -0
  11. nd/commands/logs.py +76 -0
  12. nd/commands/plan.py +103 -0
  13. nd/commands/run.py +372 -0
  14. nd/commands/status/__init__.py +29 -0
  15. nd/commands/status/command.py +102 -0
  16. nd/commands/status/render.py +172 -0
  17. nd/commands/status/report.py +339 -0
  18. nd/commands/stop.py +412 -0
  19. nd/commands/volume/__init__.py +25 -0
  20. nd/commands/volume/command.py +216 -0
  21. nd/commands/volume/render.py +132 -0
  22. nd/commands/volume/report.py +146 -0
  23. nd/constants.py +43 -0
  24. nd/jobfiles.py +125 -0
  25. nd/nomad/__init__.py +29 -0
  26. nd/nomad/client.py +51 -0
  27. nd/nomad/config.py +156 -0
  28. nd/nomad/errors.py +52 -0
  29. nd/nomad/models/__init__.py +1 -0
  30. nd/nomad/models/agent.py +26 -0
  31. nd/nomad/models/allocation.py +37 -0
  32. nd/nomad/models/deployment.py +40 -0
  33. nd/nomad/models/evaluation.py +21 -0
  34. nd/nomad/models/job.py +51 -0
  35. nd/nomad/models/node.py +41 -0
  36. nd/nomad/models/volume.py +28 -0
  37. nd/nomad/resources/__init__.py +1 -0
  38. nd/nomad/resources/agent.py +25 -0
  39. nd/nomad/resources/allocations.py +24 -0
  40. nd/nomad/resources/base.py +45 -0
  41. nd/nomad/resources/deployments.py +28 -0
  42. nd/nomad/resources/evaluations.py +19 -0
  43. nd/nomad/resources/jobs.py +70 -0
  44. nd/nomad/resources/nodes.py +24 -0
  45. nd/nomad/resources/status.py +14 -0
  46. nd/nomad/resources/system.py +25 -0
  47. nd/nomad/resources/volumes.py +42 -0
  48. nd/nomad/transport.py +141 -0
  49. nd/targets/__init__.py +32 -0
  50. nd/targets/alloc_target.py +166 -0
  51. nd/targets/selection.py +91 -0
  52. nd/ui/__init__.py +1 -0
  53. nd/ui/alloc_rows.py +93 -0
  54. nd/ui/duration.py +44 -0
  55. nd/ui/links.py +22 -0
  56. nd/ui/live_panel.py +199 -0
  57. nd/ui/panels.py +31 -0
  58. nd/ui/prompts.py +46 -0
  59. nd/ui/styles.py +52 -0
  60. nd/volumefiles.py +143 -0
  61. nomadctl-0.2.0.dist-info/METADATA +268 -0
  62. nomadctl-0.2.0.dist-info/RECORD +65 -0
  63. nomadctl-0.2.0.dist-info/WHEEL +4 -0
  64. nomadctl-0.2.0.dist-info/entry_points.txt +3 -0
  65. nomadctl-0.2.0.dist-info/licenses/LICENSE +21 -0
nd/commands/list.py ADDED
@@ -0,0 +1,120 @@
1
+ """The ``nd list`` command: list known job files against live cluster state."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from dataclasses import dataclass
7
+ from typing import TYPE_CHECKING, Annotated
8
+
9
+ import typer
10
+ from nclutils import pp
11
+
12
+ from nd.commands._common import VerboseOption, configure_verbosity
13
+ from nd.jobfiles import discover_job_files, load_job_directories
14
+ from nd.nomad import NomadClient, NomadConfig
15
+ from nd.ui.links import WebUi
16
+ from nd.ui.panels import status_table, titled_panel
17
+ from nd.ui.styles import status_cell
18
+
19
+ if TYPE_CHECKING:
20
+ from nd.jobfiles import JobFile
21
+ from nd.nomad.models.job import JobListStub
22
+
23
+ # Cluster-status label for a job file whose name is not present in Nomad at all.
24
+ _NOT_DEPLOYED = "not deployed"
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class ListRow:
29
+ """One rendered row: a job file's name, path, and cluster status."""
30
+
31
+ job_name: str
32
+ path: str
33
+ cluster_status: str
34
+ # Nomad job ID for the web UI link, or None when the job is not deployed.
35
+ link_id: str | None
36
+
37
+
38
+ def build_rows(
39
+ files: list[JobFile], jobs: list[JobListStub], *, hide_running: bool = False
40
+ ) -> list[ListRow]:
41
+ """Join discovered job files to cluster jobs by name, classifying each.
42
+
43
+ A file with no resolved job name still appears (named ``?``) so unresolved
44
+ interpolated names are visible rather than silently dropped. A deployed job
45
+ carries its Nomad ID so its name can be linked to the web UI.
46
+
47
+ Args:
48
+ files: Discovered job files to classify.
49
+ jobs: Live cluster jobs to join against.
50
+ hide_running: When True, omit jobs whose cluster status is ``running`` so
51
+ only dead and not-deployed files remain.
52
+
53
+ Returns:
54
+ Sorted list of rows, one per job name per file.
55
+ """
56
+ jobs_by_name = {job.name: job for job in jobs}
57
+ rows: list[ListRow] = []
58
+ for jf in files:
59
+ names = jf.job_names or ["?"]
60
+ for name in names:
61
+ job = jobs_by_name.get(name)
62
+ status = job.status if job else _NOT_DEPLOYED
63
+ if hide_running and status == "running":
64
+ continue
65
+ link_id = job.id if job else None
66
+ rows.append(
67
+ ListRow(job_name=name, path=str(jf.path), cluster_status=status, link_id=link_id)
68
+ )
69
+ return sorted(rows, key=lambda r: r.job_name)
70
+
71
+
72
+ def _render(rows: list[ListRow], ui_base: str) -> None:
73
+ """Print the job-file table inside a titled panel, linking deployed jobs to the web UI."""
74
+ if not rows:
75
+ pp.info("No job files found; set [jobs] directories in your nd config.")
76
+ return
77
+ web = WebUi(ui_base)
78
+ table = status_table("JOB", "STATUS", "FILE")
79
+ for row in rows:
80
+ name = web.job(row.link_id, row.job_name) if row.link_id else row.job_name
81
+ # "not deployed" is not a Nomad status, so style it muted rather than via status_cell.
82
+ cell = (
83
+ status_cell(row.cluster_status)
84
+ if row.cluster_status != _NOT_DEPLOYED
85
+ else "[dim]• not deployed[/]"
86
+ )
87
+ table.add_row(name, cell, row.path)
88
+ pp.console().print(titled_panel(table, "Job files"))
89
+
90
+
91
+ app = typer.Typer()
92
+
93
+
94
+ @app.callback(invoke_without_command=True)
95
+ def list_(
96
+ ctx: typer.Context,
97
+ hide_running: Annotated[ # noqa: FBT002
98
+ bool,
99
+ typer.Option(
100
+ "--hide-running",
101
+ "-R",
102
+ help="Hide jobs that are currently running, leaving only dead and not-deployed files.",
103
+ ),
104
+ ] = False,
105
+ verbose: VerboseOption = 0,
106
+ ) -> None:
107
+ """List known job files and whether each is running, dead, or not deployed."""
108
+ configure_verbosity(ctx, verbose)
109
+ asyncio.run(_run(hide_running=hide_running))
110
+
111
+
112
+ async def _run(*, hide_running: bool = False) -> None:
113
+ """Discover job files, fetch cluster jobs, and render the joined table."""
114
+ directories = load_job_directories()
115
+ files = discover_job_files(directories)
116
+ pp.debug(f"Discovered {len(files)} job file(s) in {len(directories)} dir(s)")
117
+ config = NomadConfig.resolve()
118
+ async with NomadClient.from_config(config) as client:
119
+ jobs = await client.jobs.list()
120
+ _render(build_rows(files, jobs, hide_running=hide_running), config.ui_base)
nd/commands/logs.py ADDED
@@ -0,0 +1,76 @@
1
+ """The ``nd logs`` command: stream, tail, or export a running task's logs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path # noqa: TC003
6
+ from typing import Annotated
7
+
8
+ import typer
9
+
10
+ from nd.commands._common import VerboseOption, configure_verbosity, run_alloc_action
11
+ from nd.nomad import NomadConfig
12
+
13
+ # allow_interspersed_args lets options follow the positional JOB (e.g. `nd logs web -e`).
14
+ app = typer.Typer(context_settings={"allow_interspersed_args": True})
15
+
16
+
17
+ def _streams(*, only_stdout: bool, only_stderr: bool) -> tuple[str, ...]:
18
+ """Resolve the stream-selection flags to the streams to read (default both)."""
19
+ if only_stdout and not only_stderr:
20
+ return ("stdout",)
21
+ if only_stderr and not only_stdout:
22
+ return ("stderr",)
23
+ return ("stdout", "stderr")
24
+
25
+
26
+ @app.callback(invoke_without_command=True)
27
+ def logs( # noqa: PLR0913
28
+ ctx: typer.Context,
29
+ job: Annotated[
30
+ str | None,
31
+ typer.Argument(
32
+ help="Running job to read; matches any job whose name starts with this. "
33
+ "Omit to pick from a list."
34
+ ),
35
+ ] = None,
36
+ task: Annotated[
37
+ str | None,
38
+ typer.Option("--task", "-t", help="Target task; skips the task prompt."),
39
+ ] = None,
40
+ only_stdout: Annotated[ # noqa: FBT002
41
+ bool,
42
+ typer.Option("--stdout", "-o", help="Show only the stdout stream."),
43
+ ] = False,
44
+ only_stderr: Annotated[ # noqa: FBT002
45
+ bool,
46
+ typer.Option("--stderr", "-e", help="Show only the stderr stream."),
47
+ ] = False,
48
+ tail: Annotated[
49
+ int | None,
50
+ typer.Option("--tail", "-n", help="Show the last N lines, static (no follow)."),
51
+ ] = None,
52
+ export: Annotated[
53
+ Path | None,
54
+ typer.Option("--export", help="Write current logs to this file, then exit."),
55
+ ] = None,
56
+ verbose: VerboseOption = 0,
57
+ ) -> None:
58
+ """Stream a task's logs, or tail/export them.
59
+
60
+ Defaults to a live stream of both stdout and stderr (interleaved) until
61
+ interrupted with Ctrl-C. Pass --stdout or --stderr to show a single stream.
62
+ """
63
+ configure_verbosity(ctx, verbose)
64
+ config = NomadConfig.resolve()
65
+ streams = _streams(only_stdout=only_stdout, only_stderr=only_stderr)
66
+ # running_only=False so logs of a dead, completed, or failed task stay reachable
67
+ # (debugging a crash is the main reason to read logs).
68
+ run_alloc_action(
69
+ config,
70
+ job=job,
71
+ task=task,
72
+ running_only=False,
73
+ action=lambda nomad, alloc_id, task_name: nomad.stream_logs(
74
+ alloc_id, task_name, streams=streams, tail=tail, export_path=export
75
+ ),
76
+ )
nd/commands/plan.py ADDED
@@ -0,0 +1,103 @@
1
+ """The ``nd plan`` command: preview job-file changes via `nomad job plan`."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from typing import TYPE_CHECKING, Annotated
7
+
8
+ import typer
9
+ from nclutils import pp
10
+
11
+ from nd.binary import NomadBinary, NomadBinaryError
12
+ from nd.commands._common import VerboseOption, configure_verbosity
13
+ from nd.jobfiles import candidates_for, discover_job_files, load_job_directories
14
+ from nd.nomad import NomadConfig
15
+ from nd.targets import resolve_targets, select_candidates
16
+
17
+ if TYPE_CHECKING:
18
+ from nd.jobfiles import JobCandidate
19
+ from nd.targets import TargetResolution
20
+
21
+
22
+ # allow_interspersed_args lets options follow the positional JOB (e.g. `nd plan web -n`);
23
+ # Typer groups disable that by default, which would parse `-n` as a subcommand.
24
+ app = typer.Typer(context_settings={"allow_interspersed_args": True})
25
+
26
+
27
+ @app.callback(invoke_without_command=True)
28
+ def plan(
29
+ ctx: typer.Context,
30
+ job: Annotated[
31
+ str | None,
32
+ typer.Argument(
33
+ help="Job to plan; matches any job whose name starts with this. "
34
+ "Omit to pick from a list."
35
+ ),
36
+ ] = None,
37
+ dry_run: Annotated[ # noqa: FBT002
38
+ bool,
39
+ typer.Option("--dry-run", "-n", help="Resolve and report targets without planning them."),
40
+ ] = False,
41
+ verbose: VerboseOption = 0,
42
+ ) -> None:
43
+ """Preview the changes one or more job files would apply, including to running jobs."""
44
+ configure_verbosity(ctx, verbose)
45
+ exit_code = asyncio.run(_run(job_arg=job, dry_run=dry_run))
46
+ if exit_code != 0:
47
+ raise typer.Exit(exit_code)
48
+
49
+
50
+ async def _run(*, job_arg: str | None, dry_run: bool) -> int:
51
+ """Resolve candidates (all files), then validate + plan each selected one."""
52
+ files = discover_job_files(load_job_directories())
53
+ candidates = candidates_for(files)
54
+ if not candidates:
55
+ pp.info("No job files found; set [jobs] directories in your nd config.")
56
+ return 0
57
+
58
+ resolution: TargetResolution[JobCandidate] = resolve_targets(
59
+ candidates, job_arg, name_of=lambda c: c.name
60
+ )
61
+ targets = await select_candidates(
62
+ resolution, "Select jobs to plan", label_of=lambda c: f"{c.name} [{c.file.path.name}]"
63
+ )
64
+ if targets is None:
65
+ return 0
66
+ if not targets:
67
+ pp.error(f"No job file matching '{job_arg}'")
68
+ return 1
69
+
70
+ if dry_run:
71
+ for c in targets:
72
+ pp.dryrun(f"would plan {c.name} ({c.file.path})")
73
+ return 0
74
+
75
+ return _plan_all(targets)
76
+
77
+
78
+ def _plan_all(targets: list[JobCandidate]) -> int:
79
+ """Validate then plan each unique file, surfacing `nomad job plan` verbatim.
80
+
81
+ Returns 0 when every plan ran (including "changes present"); 1 if any file
82
+ failed validation or the binary could not run.
83
+ """
84
+ # Resolve config so the binary targets the same cluster as nd (including
85
+ # config-file overrides), not just whatever NOMAD_* env vars are ambient.
86
+ config = NomadConfig.resolve()
87
+ try:
88
+ nomad = NomadBinary.create(config)
89
+ except NomadBinaryError as exc:
90
+ pp.error(str(exc))
91
+ return 1
92
+
93
+ failures = 0
94
+ # dict.fromkeys dedups while preserving order, so a multi-job file is planned once.
95
+ for path in dict.fromkeys(c.file.path for c in targets):
96
+ pp.header(f"plan: {path.name}")
97
+ try:
98
+ nomad.validate(path)
99
+ nomad.plan(path)
100
+ except NomadBinaryError as exc:
101
+ pp.error(str(exc))
102
+ failures += 1
103
+ return 1 if failures else 0
nd/commands/run.py ADDED
@@ -0,0 +1,372 @@
1
+ """The ``nd run`` command: deploy job files and watch the rollout live."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import enum
7
+ import time
8
+ from dataclasses import dataclass, replace
9
+ from typing import TYPE_CHECKING, Annotated
10
+
11
+ import msgspec
12
+ import typer
13
+ from nclutils import pp
14
+
15
+ from nd.binary import NomadBinary, NomadBinaryError
16
+ from nd.commands._common import VerboseOption, configure_verbosity
17
+ from nd.constants import DEPLOY_TIMEOUT_SECONDS, HEALTHY_ALLOC_STATUSES, POLL_INTERVAL_SECONDS
18
+ from nd.jobfiles import candidates_for, discover_job_files, load_job_directories
19
+ from nd.nomad import NomadClient, NomadConfig
20
+ from nd.nomad.errors import NomadError
21
+ from nd.targets import resolve_targets, select_candidates
22
+ from nd.ui.alloc_rows import alloc_children
23
+ from nd.ui.duration import summary_title
24
+ from nd.ui.live_panel import PanelUpdate, run_rows
25
+ from nd.ui.styles import OUTCOME_GLYPH
26
+
27
+ if TYPE_CHECKING:
28
+ from nd.jobfiles import JobCandidate
29
+ from nd.nomad.models.deployment import Deployment
30
+ from nd.ui.alloc_rows import TaskLifecycle
31
+
32
+ # Deployment statuses that mean the rollout is finished, one way or the other.
33
+ _DEPLOY_SUCCESS = "successful"
34
+ _DEPLOY_FAILURE = frozenset({"failed", "cancelled"})
35
+
36
+
37
+ class DeployStatus(enum.StrEnum):
38
+ """The terminal outcome of deploying one job."""
39
+
40
+ DEPLOYED = "deployed"
41
+ FAILED = "failed"
42
+ TIMEOUT = "timeout"
43
+
44
+
45
+ @dataclass(frozen=True)
46
+ class DeployOutcome:
47
+ """The result of deploying one job, ready for summary rendering."""
48
+
49
+ name: str
50
+ status: DeployStatus
51
+ detail: str = ""
52
+ warnings: str = ""
53
+
54
+
55
+ def deploy_phase(dep: Deployment) -> str:
56
+ """Summarize a deployment's progress as ``<status>: <healthy>/<desired> healthy``.
57
+
58
+ Aggregates counts across all task groups so the live panel shows a single
59
+ meaningful number rather than per-group noise.
60
+ """
61
+ healthy = sum(tg.healthy_allocs for tg in dep.task_groups.values())
62
+ desired = sum(tg.desired_total for tg in dep.task_groups.values())
63
+ return f"{dep.status}: {healthy}/{desired} healthy"
64
+
65
+
66
+ def task_lifecycle(body: bytes) -> TaskLifecycle:
67
+ """Parse task lifecycle order and labels from a compiled job spec.
68
+
69
+ Tasks are ordered prestart, then main, then poststart/sidecar within each
70
+ group, so the panel shows them in the order Nomad runs them. Poststop tasks are
71
+ omitted because they only run when an allocation stops, not during a deploy.
72
+
73
+ Args:
74
+ body: The compiled ``{"Job": {...}}`` JSON from ``nomad job run -output``.
75
+
76
+ Returns:
77
+ A map of group name to ``{task name: (sort order, label)}``.
78
+ """
79
+ job = msgspec.json.decode(body).get("Job") or {}
80
+ lifecycle: TaskLifecycle = {}
81
+ for group in job.get("TaskGroups") or []:
82
+ tasks: dict[str, tuple[int, str]] = {}
83
+ for index, task in enumerate(group.get("Tasks") or []):
84
+ role = _task_role(task.get("Lifecycle"), index)
85
+ if role is not None:
86
+ tasks[task["Name"]] = role
87
+ lifecycle[group["Name"]] = tasks
88
+ return lifecycle
89
+
90
+
91
+ def _task_role(lifecycle: dict[str, object] | None, index: int) -> tuple[int, str] | None:
92
+ """Return a task's (sort order, label) from its lifecycle block, or None to skip.
93
+
94
+ A task with no lifecycle block is a main task. Poststop tasks return None so
95
+ they are excluded from the deploy view.
96
+ """
97
+ if not lifecycle:
98
+ return (1_000 + index, "main")
99
+ hook = lifecycle.get("Hook")
100
+ if hook == "prestart":
101
+ return (index, "prestart")
102
+ if hook == "poststart":
103
+ return (2_000 + index, "sidecar" if lifecycle.get("Sidecar") else "poststart")
104
+ if hook == "poststop":
105
+ return None
106
+ return (1_000 + index, "main")
107
+
108
+
109
+ _OUTCOME_ROW: dict[DeployStatus, tuple[str, str]] = {
110
+ DeployStatus.DEPLOYED: (OUTCOME_GLYPH["ok"], "deployed"),
111
+ DeployStatus.FAILED: (OUTCOME_GLYPH["fail"], "failed"),
112
+ DeployStatus.TIMEOUT: (OUTCOME_GLYPH["warn"], "still deploying"),
113
+ }
114
+
115
+
116
+ async def _running_job_names(client: NomadClient) -> set[str]:
117
+ """Return the names of jobs currently running in the cluster."""
118
+ jobs = await client.jobs.list()
119
+ return {j.name for j in jobs if j.status == "running"}
120
+
121
+
122
+ # allow_interspersed_args lets options follow the positional JOB argument;
123
+ # Typer groups disable that by default, which would parse flags as subcommands.
124
+ app = typer.Typer(context_settings={"allow_interspersed_args": True})
125
+
126
+
127
+ @app.callback(invoke_without_command=True)
128
+ def run(
129
+ ctx: typer.Context,
130
+ job: Annotated[
131
+ str | None,
132
+ typer.Argument(
133
+ help="Job to run; matches any not-running job whose name starts with this. "
134
+ "Omit to pick from a list."
135
+ ),
136
+ ] = None,
137
+ detach: Annotated[ # noqa: FBT002
138
+ bool,
139
+ typer.Option(
140
+ "--detach", "-d", help="Register the jobs and return without watching the rollout."
141
+ ),
142
+ ] = False,
143
+ dry_run: Annotated[ # noqa: FBT002
144
+ bool,
145
+ typer.Option("--dry-run", "-n", help="Resolve and validate without registering."),
146
+ ] = False,
147
+ verbose: VerboseOption = 0,
148
+ ) -> None:
149
+ """Deploy one or more not-yet-running job files and watch them roll out.
150
+
151
+ Only jobs that are not already running are offered; use plan to preview changes
152
+ to a running job. Each selected file is validated, registered, and watched live:
153
+ service jobs follow their deployment to success, while batch and system jobs
154
+ follow their allocations. Use --detach to register and return without watching.
155
+ """
156
+ configure_verbosity(ctx, verbose)
157
+ exit_code = asyncio.run(_run(job_arg=job, detach=detach, dry_run=dry_run))
158
+ if exit_code != 0:
159
+ raise typer.Exit(exit_code)
160
+
161
+
162
+ async def _run(*, job_arg: str | None, detach: bool, dry_run: bool) -> int: # noqa: PLR0911
163
+ """Resolve not-running candidates, validate, register, and watch the rollout.
164
+
165
+ Returns the exit code: 0 on clean success, 1 on any failure. With ``detach`` the
166
+ jobs are compiled and registered but the rollout is not watched.
167
+ """
168
+ files = discover_job_files(load_job_directories())
169
+ config = NomadConfig.resolve()
170
+ async with NomadClient.from_config(config) as client:
171
+ running = await _running_job_names(client)
172
+ candidates = candidates_for(files, exclude_names=running)
173
+ if not candidates:
174
+ pp.info("No deployable job files (all known jobs are already running).")
175
+ return 0
176
+
177
+ resolution = resolve_targets(candidates, job_arg, name_of=lambda c: c.name)
178
+ targets = await select_candidates(
179
+ resolution, "Select jobs to run", label_of=lambda c: f"{c.name} [{c.file.path.name}]"
180
+ )
181
+ if targets is None:
182
+ return 0
183
+ if not targets:
184
+ pp.error(f"No not-running job file matching '{job_arg}'")
185
+ return 1
186
+
187
+ try:
188
+ nomad = NomadBinary.create(config)
189
+ # dict.fromkeys dedups so a multi-job file is validated once.
190
+ for path in dict.fromkeys(c.file.path for c in targets):
191
+ nomad.validate(path)
192
+ except NomadBinaryError as exc:
193
+ pp.error(str(exc))
194
+ return 1
195
+
196
+ if dry_run:
197
+ for c in targets:
198
+ pp.dryrun(f"would run {c.name} ({c.file.path})")
199
+ return 0
200
+
201
+ if detach:
202
+ return await _register_detached(client, targets, nomad)
203
+
204
+ outcomes = await _deploy_all(client, targets, nomad)
205
+
206
+ return 0 if all(o.status is DeployStatus.DEPLOYED for o in outcomes) else 1
207
+
208
+
209
+ async def _register_detached(
210
+ client: NomadClient, targets: list[JobCandidate], nomad: NomadBinary
211
+ ) -> int:
212
+ """Compile and register every target concurrently, then return without watching.
213
+
214
+ Mirrors ``nomad job run -detach``: each job file is compiled to JSON and
215
+ registered, surfacing any register warnings, but the rollout is not polled. A
216
+ per-job compile or register failure is reported and does not abort the others.
217
+ Returns 0 only when every job registered successfully.
218
+ """
219
+
220
+ async def register_one(candidate: JobCandidate) -> tuple[str, str | None, str]:
221
+ try:
222
+ body = await asyncio.to_thread(nomad.compile_to_json, candidate.file.path)
223
+ resp = await client.jobs.register(body)
224
+ except (NomadBinaryError, NomadError) as exc:
225
+ return (candidate.name, str(exc), "")
226
+ return (candidate.name, None, resp.warnings)
227
+
228
+ results = await asyncio.gather(*(register_one(c) for c in targets))
229
+ registered = [name for name, err, _ in results if err is None]
230
+ if registered:
231
+ pp.success(f"Registered {len(registered)} job(s)", details=registered)
232
+ for name, err, warnings in results:
233
+ if err is not None:
234
+ pp.error(f"{name} failed to register", details=[err])
235
+ elif warnings:
236
+ pp.warning(f"{name}: {warnings}")
237
+ return 0 if all(err is None for _, err, _ in results) else 1
238
+
239
+
240
+ async def _deploy_all(
241
+ client: NomadClient, targets: list[JobCandidate], nomad: NomadBinary
242
+ ) -> list[DeployOutcome]:
243
+ """Register and watch every target concurrently under one live panel.
244
+
245
+ Args:
246
+ client: Authenticated Nomad client.
247
+ targets: The job candidates to register and watch.
248
+ nomad: Configured `nomad` binary handle for the compile step.
249
+
250
+ Returns:
251
+ Ordered list of outcomes, one per target.
252
+ """
253
+ # Resolve node IDs to names once so every job's detail rows can show placement.
254
+ node_names = {node.id: node.name for node in await client.nodes.list()}
255
+
256
+ async def do_work(candidate: JobCandidate, update: PanelUpdate) -> DeployOutcome:
257
+ return await _deploy_one(
258
+ client, candidate, node_names=node_names, update=update, nomad=nomad
259
+ )
260
+
261
+ ordered = await run_rows(
262
+ targets,
263
+ do_work,
264
+ label_of=lambda c: c.name,
265
+ initial_phase="registering",
266
+ finish_of=lambda o: _OUTCOME_ROW[o.status],
267
+ running_title=f"Deploying {len(targets)} job(s)",
268
+ final_title=_final_title,
269
+ )
270
+
271
+ for o in ordered:
272
+ if o.status is DeployStatus.TIMEOUT:
273
+ pp.warning(f"{o.name}: {o.detail or 'still deploying'}")
274
+ elif o.status is DeployStatus.FAILED:
275
+ pp.error(f"{o.name} failed to deploy", details=[o.detail] if o.detail else None)
276
+ if o.warnings:
277
+ pp.warning(f"{o.name}: {o.warnings}")
278
+ return ordered
279
+
280
+
281
+ def _final_title(outcomes: list[DeployOutcome], elapsed_seconds: float) -> str:
282
+ """Build the final panel title with deployed totals and elapsed seconds."""
283
+ ok = sum(1 for o in outcomes if o.status is DeployStatus.DEPLOYED)
284
+ return summary_title("Deployed", ok, len(outcomes), elapsed_seconds)
285
+
286
+
287
+ async def _deploy_one(
288
+ client: NomadClient,
289
+ candidate: JobCandidate,
290
+ *,
291
+ node_names: dict[str, str],
292
+ update: PanelUpdate,
293
+ nomad: NomadBinary,
294
+ ) -> DeployOutcome:
295
+ """Compile, register, and watch one job to a terminal deploy state.
296
+
297
+ Service jobs are watched via their deployment; batch/system jobs (which create
298
+ no deployment) are watched via their allocations. Never raises: Nomad/binary
299
+ failures become a FAILED outcome so a sibling job's progress is unaffected.
300
+
301
+ Args:
302
+ client: Authenticated Nomad client.
303
+ candidate: The job file and name to deploy.
304
+ node_names: Map of node ID to node name for the per-allocation detail rows.
305
+ update: Callback to update the live panel phase text and detail rows.
306
+ nomad: Configured `nomad` binary handle for the compile step.
307
+
308
+ Returns:
309
+ The terminal outcome for this candidate.
310
+ """
311
+ try:
312
+ update("compiling")
313
+ # compile_to_json shells out to the nomad binary (blocking); run it off the
314
+ # event loop so sibling deploys keep making progress concurrently.
315
+ body = await asyncio.to_thread(nomad.compile_to_json, candidate.file.path)
316
+ lifecycle = task_lifecycle(body)
317
+ update("registering")
318
+ resp = await client.jobs.register(body)
319
+ outcome = await _watch(
320
+ client, candidate.name, node_names=node_names, lifecycle=lifecycle, update=update
321
+ )
322
+ # Attach any register warnings so the caller can surface them after the panel closes.
323
+ return replace(outcome, warnings=resp.warnings)
324
+ except (NomadBinaryError, NomadError) as exc:
325
+ return DeployOutcome(candidate.name, DeployStatus.FAILED, str(exc))
326
+
327
+
328
+ async def _watch(
329
+ client: NomadClient,
330
+ job_id: str,
331
+ *,
332
+ node_names: dict[str, str],
333
+ lifecycle: TaskLifecycle,
334
+ update: PanelUpdate,
335
+ ) -> DeployOutcome:
336
+ """Poll a registered job until its deployment (or allocations) settle or time out.
337
+
338
+ Service jobs expose a deployment that tracks health; batch/system jobs have no
339
+ deployment so alloc statuses are used instead. Either way the job's allocations
340
+ are fetched each tick to show where each one is placed and its status. The poll
341
+ loop is bounded by a wall-clock deadline to avoid hanging on a stalled cluster.
342
+
343
+ Args:
344
+ client: Authenticated Nomad client.
345
+ job_id: The Nomad job ID to poll.
346
+ node_names: Map of node ID to node name for the per-allocation detail rows.
347
+ lifecycle: Task ordering and labels from the compiled job spec.
348
+ update: Callback to update the live panel phase text and detail rows.
349
+
350
+ Returns:
351
+ The terminal deploy outcome for this job.
352
+ """
353
+ deadline = time.monotonic() + DEPLOY_TIMEOUT_SECONDS
354
+ while True:
355
+ allocs = await client.jobs.allocations(job_id)
356
+ children = alloc_children(allocs, node_names, lifecycle)
357
+ deployments = await client.jobs.deployments(job_id)
358
+ if deployments: # service job: follow the most-recent deployment
359
+ dep = await client.deployments.read(deployments[0].id)
360
+ if dep.status == _DEPLOY_SUCCESS:
361
+ return DeployOutcome(job_id, DeployStatus.DEPLOYED)
362
+ if dep.status in _DEPLOY_FAILURE:
363
+ return DeployOutcome(job_id, DeployStatus.FAILED, dep.status_description)
364
+ update(deploy_phase(dep), children)
365
+ else: # batch/system job: follow allocations
366
+ running = sum(1 for a in allocs if a.client_status in HEALTHY_ALLOC_STATUSES)
367
+ if allocs and running == len(allocs):
368
+ return DeployOutcome(job_id, DeployStatus.DEPLOYED)
369
+ update(f"placing {running}/{len(allocs) or '?'} allocs", children)
370
+ if time.monotonic() >= deadline:
371
+ return DeployOutcome(job_id, DeployStatus.TIMEOUT, "deploy still in progress")
372
+ await asyncio.sleep(POLL_INTERVAL_SECONDS)