nomadctl 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nd/__init__.py +7 -0
- nd/binary/__init__.py +10 -0
- nd/binary/env.py +43 -0
- nd/binary/runner.py +192 -0
- nd/cli.py +97 -0
- nd/commands/__init__.py +1 -0
- nd/commands/_common.py +101 -0
- nd/commands/clean.py +50 -0
- nd/commands/exec.py +67 -0
- nd/commands/list.py +120 -0
- nd/commands/logs.py +76 -0
- nd/commands/plan.py +103 -0
- nd/commands/run.py +372 -0
- nd/commands/status/__init__.py +29 -0
- nd/commands/status/command.py +102 -0
- nd/commands/status/render.py +172 -0
- nd/commands/status/report.py +339 -0
- nd/commands/stop.py +412 -0
- nd/commands/volume/__init__.py +25 -0
- nd/commands/volume/command.py +216 -0
- nd/commands/volume/render.py +132 -0
- nd/commands/volume/report.py +146 -0
- nd/constants.py +43 -0
- nd/jobfiles.py +125 -0
- nd/nomad/__init__.py +29 -0
- nd/nomad/client.py +51 -0
- nd/nomad/config.py +156 -0
- nd/nomad/errors.py +52 -0
- nd/nomad/models/__init__.py +1 -0
- nd/nomad/models/agent.py +26 -0
- nd/nomad/models/allocation.py +37 -0
- nd/nomad/models/deployment.py +40 -0
- nd/nomad/models/evaluation.py +21 -0
- nd/nomad/models/job.py +51 -0
- nd/nomad/models/node.py +41 -0
- nd/nomad/models/volume.py +28 -0
- nd/nomad/resources/__init__.py +1 -0
- nd/nomad/resources/agent.py +25 -0
- nd/nomad/resources/allocations.py +24 -0
- nd/nomad/resources/base.py +45 -0
- nd/nomad/resources/deployments.py +28 -0
- nd/nomad/resources/evaluations.py +19 -0
- nd/nomad/resources/jobs.py +70 -0
- nd/nomad/resources/nodes.py +24 -0
- nd/nomad/resources/status.py +14 -0
- nd/nomad/resources/system.py +25 -0
- nd/nomad/resources/volumes.py +42 -0
- nd/nomad/transport.py +141 -0
- nd/targets/__init__.py +32 -0
- nd/targets/alloc_target.py +166 -0
- nd/targets/selection.py +91 -0
- nd/ui/__init__.py +1 -0
- nd/ui/alloc_rows.py +93 -0
- nd/ui/duration.py +44 -0
- nd/ui/links.py +22 -0
- nd/ui/live_panel.py +199 -0
- nd/ui/panels.py +31 -0
- nd/ui/prompts.py +46 -0
- nd/ui/styles.py +52 -0
- nd/volumefiles.py +143 -0
- nomadctl-0.2.0.dist-info/METADATA +268 -0
- nomadctl-0.2.0.dist-info/RECORD +65 -0
- nomadctl-0.2.0.dist-info/WHEEL +4 -0
- nomadctl-0.2.0.dist-info/entry_points.txt +3 -0
- nomadctl-0.2.0.dist-info/licenses/LICENSE +21 -0
nd/commands/list.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""The ``nd list`` command: list known job files against live cluster state."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import TYPE_CHECKING, Annotated
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
from nclutils import pp
|
|
11
|
+
|
|
12
|
+
from nd.commands._common import VerboseOption, configure_verbosity
|
|
13
|
+
from nd.jobfiles import discover_job_files, load_job_directories
|
|
14
|
+
from nd.nomad import NomadClient, NomadConfig
|
|
15
|
+
from nd.ui.links import WebUi
|
|
16
|
+
from nd.ui.panels import status_table, titled_panel
|
|
17
|
+
from nd.ui.styles import status_cell
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from nd.jobfiles import JobFile
|
|
21
|
+
from nd.nomad.models.job import JobListStub
|
|
22
|
+
|
|
23
|
+
# Cluster-status label for a job file whose name is not present in Nomad at all.
|
|
24
|
+
_NOT_DEPLOYED = "not deployed"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass(frozen=True)
|
|
28
|
+
class ListRow:
|
|
29
|
+
"""One rendered row: a job file's name, path, and cluster status."""
|
|
30
|
+
|
|
31
|
+
job_name: str
|
|
32
|
+
path: str
|
|
33
|
+
cluster_status: str
|
|
34
|
+
# Nomad job ID for the web UI link, or None when the job is not deployed.
|
|
35
|
+
link_id: str | None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def build_rows(
|
|
39
|
+
files: list[JobFile], jobs: list[JobListStub], *, hide_running: bool = False
|
|
40
|
+
) -> list[ListRow]:
|
|
41
|
+
"""Join discovered job files to cluster jobs by name, classifying each.
|
|
42
|
+
|
|
43
|
+
A file with no resolved job name still appears (named ``?``) so unresolved
|
|
44
|
+
interpolated names are visible rather than silently dropped. A deployed job
|
|
45
|
+
carries its Nomad ID so its name can be linked to the web UI.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
files: Discovered job files to classify.
|
|
49
|
+
jobs: Live cluster jobs to join against.
|
|
50
|
+
hide_running: When True, omit jobs whose cluster status is ``running`` so
|
|
51
|
+
only dead and not-deployed files remain.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
Sorted list of rows, one per job name per file.
|
|
55
|
+
"""
|
|
56
|
+
jobs_by_name = {job.name: job for job in jobs}
|
|
57
|
+
rows: list[ListRow] = []
|
|
58
|
+
for jf in files:
|
|
59
|
+
names = jf.job_names or ["?"]
|
|
60
|
+
for name in names:
|
|
61
|
+
job = jobs_by_name.get(name)
|
|
62
|
+
status = job.status if job else _NOT_DEPLOYED
|
|
63
|
+
if hide_running and status == "running":
|
|
64
|
+
continue
|
|
65
|
+
link_id = job.id if job else None
|
|
66
|
+
rows.append(
|
|
67
|
+
ListRow(job_name=name, path=str(jf.path), cluster_status=status, link_id=link_id)
|
|
68
|
+
)
|
|
69
|
+
return sorted(rows, key=lambda r: r.job_name)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _render(rows: list[ListRow], ui_base: str) -> None:
|
|
73
|
+
"""Print the job-file table inside a titled panel, linking deployed jobs to the web UI."""
|
|
74
|
+
if not rows:
|
|
75
|
+
pp.info("No job files found; set [jobs] directories in your nd config.")
|
|
76
|
+
return
|
|
77
|
+
web = WebUi(ui_base)
|
|
78
|
+
table = status_table("JOB", "STATUS", "FILE")
|
|
79
|
+
for row in rows:
|
|
80
|
+
name = web.job(row.link_id, row.job_name) if row.link_id else row.job_name
|
|
81
|
+
# "not deployed" is not a Nomad status, so style it muted rather than via status_cell.
|
|
82
|
+
cell = (
|
|
83
|
+
status_cell(row.cluster_status)
|
|
84
|
+
if row.cluster_status != _NOT_DEPLOYED
|
|
85
|
+
else "[dim]• not deployed[/]"
|
|
86
|
+
)
|
|
87
|
+
table.add_row(name, cell, row.path)
|
|
88
|
+
pp.console().print(titled_panel(table, "Job files"))
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
app = typer.Typer()
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@app.callback(invoke_without_command=True)
|
|
95
|
+
def list_(
|
|
96
|
+
ctx: typer.Context,
|
|
97
|
+
hide_running: Annotated[ # noqa: FBT002
|
|
98
|
+
bool,
|
|
99
|
+
typer.Option(
|
|
100
|
+
"--hide-running",
|
|
101
|
+
"-R",
|
|
102
|
+
help="Hide jobs that are currently running, leaving only dead and not-deployed files.",
|
|
103
|
+
),
|
|
104
|
+
] = False,
|
|
105
|
+
verbose: VerboseOption = 0,
|
|
106
|
+
) -> None:
|
|
107
|
+
"""List known job files and whether each is running, dead, or not deployed."""
|
|
108
|
+
configure_verbosity(ctx, verbose)
|
|
109
|
+
asyncio.run(_run(hide_running=hide_running))
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
async def _run(*, hide_running: bool = False) -> None:
|
|
113
|
+
"""Discover job files, fetch cluster jobs, and render the joined table."""
|
|
114
|
+
directories = load_job_directories()
|
|
115
|
+
files = discover_job_files(directories)
|
|
116
|
+
pp.debug(f"Discovered {len(files)} job file(s) in {len(directories)} dir(s)")
|
|
117
|
+
config = NomadConfig.resolve()
|
|
118
|
+
async with NomadClient.from_config(config) as client:
|
|
119
|
+
jobs = await client.jobs.list()
|
|
120
|
+
_render(build_rows(files, jobs, hide_running=hide_running), config.ui_base)
|
nd/commands/logs.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""The ``nd logs`` command: stream, tail, or export a running task's logs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path # noqa: TC003
|
|
6
|
+
from typing import Annotated
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
|
|
10
|
+
from nd.commands._common import VerboseOption, configure_verbosity, run_alloc_action
|
|
11
|
+
from nd.nomad import NomadConfig
|
|
12
|
+
|
|
13
|
+
# allow_interspersed_args lets options follow the positional JOB (e.g. `nd logs web -e`).
|
|
14
|
+
app = typer.Typer(context_settings={"allow_interspersed_args": True})
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _streams(*, only_stdout: bool, only_stderr: bool) -> tuple[str, ...]:
|
|
18
|
+
"""Resolve the stream-selection flags to the streams to read (default both)."""
|
|
19
|
+
if only_stdout and not only_stderr:
|
|
20
|
+
return ("stdout",)
|
|
21
|
+
if only_stderr and not only_stdout:
|
|
22
|
+
return ("stderr",)
|
|
23
|
+
return ("stdout", "stderr")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@app.callback(invoke_without_command=True)
|
|
27
|
+
def logs( # noqa: PLR0913
|
|
28
|
+
ctx: typer.Context,
|
|
29
|
+
job: Annotated[
|
|
30
|
+
str | None,
|
|
31
|
+
typer.Argument(
|
|
32
|
+
help="Running job to read; matches any job whose name starts with this. "
|
|
33
|
+
"Omit to pick from a list."
|
|
34
|
+
),
|
|
35
|
+
] = None,
|
|
36
|
+
task: Annotated[
|
|
37
|
+
str | None,
|
|
38
|
+
typer.Option("--task", "-t", help="Target task; skips the task prompt."),
|
|
39
|
+
] = None,
|
|
40
|
+
only_stdout: Annotated[ # noqa: FBT002
|
|
41
|
+
bool,
|
|
42
|
+
typer.Option("--stdout", "-o", help="Show only the stdout stream."),
|
|
43
|
+
] = False,
|
|
44
|
+
only_stderr: Annotated[ # noqa: FBT002
|
|
45
|
+
bool,
|
|
46
|
+
typer.Option("--stderr", "-e", help="Show only the stderr stream."),
|
|
47
|
+
] = False,
|
|
48
|
+
tail: Annotated[
|
|
49
|
+
int | None,
|
|
50
|
+
typer.Option("--tail", "-n", help="Show the last N lines, static (no follow)."),
|
|
51
|
+
] = None,
|
|
52
|
+
export: Annotated[
|
|
53
|
+
Path | None,
|
|
54
|
+
typer.Option("--export", help="Write current logs to this file, then exit."),
|
|
55
|
+
] = None,
|
|
56
|
+
verbose: VerboseOption = 0,
|
|
57
|
+
) -> None:
|
|
58
|
+
"""Stream a task's logs, or tail/export them.
|
|
59
|
+
|
|
60
|
+
Defaults to a live stream of both stdout and stderr (interleaved) until
|
|
61
|
+
interrupted with Ctrl-C. Pass --stdout or --stderr to show a single stream.
|
|
62
|
+
"""
|
|
63
|
+
configure_verbosity(ctx, verbose)
|
|
64
|
+
config = NomadConfig.resolve()
|
|
65
|
+
streams = _streams(only_stdout=only_stdout, only_stderr=only_stderr)
|
|
66
|
+
# running_only=False so logs of a dead, completed, or failed task stay reachable
|
|
67
|
+
# (debugging a crash is the main reason to read logs).
|
|
68
|
+
run_alloc_action(
|
|
69
|
+
config,
|
|
70
|
+
job=job,
|
|
71
|
+
task=task,
|
|
72
|
+
running_only=False,
|
|
73
|
+
action=lambda nomad, alloc_id, task_name: nomad.stream_logs(
|
|
74
|
+
alloc_id, task_name, streams=streams, tail=tail, export_path=export
|
|
75
|
+
),
|
|
76
|
+
)
|
nd/commands/plan.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""The ``nd plan`` command: preview job-file changes via `nomad job plan`."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from typing import TYPE_CHECKING, Annotated
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
from nclutils import pp
|
|
10
|
+
|
|
11
|
+
from nd.binary import NomadBinary, NomadBinaryError
|
|
12
|
+
from nd.commands._common import VerboseOption, configure_verbosity
|
|
13
|
+
from nd.jobfiles import candidates_for, discover_job_files, load_job_directories
|
|
14
|
+
from nd.nomad import NomadConfig
|
|
15
|
+
from nd.targets import resolve_targets, select_candidates
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from nd.jobfiles import JobCandidate
|
|
19
|
+
from nd.targets import TargetResolution
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# allow_interspersed_args lets options follow the positional JOB (e.g. `nd plan web -n`);
|
|
23
|
+
# Typer groups disable that by default, which would parse `-n` as a subcommand.
|
|
24
|
+
app = typer.Typer(context_settings={"allow_interspersed_args": True})
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@app.callback(invoke_without_command=True)
|
|
28
|
+
def plan(
|
|
29
|
+
ctx: typer.Context,
|
|
30
|
+
job: Annotated[
|
|
31
|
+
str | None,
|
|
32
|
+
typer.Argument(
|
|
33
|
+
help="Job to plan; matches any job whose name starts with this. "
|
|
34
|
+
"Omit to pick from a list."
|
|
35
|
+
),
|
|
36
|
+
] = None,
|
|
37
|
+
dry_run: Annotated[ # noqa: FBT002
|
|
38
|
+
bool,
|
|
39
|
+
typer.Option("--dry-run", "-n", help="Resolve and report targets without planning them."),
|
|
40
|
+
] = False,
|
|
41
|
+
verbose: VerboseOption = 0,
|
|
42
|
+
) -> None:
|
|
43
|
+
"""Preview the changes one or more job files would apply, including to running jobs."""
|
|
44
|
+
configure_verbosity(ctx, verbose)
|
|
45
|
+
exit_code = asyncio.run(_run(job_arg=job, dry_run=dry_run))
|
|
46
|
+
if exit_code != 0:
|
|
47
|
+
raise typer.Exit(exit_code)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
async def _run(*, job_arg: str | None, dry_run: bool) -> int:
|
|
51
|
+
"""Resolve candidates (all files), then validate + plan each selected one."""
|
|
52
|
+
files = discover_job_files(load_job_directories())
|
|
53
|
+
candidates = candidates_for(files)
|
|
54
|
+
if not candidates:
|
|
55
|
+
pp.info("No job files found; set [jobs] directories in your nd config.")
|
|
56
|
+
return 0
|
|
57
|
+
|
|
58
|
+
resolution: TargetResolution[JobCandidate] = resolve_targets(
|
|
59
|
+
candidates, job_arg, name_of=lambda c: c.name
|
|
60
|
+
)
|
|
61
|
+
targets = await select_candidates(
|
|
62
|
+
resolution, "Select jobs to plan", label_of=lambda c: f"{c.name} [{c.file.path.name}]"
|
|
63
|
+
)
|
|
64
|
+
if targets is None:
|
|
65
|
+
return 0
|
|
66
|
+
if not targets:
|
|
67
|
+
pp.error(f"No job file matching '{job_arg}'")
|
|
68
|
+
return 1
|
|
69
|
+
|
|
70
|
+
if dry_run:
|
|
71
|
+
for c in targets:
|
|
72
|
+
pp.dryrun(f"would plan {c.name} ({c.file.path})")
|
|
73
|
+
return 0
|
|
74
|
+
|
|
75
|
+
return _plan_all(targets)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _plan_all(targets: list[JobCandidate]) -> int:
|
|
79
|
+
"""Validate then plan each unique file, surfacing `nomad job plan` verbatim.
|
|
80
|
+
|
|
81
|
+
Returns 0 when every plan ran (including "changes present"); 1 if any file
|
|
82
|
+
failed validation or the binary could not run.
|
|
83
|
+
"""
|
|
84
|
+
# Resolve config so the binary targets the same cluster as nd (including
|
|
85
|
+
# config-file overrides), not just whatever NOMAD_* env vars are ambient.
|
|
86
|
+
config = NomadConfig.resolve()
|
|
87
|
+
try:
|
|
88
|
+
nomad = NomadBinary.create(config)
|
|
89
|
+
except NomadBinaryError as exc:
|
|
90
|
+
pp.error(str(exc))
|
|
91
|
+
return 1
|
|
92
|
+
|
|
93
|
+
failures = 0
|
|
94
|
+
# dict.fromkeys dedups while preserving order, so a multi-job file is planned once.
|
|
95
|
+
for path in dict.fromkeys(c.file.path for c in targets):
|
|
96
|
+
pp.header(f"plan: {path.name}")
|
|
97
|
+
try:
|
|
98
|
+
nomad.validate(path)
|
|
99
|
+
nomad.plan(path)
|
|
100
|
+
except NomadBinaryError as exc:
|
|
101
|
+
pp.error(str(exc))
|
|
102
|
+
failures += 1
|
|
103
|
+
return 1 if failures else 0
|
nd/commands/run.py
ADDED
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
"""The ``nd run`` command: deploy job files and watch the rollout live."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import enum
|
|
7
|
+
import time
|
|
8
|
+
from dataclasses import dataclass, replace
|
|
9
|
+
from typing import TYPE_CHECKING, Annotated
|
|
10
|
+
|
|
11
|
+
import msgspec
|
|
12
|
+
import typer
|
|
13
|
+
from nclutils import pp
|
|
14
|
+
|
|
15
|
+
from nd.binary import NomadBinary, NomadBinaryError
|
|
16
|
+
from nd.commands._common import VerboseOption, configure_verbosity
|
|
17
|
+
from nd.constants import DEPLOY_TIMEOUT_SECONDS, HEALTHY_ALLOC_STATUSES, POLL_INTERVAL_SECONDS
|
|
18
|
+
from nd.jobfiles import candidates_for, discover_job_files, load_job_directories
|
|
19
|
+
from nd.nomad import NomadClient, NomadConfig
|
|
20
|
+
from nd.nomad.errors import NomadError
|
|
21
|
+
from nd.targets import resolve_targets, select_candidates
|
|
22
|
+
from nd.ui.alloc_rows import alloc_children
|
|
23
|
+
from nd.ui.duration import summary_title
|
|
24
|
+
from nd.ui.live_panel import PanelUpdate, run_rows
|
|
25
|
+
from nd.ui.styles import OUTCOME_GLYPH
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from nd.jobfiles import JobCandidate
|
|
29
|
+
from nd.nomad.models.deployment import Deployment
|
|
30
|
+
from nd.ui.alloc_rows import TaskLifecycle
|
|
31
|
+
|
|
32
|
+
# Deployment statuses that mean the rollout is finished, one way or the other.
|
|
33
|
+
_DEPLOY_SUCCESS = "successful"
|
|
34
|
+
_DEPLOY_FAILURE = frozenset({"failed", "cancelled"})
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class DeployStatus(enum.StrEnum):
|
|
38
|
+
"""The terminal outcome of deploying one job."""
|
|
39
|
+
|
|
40
|
+
DEPLOYED = "deployed"
|
|
41
|
+
FAILED = "failed"
|
|
42
|
+
TIMEOUT = "timeout"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass(frozen=True)
|
|
46
|
+
class DeployOutcome:
|
|
47
|
+
"""The result of deploying one job, ready for summary rendering."""
|
|
48
|
+
|
|
49
|
+
name: str
|
|
50
|
+
status: DeployStatus
|
|
51
|
+
detail: str = ""
|
|
52
|
+
warnings: str = ""
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def deploy_phase(dep: Deployment) -> str:
|
|
56
|
+
"""Summarize a deployment's progress as ``<status>: <healthy>/<desired> healthy``.
|
|
57
|
+
|
|
58
|
+
Aggregates counts across all task groups so the live panel shows a single
|
|
59
|
+
meaningful number rather than per-group noise.
|
|
60
|
+
"""
|
|
61
|
+
healthy = sum(tg.healthy_allocs for tg in dep.task_groups.values())
|
|
62
|
+
desired = sum(tg.desired_total for tg in dep.task_groups.values())
|
|
63
|
+
return f"{dep.status}: {healthy}/{desired} healthy"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def task_lifecycle(body: bytes) -> TaskLifecycle:
|
|
67
|
+
"""Parse task lifecycle order and labels from a compiled job spec.
|
|
68
|
+
|
|
69
|
+
Tasks are ordered prestart, then main, then poststart/sidecar within each
|
|
70
|
+
group, so the panel shows them in the order Nomad runs them. Poststop tasks are
|
|
71
|
+
omitted because they only run when an allocation stops, not during a deploy.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
body: The compiled ``{"Job": {...}}`` JSON from ``nomad job run -output``.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
A map of group name to ``{task name: (sort order, label)}``.
|
|
78
|
+
"""
|
|
79
|
+
job = msgspec.json.decode(body).get("Job") or {}
|
|
80
|
+
lifecycle: TaskLifecycle = {}
|
|
81
|
+
for group in job.get("TaskGroups") or []:
|
|
82
|
+
tasks: dict[str, tuple[int, str]] = {}
|
|
83
|
+
for index, task in enumerate(group.get("Tasks") or []):
|
|
84
|
+
role = _task_role(task.get("Lifecycle"), index)
|
|
85
|
+
if role is not None:
|
|
86
|
+
tasks[task["Name"]] = role
|
|
87
|
+
lifecycle[group["Name"]] = tasks
|
|
88
|
+
return lifecycle
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _task_role(lifecycle: dict[str, object] | None, index: int) -> tuple[int, str] | None:
|
|
92
|
+
"""Return a task's (sort order, label) from its lifecycle block, or None to skip.
|
|
93
|
+
|
|
94
|
+
A task with no lifecycle block is a main task. Poststop tasks return None so
|
|
95
|
+
they are excluded from the deploy view.
|
|
96
|
+
"""
|
|
97
|
+
if not lifecycle:
|
|
98
|
+
return (1_000 + index, "main")
|
|
99
|
+
hook = lifecycle.get("Hook")
|
|
100
|
+
if hook == "prestart":
|
|
101
|
+
return (index, "prestart")
|
|
102
|
+
if hook == "poststart":
|
|
103
|
+
return (2_000 + index, "sidecar" if lifecycle.get("Sidecar") else "poststart")
|
|
104
|
+
if hook == "poststop":
|
|
105
|
+
return None
|
|
106
|
+
return (1_000 + index, "main")
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
_OUTCOME_ROW: dict[DeployStatus, tuple[str, str]] = {
|
|
110
|
+
DeployStatus.DEPLOYED: (OUTCOME_GLYPH["ok"], "deployed"),
|
|
111
|
+
DeployStatus.FAILED: (OUTCOME_GLYPH["fail"], "failed"),
|
|
112
|
+
DeployStatus.TIMEOUT: (OUTCOME_GLYPH["warn"], "still deploying"),
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
async def _running_job_names(client: NomadClient) -> set[str]:
|
|
117
|
+
"""Return the names of jobs currently running in the cluster."""
|
|
118
|
+
jobs = await client.jobs.list()
|
|
119
|
+
return {j.name for j in jobs if j.status == "running"}
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# allow_interspersed_args lets options follow the positional JOB argument;
|
|
123
|
+
# Typer groups disable that by default, which would parse flags as subcommands.
|
|
124
|
+
app = typer.Typer(context_settings={"allow_interspersed_args": True})
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@app.callback(invoke_without_command=True)
|
|
128
|
+
def run(
|
|
129
|
+
ctx: typer.Context,
|
|
130
|
+
job: Annotated[
|
|
131
|
+
str | None,
|
|
132
|
+
typer.Argument(
|
|
133
|
+
help="Job to run; matches any not-running job whose name starts with this. "
|
|
134
|
+
"Omit to pick from a list."
|
|
135
|
+
),
|
|
136
|
+
] = None,
|
|
137
|
+
detach: Annotated[ # noqa: FBT002
|
|
138
|
+
bool,
|
|
139
|
+
typer.Option(
|
|
140
|
+
"--detach", "-d", help="Register the jobs and return without watching the rollout."
|
|
141
|
+
),
|
|
142
|
+
] = False,
|
|
143
|
+
dry_run: Annotated[ # noqa: FBT002
|
|
144
|
+
bool,
|
|
145
|
+
typer.Option("--dry-run", "-n", help="Resolve and validate without registering."),
|
|
146
|
+
] = False,
|
|
147
|
+
verbose: VerboseOption = 0,
|
|
148
|
+
) -> None:
|
|
149
|
+
"""Deploy one or more not-yet-running job files and watch them roll out.
|
|
150
|
+
|
|
151
|
+
Only jobs that are not already running are offered; use plan to preview changes
|
|
152
|
+
to a running job. Each selected file is validated, registered, and watched live:
|
|
153
|
+
service jobs follow their deployment to success, while batch and system jobs
|
|
154
|
+
follow their allocations. Use --detach to register and return without watching.
|
|
155
|
+
"""
|
|
156
|
+
configure_verbosity(ctx, verbose)
|
|
157
|
+
exit_code = asyncio.run(_run(job_arg=job, detach=detach, dry_run=dry_run))
|
|
158
|
+
if exit_code != 0:
|
|
159
|
+
raise typer.Exit(exit_code)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
async def _run(*, job_arg: str | None, detach: bool, dry_run: bool) -> int: # noqa: PLR0911
|
|
163
|
+
"""Resolve not-running candidates, validate, register, and watch the rollout.
|
|
164
|
+
|
|
165
|
+
Returns the exit code: 0 on clean success, 1 on any failure. With ``detach`` the
|
|
166
|
+
jobs are compiled and registered but the rollout is not watched.
|
|
167
|
+
"""
|
|
168
|
+
files = discover_job_files(load_job_directories())
|
|
169
|
+
config = NomadConfig.resolve()
|
|
170
|
+
async with NomadClient.from_config(config) as client:
|
|
171
|
+
running = await _running_job_names(client)
|
|
172
|
+
candidates = candidates_for(files, exclude_names=running)
|
|
173
|
+
if not candidates:
|
|
174
|
+
pp.info("No deployable job files (all known jobs are already running).")
|
|
175
|
+
return 0
|
|
176
|
+
|
|
177
|
+
resolution = resolve_targets(candidates, job_arg, name_of=lambda c: c.name)
|
|
178
|
+
targets = await select_candidates(
|
|
179
|
+
resolution, "Select jobs to run", label_of=lambda c: f"{c.name} [{c.file.path.name}]"
|
|
180
|
+
)
|
|
181
|
+
if targets is None:
|
|
182
|
+
return 0
|
|
183
|
+
if not targets:
|
|
184
|
+
pp.error(f"No not-running job file matching '{job_arg}'")
|
|
185
|
+
return 1
|
|
186
|
+
|
|
187
|
+
try:
|
|
188
|
+
nomad = NomadBinary.create(config)
|
|
189
|
+
# dict.fromkeys dedups so a multi-job file is validated once.
|
|
190
|
+
for path in dict.fromkeys(c.file.path for c in targets):
|
|
191
|
+
nomad.validate(path)
|
|
192
|
+
except NomadBinaryError as exc:
|
|
193
|
+
pp.error(str(exc))
|
|
194
|
+
return 1
|
|
195
|
+
|
|
196
|
+
if dry_run:
|
|
197
|
+
for c in targets:
|
|
198
|
+
pp.dryrun(f"would run {c.name} ({c.file.path})")
|
|
199
|
+
return 0
|
|
200
|
+
|
|
201
|
+
if detach:
|
|
202
|
+
return await _register_detached(client, targets, nomad)
|
|
203
|
+
|
|
204
|
+
outcomes = await _deploy_all(client, targets, nomad)
|
|
205
|
+
|
|
206
|
+
return 0 if all(o.status is DeployStatus.DEPLOYED for o in outcomes) else 1
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
async def _register_detached(
|
|
210
|
+
client: NomadClient, targets: list[JobCandidate], nomad: NomadBinary
|
|
211
|
+
) -> int:
|
|
212
|
+
"""Compile and register every target concurrently, then return without watching.
|
|
213
|
+
|
|
214
|
+
Mirrors ``nomad job run -detach``: each job file is compiled to JSON and
|
|
215
|
+
registered, surfacing any register warnings, but the rollout is not polled. A
|
|
216
|
+
per-job compile or register failure is reported and does not abort the others.
|
|
217
|
+
Returns 0 only when every job registered successfully.
|
|
218
|
+
"""
|
|
219
|
+
|
|
220
|
+
async def register_one(candidate: JobCandidate) -> tuple[str, str | None, str]:
|
|
221
|
+
try:
|
|
222
|
+
body = await asyncio.to_thread(nomad.compile_to_json, candidate.file.path)
|
|
223
|
+
resp = await client.jobs.register(body)
|
|
224
|
+
except (NomadBinaryError, NomadError) as exc:
|
|
225
|
+
return (candidate.name, str(exc), "")
|
|
226
|
+
return (candidate.name, None, resp.warnings)
|
|
227
|
+
|
|
228
|
+
results = await asyncio.gather(*(register_one(c) for c in targets))
|
|
229
|
+
registered = [name for name, err, _ in results if err is None]
|
|
230
|
+
if registered:
|
|
231
|
+
pp.success(f"Registered {len(registered)} job(s)", details=registered)
|
|
232
|
+
for name, err, warnings in results:
|
|
233
|
+
if err is not None:
|
|
234
|
+
pp.error(f"{name} failed to register", details=[err])
|
|
235
|
+
elif warnings:
|
|
236
|
+
pp.warning(f"{name}: {warnings}")
|
|
237
|
+
return 0 if all(err is None for _, err, _ in results) else 1
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
async def _deploy_all(
|
|
241
|
+
client: NomadClient, targets: list[JobCandidate], nomad: NomadBinary
|
|
242
|
+
) -> list[DeployOutcome]:
|
|
243
|
+
"""Register and watch every target concurrently under one live panel.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
client: Authenticated Nomad client.
|
|
247
|
+
targets: The job candidates to register and watch.
|
|
248
|
+
nomad: Configured `nomad` binary handle for the compile step.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
Ordered list of outcomes, one per target.
|
|
252
|
+
"""
|
|
253
|
+
# Resolve node IDs to names once so every job's detail rows can show placement.
|
|
254
|
+
node_names = {node.id: node.name for node in await client.nodes.list()}
|
|
255
|
+
|
|
256
|
+
async def do_work(candidate: JobCandidate, update: PanelUpdate) -> DeployOutcome:
|
|
257
|
+
return await _deploy_one(
|
|
258
|
+
client, candidate, node_names=node_names, update=update, nomad=nomad
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
ordered = await run_rows(
|
|
262
|
+
targets,
|
|
263
|
+
do_work,
|
|
264
|
+
label_of=lambda c: c.name,
|
|
265
|
+
initial_phase="registering",
|
|
266
|
+
finish_of=lambda o: _OUTCOME_ROW[o.status],
|
|
267
|
+
running_title=f"Deploying {len(targets)} job(s)",
|
|
268
|
+
final_title=_final_title,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
for o in ordered:
|
|
272
|
+
if o.status is DeployStatus.TIMEOUT:
|
|
273
|
+
pp.warning(f"{o.name}: {o.detail or 'still deploying'}")
|
|
274
|
+
elif o.status is DeployStatus.FAILED:
|
|
275
|
+
pp.error(f"{o.name} failed to deploy", details=[o.detail] if o.detail else None)
|
|
276
|
+
if o.warnings:
|
|
277
|
+
pp.warning(f"{o.name}: {o.warnings}")
|
|
278
|
+
return ordered
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def _final_title(outcomes: list[DeployOutcome], elapsed_seconds: float) -> str:
|
|
282
|
+
"""Build the final panel title with deployed totals and elapsed seconds."""
|
|
283
|
+
ok = sum(1 for o in outcomes if o.status is DeployStatus.DEPLOYED)
|
|
284
|
+
return summary_title("Deployed", ok, len(outcomes), elapsed_seconds)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
async def _deploy_one(
|
|
288
|
+
client: NomadClient,
|
|
289
|
+
candidate: JobCandidate,
|
|
290
|
+
*,
|
|
291
|
+
node_names: dict[str, str],
|
|
292
|
+
update: PanelUpdate,
|
|
293
|
+
nomad: NomadBinary,
|
|
294
|
+
) -> DeployOutcome:
|
|
295
|
+
"""Compile, register, and watch one job to a terminal deploy state.
|
|
296
|
+
|
|
297
|
+
Service jobs are watched via their deployment; batch/system jobs (which create
|
|
298
|
+
no deployment) are watched via their allocations. Never raises: Nomad/binary
|
|
299
|
+
failures become a FAILED outcome so a sibling job's progress is unaffected.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
client: Authenticated Nomad client.
|
|
303
|
+
candidate: The job file and name to deploy.
|
|
304
|
+
node_names: Map of node ID to node name for the per-allocation detail rows.
|
|
305
|
+
update: Callback to update the live panel phase text and detail rows.
|
|
306
|
+
nomad: Configured `nomad` binary handle for the compile step.
|
|
307
|
+
|
|
308
|
+
Returns:
|
|
309
|
+
The terminal outcome for this candidate.
|
|
310
|
+
"""
|
|
311
|
+
try:
|
|
312
|
+
update("compiling")
|
|
313
|
+
# compile_to_json shells out to the nomad binary (blocking); run it off the
|
|
314
|
+
# event loop so sibling deploys keep making progress concurrently.
|
|
315
|
+
body = await asyncio.to_thread(nomad.compile_to_json, candidate.file.path)
|
|
316
|
+
lifecycle = task_lifecycle(body)
|
|
317
|
+
update("registering")
|
|
318
|
+
resp = await client.jobs.register(body)
|
|
319
|
+
outcome = await _watch(
|
|
320
|
+
client, candidate.name, node_names=node_names, lifecycle=lifecycle, update=update
|
|
321
|
+
)
|
|
322
|
+
# Attach any register warnings so the caller can surface them after the panel closes.
|
|
323
|
+
return replace(outcome, warnings=resp.warnings)
|
|
324
|
+
except (NomadBinaryError, NomadError) as exc:
|
|
325
|
+
return DeployOutcome(candidate.name, DeployStatus.FAILED, str(exc))
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
async def _watch(
|
|
329
|
+
client: NomadClient,
|
|
330
|
+
job_id: str,
|
|
331
|
+
*,
|
|
332
|
+
node_names: dict[str, str],
|
|
333
|
+
lifecycle: TaskLifecycle,
|
|
334
|
+
update: PanelUpdate,
|
|
335
|
+
) -> DeployOutcome:
|
|
336
|
+
"""Poll a registered job until its deployment (or allocations) settle or time out.
|
|
337
|
+
|
|
338
|
+
Service jobs expose a deployment that tracks health; batch/system jobs have no
|
|
339
|
+
deployment so alloc statuses are used instead. Either way the job's allocations
|
|
340
|
+
are fetched each tick to show where each one is placed and its status. The poll
|
|
341
|
+
loop is bounded by a wall-clock deadline to avoid hanging on a stalled cluster.
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
client: Authenticated Nomad client.
|
|
345
|
+
job_id: The Nomad job ID to poll.
|
|
346
|
+
node_names: Map of node ID to node name for the per-allocation detail rows.
|
|
347
|
+
lifecycle: Task ordering and labels from the compiled job spec.
|
|
348
|
+
update: Callback to update the live panel phase text and detail rows.
|
|
349
|
+
|
|
350
|
+
Returns:
|
|
351
|
+
The terminal deploy outcome for this job.
|
|
352
|
+
"""
|
|
353
|
+
deadline = time.monotonic() + DEPLOY_TIMEOUT_SECONDS
|
|
354
|
+
while True:
|
|
355
|
+
allocs = await client.jobs.allocations(job_id)
|
|
356
|
+
children = alloc_children(allocs, node_names, lifecycle)
|
|
357
|
+
deployments = await client.jobs.deployments(job_id)
|
|
358
|
+
if deployments: # service job: follow the most-recent deployment
|
|
359
|
+
dep = await client.deployments.read(deployments[0].id)
|
|
360
|
+
if dep.status == _DEPLOY_SUCCESS:
|
|
361
|
+
return DeployOutcome(job_id, DeployStatus.DEPLOYED)
|
|
362
|
+
if dep.status in _DEPLOY_FAILURE:
|
|
363
|
+
return DeployOutcome(job_id, DeployStatus.FAILED, dep.status_description)
|
|
364
|
+
update(deploy_phase(dep), children)
|
|
365
|
+
else: # batch/system job: follow allocations
|
|
366
|
+
running = sum(1 for a in allocs if a.client_status in HEALTHY_ALLOC_STATUSES)
|
|
367
|
+
if allocs and running == len(allocs):
|
|
368
|
+
return DeployOutcome(job_id, DeployStatus.DEPLOYED)
|
|
369
|
+
update(f"placing {running}/{len(allocs) or '?'} allocs", children)
|
|
370
|
+
if time.monotonic() >= deadline:
|
|
371
|
+
return DeployOutcome(job_id, DeployStatus.TIMEOUT, "deploy still in progress")
|
|
372
|
+
await asyncio.sleep(POLL_INTERVAL_SECONDS)
|