flowmesh-cli-stack 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowmesh_cli_stack/__init__.py +13 -0
- flowmesh_cli_stack/assets/.env.example +204 -0
- flowmesh_cli_stack/assets/compose.yml +201 -0
- flowmesh_cli_stack/assets/docker-bake.hcl +110 -0
- flowmesh_cli_stack/bundle.py +384 -0
- flowmesh_cli_stack/env_schema.py +646 -0
- flowmesh_cli_stack/stack.py +789 -0
- flowmesh_cli_stack/utils.py +137 -0
- flowmesh_cli_stack/worker.py +235 -0
- flowmesh_cli_stack-0.1.0.dist-info/METADATA +25 -0
- flowmesh_cli_stack-0.1.0.dist-info/RECORD +14 -0
- flowmesh_cli_stack-0.1.0.dist-info/WHEEL +5 -0
- flowmesh_cli_stack-0.1.0.dist-info/licenses/LICENSE +202 -0
- flowmesh_cli_stack-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,789 @@
|
|
|
1
|
+
"""Stack management commands."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
import subprocess
|
|
6
|
+
from datetime import UTC, datetime
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
from flowmesh.models.nodes import NodeRole
|
|
11
|
+
from flowmesh_cli.core import logging
|
|
12
|
+
from flowmesh_cli.core.assets import AssetNotFoundError, asset_path
|
|
13
|
+
from flowmesh_cli.core.typer import get_typer
|
|
14
|
+
from flowmesh_stack.docker import (
|
|
15
|
+
DockerComposeStack,
|
|
16
|
+
DockerError,
|
|
17
|
+
ensure_docker_available,
|
|
18
|
+
image_env_overrides,
|
|
19
|
+
inspect_image,
|
|
20
|
+
remove_image,
|
|
21
|
+
)
|
|
22
|
+
from flowmesh_stack.doctor import DoctorFinding, run_doctor_checks
|
|
23
|
+
from flowmesh_stack.env import ensure_env_file, load_env, parse_env_file
|
|
24
|
+
from flowmesh_stack.env_schema import render_env_example
|
|
25
|
+
from flowmesh_stack.images import (
|
|
26
|
+
BUILD_GROUPS,
|
|
27
|
+
BUILD_TARGETS,
|
|
28
|
+
expand_build_targets,
|
|
29
|
+
get_cache_ref,
|
|
30
|
+
get_image_ref,
|
|
31
|
+
get_push_platforms,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
from .env_schema import STACK_ENV_SCHEMA, deploy_overrides, role_overrides
|
|
35
|
+
from .utils import (
|
|
36
|
+
DEFAULT_ENV_FILE,
|
|
37
|
+
STACK_PATH_KEYS,
|
|
38
|
+
apply_stack_resource_env,
|
|
39
|
+
ensure_deploy_paths,
|
|
40
|
+
parse_node_role,
|
|
41
|
+
resolve_package_version,
|
|
42
|
+
stack_bake_file,
|
|
43
|
+
stack_compose_file,
|
|
44
|
+
stack_env_example,
|
|
45
|
+
stack_node_client,
|
|
46
|
+
)
|
|
47
|
+
from .worker import worker_pull
|
|
48
|
+
|
|
49
|
+
app = get_typer(help="Build, manage, and run the FlowMesh stack.")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _stack() -> DockerComposeStack:
|
|
53
|
+
def _load(env_file: Path) -> None:
|
|
54
|
+
ensure_env_file(env_file, stack_env_example())
|
|
55
|
+
load_env(env_file, base_dir=Path.cwd(), path_keys=STACK_PATH_KEYS)
|
|
56
|
+
try:
|
|
57
|
+
apply_stack_resource_env()
|
|
58
|
+
except ValueError as exc:
|
|
59
|
+
logging.error(str(exc))
|
|
60
|
+
raise typer.Exit(code=1)
|
|
61
|
+
|
|
62
|
+
return DockerComposeStack(
|
|
63
|
+
compose_file=stack_compose_file(),
|
|
64
|
+
env_file_var="STACK_ENV_FILE",
|
|
65
|
+
load_env=_load,
|
|
66
|
+
ensure_deploy_paths=ensure_deploy_paths,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _compose(
|
|
71
|
+
args: list[str],
|
|
72
|
+
env_file: Path,
|
|
73
|
+
env: dict[str, str] | None,
|
|
74
|
+
to_deploy: bool = False,
|
|
75
|
+
profile: str | None = None,
|
|
76
|
+
) -> None:
|
|
77
|
+
ensure_env_file(env_file, stack_env_example())
|
|
78
|
+
full_args = (["--profile", profile] if profile else []) + args
|
|
79
|
+
result = _stack().run(full_args, env_file=env_file, env=env, to_deploy=to_deploy)
|
|
80
|
+
if result.returncode != 0:
|
|
81
|
+
raise typer.Exit(code=result.returncode)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _node_role(env_file: Path) -> NodeRole:
|
|
85
|
+
"""Return the configured NODE_ROLE (root | worker), defaulting to root if unset."""
|
|
86
|
+
raw = parse_env_file(env_file).get("NODE_ROLE", "").strip()
|
|
87
|
+
try:
|
|
88
|
+
return NodeRole(raw.lower()) if raw else NodeRole.ROOT
|
|
89
|
+
except ValueError:
|
|
90
|
+
logging.error(
|
|
91
|
+
f"NODE_ROLE={raw!r} is not a recognized role; expected 'root' or 'worker'."
|
|
92
|
+
)
|
|
93
|
+
raise typer.Exit(code=1)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _resolve_build_targets(batch_targets: list[str]) -> list[str]:
|
|
97
|
+
resolved: list[str] = []
|
|
98
|
+
for target in batch_targets:
|
|
99
|
+
if target in BUILD_GROUPS:
|
|
100
|
+
resolved.extend(BUILD_GROUPS[target])
|
|
101
|
+
elif target in BUILD_TARGETS:
|
|
102
|
+
resolved.append(target)
|
|
103
|
+
return expand_build_targets(resolved)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _platform_overrides(mode: str, targets: list[str]) -> list[tuple[str, str]]:
|
|
107
|
+
if mode == "load":
|
|
108
|
+
return [(target, "local") for target in targets]
|
|
109
|
+
return [(target, get_push_platforms(target)) for target in targets]
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _resolve_bake_batches(
|
|
113
|
+
targets: list[str] | None, no_builder: bool = False
|
|
114
|
+
) -> list[list[str]]:
|
|
115
|
+
if targets is None:
|
|
116
|
+
default_targets = ["server", "workers"]
|
|
117
|
+
if no_builder:
|
|
118
|
+
return [default_targets]
|
|
119
|
+
return [["builders"], default_targets]
|
|
120
|
+
|
|
121
|
+
builder_targets = {"builders", "flowmesh_worker_gpu_builder"}
|
|
122
|
+
if no_builder and any(target in builder_targets for target in targets):
|
|
123
|
+
logging.error("--no-builder cannot be used with explicit builder targets.")
|
|
124
|
+
raise typer.Exit(code=1)
|
|
125
|
+
|
|
126
|
+
return [targets]
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _require_bin(name: str) -> str:
|
|
130
|
+
path = shutil.which(name)
|
|
131
|
+
if path is None:
|
|
132
|
+
logging.error(f"{name} is required but was not found in PATH.")
|
|
133
|
+
raise typer.Exit(code=1)
|
|
134
|
+
return path
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _parse_buildx_field(output: str, field: str) -> str | None:
|
|
138
|
+
needle = f"{field}:"
|
|
139
|
+
for line in output.splitlines():
|
|
140
|
+
line = line.strip()
|
|
141
|
+
if line.startswith(needle):
|
|
142
|
+
return line.split(":", 1)[1].strip() or None
|
|
143
|
+
return None
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _inspect_buildx_builder(
|
|
147
|
+
docker_bin: str, name: str | None
|
|
148
|
+
) -> subprocess.CompletedProcess[str]:
|
|
149
|
+
args = [docker_bin, "buildx", "inspect"]
|
|
150
|
+
if name is not None:
|
|
151
|
+
args.append(name)
|
|
152
|
+
return subprocess.run( # nosec B603: argv list, absolute binary path.
|
|
153
|
+
args, capture_output=True, text=True, check=False
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _get_active_buildx_builder(docker_bin: str) -> str | None:
|
|
158
|
+
result = _inspect_buildx_builder(docker_bin, None)
|
|
159
|
+
if result.returncode != 0:
|
|
160
|
+
return None
|
|
161
|
+
return _parse_buildx_field(result.stdout, "Name")
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _ensure_buildx_builder_ready(
|
|
165
|
+
docker_bin: str,
|
|
166
|
+
builder: str,
|
|
167
|
+
expected_driver: str,
|
|
168
|
+
missing_hint: str | None = None,
|
|
169
|
+
) -> None:
|
|
170
|
+
"""Verify ``builder`` exists and uses ``expected_driver`` before bake runs."""
|
|
171
|
+
result = _inspect_buildx_builder(docker_bin, builder)
|
|
172
|
+
if result.returncode != 0:
|
|
173
|
+
logging.error(f"Buildx builder '{builder}' is not available.")
|
|
174
|
+
if result.stderr:
|
|
175
|
+
logging.log(result.stderr.strip(), err=True)
|
|
176
|
+
if missing_hint:
|
|
177
|
+
logging.log(missing_hint)
|
|
178
|
+
raise typer.Exit(code=1)
|
|
179
|
+
driver = _parse_buildx_field(result.stdout, "Driver")
|
|
180
|
+
if driver != expected_driver:
|
|
181
|
+
logging.error(
|
|
182
|
+
f"Buildx builder '{builder}' uses driver '{driver or 'unknown'}'; "
|
|
183
|
+
f"'{expected_driver}' is required."
|
|
184
|
+
)
|
|
185
|
+
raise typer.Exit(code=1)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _switch_active_buildx_builder(docker_bin: str, target: str, force: bool) -> None:
|
|
189
|
+
"""If the active buildx builder differs from ``target``, switch to it.
|
|
190
|
+
|
|
191
|
+
Prompts for confirmation unless ``force`` is true; aborts the command on
|
|
192
|
+
decline so the user never silently builds against an unintended builder.
|
|
193
|
+
"""
|
|
194
|
+
active = _get_active_buildx_builder(docker_bin)
|
|
195
|
+
if active == target:
|
|
196
|
+
return
|
|
197
|
+
if not force:
|
|
198
|
+
prompt = (
|
|
199
|
+
f"Active buildx builder is '{active or 'unknown'}'; "
|
|
200
|
+
f"switch to '{target}'?"
|
|
201
|
+
)
|
|
202
|
+
if not typer.confirm(prompt, default=False):
|
|
203
|
+
logging.error(f"Aborted; '{target}' is not the active buildx builder.")
|
|
204
|
+
raise typer.Exit(code=1)
|
|
205
|
+
result = subprocess.run( # nosec B603: argv list, absolute binary path.
|
|
206
|
+
[docker_bin, "buildx", "use", target],
|
|
207
|
+
capture_output=True,
|
|
208
|
+
text=True,
|
|
209
|
+
check=False,
|
|
210
|
+
)
|
|
211
|
+
if result.returncode != 0:
|
|
212
|
+
if result.stdout:
|
|
213
|
+
logging.log(result.stdout)
|
|
214
|
+
if result.stderr:
|
|
215
|
+
logging.log(result.stderr, err=True)
|
|
216
|
+
logging.error(f"Failed to switch active buildx builder to '{target}'.")
|
|
217
|
+
raise typer.Exit(code=result.returncode)
|
|
218
|
+
logging.info(f"Switched active buildx builder to '{target}'.")
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
# Driver split: load uses the native docker driver (local cache, image goes
|
|
222
|
+
# straight into the daemon's image store); push uses docker-container (registry
|
|
223
|
+
# cache in/out, multi-platform).
|
|
224
|
+
_BUILD_DEFAULT_BUILDER = "default"
|
|
225
|
+
_PUSH_DEFAULT_BUILDER = "flowmesh-multiarch"
|
|
226
|
+
_PUSH_BUILDER_MISSING_HINT = (
|
|
227
|
+
"Create the builder, then retry:\n"
|
|
228
|
+
f"docker buildx create --name {_PUSH_DEFAULT_BUILDER} "
|
|
229
|
+
"--driver docker-container --bootstrap"
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _run_bake(
|
|
234
|
+
mode: str,
|
|
235
|
+
targets: list[str] | None,
|
|
236
|
+
env_file: Path,
|
|
237
|
+
builder: str,
|
|
238
|
+
force: bool,
|
|
239
|
+
no_builder: bool = False,
|
|
240
|
+
image_tag: str | None = None,
|
|
241
|
+
build_ref: str | None = None,
|
|
242
|
+
) -> None:
|
|
243
|
+
ensure_env_file(env_file, stack_env_example())
|
|
244
|
+
load_env(env_file, base_dir=Path.cwd(), path_keys=STACK_PATH_KEYS)
|
|
245
|
+
|
|
246
|
+
try:
|
|
247
|
+
ensure_docker_available()
|
|
248
|
+
except DockerError as exc:
|
|
249
|
+
logging.error(str(exc))
|
|
250
|
+
raise typer.Exit(code=1)
|
|
251
|
+
docker_bin = _require_bin("docker")
|
|
252
|
+
|
|
253
|
+
buildx_check = subprocess.run(
|
|
254
|
+
[
|
|
255
|
+
docker_bin,
|
|
256
|
+
"buildx",
|
|
257
|
+
"version",
|
|
258
|
+
], # nosec B603: argv list, absolute binary path.
|
|
259
|
+
capture_output=True,
|
|
260
|
+
text=True,
|
|
261
|
+
check=False,
|
|
262
|
+
)
|
|
263
|
+
if buildx_check.returncode != 0:
|
|
264
|
+
if buildx_check.stdout:
|
|
265
|
+
logging.log(buildx_check.stdout)
|
|
266
|
+
if buildx_check.stderr:
|
|
267
|
+
logging.log(buildx_check.stderr, err=True)
|
|
268
|
+
logging.error("docker buildx is required for dev build/push")
|
|
269
|
+
raise typer.Exit(code=buildx_check.returncode)
|
|
270
|
+
|
|
271
|
+
bake_file = stack_bake_file()
|
|
272
|
+
if not bake_file.exists():
|
|
273
|
+
logging.error(f"Bake file not found: {bake_file}")
|
|
274
|
+
raise typer.Exit(code=1)
|
|
275
|
+
|
|
276
|
+
if mode == "push":
|
|
277
|
+
_ensure_buildx_builder_ready(
|
|
278
|
+
docker_bin,
|
|
279
|
+
builder,
|
|
280
|
+
"docker-container",
|
|
281
|
+
missing_hint=_PUSH_BUILDER_MISSING_HINT,
|
|
282
|
+
)
|
|
283
|
+
else:
|
|
284
|
+
_ensure_buildx_builder_ready(docker_bin, builder, "docker")
|
|
285
|
+
_switch_active_buildx_builder(docker_bin, builder, force)
|
|
286
|
+
|
|
287
|
+
build_created = datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
288
|
+
registry = os.getenv("FLOWMESH_REGISTRY", "ghcr.io/mlsys-io")
|
|
289
|
+
version = image_tag if image_tag else os.getenv("FLOWMESH_VERSION", "dev")
|
|
290
|
+
cache_version = os.getenv("FLOWMESH_CACHE_VERSION", "").strip() or "cache"
|
|
291
|
+
env: dict[str, str] = {
|
|
292
|
+
"REGISTRY": registry,
|
|
293
|
+
"VERSION": version,
|
|
294
|
+
"BUILD_REF": (
|
|
295
|
+
build_ref if build_ref else os.getenv("FLOWMESH_BUILD_REF", "local")
|
|
296
|
+
),
|
|
297
|
+
"BUILD_CREATED": build_created,
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
for batch_targets in _resolve_bake_batches(targets, no_builder=no_builder):
|
|
301
|
+
args = [
|
|
302
|
+
docker_bin,
|
|
303
|
+
"buildx",
|
|
304
|
+
"bake",
|
|
305
|
+
"-f",
|
|
306
|
+
str(bake_file),
|
|
307
|
+
"--builder",
|
|
308
|
+
builder,
|
|
309
|
+
]
|
|
310
|
+
if mode == "push":
|
|
311
|
+
args.append("--push")
|
|
312
|
+
else:
|
|
313
|
+
args.append("--load")
|
|
314
|
+
args.extend(batch_targets)
|
|
315
|
+
|
|
316
|
+
selected_targets = _resolve_build_targets(batch_targets)
|
|
317
|
+
if mode == "push":
|
|
318
|
+
for target in selected_targets:
|
|
319
|
+
cache_ref = get_cache_ref(registry, cache_version, target)
|
|
320
|
+
args += [
|
|
321
|
+
"--set",
|
|
322
|
+
f"{target}.cache-from=type=registry,ref={cache_ref}",
|
|
323
|
+
"--set",
|
|
324
|
+
f"{target}.cache-to=type=registry,ref={cache_ref},mode=max",
|
|
325
|
+
]
|
|
326
|
+
for target, platform in _platform_overrides(mode, selected_targets):
|
|
327
|
+
args += ["--set", f"{target}.platform={platform}"]
|
|
328
|
+
|
|
329
|
+
result = subprocess.run( # nosec B603: argv list, absolute binary path.
|
|
330
|
+
args,
|
|
331
|
+
env={**os.environ, **env},
|
|
332
|
+
check=False,
|
|
333
|
+
text=True,
|
|
334
|
+
)
|
|
335
|
+
if result.returncode != 0:
|
|
336
|
+
raise typer.Exit(code=result.returncode)
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def _log_finding(fnd: DoctorFinding) -> None:
|
|
340
|
+
match fnd.level:
|
|
341
|
+
case "note":
|
|
342
|
+
logging.log(fnd.message)
|
|
343
|
+
case "warning":
|
|
344
|
+
logging.warning(fnd.message)
|
|
345
|
+
case "error":
|
|
346
|
+
logging.error(fnd.message)
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
@app.command()
|
|
350
|
+
def build(
|
|
351
|
+
targets: list[str] | None = typer.Argument(
|
|
352
|
+
None, help="Optional bake targets", metavar="[TARGETS]..."
|
|
353
|
+
),
|
|
354
|
+
env_file: Path = typer.Option(
|
|
355
|
+
DEFAULT_ENV_FILE, "--env-file", help="Env file for stack compose/bake"
|
|
356
|
+
),
|
|
357
|
+
no_builder: bool = typer.Option(
|
|
358
|
+
False,
|
|
359
|
+
"--no-builder",
|
|
360
|
+
help="Skip exporting the standalone GPU builder image.",
|
|
361
|
+
),
|
|
362
|
+
builder: str = typer.Option(
|
|
363
|
+
_BUILD_DEFAULT_BUILDER,
|
|
364
|
+
"--builder",
|
|
365
|
+
help="Buildx builder to use; must use the native 'docker' driver.",
|
|
366
|
+
),
|
|
367
|
+
force: bool = typer.Option(
|
|
368
|
+
False,
|
|
369
|
+
"-f",
|
|
370
|
+
"--force",
|
|
371
|
+
help="Skip the confirmation prompt when switching the active buildx builder.",
|
|
372
|
+
),
|
|
373
|
+
image_tag: str | None = typer.Option(
|
|
374
|
+
None, "--image-tag", help="Override FLOWMESH_VERSION"
|
|
375
|
+
),
|
|
376
|
+
build_ref: str | None = typer.Option(
|
|
377
|
+
None, "--build-ref", help="Override FLOWMESH_BUILD_REF"
|
|
378
|
+
),
|
|
379
|
+
) -> None:
|
|
380
|
+
"""Build FlowMesh Docker images locally using buildx."""
|
|
381
|
+
_run_bake(
|
|
382
|
+
"load",
|
|
383
|
+
targets,
|
|
384
|
+
env_file,
|
|
385
|
+
builder=builder,
|
|
386
|
+
force=force,
|
|
387
|
+
no_builder=no_builder,
|
|
388
|
+
image_tag=image_tag,
|
|
389
|
+
build_ref=build_ref,
|
|
390
|
+
)
|
|
391
|
+
logging.success("Images built locally.")
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
@app.command()
|
|
395
|
+
def push(
|
|
396
|
+
targets: list[str] | None = typer.Argument(
|
|
397
|
+
None, help="Optional bake targets", metavar="[TARGETS]..."
|
|
398
|
+
),
|
|
399
|
+
env_file: Path = typer.Option(
|
|
400
|
+
DEFAULT_ENV_FILE, "--env-file", help="Env file for stack compose/bake"
|
|
401
|
+
),
|
|
402
|
+
no_builder: bool = typer.Option(
|
|
403
|
+
False,
|
|
404
|
+
"--no-builder",
|
|
405
|
+
help="Skip publishing the standalone GPU builder image.",
|
|
406
|
+
),
|
|
407
|
+
builder: str = typer.Option(
|
|
408
|
+
_PUSH_DEFAULT_BUILDER,
|
|
409
|
+
"--builder",
|
|
410
|
+
help="Buildx builder to use; must use the 'docker-container' driver.",
|
|
411
|
+
),
|
|
412
|
+
force: bool = typer.Option(
|
|
413
|
+
False,
|
|
414
|
+
"-f",
|
|
415
|
+
"--force",
|
|
416
|
+
help="Skip the confirmation prompt when switching the active buildx builder.",
|
|
417
|
+
),
|
|
418
|
+
image_tag: str | None = typer.Option(
|
|
419
|
+
None, "--image-tag", help="Override FLOWMESH_VERSION"
|
|
420
|
+
),
|
|
421
|
+
build_ref: str | None = typer.Option(
|
|
422
|
+
None, "--build-ref", help="Override FLOWMESH_BUILD_REF"
|
|
423
|
+
),
|
|
424
|
+
) -> None:
|
|
425
|
+
"""Build FlowMesh Docker images and push them to the container registry."""
|
|
426
|
+
_run_bake(
|
|
427
|
+
"push",
|
|
428
|
+
targets,
|
|
429
|
+
env_file,
|
|
430
|
+
builder=builder,
|
|
431
|
+
force=force,
|
|
432
|
+
no_builder=no_builder,
|
|
433
|
+
image_tag=image_tag,
|
|
434
|
+
build_ref=build_ref,
|
|
435
|
+
)
|
|
436
|
+
logging.success("Images pushed.")
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
@app.command()
|
|
440
|
+
def pull(
|
|
441
|
+
services: list[str] | None = typer.Argument(
|
|
442
|
+
None, help="Optional services to pull", metavar="[SERVICES]..."
|
|
443
|
+
),
|
|
444
|
+
env_file: Path = typer.Option(
|
|
445
|
+
DEFAULT_ENV_FILE, "--env-file", help="Env file for compose"
|
|
446
|
+
),
|
|
447
|
+
image_tag: str | None = typer.Option(
|
|
448
|
+
None, "--image-tag", help="Override FLOWMESH_VERSION"
|
|
449
|
+
),
|
|
450
|
+
) -> None:
|
|
451
|
+
"""Pull Docker images for stack services from the registry."""
|
|
452
|
+
args = ["pull"] + (services or [])
|
|
453
|
+
profile = "root" if _node_role(env_file) == NodeRole.ROOT else None
|
|
454
|
+
_compose(
|
|
455
|
+
args, env_file=env_file, env=image_env_overrides(image_tag), profile=profile
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
@app.command()
|
|
460
|
+
def pullall(
|
|
461
|
+
env_file: Path = typer.Option(
|
|
462
|
+
DEFAULT_ENV_FILE, "--env-file", help="Env file for compose"
|
|
463
|
+
),
|
|
464
|
+
image_tag: str | None = typer.Option(
|
|
465
|
+
None, "--image-tag", help="Override FLOWMESH_VERSION"
|
|
466
|
+
),
|
|
467
|
+
) -> None:
|
|
468
|
+
"""Pull all images required for the stack."""
|
|
469
|
+
pull(services=None, env_file=env_file, image_tag=image_tag)
|
|
470
|
+
worker_pull(kinds=["all"], builder=True, env_file=env_file, image_tag=image_tag)
|
|
471
|
+
worker_pull(kinds=["all"], builder=False, env_file=env_file, image_tag=image_tag)
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
@app.command()
|
|
475
|
+
def up(
|
|
476
|
+
env_file: Path = typer.Option(
|
|
477
|
+
DEFAULT_ENV_FILE, "--env-file", help="Env file for compose"
|
|
478
|
+
),
|
|
479
|
+
image_tag: str | None = typer.Option(
|
|
480
|
+
None, "--image-tag", help="Override FLOWMESH_VERSION"
|
|
481
|
+
),
|
|
482
|
+
) -> None:
|
|
483
|
+
"""Start the stack.
|
|
484
|
+
|
|
485
|
+
On root nodes (NODE_ROLE=root, the default), the local Redis services are
|
|
486
|
+
started alongside the server. On worker nodes (NODE_ROLE=worker), Redis
|
|
487
|
+
services are skipped — the worker is expected to connect to the root
|
|
488
|
+
node's Redis via REDIS_CONTROL_URL / REDIS_TELEMETRY_URL.
|
|
489
|
+
"""
|
|
490
|
+
profile = "root" if _node_role(env_file) == NodeRole.ROOT else None
|
|
491
|
+
_compose(
|
|
492
|
+
["up", "-d", "--wait"],
|
|
493
|
+
env_file=env_file,
|
|
494
|
+
env=image_env_overrides(image_tag),
|
|
495
|
+
to_deploy=True,
|
|
496
|
+
profile=profile,
|
|
497
|
+
)
|
|
498
|
+
logging.success("FlowMesh stack is up.")
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
def _drain_workers(env_file: Path) -> None:
|
|
502
|
+
"""Destroy all dynamically spawned workers before stopping the server."""
|
|
503
|
+
try:
|
|
504
|
+
client = stack_node_client(env_file, base_url=None, token=None)
|
|
505
|
+
client.destroy_all_workers()
|
|
506
|
+
except Exception as exc:
|
|
507
|
+
logging.warning(f"Unable to drain workers; continuing shutdown. {exc}")
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
@app.command()
|
|
511
|
+
def down(
|
|
512
|
+
env_file: Path = typer.Option(
|
|
513
|
+
DEFAULT_ENV_FILE, "--env-file", help="Env file for compose"
|
|
514
|
+
),
|
|
515
|
+
image_tag: str | None = typer.Option(
|
|
516
|
+
None, "--image-tag", help="Override FLOWMESH_VERSION"
|
|
517
|
+
),
|
|
518
|
+
) -> None:
|
|
519
|
+
"""Drain workers and stop the stack."""
|
|
520
|
+
logging.info("Draining workers...")
|
|
521
|
+
_drain_workers(env_file)
|
|
522
|
+
logging.info("Shutting down the FlowMesh stack...")
|
|
523
|
+
_compose(
|
|
524
|
+
["down"],
|
|
525
|
+
env_file=env_file,
|
|
526
|
+
env=image_env_overrides(image_tag),
|
|
527
|
+
profile="root",
|
|
528
|
+
)
|
|
529
|
+
logging.success("FlowMesh stack stopped.")
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
@app.command()
|
|
533
|
+
def restart(
|
|
534
|
+
env_file: Path = typer.Option(
|
|
535
|
+
DEFAULT_ENV_FILE, "--env-file", help="Env file for compose"
|
|
536
|
+
),
|
|
537
|
+
image_tag: str | None = typer.Option(
|
|
538
|
+
None, "--image-tag", help="Override FLOWMESH_VERSION"
|
|
539
|
+
),
|
|
540
|
+
) -> None:
|
|
541
|
+
"""Drain workers and restart the stack."""
|
|
542
|
+
logging.info("Draining workers...")
|
|
543
|
+
_drain_workers(env_file)
|
|
544
|
+
_compose(
|
|
545
|
+
["down"],
|
|
546
|
+
env_file=env_file,
|
|
547
|
+
env=image_env_overrides(image_tag),
|
|
548
|
+
profile="root",
|
|
549
|
+
)
|
|
550
|
+
profile = "root" if _node_role(env_file) == NodeRole.ROOT else None
|
|
551
|
+
_compose(
|
|
552
|
+
["up", "-d", "--wait"],
|
|
553
|
+
env_file=env_file,
|
|
554
|
+
env=image_env_overrides(image_tag),
|
|
555
|
+
to_deploy=True,
|
|
556
|
+
profile=profile,
|
|
557
|
+
)
|
|
558
|
+
logging.success("FlowMesh stack is up.")
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
@app.command()
|
|
562
|
+
def logs(
|
|
563
|
+
service: str | None = typer.Argument(None, help="Optional service name"),
|
|
564
|
+
env_file: Path = typer.Option(
|
|
565
|
+
DEFAULT_ENV_FILE, "--env-file", help="Env file for compose"
|
|
566
|
+
),
|
|
567
|
+
) -> None:
|
|
568
|
+
"""Stream logs from stack services or a specific service container."""
|
|
569
|
+
code = _stack().stream_logs(env_file=env_file, service=service, profile="root")
|
|
570
|
+
if code != 0:
|
|
571
|
+
raise typer.Exit(code=code)
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
@app.command()
|
|
575
|
+
def ps(
|
|
576
|
+
env_file: Path = typer.Option(
|
|
577
|
+
DEFAULT_ENV_FILE, "--env-file", help="Env file for compose"
|
|
578
|
+
),
|
|
579
|
+
) -> None:
|
|
580
|
+
"""Display running status of stack containers and worker containers."""
|
|
581
|
+
_compose(["ps"], env_file=env_file, env=None, profile="root")
|
|
582
|
+
logging.log("\nWorkers:")
|
|
583
|
+
docker_bin = _require_bin("docker")
|
|
584
|
+
subprocess.run(
|
|
585
|
+
[
|
|
586
|
+
docker_bin,
|
|
587
|
+
"ps",
|
|
588
|
+
"-a",
|
|
589
|
+
"--filter",
|
|
590
|
+
"label=flowmesh.group=server-workers",
|
|
591
|
+
"--format",
|
|
592
|
+
" {{.Names}}\t{{.Status}}",
|
|
593
|
+
],
|
|
594
|
+
check=False,
|
|
595
|
+
) # nosec B603: argv list, absolute binary path.
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
@app.command("status")
|
|
599
|
+
def status_cmd(
|
|
600
|
+
env_file: Path = typer.Option(
|
|
601
|
+
DEFAULT_ENV_FILE, "--env-file", help="Env file for compose"
|
|
602
|
+
),
|
|
603
|
+
) -> None:
|
|
604
|
+
"""Display running status of stack containers (alias for ps)."""
|
|
605
|
+
ps(env_file=env_file)
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
@app.command()
|
|
609
|
+
def clean(
|
|
610
|
+
env_file: Path = typer.Option(
|
|
611
|
+
DEFAULT_ENV_FILE, "--env-file", help="Env file for compose"
|
|
612
|
+
),
|
|
613
|
+
image_tag: str | None = typer.Option(
|
|
614
|
+
None, "--image-tag", help="Override FLOWMESH_VERSION"
|
|
615
|
+
),
|
|
616
|
+
) -> None:
|
|
617
|
+
"""Drain workers, stop the stack, and remove all containers and volumes."""
|
|
618
|
+
logging.info("Draining workers...")
|
|
619
|
+
_drain_workers(env_file)
|
|
620
|
+
logging.info("Removing stack containers and volumes...")
|
|
621
|
+
_compose(
|
|
622
|
+
["down", "-v"],
|
|
623
|
+
env_file=env_file,
|
|
624
|
+
env=image_env_overrides(image_tag),
|
|
625
|
+
profile="root",
|
|
626
|
+
)
|
|
627
|
+
logging.success("FlowMesh stack cleaned.")
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
def _write_env_example(package: str, filename: str, schema, errors: list[str]) -> None:
|
|
631
|
+
try:
|
|
632
|
+
path = asset_path(package, filename)
|
|
633
|
+
except AssetNotFoundError as exc:
|
|
634
|
+
message = f"Unable to resolve asset {package}/{filename}: {exc}"
|
|
635
|
+
logging.error(message)
|
|
636
|
+
errors.append(message)
|
|
637
|
+
return
|
|
638
|
+
path.write_text(render_env_example(schema))
|
|
639
|
+
logging.success(f"Wrote {path}")
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
@app.command("env-examples")
|
|
643
|
+
def env_examples() -> None:
|
|
644
|
+
"""Generate env example files from the shared schema."""
|
|
645
|
+
errors: list[str] = []
|
|
646
|
+
_write_env_example(
|
|
647
|
+
"flowmesh_cli_stack.assets",
|
|
648
|
+
".env.example",
|
|
649
|
+
STACK_ENV_SCHEMA,
|
|
650
|
+
errors,
|
|
651
|
+
)
|
|
652
|
+
if errors:
|
|
653
|
+
raise typer.Exit(code=1)
|
|
654
|
+
|
|
655
|
+
|
|
656
|
+
@app.command()
|
|
657
|
+
def doctor(
|
|
658
|
+
env_file: Path = typer.Option(
|
|
659
|
+
DEFAULT_ENV_FILE, "--env-file", help="Env file to validate"
|
|
660
|
+
)
|
|
661
|
+
) -> None:
|
|
662
|
+
"""Verify Docker access and validate stack env configuration."""
|
|
663
|
+
report = run_doctor_checks(env_file, STACK_ENV_SCHEMA, callback=_log_finding)
|
|
664
|
+
if report.errors:
|
|
665
|
+
logging.error(f"Doctor found {len(report.errors)} issue(s).")
|
|
666
|
+
raise typer.Exit(code=1)
|
|
667
|
+
logging.success("Doctor checks passed.")
|
|
668
|
+
|
|
669
|
+
|
|
670
|
+
@app.command("init")
|
|
671
|
+
def init(
|
|
672
|
+
env_file: Path = typer.Option(
|
|
673
|
+
DEFAULT_ENV_FILE, "--env-file", help="Env file to write"
|
|
674
|
+
),
|
|
675
|
+
force: bool = typer.Option(
|
|
676
|
+
False, "--force", "-f", help="Force initialization; overwrite existing files"
|
|
677
|
+
),
|
|
678
|
+
role: str = typer.Option(
|
|
679
|
+
NodeRole.ROOT.value,
|
|
680
|
+
"--role",
|
|
681
|
+
help="Target NODE_ROLE for the generated env file (root|worker).",
|
|
682
|
+
),
|
|
683
|
+
deploy: bool = typer.Option(
|
|
684
|
+
False,
|
|
685
|
+
"--deploy",
|
|
686
|
+
help=(
|
|
687
|
+
"Pin FLOWMESH_VERSION to the installed flowmesh-cli-stack package version"
|
|
688
|
+
"(falls back to 'latest' if package metadata is missing)."
|
|
689
|
+
),
|
|
690
|
+
),
|
|
691
|
+
) -> None:
|
|
692
|
+
"""Create or overwrite the stack env file rendered from the schema."""
|
|
693
|
+
node_role = parse_node_role(role)
|
|
694
|
+
if env_file.exists() and not force:
|
|
695
|
+
if not typer.confirm(f"{env_file} exists. Overwrite?", default=False):
|
|
696
|
+
logging.info("Keeping existing env file.")
|
|
697
|
+
return
|
|
698
|
+
deploy_version: str | None = None
|
|
699
|
+
if deploy:
|
|
700
|
+
package_version = resolve_package_version()
|
|
701
|
+
if package_version is None:
|
|
702
|
+
logging.warning(
|
|
703
|
+
"Unable to resolve flowmesh-cli-stack version; "
|
|
704
|
+
"falling back to FLOWMESH_VERSION=latest. "
|
|
705
|
+
"Edit .env if you need a specific version."
|
|
706
|
+
)
|
|
707
|
+
deploy_version = "latest"
|
|
708
|
+
else:
|
|
709
|
+
# GHCR images for releases are pushed at v<version>.
|
|
710
|
+
deploy_version = f"v{package_version}"
|
|
711
|
+
overrides = {
|
|
712
|
+
**role_overrides(node_role),
|
|
713
|
+
**deploy_overrides(deploy, deploy_version),
|
|
714
|
+
}
|
|
715
|
+
env_file.write_text(render_env_example(STACK_ENV_SCHEMA, overrides=overrides))
|
|
716
|
+
logging.success(f"Wrote {env_file} (NODE_ROLE={node_role.value}).")
|
|
717
|
+
|
|
718
|
+
|
|
719
|
+
@app.command("purge")
|
|
720
|
+
def purge(
|
|
721
|
+
version: str = typer.Argument(
|
|
722
|
+
..., help="FlowMesh version to purge from local Docker"
|
|
723
|
+
),
|
|
724
|
+
targets: list[str] | None = typer.Option(
|
|
725
|
+
None,
|
|
726
|
+
"--target",
|
|
727
|
+
"-t",
|
|
728
|
+
help="Optional specific image targets to purge "
|
|
729
|
+
"(e.g., flowmesh_server, flowmesh_worker_gpu, etc.)",
|
|
730
|
+
),
|
|
731
|
+
dry_run: bool = typer.Option(
|
|
732
|
+
False, "--dry-run", help="List images to be purged without deleting them"
|
|
733
|
+
),
|
|
734
|
+
env_file: Path = typer.Option(
|
|
735
|
+
DEFAULT_ENV_FILE, "--env-file", help="Env file for docker"
|
|
736
|
+
),
|
|
737
|
+
) -> None:
|
|
738
|
+
"""Purge FlowMesh Docker images for a specific version from local Docker."""
|
|
739
|
+
try:
|
|
740
|
+
ensure_docker_available()
|
|
741
|
+
except DockerError as exc:
|
|
742
|
+
logging.error(str(exc))
|
|
743
|
+
raise typer.Exit(code=1)
|
|
744
|
+
|
|
745
|
+
logging.info(f"Purging FlowMesh Docker images with version '{version}'...")
|
|
746
|
+
load_env(env_file, base_dir=Path.cwd(), path_keys=STACK_PATH_KEYS)
|
|
747
|
+
|
|
748
|
+
if targets is None:
|
|
749
|
+
targets = list(BUILD_TARGETS)
|
|
750
|
+
else:
|
|
751
|
+
invalid = [t for t in targets if t not in BUILD_TARGETS]
|
|
752
|
+
if invalid:
|
|
753
|
+
logging.error(f"Invalid targets specified: {', '.join(invalid)}")
|
|
754
|
+
raise typer.Exit(code=1)
|
|
755
|
+
|
|
756
|
+
images_to_purge: list[str] = []
|
|
757
|
+
registry = os.getenv("FLOWMESH_REGISTRY", "ghcr.io/mlsys-io")
|
|
758
|
+
for target in targets:
|
|
759
|
+
image_ref = get_image_ref(registry=registry, version=version, target=target)
|
|
760
|
+
result = inspect_image(image_ref, capture_output=True) # Check if image exists
|
|
761
|
+
if result.returncode == 0:
|
|
762
|
+
images_to_purge.append(image_ref)
|
|
763
|
+
else:
|
|
764
|
+
logging.warning(f"Image not found: {image_ref}")
|
|
765
|
+
|
|
766
|
+
if not images_to_purge:
|
|
767
|
+
logging.info("No images to purge.")
|
|
768
|
+
return
|
|
769
|
+
|
|
770
|
+
if dry_run:
|
|
771
|
+
logging.info("Images to be purged:")
|
|
772
|
+
for image in images_to_purge:
|
|
773
|
+
logging.log(f" {image}")
|
|
774
|
+
return
|
|
775
|
+
|
|
776
|
+
error = False
|
|
777
|
+
for image in images_to_purge:
|
|
778
|
+
result = remove_image(image, capture_output=True)
|
|
779
|
+
if result.returncode == 0:
|
|
780
|
+
logging.success(f"Removed image: {image}")
|
|
781
|
+
else:
|
|
782
|
+
logging.error(f"Failed to remove image: {image}")
|
|
783
|
+
if result.stdout:
|
|
784
|
+
logging.log(result.stdout)
|
|
785
|
+
if result.stderr:
|
|
786
|
+
logging.log(result.stderr, err=True)
|
|
787
|
+
error = True
|
|
788
|
+
if error:
|
|
789
|
+
raise typer.Exit(code=1)
|