flowmesh-cli-stack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,789 @@
1
+ """Stack management commands."""
2
+
3
+ import os
4
+ import shutil
5
+ import subprocess
6
+ from datetime import UTC, datetime
7
+ from pathlib import Path
8
+
9
+ import typer
10
+ from flowmesh.models.nodes import NodeRole
11
+ from flowmesh_cli.core import logging
12
+ from flowmesh_cli.core.assets import AssetNotFoundError, asset_path
13
+ from flowmesh_cli.core.typer import get_typer
14
+ from flowmesh_stack.docker import (
15
+ DockerComposeStack,
16
+ DockerError,
17
+ ensure_docker_available,
18
+ image_env_overrides,
19
+ inspect_image,
20
+ remove_image,
21
+ )
22
+ from flowmesh_stack.doctor import DoctorFinding, run_doctor_checks
23
+ from flowmesh_stack.env import ensure_env_file, load_env, parse_env_file
24
+ from flowmesh_stack.env_schema import render_env_example
25
+ from flowmesh_stack.images import (
26
+ BUILD_GROUPS,
27
+ BUILD_TARGETS,
28
+ expand_build_targets,
29
+ get_cache_ref,
30
+ get_image_ref,
31
+ get_push_platforms,
32
+ )
33
+
34
+ from .env_schema import STACK_ENV_SCHEMA, deploy_overrides, role_overrides
35
+ from .utils import (
36
+ DEFAULT_ENV_FILE,
37
+ STACK_PATH_KEYS,
38
+ apply_stack_resource_env,
39
+ ensure_deploy_paths,
40
+ parse_node_role,
41
+ resolve_package_version,
42
+ stack_bake_file,
43
+ stack_compose_file,
44
+ stack_env_example,
45
+ stack_node_client,
46
+ )
47
+ from .worker import worker_pull
48
+
49
+ app = get_typer(help="Build, manage, and run the FlowMesh stack.")
50
+
51
+
52
+ def _stack() -> DockerComposeStack:
53
+ def _load(env_file: Path) -> None:
54
+ ensure_env_file(env_file, stack_env_example())
55
+ load_env(env_file, base_dir=Path.cwd(), path_keys=STACK_PATH_KEYS)
56
+ try:
57
+ apply_stack_resource_env()
58
+ except ValueError as exc:
59
+ logging.error(str(exc))
60
+ raise typer.Exit(code=1)
61
+
62
+ return DockerComposeStack(
63
+ compose_file=stack_compose_file(),
64
+ env_file_var="STACK_ENV_FILE",
65
+ load_env=_load,
66
+ ensure_deploy_paths=ensure_deploy_paths,
67
+ )
68
+
69
+
70
+ def _compose(
71
+ args: list[str],
72
+ env_file: Path,
73
+ env: dict[str, str] | None,
74
+ to_deploy: bool = False,
75
+ profile: str | None = None,
76
+ ) -> None:
77
+ ensure_env_file(env_file, stack_env_example())
78
+ full_args = (["--profile", profile] if profile else []) + args
79
+ result = _stack().run(full_args, env_file=env_file, env=env, to_deploy=to_deploy)
80
+ if result.returncode != 0:
81
+ raise typer.Exit(code=result.returncode)
82
+
83
+
84
+ def _node_role(env_file: Path) -> NodeRole:
85
+ """Return the configured NODE_ROLE (root | worker), defaulting to root if unset."""
86
+ raw = parse_env_file(env_file).get("NODE_ROLE", "").strip()
87
+ try:
88
+ return NodeRole(raw.lower()) if raw else NodeRole.ROOT
89
+ except ValueError:
90
+ logging.error(
91
+ f"NODE_ROLE={raw!r} is not a recognized role; expected 'root' or 'worker'."
92
+ )
93
+ raise typer.Exit(code=1)
94
+
95
+
96
+ def _resolve_build_targets(batch_targets: list[str]) -> list[str]:
97
+ resolved: list[str] = []
98
+ for target in batch_targets:
99
+ if target in BUILD_GROUPS:
100
+ resolved.extend(BUILD_GROUPS[target])
101
+ elif target in BUILD_TARGETS:
102
+ resolved.append(target)
103
+ return expand_build_targets(resolved)
104
+
105
+
106
+ def _platform_overrides(mode: str, targets: list[str]) -> list[tuple[str, str]]:
107
+ if mode == "load":
108
+ return [(target, "local") for target in targets]
109
+ return [(target, get_push_platforms(target)) for target in targets]
110
+
111
+
112
+ def _resolve_bake_batches(
113
+ targets: list[str] | None, no_builder: bool = False
114
+ ) -> list[list[str]]:
115
+ if targets is None:
116
+ default_targets = ["server", "workers"]
117
+ if no_builder:
118
+ return [default_targets]
119
+ return [["builders"], default_targets]
120
+
121
+ builder_targets = {"builders", "flowmesh_worker_gpu_builder"}
122
+ if no_builder and any(target in builder_targets for target in targets):
123
+ logging.error("--no-builder cannot be used with explicit builder targets.")
124
+ raise typer.Exit(code=1)
125
+
126
+ return [targets]
127
+
128
+
129
+ def _require_bin(name: str) -> str:
130
+ path = shutil.which(name)
131
+ if path is None:
132
+ logging.error(f"{name} is required but was not found in PATH.")
133
+ raise typer.Exit(code=1)
134
+ return path
135
+
136
+
137
+ def _parse_buildx_field(output: str, field: str) -> str | None:
138
+ needle = f"{field}:"
139
+ for line in output.splitlines():
140
+ line = line.strip()
141
+ if line.startswith(needle):
142
+ return line.split(":", 1)[1].strip() or None
143
+ return None
144
+
145
+
146
+ def _inspect_buildx_builder(
147
+ docker_bin: str, name: str | None
148
+ ) -> subprocess.CompletedProcess[str]:
149
+ args = [docker_bin, "buildx", "inspect"]
150
+ if name is not None:
151
+ args.append(name)
152
+ return subprocess.run( # nosec B603: argv list, absolute binary path.
153
+ args, capture_output=True, text=True, check=False
154
+ )
155
+
156
+
157
+ def _get_active_buildx_builder(docker_bin: str) -> str | None:
158
+ result = _inspect_buildx_builder(docker_bin, None)
159
+ if result.returncode != 0:
160
+ return None
161
+ return _parse_buildx_field(result.stdout, "Name")
162
+
163
+
164
+ def _ensure_buildx_builder_ready(
165
+ docker_bin: str,
166
+ builder: str,
167
+ expected_driver: str,
168
+ missing_hint: str | None = None,
169
+ ) -> None:
170
+ """Verify ``builder`` exists and uses ``expected_driver`` before bake runs."""
171
+ result = _inspect_buildx_builder(docker_bin, builder)
172
+ if result.returncode != 0:
173
+ logging.error(f"Buildx builder '{builder}' is not available.")
174
+ if result.stderr:
175
+ logging.log(result.stderr.strip(), err=True)
176
+ if missing_hint:
177
+ logging.log(missing_hint)
178
+ raise typer.Exit(code=1)
179
+ driver = _parse_buildx_field(result.stdout, "Driver")
180
+ if driver != expected_driver:
181
+ logging.error(
182
+ f"Buildx builder '{builder}' uses driver '{driver or 'unknown'}'; "
183
+ f"'{expected_driver}' is required."
184
+ )
185
+ raise typer.Exit(code=1)
186
+
187
+
188
+ def _switch_active_buildx_builder(docker_bin: str, target: str, force: bool) -> None:
189
+ """If the active buildx builder differs from ``target``, switch to it.
190
+
191
+ Prompts for confirmation unless ``force`` is true; aborts the command on
192
+ decline so the user never silently builds against an unintended builder.
193
+ """
194
+ active = _get_active_buildx_builder(docker_bin)
195
+ if active == target:
196
+ return
197
+ if not force:
198
+ prompt = (
199
+ f"Active buildx builder is '{active or 'unknown'}'; "
200
+ f"switch to '{target}'?"
201
+ )
202
+ if not typer.confirm(prompt, default=False):
203
+ logging.error(f"Aborted; '{target}' is not the active buildx builder.")
204
+ raise typer.Exit(code=1)
205
+ result = subprocess.run( # nosec B603: argv list, absolute binary path.
206
+ [docker_bin, "buildx", "use", target],
207
+ capture_output=True,
208
+ text=True,
209
+ check=False,
210
+ )
211
+ if result.returncode != 0:
212
+ if result.stdout:
213
+ logging.log(result.stdout)
214
+ if result.stderr:
215
+ logging.log(result.stderr, err=True)
216
+ logging.error(f"Failed to switch active buildx builder to '{target}'.")
217
+ raise typer.Exit(code=result.returncode)
218
+ logging.info(f"Switched active buildx builder to '{target}'.")
219
+
220
+
221
+ # Driver split: load uses the native docker driver (local cache, image goes
222
+ # straight into the daemon's image store); push uses docker-container (registry
223
+ # cache in/out, multi-platform).
224
+ _BUILD_DEFAULT_BUILDER = "default"
225
+ _PUSH_DEFAULT_BUILDER = "flowmesh-multiarch"
226
+ _PUSH_BUILDER_MISSING_HINT = (
227
+ "Create the builder, then retry:\n"
228
+ f"docker buildx create --name {_PUSH_DEFAULT_BUILDER} "
229
+ "--driver docker-container --bootstrap"
230
+ )
231
+
232
+
233
+ def _run_bake(
234
+ mode: str,
235
+ targets: list[str] | None,
236
+ env_file: Path,
237
+ builder: str,
238
+ force: bool,
239
+ no_builder: bool = False,
240
+ image_tag: str | None = None,
241
+ build_ref: str | None = None,
242
+ ) -> None:
243
+ ensure_env_file(env_file, stack_env_example())
244
+ load_env(env_file, base_dir=Path.cwd(), path_keys=STACK_PATH_KEYS)
245
+
246
+ try:
247
+ ensure_docker_available()
248
+ except DockerError as exc:
249
+ logging.error(str(exc))
250
+ raise typer.Exit(code=1)
251
+ docker_bin = _require_bin("docker")
252
+
253
+ buildx_check = subprocess.run(
254
+ [
255
+ docker_bin,
256
+ "buildx",
257
+ "version",
258
+ ], # nosec B603: argv list, absolute binary path.
259
+ capture_output=True,
260
+ text=True,
261
+ check=False,
262
+ )
263
+ if buildx_check.returncode != 0:
264
+ if buildx_check.stdout:
265
+ logging.log(buildx_check.stdout)
266
+ if buildx_check.stderr:
267
+ logging.log(buildx_check.stderr, err=True)
268
+ logging.error("docker buildx is required for dev build/push")
269
+ raise typer.Exit(code=buildx_check.returncode)
270
+
271
+ bake_file = stack_bake_file()
272
+ if not bake_file.exists():
273
+ logging.error(f"Bake file not found: {bake_file}")
274
+ raise typer.Exit(code=1)
275
+
276
+ if mode == "push":
277
+ _ensure_buildx_builder_ready(
278
+ docker_bin,
279
+ builder,
280
+ "docker-container",
281
+ missing_hint=_PUSH_BUILDER_MISSING_HINT,
282
+ )
283
+ else:
284
+ _ensure_buildx_builder_ready(docker_bin, builder, "docker")
285
+ _switch_active_buildx_builder(docker_bin, builder, force)
286
+
287
+ build_created = datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
288
+ registry = os.getenv("FLOWMESH_REGISTRY", "ghcr.io/mlsys-io")
289
+ version = image_tag if image_tag else os.getenv("FLOWMESH_VERSION", "dev")
290
+ cache_version = os.getenv("FLOWMESH_CACHE_VERSION", "").strip() or "cache"
291
+ env: dict[str, str] = {
292
+ "REGISTRY": registry,
293
+ "VERSION": version,
294
+ "BUILD_REF": (
295
+ build_ref if build_ref else os.getenv("FLOWMESH_BUILD_REF", "local")
296
+ ),
297
+ "BUILD_CREATED": build_created,
298
+ }
299
+
300
+ for batch_targets in _resolve_bake_batches(targets, no_builder=no_builder):
301
+ args = [
302
+ docker_bin,
303
+ "buildx",
304
+ "bake",
305
+ "-f",
306
+ str(bake_file),
307
+ "--builder",
308
+ builder,
309
+ ]
310
+ if mode == "push":
311
+ args.append("--push")
312
+ else:
313
+ args.append("--load")
314
+ args.extend(batch_targets)
315
+
316
+ selected_targets = _resolve_build_targets(batch_targets)
317
+ if mode == "push":
318
+ for target in selected_targets:
319
+ cache_ref = get_cache_ref(registry, cache_version, target)
320
+ args += [
321
+ "--set",
322
+ f"{target}.cache-from=type=registry,ref={cache_ref}",
323
+ "--set",
324
+ f"{target}.cache-to=type=registry,ref={cache_ref},mode=max",
325
+ ]
326
+ for target, platform in _platform_overrides(mode, selected_targets):
327
+ args += ["--set", f"{target}.platform={platform}"]
328
+
329
+ result = subprocess.run( # nosec B603: argv list, absolute binary path.
330
+ args,
331
+ env={**os.environ, **env},
332
+ check=False,
333
+ text=True,
334
+ )
335
+ if result.returncode != 0:
336
+ raise typer.Exit(code=result.returncode)
337
+
338
+
339
+ def _log_finding(fnd: DoctorFinding) -> None:
340
+ match fnd.level:
341
+ case "note":
342
+ logging.log(fnd.message)
343
+ case "warning":
344
+ logging.warning(fnd.message)
345
+ case "error":
346
+ logging.error(fnd.message)
347
+
348
+
349
+ @app.command()
350
+ def build(
351
+ targets: list[str] | None = typer.Argument(
352
+ None, help="Optional bake targets", metavar="[TARGETS]..."
353
+ ),
354
+ env_file: Path = typer.Option(
355
+ DEFAULT_ENV_FILE, "--env-file", help="Env file for stack compose/bake"
356
+ ),
357
+ no_builder: bool = typer.Option(
358
+ False,
359
+ "--no-builder",
360
+ help="Skip exporting the standalone GPU builder image.",
361
+ ),
362
+ builder: str = typer.Option(
363
+ _BUILD_DEFAULT_BUILDER,
364
+ "--builder",
365
+ help="Buildx builder to use; must use the native 'docker' driver.",
366
+ ),
367
+ force: bool = typer.Option(
368
+ False,
369
+ "-f",
370
+ "--force",
371
+ help="Skip the confirmation prompt when switching the active buildx builder.",
372
+ ),
373
+ image_tag: str | None = typer.Option(
374
+ None, "--image-tag", help="Override FLOWMESH_VERSION"
375
+ ),
376
+ build_ref: str | None = typer.Option(
377
+ None, "--build-ref", help="Override FLOWMESH_BUILD_REF"
378
+ ),
379
+ ) -> None:
380
+ """Build FlowMesh Docker images locally using buildx."""
381
+ _run_bake(
382
+ "load",
383
+ targets,
384
+ env_file,
385
+ builder=builder,
386
+ force=force,
387
+ no_builder=no_builder,
388
+ image_tag=image_tag,
389
+ build_ref=build_ref,
390
+ )
391
+ logging.success("Images built locally.")
392
+
393
+
394
+ @app.command()
395
+ def push(
396
+ targets: list[str] | None = typer.Argument(
397
+ None, help="Optional bake targets", metavar="[TARGETS]..."
398
+ ),
399
+ env_file: Path = typer.Option(
400
+ DEFAULT_ENV_FILE, "--env-file", help="Env file for stack compose/bake"
401
+ ),
402
+ no_builder: bool = typer.Option(
403
+ False,
404
+ "--no-builder",
405
+ help="Skip publishing the standalone GPU builder image.",
406
+ ),
407
+ builder: str = typer.Option(
408
+ _PUSH_DEFAULT_BUILDER,
409
+ "--builder",
410
+ help="Buildx builder to use; must use the 'docker-container' driver.",
411
+ ),
412
+ force: bool = typer.Option(
413
+ False,
414
+ "-f",
415
+ "--force",
416
+ help="Skip the confirmation prompt when switching the active buildx builder.",
417
+ ),
418
+ image_tag: str | None = typer.Option(
419
+ None, "--image-tag", help="Override FLOWMESH_VERSION"
420
+ ),
421
+ build_ref: str | None = typer.Option(
422
+ None, "--build-ref", help="Override FLOWMESH_BUILD_REF"
423
+ ),
424
+ ) -> None:
425
+ """Build FlowMesh Docker images and push them to the container registry."""
426
+ _run_bake(
427
+ "push",
428
+ targets,
429
+ env_file,
430
+ builder=builder,
431
+ force=force,
432
+ no_builder=no_builder,
433
+ image_tag=image_tag,
434
+ build_ref=build_ref,
435
+ )
436
+ logging.success("Images pushed.")
437
+
438
+
439
+ @app.command()
440
+ def pull(
441
+ services: list[str] | None = typer.Argument(
442
+ None, help="Optional services to pull", metavar="[SERVICES]..."
443
+ ),
444
+ env_file: Path = typer.Option(
445
+ DEFAULT_ENV_FILE, "--env-file", help="Env file for compose"
446
+ ),
447
+ image_tag: str | None = typer.Option(
448
+ None, "--image-tag", help="Override FLOWMESH_VERSION"
449
+ ),
450
+ ) -> None:
451
+ """Pull Docker images for stack services from the registry."""
452
+ args = ["pull"] + (services or [])
453
+ profile = "root" if _node_role(env_file) == NodeRole.ROOT else None
454
+ _compose(
455
+ args, env_file=env_file, env=image_env_overrides(image_tag), profile=profile
456
+ )
457
+
458
+
459
+ @app.command()
460
+ def pullall(
461
+ env_file: Path = typer.Option(
462
+ DEFAULT_ENV_FILE, "--env-file", help="Env file for compose"
463
+ ),
464
+ image_tag: str | None = typer.Option(
465
+ None, "--image-tag", help="Override FLOWMESH_VERSION"
466
+ ),
467
+ ) -> None:
468
+ """Pull all images required for the stack."""
469
+ pull(services=None, env_file=env_file, image_tag=image_tag)
470
+ worker_pull(kinds=["all"], builder=True, env_file=env_file, image_tag=image_tag)
471
+ worker_pull(kinds=["all"], builder=False, env_file=env_file, image_tag=image_tag)
472
+
473
+
474
+ @app.command()
475
+ def up(
476
+ env_file: Path = typer.Option(
477
+ DEFAULT_ENV_FILE, "--env-file", help="Env file for compose"
478
+ ),
479
+ image_tag: str | None = typer.Option(
480
+ None, "--image-tag", help="Override FLOWMESH_VERSION"
481
+ ),
482
+ ) -> None:
483
+ """Start the stack.
484
+
485
+ On root nodes (NODE_ROLE=root, the default), the local Redis services are
486
+ started alongside the server. On worker nodes (NODE_ROLE=worker), Redis
487
+ services are skipped — the worker is expected to connect to the root
488
+ node's Redis via REDIS_CONTROL_URL / REDIS_TELEMETRY_URL.
489
+ """
490
+ profile = "root" if _node_role(env_file) == NodeRole.ROOT else None
491
+ _compose(
492
+ ["up", "-d", "--wait"],
493
+ env_file=env_file,
494
+ env=image_env_overrides(image_tag),
495
+ to_deploy=True,
496
+ profile=profile,
497
+ )
498
+ logging.success("FlowMesh stack is up.")
499
+
500
+
501
+ def _drain_workers(env_file: Path) -> None:
502
+ """Destroy all dynamically spawned workers before stopping the server."""
503
+ try:
504
+ client = stack_node_client(env_file, base_url=None, token=None)
505
+ client.destroy_all_workers()
506
+ except Exception as exc:
507
+ logging.warning(f"Unable to drain workers; continuing shutdown. {exc}")
508
+
509
+
510
+ @app.command()
511
+ def down(
512
+ env_file: Path = typer.Option(
513
+ DEFAULT_ENV_FILE, "--env-file", help="Env file for compose"
514
+ ),
515
+ image_tag: str | None = typer.Option(
516
+ None, "--image-tag", help="Override FLOWMESH_VERSION"
517
+ ),
518
+ ) -> None:
519
+ """Drain workers and stop the stack."""
520
+ logging.info("Draining workers...")
521
+ _drain_workers(env_file)
522
+ logging.info("Shutting down the FlowMesh stack...")
523
+ _compose(
524
+ ["down"],
525
+ env_file=env_file,
526
+ env=image_env_overrides(image_tag),
527
+ profile="root",
528
+ )
529
+ logging.success("FlowMesh stack stopped.")
530
+
531
+
532
+ @app.command()
533
+ def restart(
534
+ env_file: Path = typer.Option(
535
+ DEFAULT_ENV_FILE, "--env-file", help="Env file for compose"
536
+ ),
537
+ image_tag: str | None = typer.Option(
538
+ None, "--image-tag", help="Override FLOWMESH_VERSION"
539
+ ),
540
+ ) -> None:
541
+ """Drain workers and restart the stack."""
542
+ logging.info("Draining workers...")
543
+ _drain_workers(env_file)
544
+ _compose(
545
+ ["down"],
546
+ env_file=env_file,
547
+ env=image_env_overrides(image_tag),
548
+ profile="root",
549
+ )
550
+ profile = "root" if _node_role(env_file) == NodeRole.ROOT else None
551
+ _compose(
552
+ ["up", "-d", "--wait"],
553
+ env_file=env_file,
554
+ env=image_env_overrides(image_tag),
555
+ to_deploy=True,
556
+ profile=profile,
557
+ )
558
+ logging.success("FlowMesh stack is up.")
559
+
560
+
561
+ @app.command()
562
+ def logs(
563
+ service: str | None = typer.Argument(None, help="Optional service name"),
564
+ env_file: Path = typer.Option(
565
+ DEFAULT_ENV_FILE, "--env-file", help="Env file for compose"
566
+ ),
567
+ ) -> None:
568
+ """Stream logs from stack services or a specific service container."""
569
+ code = _stack().stream_logs(env_file=env_file, service=service, profile="root")
570
+ if code != 0:
571
+ raise typer.Exit(code=code)
572
+
573
+
574
+ @app.command()
575
+ def ps(
576
+ env_file: Path = typer.Option(
577
+ DEFAULT_ENV_FILE, "--env-file", help="Env file for compose"
578
+ ),
579
+ ) -> None:
580
+ """Display running status of stack containers and worker containers."""
581
+ _compose(["ps"], env_file=env_file, env=None, profile="root")
582
+ logging.log("\nWorkers:")
583
+ docker_bin = _require_bin("docker")
584
+ subprocess.run(
585
+ [
586
+ docker_bin,
587
+ "ps",
588
+ "-a",
589
+ "--filter",
590
+ "label=flowmesh.group=server-workers",
591
+ "--format",
592
+ " {{.Names}}\t{{.Status}}",
593
+ ],
594
+ check=False,
595
+ ) # nosec B603: argv list, absolute binary path.
596
+
597
+
598
+ @app.command("status")
599
+ def status_cmd(
600
+ env_file: Path = typer.Option(
601
+ DEFAULT_ENV_FILE, "--env-file", help="Env file for compose"
602
+ ),
603
+ ) -> None:
604
+ """Display running status of stack containers (alias for ps)."""
605
+ ps(env_file=env_file)
606
+
607
+
608
+ @app.command()
609
+ def clean(
610
+ env_file: Path = typer.Option(
611
+ DEFAULT_ENV_FILE, "--env-file", help="Env file for compose"
612
+ ),
613
+ image_tag: str | None = typer.Option(
614
+ None, "--image-tag", help="Override FLOWMESH_VERSION"
615
+ ),
616
+ ) -> None:
617
+ """Drain workers, stop the stack, and remove all containers and volumes."""
618
+ logging.info("Draining workers...")
619
+ _drain_workers(env_file)
620
+ logging.info("Removing stack containers and volumes...")
621
+ _compose(
622
+ ["down", "-v"],
623
+ env_file=env_file,
624
+ env=image_env_overrides(image_tag),
625
+ profile="root",
626
+ )
627
+ logging.success("FlowMesh stack cleaned.")
628
+
629
+
630
+ def _write_env_example(package: str, filename: str, schema, errors: list[str]) -> None:
631
+ try:
632
+ path = asset_path(package, filename)
633
+ except AssetNotFoundError as exc:
634
+ message = f"Unable to resolve asset {package}/{filename}: {exc}"
635
+ logging.error(message)
636
+ errors.append(message)
637
+ return
638
+ path.write_text(render_env_example(schema))
639
+ logging.success(f"Wrote {path}")
640
+
641
+
642
+ @app.command("env-examples")
643
+ def env_examples() -> None:
644
+ """Generate env example files from the shared schema."""
645
+ errors: list[str] = []
646
+ _write_env_example(
647
+ "flowmesh_cli_stack.assets",
648
+ ".env.example",
649
+ STACK_ENV_SCHEMA,
650
+ errors,
651
+ )
652
+ if errors:
653
+ raise typer.Exit(code=1)
654
+
655
+
656
+ @app.command()
657
+ def doctor(
658
+ env_file: Path = typer.Option(
659
+ DEFAULT_ENV_FILE, "--env-file", help="Env file to validate"
660
+ )
661
+ ) -> None:
662
+ """Verify Docker access and validate stack env configuration."""
663
+ report = run_doctor_checks(env_file, STACK_ENV_SCHEMA, callback=_log_finding)
664
+ if report.errors:
665
+ logging.error(f"Doctor found {len(report.errors)} issue(s).")
666
+ raise typer.Exit(code=1)
667
+ logging.success("Doctor checks passed.")
668
+
669
+
670
+ @app.command("init")
671
+ def init(
672
+ env_file: Path = typer.Option(
673
+ DEFAULT_ENV_FILE, "--env-file", help="Env file to write"
674
+ ),
675
+ force: bool = typer.Option(
676
+ False, "--force", "-f", help="Force initialization; overwrite existing files"
677
+ ),
678
+ role: str = typer.Option(
679
+ NodeRole.ROOT.value,
680
+ "--role",
681
+ help="Target NODE_ROLE for the generated env file (root|worker).",
682
+ ),
683
+ deploy: bool = typer.Option(
684
+ False,
685
+ "--deploy",
686
+ help=(
687
+ "Pin FLOWMESH_VERSION to the installed flowmesh-cli-stack package version"
688
+ "(falls back to 'latest' if package metadata is missing)."
689
+ ),
690
+ ),
691
+ ) -> None:
692
+ """Create or overwrite the stack env file rendered from the schema."""
693
+ node_role = parse_node_role(role)
694
+ if env_file.exists() and not force:
695
+ if not typer.confirm(f"{env_file} exists. Overwrite?", default=False):
696
+ logging.info("Keeping existing env file.")
697
+ return
698
+ deploy_version: str | None = None
699
+ if deploy:
700
+ package_version = resolve_package_version()
701
+ if package_version is None:
702
+ logging.warning(
703
+ "Unable to resolve flowmesh-cli-stack version; "
704
+ "falling back to FLOWMESH_VERSION=latest. "
705
+ "Edit .env if you need a specific version."
706
+ )
707
+ deploy_version = "latest"
708
+ else:
709
+ # GHCR images for releases are pushed at v<version>.
710
+ deploy_version = f"v{package_version}"
711
+ overrides = {
712
+ **role_overrides(node_role),
713
+ **deploy_overrides(deploy, deploy_version),
714
+ }
715
+ env_file.write_text(render_env_example(STACK_ENV_SCHEMA, overrides=overrides))
716
+ logging.success(f"Wrote {env_file} (NODE_ROLE={node_role.value}).")
717
+
718
+
719
+ @app.command("purge")
720
+ def purge(
721
+ version: str = typer.Argument(
722
+ ..., help="FlowMesh version to purge from local Docker"
723
+ ),
724
+ targets: list[str] | None = typer.Option(
725
+ None,
726
+ "--target",
727
+ "-t",
728
+ help="Optional specific image targets to purge "
729
+ "(e.g., flowmesh_server, flowmesh_worker_gpu, etc.)",
730
+ ),
731
+ dry_run: bool = typer.Option(
732
+ False, "--dry-run", help="List images to be purged without deleting them"
733
+ ),
734
+ env_file: Path = typer.Option(
735
+ DEFAULT_ENV_FILE, "--env-file", help="Env file for docker"
736
+ ),
737
+ ) -> None:
738
+ """Purge FlowMesh Docker images for a specific version from local Docker."""
739
+ try:
740
+ ensure_docker_available()
741
+ except DockerError as exc:
742
+ logging.error(str(exc))
743
+ raise typer.Exit(code=1)
744
+
745
+ logging.info(f"Purging FlowMesh Docker images with version '{version}'...")
746
+ load_env(env_file, base_dir=Path.cwd(), path_keys=STACK_PATH_KEYS)
747
+
748
+ if targets is None:
749
+ targets = list(BUILD_TARGETS)
750
+ else:
751
+ invalid = [t for t in targets if t not in BUILD_TARGETS]
752
+ if invalid:
753
+ logging.error(f"Invalid targets specified: {', '.join(invalid)}")
754
+ raise typer.Exit(code=1)
755
+
756
+ images_to_purge: list[str] = []
757
+ registry = os.getenv("FLOWMESH_REGISTRY", "ghcr.io/mlsys-io")
758
+ for target in targets:
759
+ image_ref = get_image_ref(registry=registry, version=version, target=target)
760
+ result = inspect_image(image_ref, capture_output=True) # Check if image exists
761
+ if result.returncode == 0:
762
+ images_to_purge.append(image_ref)
763
+ else:
764
+ logging.warning(f"Image not found: {image_ref}")
765
+
766
+ if not images_to_purge:
767
+ logging.info("No images to purge.")
768
+ return
769
+
770
+ if dry_run:
771
+ logging.info("Images to be purged:")
772
+ for image in images_to_purge:
773
+ logging.log(f" {image}")
774
+ return
775
+
776
+ error = False
777
+ for image in images_to_purge:
778
+ result = remove_image(image, capture_output=True)
779
+ if result.returncode == 0:
780
+ logging.success(f"Removed image: {image}")
781
+ else:
782
+ logging.error(f"Failed to remove image: {image}")
783
+ if result.stdout:
784
+ logging.log(result.stdout)
785
+ if result.stderr:
786
+ logging.log(result.stderr, err=True)
787
+ error = True
788
+ if error:
789
+ raise typer.Exit(code=1)