flowmesh-sdk-stack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,227 @@
1
+ """Pure doctor checks shared by FlowMesh tooling."""
2
+
3
+ import shutil
4
+ import subprocess
5
+ from collections.abc import Callable, Iterable
6
+ from dataclasses import dataclass, field
7
+ from pathlib import Path
8
+ from typing import Any, Literal
9
+
10
+ from flowmesh.config import DEFAULT_CONFIG_PATH, FlowMeshConfig
11
+ from flowmesh.exceptions import ConfigInvalidError, ConfigNotFoundError
12
+
13
+ from .docker import DockerError, ensure_docker_available
14
+ from .env import validate_env_file
15
+ from .env_schema import EnvSchema, schema_keys, validate_env_values
16
+
17
+ type FindingLevel = Literal["note", "warning", "error"]
18
+
19
+ _DEFAULT_CUDA_PROBE_IMAGE = "nvidia/cuda:12.9.1-base-ubuntu24.04"
20
+ _DEFAULT_DOCKER_GPU_RUNTIME = "nvidia"
21
+
22
+
23
+ @dataclass(frozen=True)
24
+ class DoctorFinding:
25
+ level: FindingLevel
26
+ message: str
27
+
28
+
29
+ @dataclass
30
+ class DoctorReport:
31
+ findings: list[DoctorFinding] = field(default_factory=list)
32
+ callback: Callable[[DoctorFinding], Any] | None = None
33
+
34
+ @property
35
+ def errors(self) -> list[str]:
36
+ return [
37
+ finding.message for finding in self.findings if finding.level == "error"
38
+ ]
39
+
40
+ @property
41
+ def warnings(self) -> list[str]:
42
+ return [
43
+ finding.message for finding in self.findings if finding.level == "warning"
44
+ ]
45
+
46
+ @property
47
+ def notes(self) -> list[str]:
48
+ return [finding.message for finding in self.findings if finding.level == "note"]
49
+
50
+ def error(self, message: str) -> None:
51
+ self._add_finding("error", message)
52
+
53
+ def warning(self, message: str) -> None:
54
+ self._add_finding("warning", message)
55
+
56
+ def note(self, message: str) -> None:
57
+ self._add_finding("note", message)
58
+
59
+ def extend_errors(self, messages: Iterable[str]) -> None:
60
+ for message in messages:
61
+ self.error(message)
62
+
63
+ def extend_warnings(self, messages: Iterable[str]) -> None:
64
+ for message in messages:
65
+ self.warning(message)
66
+
67
+ def extend_notes(self, messages: Iterable[str]) -> None:
68
+ for message in messages:
69
+ self.note(message)
70
+
71
+ def _add_finding(self, level: FindingLevel, message: str) -> None:
72
+ finding = DoctorFinding(level, message)
73
+ self.findings.append(finding)
74
+ if self.callback:
75
+ self.callback(finding)
76
+
77
+
78
+ def run_doctor_checks(
79
+ env_file: Path,
80
+ schema: EnvSchema,
81
+ callback: Callable[[DoctorFinding], Any] | None = None,
82
+ ) -> DoctorReport:
83
+ """Run shared doctor checks and return structured findings."""
84
+ report = DoctorReport(callback=callback)
85
+ env_values, env_errors = validate_env_file(
86
+ env_file, expected_keys=schema_keys(schema)
87
+ )
88
+ report.extend_errors(env_errors)
89
+ if env_values is not None:
90
+ errors, warnings = validate_env_values(schema, env_values)
91
+ report.extend_errors(errors)
92
+ report.extend_warnings(warnings)
93
+ validate_config_file(report)
94
+ validate_docker_availability(report)
95
+ validate_gpu_visibility(report, env_values or {})
96
+ return report
97
+
98
+
99
+ def validate_config_file(report: DoctorReport) -> None:
100
+ """Validate the presence and basic correctness of the config file."""
101
+ try:
102
+ FlowMeshConfig.from_file(DEFAULT_CONFIG_PATH)
103
+ except ConfigNotFoundError as exc:
104
+ report.warning(str(exc))
105
+ except ConfigInvalidError as exc:
106
+ report.error(str(exc))
107
+ else:
108
+ report.note(f"Config file found at {DEFAULT_CONFIG_PATH}")
109
+
110
+
111
+ def validate_docker_availability(report: DoctorReport) -> None:
112
+ """Validate docker CLI and daemon reachability."""
113
+ try:
114
+ ensure_docker_available()
115
+ except DockerError as exc:
116
+ report.error(str(exc))
117
+ return
118
+ report.note("Docker is available")
119
+ docker_bin = _require_bin("docker")
120
+
121
+ try:
122
+ version = subprocess.run(
123
+ [
124
+ docker_bin,
125
+ "--version",
126
+ ], # nosec B603: argv list, no shell, absolute path.
127
+ capture_output=True,
128
+ text=True,
129
+ check=False,
130
+ )
131
+ if version.stdout:
132
+ report.note(version.stdout.strip())
133
+ elif version.stderr:
134
+ report.note(version.stderr.strip())
135
+ except FileNotFoundError:
136
+ report.error("Docker CLI not found")
137
+ return
138
+
139
+ docker_info = subprocess.run(
140
+ [docker_bin, "info"], # nosec B603: argv list, no shell, absolute path.
141
+ capture_output=True,
142
+ text=True,
143
+ check=False,
144
+ )
145
+ if docker_info.returncode == 0:
146
+ report.note("Docker daemon: reachable")
147
+ else:
148
+ report.error("Docker daemon: NOT reachable")
149
+
150
+ if shutil.which("docker-compose"):
151
+ report.warning("docker-compose detected (legacy).")
152
+ else:
153
+ report.note("Using docker compose plugin.")
154
+
155
+
156
+ def validate_gpu_visibility(report: DoctorReport, env_values: dict[str, str]) -> None:
157
+ """Validate whether GPUs are visible to the host and Docker runtime."""
158
+ nvidia_smi_bin = shutil.which("nvidia-smi")
159
+ if nvidia_smi_bin:
160
+ smi = subprocess.run(
161
+ [nvidia_smi_bin], # nosec B603: argv list, no shell, absolute path.
162
+ capture_output=True,
163
+ text=True,
164
+ check=False,
165
+ )
166
+ if smi.stdout:
167
+ report.note("nvidia-smi output:")
168
+ report.note(smi.stdout)
169
+ if smi.returncode != 0:
170
+ detail = (smi.stderr or smi.stdout).strip()
171
+ report.warning(f"nvidia-smi failed on host: {detail or 'unknown error'}")
172
+ return
173
+ validate_docker_gpu_runtime(report, env_values)
174
+ return
175
+ report.warning("nvidia-smi not found; GPU visibility not verified.")
176
+
177
+
178
+ def validate_docker_gpu_runtime(
179
+ report: DoctorReport, env_values: dict[str, str]
180
+ ) -> None:
181
+ """Validate that the configured Docker GPU runtime works with the probe image."""
182
+ docker_bin = shutil.which("docker")
183
+ if docker_bin is None:
184
+ return
185
+
186
+ probe_image = env_values.get("SERVER_CUDA_PROBE_IMAGE", _DEFAULT_CUDA_PROBE_IMAGE)
187
+ runtime = env_values.get("DOCKER_GPU_RUNTIME", _DEFAULT_DOCKER_GPU_RUNTIME).strip()
188
+ command = [docker_bin, "run", "--rm"]
189
+ if runtime:
190
+ command += ["--runtime", runtime]
191
+ command += [
192
+ "--gpus",
193
+ "all",
194
+ probe_image,
195
+ "nvidia-smi",
196
+ "--query-gpu=index,name",
197
+ "--format=csv,noheader",
198
+ ]
199
+ result = subprocess.run(
200
+ command, # nosec B603: argv list, no shell, absolute path.
201
+ capture_output=True,
202
+ text=True,
203
+ check=False,
204
+ )
205
+ if result.returncode == 0:
206
+ report.note("Docker GPU probe succeeded.")
207
+ return
208
+
209
+ stderr = (result.stderr or "").strip()
210
+ stdout = (result.stdout or "").strip()
211
+ detail = stderr or stdout or f"exit code {result.returncode}"
212
+ lowered = detail.lower()
213
+ if runtime and "unknown or invalid runtime name" in lowered:
214
+ report.warning(
215
+ f"DOCKER_GPU_RUNTIME={runtime!r} is not available to Docker on this host. "
216
+ "If `docker run --rm --gpus all ...` works without `--runtime`, set "
217
+ "`DOCKER_GPU_RUNTIME=` in the stack env. This is common on DGX Spark."
218
+ )
219
+ return
220
+ report.warning(f"Docker GPU probe failed: {detail}")
221
+
222
+
223
+ def _require_bin(name: str) -> str:
224
+ path = shutil.which(name)
225
+ if path is None:
226
+ raise FileNotFoundError(name)
227
+ return path
flowmesh_stack/env.py ADDED
@@ -0,0 +1,145 @@
1
+ """Environment file helpers shared by FlowMesh tooling."""
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from urllib.parse import urlparse
6
+
7
+
8
+ def parse_env_file(env_file: Path) -> dict[str, str]:
9
+ """Parse a .env file into key/value pairs."""
10
+ values: dict[str, str] = {}
11
+ if not env_file.exists():
12
+ return values
13
+ for line in env_file.read_text().splitlines():
14
+ stripped = line.strip()
15
+ if not stripped or stripped.startswith("#") or "=" not in stripped:
16
+ continue
17
+ stripped = stripped.removeprefix("export ").strip()
18
+ key, value = stripped.split("=", 1)
19
+ values[key.strip()] = _normalize_env_value(value)
20
+ return values
21
+
22
+
23
+ def parse_bool(value: str) -> bool | None:
24
+ """Parse a string into a boolean value."""
25
+ lowered = value.strip().lower()
26
+ if lowered in {"1", "true", "yes", "on"}:
27
+ return True
28
+ if lowered in {"0", "false", "no", "off"}:
29
+ return False
30
+ return None
31
+
32
+
33
+ def parse_int(value: str) -> int | None:
34
+ """Parse a string into an integer value."""
35
+ stripped = value.strip()
36
+ if not stripped:
37
+ return None
38
+ try:
39
+ return int(stripped)
40
+ except ValueError:
41
+ return None
42
+
43
+
44
+ def parse_float(value: str) -> float | None:
45
+ """Parse a string into a float value."""
46
+ stripped = value.strip()
47
+ if not stripped:
48
+ return None
49
+ try:
50
+ return float(stripped)
51
+ except ValueError:
52
+ return None
53
+
54
+
55
+ def is_url(value: str, schemes: set[str] | None = None) -> bool:
56
+ """Check if a string is a valid URL with optional scheme restrictions."""
57
+ parsed = urlparse(value.strip())
58
+ if not (parsed.scheme and parsed.netloc):
59
+ return False
60
+ if schemes and parsed.scheme not in schemes:
61
+ return False
62
+ return True
63
+
64
+
65
+ def validate_env_file(
66
+ env_file: Path,
67
+ example: Path | None = None,
68
+ expected_keys: set[str] | None = None,
69
+ ) -> tuple[dict[str, str] | None, list[str]]:
70
+ """Validate an env file against an example template or key set."""
71
+ errors: list[str] = []
72
+ if not env_file.exists():
73
+ return None, [f"env file not found: {env_file}"]
74
+ if expected_keys is None:
75
+ if example is None or not example.exists():
76
+ return parse_env_file(env_file), errors
77
+ expected_keys = _parse_env_keys(example)
78
+
79
+ actual_keys = _parse_env_keys(env_file)
80
+ missing = sorted(expected_keys - actual_keys)
81
+ unexpected = sorted(actual_keys - expected_keys)
82
+ if missing:
83
+ errors.append(f"Missing required env vars in {env_file}: {', '.join(missing)}")
84
+ if unexpected:
85
+ errors.append(f"Unexpected env vars in {env_file}: {', '.join(unexpected)}")
86
+ return parse_env_file(env_file), errors
87
+
88
+
89
+ def ensure_env_file(env_file: Path, example: Path) -> bool:
90
+ """Create an env file from an example if it does not exist."""
91
+ if env_file.exists() or not example.exists():
92
+ return False
93
+ env_file.write_text(example.read_text())
94
+ return True
95
+
96
+
97
+ def load_env(
98
+ env_file: Path,
99
+ base_dir: Path | None = None,
100
+ path_keys: set[str] | None = None,
101
+ ) -> None:
102
+ """Load env vars from a file into ``os.environ``."""
103
+ env_key = (env_file, base_dir, path_keys)
104
+ if getattr(load_env, "_loaded", None) == env_key:
105
+ return
106
+ if not env_file.exists():
107
+ return
108
+ for line in env_file.read_text().splitlines():
109
+ stripped = line.strip()
110
+ if not stripped or stripped.startswith("#") or "=" not in stripped:
111
+ continue
112
+ key, value = stripped.split("=", 1)
113
+ if path_keys and key in path_keys and value:
114
+ expanded = Path(value).expanduser()
115
+ if expanded.is_absolute():
116
+ os.environ[key] = str(expanded)
117
+ elif base_dir is not None:
118
+ os.environ[key] = str((base_dir / expanded).resolve())
119
+ else:
120
+ os.environ[key] = value
121
+ else:
122
+ os.environ[key] = value
123
+ load_env._loaded = env_key # type: ignore[attr-defined]
124
+
125
+
126
+ def _parse_env_keys(path: Path) -> set[str]:
127
+ keys: set[str] = set()
128
+ if not path.exists():
129
+ return keys
130
+ for line in path.read_text().splitlines():
131
+ stripped = line.strip()
132
+ if not stripped or stripped.startswith("#") or "=" not in stripped:
133
+ continue
134
+ stripped = stripped.removeprefix("export ").strip()
135
+ key = stripped.split("=", 1)[0].strip()
136
+ if key:
137
+ keys.add(key)
138
+ return keys
139
+
140
+
141
+ def _normalize_env_value(value: str) -> str:
142
+ stripped = value.strip()
143
+ if len(stripped) >= 2 and stripped[0] == stripped[-1] and stripped[0] in "\"'":
144
+ stripped = stripped[1:-1]
145
+ return stripped.strip()
@@ -0,0 +1,238 @@
1
+ """Environment schema definitions and pure validation helpers."""
2
+
3
+ import enum
4
+ from collections.abc import Callable, Iterable, Mapping
5
+ from dataclasses import dataclass, field
6
+ from logging import _nameToLevel as LOG_LEVELS
7
+ from pathlib import Path
8
+ from typing import Literal
9
+
10
+ from .env import is_url, parse_bool, parse_float, parse_int
11
+
12
+
13
+ class EnvVarType(enum.StrEnum):
14
+ STRING = "string"
15
+ INT = "int"
16
+ FLOAT = "float"
17
+ BOOL = "bool"
18
+ FILE_PATH = "file_path"
19
+ DIR_PATH = "dir_path"
20
+ URL = "url"
21
+ LOG_LEVEL = "log_level"
22
+ ENUM = "enum"
23
+ CSV = "csv"
24
+ CSV_INTS_OR_ALL = "csv_ints_or_all"
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class EnvVar:
29
+ key: str
30
+ default: str = ""
31
+ description: str | list[str] | None = None
32
+ var_type: EnvVarType = EnvVarType.STRING
33
+ required: bool = False
34
+ use_default: bool = False
35
+ choices: Iterable[str] | None = None
36
+ min_value: float | None = None
37
+ max_value: float | None = None
38
+ min_length: int | None = None
39
+ ensure_path: Literal["error", "warn", "create"] | None = None
40
+ url_schemes: set[str] | None = None
41
+ warn_if_empty: bool = False
42
+ validator: Callable[[str, list[str], list[str]], None] | None = None
43
+
44
+
45
+ @dataclass(frozen=True)
46
+ class EnvSection:
47
+ title: str
48
+ description: list[str] = field(default_factory=list)
49
+ vars: list[EnvVar] = field(default_factory=list)
50
+
51
+
52
+ @dataclass(frozen=True)
53
+ class EnvSchema:
54
+ name: str
55
+ header: list[str]
56
+ sections: list[EnvSection]
57
+ validators: list[Callable[[dict[str, str], list[str], list[str]], None]] = field(
58
+ default_factory=list
59
+ )
60
+
61
+
62
+ def schema_keys(schema: EnvSchema) -> set[str]:
63
+ """Return the set of keys defined by a schema."""
64
+ keys: set[str] = set()
65
+ for section in schema.sections:
66
+ for var in section.vars:
67
+ keys.add(var.key)
68
+ return keys
69
+
70
+
71
+ def render_env_example(
72
+ schema: EnvSchema, overrides: Mapping[str, str] | None = None
73
+ ) -> str:
74
+ """Render an example .env file based on the schema.
75
+
76
+ ``overrides`` swaps in a different default value for the listed keys to produce a
77
+ worker-shaped env without rebuilding the schema). Keys not present in ``overrides``
78
+ use their schema-declared default.
79
+ """
80
+ overrides = overrides or {}
81
+ lines: list[str] = []
82
+ lines.extend(schema.header)
83
+ for section in schema.sections:
84
+ lines.append("")
85
+ lines.append(f"# ==== {section.title} ====")
86
+ for desc in section.description:
87
+ lines.append(f"# {desc}")
88
+ for var in section.vars:
89
+ if description := var.description:
90
+ if isinstance(description, list):
91
+ for desc_line in description:
92
+ lines.append(f"# {desc_line}")
93
+ else:
94
+ lines.append(f"# {description}")
95
+ value = overrides.get(var.key, var.default)
96
+ lines.append(f"{var.key}={value}")
97
+ lines.append("")
98
+ return "\n".join(lines)
99
+
100
+
101
+ def validate_env_values(
102
+ schema: EnvSchema, env: dict[str, str]
103
+ ) -> tuple[list[str], list[str]]:
104
+ """Validate environment variable values against the schema.
105
+
106
+ Returns a tuple of (errors, warnings) found during validation.
107
+ """
108
+ errors: list[str] = []
109
+ warnings: list[str] = []
110
+ for section in schema.sections:
111
+ for var in section.vars:
112
+ raw = env.get(var.key, "").strip()
113
+ if not raw:
114
+ use_default = var.use_default
115
+ if var.required:
116
+ errors.append(f"{var.key} must be set")
117
+ use_default = False
118
+ elif var.warn_if_empty:
119
+ message = f"{var.key} is empty"
120
+ if use_default:
121
+ message += f"; default value '{var.default}' will be used"
122
+ warnings.append(message)
123
+ if not use_default:
124
+ continue
125
+ raw = var.default
126
+
127
+ if var.min_length is not None and len(raw) < var.min_length:
128
+ errors.append(f"{var.key} must be at least {var.min_length} characters")
129
+
130
+ match var.var_type:
131
+ case EnvVarType.INT:
132
+ int_value = parse_int(raw)
133
+ if int_value is None:
134
+ errors.append(f"{var.key} must be an integer")
135
+ continue
136
+ if var.min_value is not None and int_value < var.min_value:
137
+ errors.append(f"{var.key} must be >= {int(var.min_value)}")
138
+ if var.max_value is not None and int_value > var.max_value:
139
+ errors.append(f"{var.key} must be <= {int(var.max_value)}")
140
+ case EnvVarType.FLOAT:
141
+ float_value = parse_float(raw)
142
+ if float_value is None:
143
+ errors.append(f"{var.key} must be a number")
144
+ continue
145
+ if var.min_value is not None and float_value < var.min_value:
146
+ errors.append(f"{var.key} must be >= {var.min_value}")
147
+ if var.max_value is not None and float_value > var.max_value:
148
+ errors.append(f"{var.key} must be <= {var.max_value}")
149
+ case EnvVarType.BOOL:
150
+ if parse_bool(raw) is None:
151
+ errors.append(
152
+ f"{var.key} must be a boolean (true/false or 1/0)"
153
+ )
154
+ case EnvVarType.FILE_PATH | EnvVarType.DIR_PATH:
155
+ _ensure_path(raw, var, errors, warnings)
156
+ case EnvVarType.URL:
157
+ if not is_url(raw, schemes=var.url_schemes):
158
+ errors.append(f"{var.key} must be a valid URL")
159
+ case EnvVarType.LOG_LEVEL:
160
+ if raw.upper() not in LOG_LEVELS:
161
+ errors.append(f"{var.key} must be a valid log level")
162
+ case EnvVarType.ENUM:
163
+ if var.choices and raw not in var.choices:
164
+ allowed = ", ".join(sorted(var.choices))
165
+ errors.append(f"{var.key} must be one of: {allowed}")
166
+ case EnvVarType.CSV:
167
+ parts = [part.strip() for part in raw.split(",")]
168
+ if any(not part for part in parts):
169
+ errors.append(f"{var.key} must not contain empty entries")
170
+ case EnvVarType.CSV_INTS_OR_ALL:
171
+ if raw.lower() != "all":
172
+ parts = [part.strip() for part in raw.split(",")]
173
+ if any(not part.isdigit() for part in parts if part):
174
+ errors.append(
175
+ f"{var.key} must be 'all' or a "
176
+ "comma-separated list of integers"
177
+ )
178
+
179
+ if var.validator:
180
+ var.validator(raw, errors, warnings)
181
+ for validator in schema.validators:
182
+ validator(env, errors, warnings)
183
+
184
+ return errors, warnings
185
+
186
+
187
+ def require_if_true(
188
+ env: dict[str, str], flag_key: str, required_keys: list[str], errors: list[str]
189
+ ) -> None:
190
+ """Require keys when a boolean-like flag is true."""
191
+ if parse_bool(env.get(flag_key, "")):
192
+ for key in required_keys:
193
+ if not env.get(key, "").strip():
194
+ errors.append(f"{key} must be set when {flag_key}=1")
195
+
196
+
197
+ def require_pair(
198
+ env: dict[str, str], key_a: str, key_b: str, errors: list[str]
199
+ ) -> None:
200
+ """Require two keys to be either both set or both empty."""
201
+ a = env.get(key_a, "").strip()
202
+ b = env.get(key_b, "").strip()
203
+ if (a or b) and (not a or not b):
204
+ errors.append(f"{key_a} and {key_b} must both be set")
205
+
206
+
207
+ def require_all_or_none(
208
+ env: dict[str, str], keys: list[str], errors: list[str]
209
+ ) -> None:
210
+ """Require a key group to be fully set or fully empty."""
211
+ values = [env.get(key, "").strip() for key in keys]
212
+ if any(values) and not all(values):
213
+ errors.append(f"Either all or none of {', '.join(keys)} must be set")
214
+
215
+
216
+ def _ensure_path(raw: str, var: EnvVar, errors: list[str], warnings: list[str]) -> None:
217
+ if not raw:
218
+ errors.append(f"{var.key} must be a non-empty path")
219
+ return
220
+ if var.ensure_path is None:
221
+ return
222
+
223
+ path = Path(raw)
224
+ if path.exists():
225
+ if var.var_type == EnvVarType.FILE_PATH and not path.is_file():
226
+ errors.append(f"{var.key} path should be a file: '{raw}'")
227
+ elif var.var_type == EnvVarType.DIR_PATH and not path.is_dir():
228
+ errors.append(f"{var.key} path should be a directory: '{raw}'")
229
+ return
230
+
231
+ message = f"{var.key} path does not exist: '{raw}'"
232
+ match var.ensure_path:
233
+ case "error":
234
+ errors.append(message)
235
+ case "warn":
236
+ warnings.append(message)
237
+ case "create":
238
+ warnings.append(message + "; it will be created at runtime")