flowmesh-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowmesh_cli/__init__.py +1 -0
- flowmesh_cli/cli.py +46 -0
- flowmesh_cli/commands/__init__.py +26 -0
- flowmesh_cli/commands/base.py +139 -0
- flowmesh_cli/commands/node.py +142 -0
- flowmesh_cli/commands/result.py +57 -0
- flowmesh_cli/commands/ssh.py +422 -0
- flowmesh_cli/commands/system.py +22 -0
- flowmesh_cli/commands/task.py +251 -0
- flowmesh_cli/commands/trace.py +382 -0
- flowmesh_cli/commands/worker.py +75 -0
- flowmesh_cli/commands/workflow.py +243 -0
- flowmesh_cli/core/__init__.py +1 -0
- flowmesh_cli/core/assets.py +34 -0
- flowmesh_cli/core/logging.py +43 -0
- flowmesh_cli/core/paths.py +32 -0
- flowmesh_cli/core/query.py +36 -0
- flowmesh_cli/core/task.py +30 -0
- flowmesh_cli/core/typer.py +12 -0
- flowmesh_cli-0.1.0.dist-info/METADATA +36 -0
- flowmesh_cli-0.1.0.dist-info/RECORD +25 -0
- flowmesh_cli-0.1.0.dist-info/WHEEL +5 -0
- flowmesh_cli-0.1.0.dist-info/entry_points.txt +2 -0
- flowmesh_cli-0.1.0.dist-info/licenses/LICENSE +202 -0
- flowmesh_cli-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,422 @@
|
|
|
1
|
+
"""SSH session commands."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import shlex
|
|
7
|
+
import shutil
|
|
8
|
+
import sys
|
|
9
|
+
import time
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
import typer
|
|
14
|
+
import websockets
|
|
15
|
+
from flowmesh import FlowMesh
|
|
16
|
+
from flowmesh.exceptions import FlowMeshError, NotFoundError
|
|
17
|
+
from flowmesh.models.common import TERMINAL_TASK_STATUSES, TaskStatus
|
|
18
|
+
from flowmesh.params import append_param, extend_params
|
|
19
|
+
|
|
20
|
+
from ..core import logging
|
|
21
|
+
from ..core.query import parse_query_filters
|
|
22
|
+
from ..core.typer import get_typer
|
|
23
|
+
|
|
24
|
+
app = get_typer(help="Connect to SSH sessions on FlowMesh workers.")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _exec_ssh(
|
|
28
|
+
ssh_info: dict[str, Any],
|
|
29
|
+
task_id: str,
|
|
30
|
+
extra_args: str | None,
|
|
31
|
+
direct: bool = False,
|
|
32
|
+
) -> None:
|
|
33
|
+
"""Replace the current process with an ssh command."""
|
|
34
|
+
ssh_bin = shutil.which("ssh")
|
|
35
|
+
if ssh_bin is None:
|
|
36
|
+
logging.error("ssh not found in PATH")
|
|
37
|
+
raise typer.Exit(code=1)
|
|
38
|
+
|
|
39
|
+
mode = str(ssh_info.get("mode", "direct"))
|
|
40
|
+
user = ssh_info.get("username", "flowmesh")
|
|
41
|
+
host = ssh_info.get("host")
|
|
42
|
+
port = ssh_info.get("port")
|
|
43
|
+
if direct:
|
|
44
|
+
if direct_host := ssh_info.get("directHost"):
|
|
45
|
+
host = direct_host
|
|
46
|
+
if direct_port := ssh_info.get("directPort"):
|
|
47
|
+
port = direct_port
|
|
48
|
+
|
|
49
|
+
args = [
|
|
50
|
+
ssh_bin,
|
|
51
|
+
"-o",
|
|
52
|
+
"StrictHostKeyChecking=no",
|
|
53
|
+
"-o",
|
|
54
|
+
"UserKnownHostsFile=/dev/null",
|
|
55
|
+
"-o",
|
|
56
|
+
"LogLevel=ERROR",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
if direct or mode in ("direct", "forward"):
|
|
60
|
+
if not host:
|
|
61
|
+
logging.error("SSH host is not available.")
|
|
62
|
+
raise typer.Exit(code=1)
|
|
63
|
+
if port:
|
|
64
|
+
args += ["-p", str(port)]
|
|
65
|
+
args.append(f"{user}@{host}")
|
|
66
|
+
elif mode == "proxy":
|
|
67
|
+
proxy_cmd = f"flowmesh ssh proxy {task_id}"
|
|
68
|
+
args += ["-o", f"ProxyCommand={proxy_cmd}"]
|
|
69
|
+
args.append(f"{user}@{task_id}")
|
|
70
|
+
else:
|
|
71
|
+
logging.error(f"Unknown SSH publish mode: {mode}")
|
|
72
|
+
raise typer.Exit(code=1)
|
|
73
|
+
|
|
74
|
+
if extra_args:
|
|
75
|
+
args.extend(extra_args.split())
|
|
76
|
+
|
|
77
|
+
logging.info(f"Connecting: {' '.join(args)}")
|
|
78
|
+
os.execvp(ssh_bin, args)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _exit_for_task_status(task_id: str, task_info: Any) -> None:
|
|
82
|
+
status = task_info.status
|
|
83
|
+
if status == TaskStatus.DONE:
|
|
84
|
+
logging.info(f"Task {task_id} completed successfully.")
|
|
85
|
+
raise typer.Exit(code=0)
|
|
86
|
+
if status == TaskStatus.FAILED:
|
|
87
|
+
logging.error(f"Task {task_id} failed: {task_info.error or 'unknown error'}")
|
|
88
|
+
raise typer.Exit(code=1)
|
|
89
|
+
if status == TaskStatus.CANCELLED:
|
|
90
|
+
logging.info(f"Task {task_id} was cancelled.")
|
|
91
|
+
raise typer.Exit(code=130)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _poll_status_and_exit(client: FlowMesh, task_id: str, interval: float) -> None:
|
|
95
|
+
try:
|
|
96
|
+
task_info = client.tasks.wait(task_id, interval=interval)
|
|
97
|
+
except KeyboardInterrupt:
|
|
98
|
+
logging.warning("Interrupted.")
|
|
99
|
+
raise typer.Exit(code=130)
|
|
100
|
+
except FlowMeshError as exc:
|
|
101
|
+
logging.error(str(exc))
|
|
102
|
+
raise typer.Exit(code=1)
|
|
103
|
+
_exit_for_task_status(task_id, task_info)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _stream_logs_and_exit(
|
|
107
|
+
client: FlowMesh,
|
|
108
|
+
task_id: str,
|
|
109
|
+
interval: float,
|
|
110
|
+
tail: bool = False,
|
|
111
|
+
) -> None:
|
|
112
|
+
logging.info("Streaming task logs...")
|
|
113
|
+
cursor: str | None = "$" if tail else "0"
|
|
114
|
+
try:
|
|
115
|
+
while True:
|
|
116
|
+
saw_logs = False
|
|
117
|
+
try:
|
|
118
|
+
for entry in client.tasks.stream_logs(task_id, cursor=cursor):
|
|
119
|
+
saw_logs = True
|
|
120
|
+
cursor = entry.cursor or cursor
|
|
121
|
+
event = entry.event.model_dump(mode="json")
|
|
122
|
+
ts = event.get("ts", "")
|
|
123
|
+
message = str(event.get("message", "")).rstrip("\n")
|
|
124
|
+
prefix = f"[{ts}] " if ts else ""
|
|
125
|
+
logging.log(prefix + message)
|
|
126
|
+
except NotFoundError:
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
task_info = client.tasks.retrieve(task_id)
|
|
130
|
+
if task_info.status in TERMINAL_TASK_STATUSES:
|
|
131
|
+
_exit_for_task_status(task_id, task_info)
|
|
132
|
+
if not saw_logs:
|
|
133
|
+
time.sleep(interval)
|
|
134
|
+
except KeyboardInterrupt:
|
|
135
|
+
logging.warning("Interrupted.")
|
|
136
|
+
raise typer.Exit(code=130)
|
|
137
|
+
except FlowMeshError as exc:
|
|
138
|
+
logging.error(str(exc))
|
|
139
|
+
raise typer.Exit(code=1)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# ------------------------------------------------------------------ #
|
|
143
|
+
# Commands
|
|
144
|
+
# ------------------------------------------------------------------ #
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
@app.command("connect")
|
|
148
|
+
def connect(
|
|
149
|
+
task_id: str = typer.Argument(..., help="Task identifier"),
|
|
150
|
+
interval: float = typer.Option(2.0, help="Polling interval in seconds"),
|
|
151
|
+
direct: bool = typer.Option(
|
|
152
|
+
False, "--direct", help="Use direct SSH instead of proxy/forward"
|
|
153
|
+
),
|
|
154
|
+
ssh_args: str | None = typer.Option(
|
|
155
|
+
None, "--ssh-args", help="Extra arguments passed to ssh"
|
|
156
|
+
),
|
|
157
|
+
) -> None:
|
|
158
|
+
"""Wait for an SSH session to be ready on a task and connect."""
|
|
159
|
+
logging.info(f"Waiting for SSH session on task {task_id}...")
|
|
160
|
+
client = FlowMesh()
|
|
161
|
+
try:
|
|
162
|
+
ssh_info = client.tasks.wait_for_ssh(task_id, interval=interval)
|
|
163
|
+
except FlowMeshError as exc:
|
|
164
|
+
logging.error(str(exc))
|
|
165
|
+
raise typer.Exit(code=1)
|
|
166
|
+
_exec_ssh(ssh_info, task_id, ssh_args, direct)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
@app.command("run")
|
|
170
|
+
def run(
|
|
171
|
+
key: Path | None = typer.Option(
|
|
172
|
+
None, "--key", "-k", help="Path to SSH public key file"
|
|
173
|
+
),
|
|
174
|
+
gpu: int | None = typer.Option(None, "--gpu", "-g", help="Number of GPUs"),
|
|
175
|
+
gpu_memory: str | None = typer.Option(
|
|
176
|
+
None, "--gpu-memory", help="GPU memory requirement (e.g. 16GB)"
|
|
177
|
+
),
|
|
178
|
+
memory: str | None = typer.Option(
|
|
179
|
+
None, "--memory", "-m", help="System memory (e.g. 8Gi)"
|
|
180
|
+
),
|
|
181
|
+
cpu: int | None = typer.Option(None, "--cpu", help="Number of CPU cores"),
|
|
182
|
+
ttl: int = typer.Option(3600, "--ttl", help="Session TTL in seconds"),
|
|
183
|
+
idle_timeout: int = typer.Option(
|
|
184
|
+
900, "--idle-timeout", help="Idle timeout in seconds"
|
|
185
|
+
),
|
|
186
|
+
image: str | None = typer.Option(None, "--image", help="Custom session image"),
|
|
187
|
+
user: str = typer.Option("flowmesh", "--user", "-u", help="SSH username"),
|
|
188
|
+
mode: str = typer.Option(
|
|
189
|
+
"proxy", "--mode", help="Publish mode: direct|proxy|forward"
|
|
190
|
+
),
|
|
191
|
+
worker: str | None = typer.Option(
|
|
192
|
+
None, "--worker", "-w", help="Pin to a specific worker"
|
|
193
|
+
),
|
|
194
|
+
name: str = typer.Option("ssh-session", "--name", "-n", help="Task name"),
|
|
195
|
+
env: list[str] | None = typer.Option(
|
|
196
|
+
None, "--env", "-e", help="Environment variable KEY=VALUE (repeatable)"
|
|
197
|
+
),
|
|
198
|
+
command: str | None = typer.Option(
|
|
199
|
+
None,
|
|
200
|
+
"--command",
|
|
201
|
+
"-c",
|
|
202
|
+
help="Command to run non-interactively (e.g. 'python train.py')",
|
|
203
|
+
),
|
|
204
|
+
entrypoint_override: str | None = typer.Option(
|
|
205
|
+
None,
|
|
206
|
+
"--entrypoint",
|
|
207
|
+
help="Override image entrypoint (non-interactive mode)",
|
|
208
|
+
),
|
|
209
|
+
interactive: bool | None = typer.Option(
|
|
210
|
+
None,
|
|
211
|
+
"--interactive/--non-interactive",
|
|
212
|
+
help="Whether to use interactive SSH or non-interactive command mode.",
|
|
213
|
+
),
|
|
214
|
+
logs: bool = typer.Option(
|
|
215
|
+
False,
|
|
216
|
+
"--logs/--no-logs",
|
|
217
|
+
help="For non-interactive runs, stream logs instead of only polling status.",
|
|
218
|
+
),
|
|
219
|
+
tail: bool = typer.Option(
|
|
220
|
+
False,
|
|
221
|
+
"--tail",
|
|
222
|
+
help="With --logs, start from the latest log entry instead of the beginning.",
|
|
223
|
+
),
|
|
224
|
+
interval: float = typer.Option(2.0, "--interval", help="Polling interval"),
|
|
225
|
+
direct: bool = typer.Option(
|
|
226
|
+
False, "--direct", help="Use direct SSH instead of proxy/forward"
|
|
227
|
+
),
|
|
228
|
+
ssh_args: str | None = typer.Option(
|
|
229
|
+
None, "--ssh-args", help="Extra arguments passed to ssh"
|
|
230
|
+
),
|
|
231
|
+
) -> None:
|
|
232
|
+
"""Submit an SSH task, wait for the session, and connect.
|
|
233
|
+
|
|
234
|
+
When --command or --entrypoint is provided, the task runs non-interactively:
|
|
235
|
+
the specified command executes in the container and the CLI polls until the
|
|
236
|
+
task completes.
|
|
237
|
+
"""
|
|
238
|
+
inferred_interactive = command is None and entrypoint_override is None
|
|
239
|
+
if interactive and not inferred_interactive:
|
|
240
|
+
logging.error(
|
|
241
|
+
"Interactive SSH mode cannot be combined with --command or --entrypoint."
|
|
242
|
+
)
|
|
243
|
+
raise typer.Exit(code=1)
|
|
244
|
+
resolved_interactive = (
|
|
245
|
+
interactive if interactive is not None else inferred_interactive
|
|
246
|
+
)
|
|
247
|
+
noninteractive = not resolved_interactive
|
|
248
|
+
cmd_list: list[str] | None = shlex.split(command) if command is not None else None
|
|
249
|
+
ep_list: list[str] | None = (
|
|
250
|
+
shlex.split(entrypoint_override) if entrypoint_override is not None else None
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
public_key: str | None = None
|
|
254
|
+
if resolved_interactive:
|
|
255
|
+
if key:
|
|
256
|
+
if not key.exists():
|
|
257
|
+
logging.error(f"Public key not found: {key}")
|
|
258
|
+
raise typer.Exit(code=1)
|
|
259
|
+
public_key = key.read_text().strip()
|
|
260
|
+
else:
|
|
261
|
+
try:
|
|
262
|
+
public_key = FlowMesh().ssh.detect_public_key()
|
|
263
|
+
except FlowMeshError as exc:
|
|
264
|
+
logging.error(str(exc))
|
|
265
|
+
raise typer.Exit(code=1)
|
|
266
|
+
|
|
267
|
+
if mode not in ("direct", "proxy", "forward"):
|
|
268
|
+
logging.error(f"Invalid mode: {mode}. Use 'direct', 'proxy', or 'forward'.")
|
|
269
|
+
raise typer.Exit(code=1)
|
|
270
|
+
|
|
271
|
+
client = FlowMesh()
|
|
272
|
+
workflow_yaml = client.ssh.build_task_yaml(
|
|
273
|
+
name=name,
|
|
274
|
+
public_key=public_key,
|
|
275
|
+
user=user,
|
|
276
|
+
mode=mode,
|
|
277
|
+
ttl=ttl,
|
|
278
|
+
idle_timeout=idle_timeout,
|
|
279
|
+
gpu=gpu,
|
|
280
|
+
gpu_memory=gpu_memory,
|
|
281
|
+
cpu=cpu,
|
|
282
|
+
memory=memory,
|
|
283
|
+
image=image,
|
|
284
|
+
worker=worker,
|
|
285
|
+
env_pairs=env,
|
|
286
|
+
interactive=resolved_interactive,
|
|
287
|
+
command=cmd_list,
|
|
288
|
+
entrypoint=ep_list,
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
logging.info("Submitting SSH task...")
|
|
292
|
+
try:
|
|
293
|
+
result = client.workflows.submit(workflow_yaml)
|
|
294
|
+
except FlowMeshError as exc:
|
|
295
|
+
logging.error(f"Failed to submit SSH task: {exc}")
|
|
296
|
+
raise typer.Exit(code=1)
|
|
297
|
+
|
|
298
|
+
task_id = None
|
|
299
|
+
if result.tasks:
|
|
300
|
+
task_id = result.tasks[0].task_id
|
|
301
|
+
if not task_id:
|
|
302
|
+
logging.error("No task ID returned from submission.")
|
|
303
|
+
logging.error(result.model_dump_json(indent=2))
|
|
304
|
+
raise typer.Exit(code=1)
|
|
305
|
+
|
|
306
|
+
logging.info(f"Task submitted: {task_id}")
|
|
307
|
+
|
|
308
|
+
if noninteractive:
|
|
309
|
+
if logs:
|
|
310
|
+
_stream_logs_and_exit(client, task_id, interval, tail)
|
|
311
|
+
_poll_status_and_exit(client, task_id, interval)
|
|
312
|
+
else:
|
|
313
|
+
# Interactive: wait for SSH session and connect.
|
|
314
|
+
logging.info("Waiting for SSH session...")
|
|
315
|
+
try:
|
|
316
|
+
ssh_info = client.tasks.wait_for_ssh(task_id, interval=interval)
|
|
317
|
+
except FlowMeshError as exc:
|
|
318
|
+
logging.error(str(exc))
|
|
319
|
+
raise typer.Exit(code=1)
|
|
320
|
+
_exec_ssh(ssh_info, task_id, ssh_args, direct)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
@app.command("proxy")
|
|
324
|
+
def proxy(
|
|
325
|
+
task_id: str = typer.Argument(..., help="Task identifier"),
|
|
326
|
+
) -> None:
|
|
327
|
+
"""Raw stdin/stdout proxy for SSH ProxyCommand (internal)."""
|
|
328
|
+
try:
|
|
329
|
+
asyncio.run(_run_proxy(task_id))
|
|
330
|
+
except KeyboardInterrupt:
|
|
331
|
+
pass
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
async def _run_proxy(task_id: str) -> None:
|
|
335
|
+
"""Async WebSocket <-> stdio proxy."""
|
|
336
|
+
client = FlowMesh()
|
|
337
|
+
api_key = client.api_key
|
|
338
|
+
ws_url = client.ssh.proxy_url(task_id)
|
|
339
|
+
|
|
340
|
+
auth_header = {"Authorization": f"Bearer {api_key}"} if api_key else None
|
|
341
|
+
async with websockets.connect(ws_url, additional_headers=auth_header) as ws:
|
|
342
|
+
loop = asyncio.get_running_loop()
|
|
343
|
+
|
|
344
|
+
async def stdin_to_ws() -> None:
|
|
345
|
+
try:
|
|
346
|
+
while True:
|
|
347
|
+
data = await loop.run_in_executor(
|
|
348
|
+
None, os.read, sys.stdin.fileno(), 4096
|
|
349
|
+
)
|
|
350
|
+
if not data:
|
|
351
|
+
break
|
|
352
|
+
await ws.send(data)
|
|
353
|
+
except Exception:
|
|
354
|
+
pass
|
|
355
|
+
|
|
356
|
+
async def ws_to_stdout() -> None:
|
|
357
|
+
try:
|
|
358
|
+
async for msg in ws:
|
|
359
|
+
if isinstance(msg, str):
|
|
360
|
+
msg = msg.encode()
|
|
361
|
+
await loop.run_in_executor(None, os.write, sys.stdout.fileno(), msg)
|
|
362
|
+
except Exception:
|
|
363
|
+
pass
|
|
364
|
+
|
|
365
|
+
t1 = asyncio.create_task(stdin_to_ws())
|
|
366
|
+
t2 = asyncio.create_task(ws_to_stdout())
|
|
367
|
+
_, pending = await asyncio.wait([t1, t2], return_when=asyncio.FIRST_COMPLETED)
|
|
368
|
+
for task in pending:
|
|
369
|
+
task.cancel()
|
|
370
|
+
try:
|
|
371
|
+
await task
|
|
372
|
+
except (asyncio.CancelledError, Exception):
|
|
373
|
+
pass
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
@app.command("connections")
|
|
377
|
+
def list_connections(
|
|
378
|
+
connection_id: str | None = typer.Option(
|
|
379
|
+
None, "--id", help="Filter by connection id"
|
|
380
|
+
),
|
|
381
|
+
access_mode: list[str] | None = typer.Option(
|
|
382
|
+
None, "--mode", help="Filter by access mode (repeatable)"
|
|
383
|
+
),
|
|
384
|
+
task_id: str | None = typer.Option(None, "--task-id", help="Filter by task id"),
|
|
385
|
+
workflow_id: str | None = typer.Option(
|
|
386
|
+
None, "--workflow-id", help="Filter by workflow id"
|
|
387
|
+
),
|
|
388
|
+
worker_id: str | None = typer.Option(
|
|
389
|
+
None, "--worker-id", help="Filter by worker id"
|
|
390
|
+
),
|
|
391
|
+
node_id: str | None = typer.Option(None, "--node-id", help="Filter by node id"),
|
|
392
|
+
username: str | None = typer.Option(
|
|
393
|
+
None, "--username", help="Filter by SSH username"
|
|
394
|
+
),
|
|
395
|
+
source_ip: str | None = typer.Option(
|
|
396
|
+
None, "--source-ip", help="Filter by source IP"
|
|
397
|
+
),
|
|
398
|
+
source_port: int | None = typer.Option(
|
|
399
|
+
None, "--source-port", help="Filter by source port"
|
|
400
|
+
),
|
|
401
|
+
query: list[str] | None = typer.Option(
|
|
402
|
+
None, "--query", "-q", help="Filter connections by key=value pairs"
|
|
403
|
+
),
|
|
404
|
+
) -> None:
|
|
405
|
+
"""List active SSH connections audited by the server."""
|
|
406
|
+
client = FlowMesh()
|
|
407
|
+
query_params = parse_query_filters(query)
|
|
408
|
+
append_param(query_params, "connection_id", connection_id)
|
|
409
|
+
extend_params(query_params, "access_mode", access_mode)
|
|
410
|
+
append_param(query_params, "task_id", task_id)
|
|
411
|
+
append_param(query_params, "workflow_id", workflow_id)
|
|
412
|
+
append_param(query_params, "worker_id", worker_id)
|
|
413
|
+
append_param(query_params, "node_id", node_id)
|
|
414
|
+
append_param(query_params, "username", username)
|
|
415
|
+
append_param(query_params, "source_ip", source_ip)
|
|
416
|
+
append_param(query_params, "source_port", source_port)
|
|
417
|
+
try:
|
|
418
|
+
connections = client.ssh.list(query_params=query_params)
|
|
419
|
+
except FlowMeshError as exc:
|
|
420
|
+
logging.error(str(exc))
|
|
421
|
+
raise typer.Exit(code=1)
|
|
422
|
+
logging.log(json.dumps([c.model_dump(mode="json") for c in connections], indent=2))
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
from flowmesh import FlowMesh
|
|
5
|
+
from flowmesh.exceptions import FlowMeshError
|
|
6
|
+
|
|
7
|
+
from ..core import logging
|
|
8
|
+
from ..core.typer import get_typer
|
|
9
|
+
|
|
10
|
+
app = get_typer(help="Query FlowMesh server system information.")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@app.command()
|
|
14
|
+
def metrics() -> None:
|
|
15
|
+
"""Retrieve and display system metrics from the FlowMesh server."""
|
|
16
|
+
client = FlowMesh()
|
|
17
|
+
try:
|
|
18
|
+
result = client.system.metrics()
|
|
19
|
+
except FlowMeshError as exc:
|
|
20
|
+
logging.error(str(exc))
|
|
21
|
+
raise typer.Exit(code=1)
|
|
22
|
+
logging.log(json.dumps(result, indent=2))
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
"""Public FlowMesh task commands."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import time
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
from flowmesh import FlowMesh
|
|
9
|
+
from flowmesh.exceptions import FlowMeshError
|
|
10
|
+
from flowmesh.models.common import TERMINAL_TASK_STATUSES, TaskStatus
|
|
11
|
+
|
|
12
|
+
from ..core import logging
|
|
13
|
+
from ..core.query import parse_query_filters
|
|
14
|
+
from ..core.task import wait_for_task_completion
|
|
15
|
+
from ..core.typer import get_typer
|
|
16
|
+
|
|
17
|
+
app = get_typer(help="Query and monitor tasks executing on FlowMesh workers.")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _format_log_event(event: dict) -> str:
|
|
21
|
+
ts = event.get("ts", "")
|
|
22
|
+
message = str(event.get("message", "")).rstrip("\n")
|
|
23
|
+
prefix = f"[{ts}] " if ts else ""
|
|
24
|
+
return prefix + message
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _log_ssh_connection_instructions(
|
|
28
|
+
task_id: str, latest_update: dict, client: FlowMesh
|
|
29
|
+
) -> None:
|
|
30
|
+
ssh_info = latest_update.get("ssh")
|
|
31
|
+
if not isinstance(ssh_info, dict):
|
|
32
|
+
return
|
|
33
|
+
mode = str(ssh_info.get("mode") or "direct")
|
|
34
|
+
logging.log(f"[{task_id}] SSH connection instructions ({mode}):")
|
|
35
|
+
for label, command in client.ssh.connection_commands(task_id, ssh_info):
|
|
36
|
+
logging.log(f" {label}:")
|
|
37
|
+
logging.log(f" {command}")
|
|
38
|
+
if str(ssh_info.get("mode") or "direct") == "proxy":
|
|
39
|
+
logging.log(" note:")
|
|
40
|
+
logging.log(" proxy ssh command requires `websocat` and $FLOWMESH_API_KEY")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@app.command()
|
|
44
|
+
def info(task_id: str = typer.Argument(..., help="Task identifier")) -> None:
|
|
45
|
+
"""Retrieve the current status and metadata for a specific task."""
|
|
46
|
+
client = FlowMesh()
|
|
47
|
+
try:
|
|
48
|
+
task = client.tasks.retrieve(task_id)
|
|
49
|
+
except FlowMeshError as exc:
|
|
50
|
+
logging.error(str(exc))
|
|
51
|
+
raise typer.Exit(code=1)
|
|
52
|
+
logging.log(task.model_dump_json(indent=2))
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@app.command("list")
|
|
56
|
+
def list_tasks(
|
|
57
|
+
task_id: str | None = typer.Option(None, "--id", help="Filter by task id"),
|
|
58
|
+
workflow_id: str | None = typer.Option(
|
|
59
|
+
None, "--workflow-id", help="Filter by workflow id"
|
|
60
|
+
),
|
|
61
|
+
status: list[str] | None = typer.Option(
|
|
62
|
+
None, "--status", "-s", help="Filter by status (repeatable)"
|
|
63
|
+
),
|
|
64
|
+
category: str | None = typer.Option(None, "--category", help="Filter by category"),
|
|
65
|
+
task_type: str | None = typer.Option(None, "--type", help="Filter by task type"),
|
|
66
|
+
assigned_worker: str | None = typer.Option(
|
|
67
|
+
None, "--assigned-worker", help="Filter by assigned worker id"
|
|
68
|
+
),
|
|
69
|
+
graph_node_name: str | None = typer.Option(
|
|
70
|
+
None, "--graph-node", help="Filter by graph node name"
|
|
71
|
+
),
|
|
72
|
+
completed: bool | None = typer.Option(
|
|
73
|
+
None,
|
|
74
|
+
"--completed/--not-completed",
|
|
75
|
+
help="Filter by completion state",
|
|
76
|
+
),
|
|
77
|
+
failed: bool | None = typer.Option(
|
|
78
|
+
None,
|
|
79
|
+
"--failed/--not-failed",
|
|
80
|
+
help="Filter by failure state",
|
|
81
|
+
),
|
|
82
|
+
query: list[str] | None = typer.Option(
|
|
83
|
+
None, "--query", "-q", help="Filter tasks by key=value pairs"
|
|
84
|
+
),
|
|
85
|
+
) -> None:
|
|
86
|
+
"""List all tasks registered in the FlowMesh server."""
|
|
87
|
+
client = FlowMesh()
|
|
88
|
+
query_params = parse_query_filters(query)
|
|
89
|
+
try:
|
|
90
|
+
tasks = client.tasks.list(
|
|
91
|
+
task_id=task_id,
|
|
92
|
+
workflow_id=workflow_id,
|
|
93
|
+
status=status or None,
|
|
94
|
+
category=category,
|
|
95
|
+
task_type=task_type,
|
|
96
|
+
assigned_worker=assigned_worker,
|
|
97
|
+
graph_node_name=graph_node_name,
|
|
98
|
+
completed=completed,
|
|
99
|
+
failed=failed,
|
|
100
|
+
query_params=query_params,
|
|
101
|
+
)
|
|
102
|
+
except FlowMeshError as exc:
|
|
103
|
+
logging.error(str(exc))
|
|
104
|
+
raise typer.Exit(code=1)
|
|
105
|
+
logging.log(json.dumps([t.model_dump(mode="json") for t in tasks], indent=2))
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@app.command()
|
|
109
|
+
def stop(
|
|
110
|
+
task_id: str = typer.Argument(..., help="Task identifier"),
|
|
111
|
+
interval: float = typer.Option(2.0, help="Polling interval in seconds"),
|
|
112
|
+
no_wait: bool = typer.Option(
|
|
113
|
+
False, "--no-wait", help="Return immediately after requesting stop"
|
|
114
|
+
),
|
|
115
|
+
) -> None:
|
|
116
|
+
"""Stop a running task."""
|
|
117
|
+
client = FlowMesh()
|
|
118
|
+
try:
|
|
119
|
+
client.tasks.stop(task_id)
|
|
120
|
+
except FlowMeshError as exc:
|
|
121
|
+
logging.error(f"Failed to stop task: {exc}")
|
|
122
|
+
raise typer.Exit(code=1)
|
|
123
|
+
|
|
124
|
+
message = f"Stop requested for task {task_id}."
|
|
125
|
+
if no_wait:
|
|
126
|
+
logging.log(message)
|
|
127
|
+
return
|
|
128
|
+
|
|
129
|
+
logging.info(message)
|
|
130
|
+
status, error = wait_for_task_completion(task_id, interval)
|
|
131
|
+
match status:
|
|
132
|
+
case TaskStatus.DONE:
|
|
133
|
+
logging.success("Task stopped successfully.")
|
|
134
|
+
case TaskStatus.FAILED | TaskStatus.CANCELLED:
|
|
135
|
+
logging.error(f"Task {status.lower()}: {error}")
|
|
136
|
+
raise typer.Exit(code=1)
|
|
137
|
+
case _:
|
|
138
|
+
logging.error(f"Unexpected task status: {status}")
|
|
139
|
+
raise typer.Exit(code=1)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@app.command()
|
|
143
|
+
def watch(
|
|
144
|
+
task_id: str = typer.Argument(..., help="Task identifier"),
|
|
145
|
+
interval: float = typer.Option(2.0, help="Polling interval in seconds"),
|
|
146
|
+
) -> None:
|
|
147
|
+
"""Monitor a task's status by polling until completion."""
|
|
148
|
+
client = FlowMesh()
|
|
149
|
+
last_status: TaskStatus | None = None
|
|
150
|
+
last_update: dict | None = None
|
|
151
|
+
try:
|
|
152
|
+
while True:
|
|
153
|
+
task = client.tasks.retrieve(task_id)
|
|
154
|
+
payload = task.model_dump(mode="json")
|
|
155
|
+
status_value = task.status
|
|
156
|
+
if status_value != last_status:
|
|
157
|
+
logging.log(f"[{task_id}] status: {status_value}")
|
|
158
|
+
last_status = status_value
|
|
159
|
+
latest_update = payload.get("latest_update")
|
|
160
|
+
if isinstance(latest_update, dict) and latest_update != last_update:
|
|
161
|
+
logging.log(json.dumps(latest_update, indent=2))
|
|
162
|
+
_log_ssh_connection_instructions(task_id, latest_update, client)
|
|
163
|
+
last_update = latest_update
|
|
164
|
+
if status_value in TERMINAL_TASK_STATUSES:
|
|
165
|
+
logging.log(json.dumps(payload, indent=2))
|
|
166
|
+
return
|
|
167
|
+
time.sleep(interval)
|
|
168
|
+
except KeyboardInterrupt:
|
|
169
|
+
logging.warning("Cancelled by user.")
|
|
170
|
+
raise typer.Exit(code=1)
|
|
171
|
+
except FlowMeshError as exc:
|
|
172
|
+
logging.error(str(exc))
|
|
173
|
+
raise typer.Exit(code=1)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
logs_app = get_typer(help="Query and monitor task logs.")
|
|
177
|
+
app.add_typer(logs_app, name="logs")
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
@logs_app.command("show")
|
|
181
|
+
def show_logs(
|
|
182
|
+
task_id: str = typer.Argument(..., help="Task identifier"),
|
|
183
|
+
limit: int = typer.Option(200, help="Maximum number of entries to return"),
|
|
184
|
+
before: str | None = typer.Option(None, help="Return entries before this cursor"),
|
|
185
|
+
after: str | None = typer.Option(None, help="Return entries after this cursor"),
|
|
186
|
+
json_output: bool = typer.Option(
|
|
187
|
+
False, "--json", help="Print raw JSON response instead of formatted lines"
|
|
188
|
+
),
|
|
189
|
+
) -> None:
|
|
190
|
+
"""Query recent task logs from the server."""
|
|
191
|
+
client = FlowMesh()
|
|
192
|
+
try:
|
|
193
|
+
result = client.tasks.get_logs(task_id, limit=limit, before=before, after=after)
|
|
194
|
+
except FlowMeshError as exc:
|
|
195
|
+
logging.error(str(exc))
|
|
196
|
+
raise typer.Exit(code=1)
|
|
197
|
+
payload = result.model_dump(mode="json")
|
|
198
|
+
if json_output:
|
|
199
|
+
logging.log(json.dumps(payload, indent=2))
|
|
200
|
+
return
|
|
201
|
+
entries = payload.get("entries") or []
|
|
202
|
+
for entry in entries:
|
|
203
|
+
if not isinstance(entry, dict):
|
|
204
|
+
continue
|
|
205
|
+
event = entry.get("event")
|
|
206
|
+
if not isinstance(event, dict):
|
|
207
|
+
continue
|
|
208
|
+
logging.log(_format_log_event(event))
|
|
209
|
+
next_cursor = payload.get("next_cursor")
|
|
210
|
+
prev_cursor = payload.get("prev_cursor")
|
|
211
|
+
if next_cursor or prev_cursor:
|
|
212
|
+
logging.log(
|
|
213
|
+
json.dumps({"next_cursor": next_cursor, "prev_cursor": prev_cursor})
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
@logs_app.command("stream")
|
|
218
|
+
def stream_logs(
|
|
219
|
+
task_id: str = typer.Argument(..., help="Task identifier"),
|
|
220
|
+
cursor: str | None = typer.Option(None, help="Start streaming after this cursor"),
|
|
221
|
+
) -> None:
|
|
222
|
+
"""Stream task logs via SSE."""
|
|
223
|
+
client = FlowMesh()
|
|
224
|
+
try:
|
|
225
|
+
for entry in client.tasks.stream_logs(task_id, cursor=cursor):
|
|
226
|
+
event = entry.event.model_dump(mode="json")
|
|
227
|
+
logging.log(_format_log_event(event))
|
|
228
|
+
except KeyboardInterrupt:
|
|
229
|
+
logging.warning("Cancelled by user.")
|
|
230
|
+
raise typer.Exit(code=1)
|
|
231
|
+
except FlowMeshError as exc:
|
|
232
|
+
logging.error(str(exc))
|
|
233
|
+
raise typer.Exit(code=1)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
@logs_app.command("download")
|
|
237
|
+
def download_logs(
|
|
238
|
+
task_id: str = typer.Argument(..., help="Task identifier"),
|
|
239
|
+
output: Path = typer.Option(..., "--output", "-o", help="Path to save logs.jsonl"),
|
|
240
|
+
) -> None:
|
|
241
|
+
"""Download archived logs.jsonl for a task."""
|
|
242
|
+
client = FlowMesh()
|
|
243
|
+
try:
|
|
244
|
+
client.tasks.download_logs(task_id, output)
|
|
245
|
+
except FlowMeshError as exc:
|
|
246
|
+
logging.error(str(exc))
|
|
247
|
+
raise typer.Exit(code=1)
|
|
248
|
+
except OSError as exc:
|
|
249
|
+
logging.error(f"Failed to write {output}: {exc}")
|
|
250
|
+
raise typer.Exit(code=1)
|
|
251
|
+
logging.log(f"Wrote logs to {output}")
|