mirrorneuron-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mirrorneuron_cli-1.0.0.dist-info/METADATA +73 -0
- mirrorneuron_cli-1.0.0.dist-info/RECORD +19 -0
- mirrorneuron_cli-1.0.0.dist-info/WHEEL +5 -0
- mirrorneuron_cli-1.0.0.dist-info/entry_points.txt +2 -0
- mirrorneuron_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
- mirrorneuron_cli-1.0.0.dist-info/top_level.txt +1 -0
- mn_cli/__init__.py +0 -0
- mn_cli/config.py +43 -0
- mn_cli/error_handler.py +51 -0
- mn_cli/libs/__init__.py +1 -0
- mn_cli/libs/blueprint_cmds.py +598 -0
- mn_cli/libs/job_cmds.py +160 -0
- mn_cli/libs/run_cmds.py +780 -0
- mn_cli/libs/sys_cmds.py +52 -0
- mn_cli/libs/ui.py +162 -0
- mn_cli/logging_config.py +38 -0
- mn_cli/main.py +35 -0
- mn_cli/server_cmds.py +331 -0
- mn_cli/shared.py +13 -0
mn_cli/libs/run_cmds.py
ADDED
|
@@ -0,0 +1,780 @@
|
|
|
1
|
+
import typer
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import time
|
|
5
|
+
import logging
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Annotated, Any, Optional
|
|
8
|
+
from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
|
|
9
|
+
from logging.handlers import RotatingFileHandler
|
|
10
|
+
from mn_cli.libs.ui import (
|
|
11
|
+
generate_detached_panel,
|
|
12
|
+
generate_live_layout,
|
|
13
|
+
generate_run_submitted_panel,
|
|
14
|
+
generate_summary_panel,
|
|
15
|
+
)
|
|
16
|
+
from mn_cli.shared import console, client, logger
|
|
17
|
+
from mn_cli.error_handler import handle_cli_error
|
|
18
|
+
|
|
19
|
+
STANDARD_EVENTS = {
|
|
20
|
+
"init", "job_pending", "job_validated", "job_scheduled", "job_running",
|
|
21
|
+
"job_completed", "job_failed", "job_paused", "job_resumed", "job_cancelled",
|
|
22
|
+
"agent_recovery_started", "agent_recovered",
|
|
23
|
+
"agent_message_received", "aggregator_received", "aggregator_duplicate_ignored",
|
|
24
|
+
"executor_lease_requested", "executor_lease_acquired", "executor_lease_released",
|
|
25
|
+
"sandbox_job_started", "sandbox_job_completed", "sandbox_job_failed",
|
|
26
|
+
"node_up", "node_down"
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
FINAL_STATUSES = {"completed", "failed", "cancelled"}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class JobLogWriter:
|
|
33
|
+
def __init__(self, job_id: str):
|
|
34
|
+
self.job_id = job_id
|
|
35
|
+
self.log_dir = Path(f"/tmp/mn_{job_id}")
|
|
36
|
+
self.log_dir.mkdir(parents=True, exist_ok=True)
|
|
37
|
+
self.events_file = self.log_dir / "events.log"
|
|
38
|
+
self.snapshot_file = self.log_dir / "job_snapshot.json"
|
|
39
|
+
self.seen = set()
|
|
40
|
+
self.web_ui_urls = set()
|
|
41
|
+
self.event_count = 0
|
|
42
|
+
self.max_bytes = int(
|
|
43
|
+
os.getenv("MN_RUN_EVENT_LOG_MAX_BYTES", str(10 * 1024 * 1024))
|
|
44
|
+
)
|
|
45
|
+
self.backup_count = int(os.getenv("MN_RUN_EVENT_LOG_BACKUP_COUNT", "5"))
|
|
46
|
+
self.run_logger = self._build_run_logger()
|
|
47
|
+
|
|
48
|
+
def _build_run_logger(self) -> logging.Logger:
|
|
49
|
+
run_logger = logging.getLogger(f"mn-cli.run.{self.job_id}")
|
|
50
|
+
run_logger.setLevel(os.getenv("MN_RUN_LOG_LEVEL", "INFO").upper())
|
|
51
|
+
run_logger.propagate = False
|
|
52
|
+
|
|
53
|
+
if run_logger.handlers:
|
|
54
|
+
return run_logger
|
|
55
|
+
|
|
56
|
+
handler = RotatingFileHandler(
|
|
57
|
+
self.log_dir / "run.log",
|
|
58
|
+
maxBytes=int(os.getenv("MN_RUN_LOG_MAX_BYTES", str(2 * 1024 * 1024))),
|
|
59
|
+
backupCount=int(os.getenv("MN_RUN_LOG_BACKUP_COUNT", "5")),
|
|
60
|
+
)
|
|
61
|
+
handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
|
|
62
|
+
run_logger.addHandler(handler)
|
|
63
|
+
return run_logger
|
|
64
|
+
|
|
65
|
+
def write_event_json(self, event_json: str) -> bool:
|
|
66
|
+
try:
|
|
67
|
+
event = json.loads(event_json)
|
|
68
|
+
except Exception:
|
|
69
|
+
self.run_logger.warning("Skipping invalid event JSON: %r", event_json)
|
|
70
|
+
return False
|
|
71
|
+
return self.write_event(event)
|
|
72
|
+
|
|
73
|
+
def write_event(self, event: dict) -> bool:
|
|
74
|
+
key = self._event_key(event)
|
|
75
|
+
if key in self.seen:
|
|
76
|
+
return False
|
|
77
|
+
|
|
78
|
+
self.seen.add(key)
|
|
79
|
+
self._rotate_if_needed()
|
|
80
|
+
with open(self.events_file, "a") as f:
|
|
81
|
+
f.write(json.dumps(event, sort_keys=True) + "\n")
|
|
82
|
+
self.event_count += 1
|
|
83
|
+
|
|
84
|
+
event_type = event.get("type", "unknown")
|
|
85
|
+
if event_type in {"slow_event_processed", "stream_metrics_updated"}:
|
|
86
|
+
payload = event.get("payload", {})
|
|
87
|
+
self.run_logger.info(
|
|
88
|
+
"slow_agent_event=%s agent=%s payload=%s",
|
|
89
|
+
event_type,
|
|
90
|
+
event.get("agent_id") or payload.get("worker") or event.get("node"),
|
|
91
|
+
json.dumps(payload, sort_keys=True),
|
|
92
|
+
)
|
|
93
|
+
elif event_type in {"backpressure_state", "external_input_rejected"}:
|
|
94
|
+
self.run_logger.info(
|
|
95
|
+
"backpressure_event=%s agent=%s payload=%s",
|
|
96
|
+
event_type,
|
|
97
|
+
event.get("agent_id"),
|
|
98
|
+
json.dumps(event.get("payload", {}), sort_keys=True),
|
|
99
|
+
)
|
|
100
|
+
elif event_type in {"job_failed", "sandbox_job_failed"}:
|
|
101
|
+
self.run_logger.error(
|
|
102
|
+
"event=%s payload=%s", event_type, json.dumps(event, sort_keys=True)
|
|
103
|
+
)
|
|
104
|
+
elif event_type not in STANDARD_EVENTS:
|
|
105
|
+
self.run_logger.info(
|
|
106
|
+
"custom_event=%s payload=%s",
|
|
107
|
+
event_type,
|
|
108
|
+
json.dumps(event, sort_keys=True),
|
|
109
|
+
)
|
|
110
|
+
else:
|
|
111
|
+
self.run_logger.info("event=%s", event_type)
|
|
112
|
+
return True
|
|
113
|
+
|
|
114
|
+
def write_snapshot(self, data: dict):
|
|
115
|
+
with open(self.snapshot_file, "w") as f:
|
|
116
|
+
json.dump(data, f, indent=2, sort_keys=True)
|
|
117
|
+
|
|
118
|
+
def _rotate_if_needed(self):
|
|
119
|
+
if not self.events_file.exists() or self.events_file.stat().st_size < self.max_bytes:
|
|
120
|
+
return
|
|
121
|
+
|
|
122
|
+
for index in range(self.backup_count - 1, 0, -1):
|
|
123
|
+
src = self.log_dir / f"events.log.{index}"
|
|
124
|
+
dst = self.log_dir / f"events.log.{index + 1}"
|
|
125
|
+
if src.exists():
|
|
126
|
+
if dst.exists():
|
|
127
|
+
dst.unlink()
|
|
128
|
+
src.rename(dst)
|
|
129
|
+
|
|
130
|
+
first_backup = self.log_dir / "events.log.1"
|
|
131
|
+
if first_backup.exists():
|
|
132
|
+
first_backup.unlink()
|
|
133
|
+
self.events_file.rename(first_backup)
|
|
134
|
+
|
|
135
|
+
@staticmethod
|
|
136
|
+
def _event_key(event: dict):
|
|
137
|
+
payload = event.get("payload", {})
|
|
138
|
+
if not isinstance(payload, dict):
|
|
139
|
+
payload = {}
|
|
140
|
+
return (
|
|
141
|
+
event.get("timestamp"),
|
|
142
|
+
event.get("type"),
|
|
143
|
+
event.get("agent_id"),
|
|
144
|
+
event.get("node"),
|
|
145
|
+
event.get("message_id") or payload.get("message_id"),
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
def record_web_ui_url(self, event: dict) -> Optional[str]:
|
|
149
|
+
url = _extract_web_ui_url(event)
|
|
150
|
+
if not url or url in self.web_ui_urls:
|
|
151
|
+
return None
|
|
152
|
+
self.web_ui_urls.add(url)
|
|
153
|
+
return url
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _extract_web_ui_url(event: dict) -> Optional[str]:
|
|
157
|
+
payload = event.get("payload") if isinstance(event, dict) else None
|
|
158
|
+
if not isinstance(payload, dict):
|
|
159
|
+
payload = event if isinstance(event, dict) else {}
|
|
160
|
+
web_ui = payload.get("web_ui") if isinstance(payload.get("web_ui"), dict) else payload
|
|
161
|
+
url = web_ui.get("url") or web_ui.get("web_ui_url") or web_ui.get("local_url")
|
|
162
|
+
return str(url) if url else None
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def fetch_and_save_results(job_id: str, data: dict = None):
|
|
166
|
+
log_dir = Path(f"/tmp/mn_{job_id}")
|
|
167
|
+
log_dir.mkdir(parents=True, exist_ok=True)
|
|
168
|
+
|
|
169
|
+
if data is None:
|
|
170
|
+
try:
|
|
171
|
+
job_json = client.get_job(job_id)
|
|
172
|
+
data = json.loads(job_json)
|
|
173
|
+
except Exception:
|
|
174
|
+
logger.exception("Failed to fetch job result for %s", job_id)
|
|
175
|
+
return
|
|
176
|
+
|
|
177
|
+
job = data.get("job", {})
|
|
178
|
+
status = job.get("status")
|
|
179
|
+
|
|
180
|
+
# Save final result if completed
|
|
181
|
+
if status == "completed":
|
|
182
|
+
result = job.get("result")
|
|
183
|
+
if result:
|
|
184
|
+
with open(log_dir / "result.txt", "w") as f:
|
|
185
|
+
json.dump(result, f, indent=2)
|
|
186
|
+
|
|
187
|
+
# Save stream results (progressive)
|
|
188
|
+
stream_events = []
|
|
189
|
+
|
|
190
|
+
try:
|
|
191
|
+
full_events = []
|
|
192
|
+
for ev_str in client.stream_events(job_id):
|
|
193
|
+
try:
|
|
194
|
+
full_events.append(json.loads(ev_str))
|
|
195
|
+
except Exception:
|
|
196
|
+
logger.exception("Failed to decode event while saving results for %s", job_id)
|
|
197
|
+
pass
|
|
198
|
+
|
|
199
|
+
for ev in full_events:
|
|
200
|
+
ev_type = ev.get("type")
|
|
201
|
+
if ev_type not in STANDARD_EVENTS:
|
|
202
|
+
stream_events.append(ev.get("payload", ev))
|
|
203
|
+
except Exception:
|
|
204
|
+
logger.exception("Failed to stream events while saving results for %s", job_id)
|
|
205
|
+
pass
|
|
206
|
+
|
|
207
|
+
if stream_events:
|
|
208
|
+
with open(log_dir / "result_stream.txt", "w") as f:
|
|
209
|
+
for se in stream_events:
|
|
210
|
+
f.write(json.dumps(se) + "\n")
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _stream_and_format_events(
|
|
214
|
+
job_id: str,
|
|
215
|
+
log_writer: Optional[JobLogWriter] = None,
|
|
216
|
+
follow_seconds: Optional[float] = None,
|
|
217
|
+
):
|
|
218
|
+
log_writer = log_writer or JobLogWriter(job_id)
|
|
219
|
+
log_dir = log_writer.log_dir
|
|
220
|
+
follow_seconds = (
|
|
221
|
+
float(os.getenv("MN_RUN_DETACH_LOG_SECONDS", "30"))
|
|
222
|
+
if follow_seconds is None
|
|
223
|
+
else follow_seconds
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
status_text = "Unknown / Detached"
|
|
227
|
+
status_color = "yellow"
|
|
228
|
+
msg_count = 0
|
|
229
|
+
|
|
230
|
+
try:
|
|
231
|
+
with Progress(
|
|
232
|
+
SpinnerColumn(),
|
|
233
|
+
TextColumn("[progress.description]{task.description}"),
|
|
234
|
+
TimeElapsedColumn(),
|
|
235
|
+
console=console,
|
|
236
|
+
) as progress:
|
|
237
|
+
job_task = progress.add_task("[cyan]Submitting job bundle...", total=None)
|
|
238
|
+
|
|
239
|
+
for event_json in client.stream_events(job_id):
|
|
240
|
+
log_writer.write_event_json(event_json)
|
|
241
|
+
try:
|
|
242
|
+
event = json.loads(event_json)
|
|
243
|
+
event_type = event.get("type")
|
|
244
|
+
|
|
245
|
+
_write_result_stream_event(log_dir, event)
|
|
246
|
+
web_ui_url = log_writer.record_web_ui_url(event)
|
|
247
|
+
if web_ui_url:
|
|
248
|
+
progress.console.print(f"[green]Blueprint Web UI:[/green] {web_ui_url}")
|
|
249
|
+
|
|
250
|
+
if event_type == "job_pending":
|
|
251
|
+
progress.update(
|
|
252
|
+
job_task,
|
|
253
|
+
description="[cyan]Preparing: job accepted, waiting for validation...",
|
|
254
|
+
)
|
|
255
|
+
elif event_type == "job_validated":
|
|
256
|
+
progress.update(
|
|
257
|
+
job_task,
|
|
258
|
+
description="[cyan]Preparing: manifest validated, scheduling agents...",
|
|
259
|
+
)
|
|
260
|
+
elif event_type == "job_scheduled":
|
|
261
|
+
progress.update(
|
|
262
|
+
job_task,
|
|
263
|
+
description="[cyan]Starting: agents scheduled, waiting for runtime to report running...",
|
|
264
|
+
)
|
|
265
|
+
elif event_type == "job_running":
|
|
266
|
+
progress.update(
|
|
267
|
+
job_task,
|
|
268
|
+
description="[green]Running: streaming live job events...",
|
|
269
|
+
)
|
|
270
|
+
elif event_type in ["agent_message_received", "aggregator_received"]:
|
|
271
|
+
msg_count += 1
|
|
272
|
+
progress.update(
|
|
273
|
+
job_task,
|
|
274
|
+
description=f"[green]Running: {msg_count} routed messages, {log_writer.event_count} events logged...",
|
|
275
|
+
)
|
|
276
|
+
elif event_type == "job_completed":
|
|
277
|
+
result = event.get("result")
|
|
278
|
+
if result is not None:
|
|
279
|
+
with open(log_dir / "result.txt", "w") as f_res:
|
|
280
|
+
json.dump(result, f_res, indent=2)
|
|
281
|
+
|
|
282
|
+
progress.update(
|
|
283
|
+
job_task,
|
|
284
|
+
description="[green]Completed successfully.",
|
|
285
|
+
)
|
|
286
|
+
status_text = "Success"
|
|
287
|
+
status_color = "green"
|
|
288
|
+
break
|
|
289
|
+
elif event_type == "job_failed":
|
|
290
|
+
progress.update(job_task, description="[red]Job failed.")
|
|
291
|
+
status_text = "Failed"
|
|
292
|
+
status_color = "red"
|
|
293
|
+
break
|
|
294
|
+
else:
|
|
295
|
+
progress.update(
|
|
296
|
+
job_task,
|
|
297
|
+
description=f"[cyan]Observing: latest event {event_type}, {log_writer.event_count} events logged...",
|
|
298
|
+
)
|
|
299
|
+
except Exception:
|
|
300
|
+
log_writer.run_logger.exception("Failed to process streamed event")
|
|
301
|
+
|
|
302
|
+
if status_text in ["Success", "Failed"]:
|
|
303
|
+
panel = generate_summary_panel(
|
|
304
|
+
job_id=job_id,
|
|
305
|
+
status="completed" if status_text == "Success" else "failed",
|
|
306
|
+
log_dir=log_dir
|
|
307
|
+
)
|
|
308
|
+
console.print(panel)
|
|
309
|
+
else:
|
|
310
|
+
with Progress(
|
|
311
|
+
SpinnerColumn(),
|
|
312
|
+
TextColumn("[progress.description]{task.description}"),
|
|
313
|
+
TimeElapsedColumn(),
|
|
314
|
+
console=console,
|
|
315
|
+
) as progress:
|
|
316
|
+
follow_task = progress.add_task(
|
|
317
|
+
f"[cyan]Following job for {follow_seconds:g}s before detach...",
|
|
318
|
+
total=None,
|
|
319
|
+
)
|
|
320
|
+
status, _data = _follow_job_events(
|
|
321
|
+
job_id,
|
|
322
|
+
log_writer,
|
|
323
|
+
follow_seconds,
|
|
324
|
+
progress=progress,
|
|
325
|
+
task_id=follow_task,
|
|
326
|
+
)
|
|
327
|
+
console.print(generate_detached_panel(job_id, log_dir, status, log_writer.event_count))
|
|
328
|
+
|
|
329
|
+
except KeyboardInterrupt:
|
|
330
|
+
console.print("[yellow]Detached from log stream.[/yellow]")
|
|
331
|
+
status, _data = _follow_job_events(job_id, log_writer, 0)
|
|
332
|
+
console.print(generate_detached_panel(job_id, log_dir, status, log_writer.event_count))
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def _write_result_stream_event(log_dir: Path, event: dict):
|
|
336
|
+
if event.get("type") in STANDARD_EVENTS:
|
|
337
|
+
return
|
|
338
|
+
payload = event.get("payload", event)
|
|
339
|
+
_materialize_sent_email_copy(log_dir, payload)
|
|
340
|
+
with open(log_dir / "result_stream.txt", "a") as f_stream:
|
|
341
|
+
f_stream.write(json.dumps(payload, sort_keys=True) + "\n")
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def _materialize_sent_email_copy(log_dir: Path, payload: dict):
|
|
345
|
+
if not isinstance(payload, dict):
|
|
346
|
+
return
|
|
347
|
+
sent_copy = payload.get("sent_email_copy")
|
|
348
|
+
if not isinstance(sent_copy, dict):
|
|
349
|
+
return
|
|
350
|
+
html_content = sent_copy.get("html_content")
|
|
351
|
+
text_content = sent_copy.get("text_content")
|
|
352
|
+
metadata = sent_copy.get("metadata")
|
|
353
|
+
if html_content is None and text_content is None and not isinstance(metadata, dict):
|
|
354
|
+
return
|
|
355
|
+
|
|
356
|
+
email_dir = log_dir / "sent_emails"
|
|
357
|
+
email_dir.mkdir(parents=True, exist_ok=True)
|
|
358
|
+
|
|
359
|
+
def resolve_path(raw_path: Optional[str], suffix: str) -> Path:
|
|
360
|
+
if raw_path:
|
|
361
|
+
return email_dir / Path(raw_path).name
|
|
362
|
+
stem = str(payload.get("provider_id") or payload.get("subject") or time.time_ns())
|
|
363
|
+
safe_stem = "".join(ch if ch.isalnum() or ch in "._-" else "-" for ch in stem)[:96]
|
|
364
|
+
return email_dir / f"{safe_stem or time.time_ns()}.{suffix}"
|
|
365
|
+
|
|
366
|
+
html_path = resolve_path(sent_copy.get("html_path"), "html")
|
|
367
|
+
text_path = resolve_path(sent_copy.get("text_path"), "txt")
|
|
368
|
+
metadata_path = resolve_path(sent_copy.get("metadata_path"), "json")
|
|
369
|
+
|
|
370
|
+
if html_content is not None:
|
|
371
|
+
html_path.write_text(str(html_content), encoding="utf-8")
|
|
372
|
+
if text_content is not None:
|
|
373
|
+
text_path.write_text(str(text_content), encoding="utf-8")
|
|
374
|
+
if isinstance(metadata, dict):
|
|
375
|
+
host_metadata = {
|
|
376
|
+
**metadata,
|
|
377
|
+
"host_html_path": str(html_path),
|
|
378
|
+
"host_text_path": str(text_path),
|
|
379
|
+
"host_metadata_path": str(metadata_path),
|
|
380
|
+
}
|
|
381
|
+
metadata_path.write_text(json.dumps(host_metadata, indent=2, sort_keys=True), encoding="utf-8")
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def _follow_job_events(
|
|
385
|
+
job_id: str,
|
|
386
|
+
log_writer: JobLogWriter,
|
|
387
|
+
follow_seconds: float,
|
|
388
|
+
progress: Optional[Progress] = None,
|
|
389
|
+
task_id=None,
|
|
390
|
+
):
|
|
391
|
+
deadline = time.monotonic() + max(follow_seconds, 0)
|
|
392
|
+
last_status = "unknown"
|
|
393
|
+
data = None
|
|
394
|
+
|
|
395
|
+
while True:
|
|
396
|
+
try:
|
|
397
|
+
data = json.loads(client.get_job(job_id))
|
|
398
|
+
log_writer.write_snapshot(data)
|
|
399
|
+
except Exception:
|
|
400
|
+
log_writer.run_logger.exception("Failed to poll job status")
|
|
401
|
+
break
|
|
402
|
+
|
|
403
|
+
job = data.get("job", {})
|
|
404
|
+
summary = data.get("summary", {})
|
|
405
|
+
last_status = summary.get("status") or job.get("status") or last_status
|
|
406
|
+
|
|
407
|
+
recent_events = data.get("recent_events", [])
|
|
408
|
+
for event in reversed(recent_events):
|
|
409
|
+
if log_writer.write_event(event):
|
|
410
|
+
_write_result_stream_event(log_writer.log_dir, event)
|
|
411
|
+
web_ui_url = log_writer.record_web_ui_url(event)
|
|
412
|
+
if web_ui_url and progress is not None:
|
|
413
|
+
progress.console.print(f"[green]Blueprint Web UI:[/green] {web_ui_url}")
|
|
414
|
+
|
|
415
|
+
if progress is not None and task_id is not None:
|
|
416
|
+
remaining = max(deadline - time.monotonic(), 0)
|
|
417
|
+
progress.update(
|
|
418
|
+
task_id,
|
|
419
|
+
description=(
|
|
420
|
+
f"[cyan]Following: status {last_status}, "
|
|
421
|
+
f"{log_writer.event_count} events logged, detach in {remaining:0.1f}s..."
|
|
422
|
+
),
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
if last_status in FINAL_STATUSES:
|
|
426
|
+
result = job.get("result")
|
|
427
|
+
if result is not None:
|
|
428
|
+
with open(log_writer.log_dir / "result.txt", "w") as f_res:
|
|
429
|
+
json.dump(result, f_res, indent=2, sort_keys=True)
|
|
430
|
+
break
|
|
431
|
+
|
|
432
|
+
if time.monotonic() >= deadline:
|
|
433
|
+
break
|
|
434
|
+
|
|
435
|
+
time.sleep(float(os.getenv("MN_RUN_LOG_POLL_INTERVAL_SECONDS", "0.5")))
|
|
436
|
+
|
|
437
|
+
return last_status, data
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def validate(bundle_path: str):
|
|
441
|
+
"""Check if a job bundle in a local folder is valid to run"""
|
|
442
|
+
try:
|
|
443
|
+
bundle_dir = Path(bundle_path)
|
|
444
|
+
if not bundle_dir.is_dir():
|
|
445
|
+
console.print(
|
|
446
|
+
f"[red]Error: '{bundle_path}' is not a directory. Expected a bundle folder.[/red]"
|
|
447
|
+
)
|
|
448
|
+
raise typer.Exit(1)
|
|
449
|
+
|
|
450
|
+
manifest_file = bundle_dir / "manifest.json"
|
|
451
|
+
if not manifest_file.exists():
|
|
452
|
+
console.print(
|
|
453
|
+
f"[red]Error: manifest.json not found in '{bundle_path}'[/red]"
|
|
454
|
+
)
|
|
455
|
+
raise typer.Exit(1)
|
|
456
|
+
|
|
457
|
+
with open(manifest_file, "r") as f:
|
|
458
|
+
try:
|
|
459
|
+
manifest = json.load(f)
|
|
460
|
+
except json.JSONDecodeError as e:
|
|
461
|
+
console.print(f"[red]Error: manifest.json is not valid JSON. {e}[/red]")
|
|
462
|
+
raise typer.Exit(1)
|
|
463
|
+
|
|
464
|
+
required_keys = ["manifest_version", "graph_id", "job_name", "entrypoints", "nodes"]
|
|
465
|
+
missing = [k for k in required_keys if k not in manifest]
|
|
466
|
+
if missing:
|
|
467
|
+
console.print(f"[red]Error: manifest.json is missing required keys: {', '.join(missing)}[/red]")
|
|
468
|
+
raise typer.Exit(1)
|
|
469
|
+
|
|
470
|
+
if not isinstance(manifest.get("nodes"), type([])):
|
|
471
|
+
console.print("[red]Error: 'nodes' must be a list in manifest.json[/red]")
|
|
472
|
+
raise typer.Exit(1)
|
|
473
|
+
|
|
474
|
+
if "requiredContextEngine" in manifest and not isinstance(manifest.get("requiredContextEngine"), bool):
|
|
475
|
+
console.print("[red]Error: 'requiredContextEngine' must be true or false in manifest.json[/red]")
|
|
476
|
+
raise typer.Exit(1)
|
|
477
|
+
|
|
478
|
+
console.print(f"[green]✓ Job bundle at '{bundle_path}' is valid.[/green]")
|
|
479
|
+
console.print(f" - Job Name: {manifest.get('job_name')}")
|
|
480
|
+
console.print(f" - Graph ID: {manifest.get('graph_id')}")
|
|
481
|
+
console.print(f" - Nodes count: {len(manifest.get('nodes'))}")
|
|
482
|
+
|
|
483
|
+
except typer.Exit:
|
|
484
|
+
raise
|
|
485
|
+
except Exception as e:
|
|
486
|
+
handle_cli_error(e, console, 'validate')
|
|
487
|
+
raise typer.Exit(1)
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def run(
|
|
491
|
+
bundle_path: str,
|
|
492
|
+
follow_seconds: Annotated[
|
|
493
|
+
Optional[float],
|
|
494
|
+
typer.Option(
|
|
495
|
+
"--follow-seconds",
|
|
496
|
+
help="Seconds to keep polling job events after the submit stream detaches. Defaults to MN_RUN_DETACH_LOG_SECONDS or 30.",
|
|
497
|
+
),
|
|
498
|
+
] = None,
|
|
499
|
+
):
|
|
500
|
+
"""Run a job bundle from a local folder directly"""
|
|
501
|
+
run_bundle(bundle_path, follow_seconds=follow_seconds)
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
def run_bundle(
|
|
505
|
+
bundle_path: str,
|
|
506
|
+
*,
|
|
507
|
+
follow_seconds: Optional[float] = None,
|
|
508
|
+
env_overrides: Optional[dict[str, str]] = None,
|
|
509
|
+
submission_metadata: Optional[dict[str, Any]] = None,
|
|
510
|
+
):
|
|
511
|
+
"""Run a bundle after applying optional runtime metadata and environment."""
|
|
512
|
+
try:
|
|
513
|
+
bundle_dir = Path(bundle_path)
|
|
514
|
+
if not bundle_dir.is_dir():
|
|
515
|
+
console.print(
|
|
516
|
+
f"[red]Error: '{bundle_path}' is not a directory. Expected a bundle folder.[/red]"
|
|
517
|
+
)
|
|
518
|
+
raise typer.Exit(1)
|
|
519
|
+
|
|
520
|
+
manifest_file = bundle_dir / "manifest.json"
|
|
521
|
+
if not manifest_file.exists():
|
|
522
|
+
console.print(
|
|
523
|
+
f"[red]Error: manifest.json not found in '{bundle_path}'[/red]"
|
|
524
|
+
)
|
|
525
|
+
raise typer.Exit(1)
|
|
526
|
+
|
|
527
|
+
with open(manifest_file, "r") as f:
|
|
528
|
+
manifest_dict = json.load(f)
|
|
529
|
+
|
|
530
|
+
if manifest_dict.get("require_config") is True:
|
|
531
|
+
config_script = bundle_dir / "config.py"
|
|
532
|
+
if config_script.exists():
|
|
533
|
+
import subprocess
|
|
534
|
+
import sys
|
|
535
|
+
console.print(f"[yellow]Bundle requires configuration. Auto-running {config_script.name}...[/yellow]")
|
|
536
|
+
res = subprocess.run([sys.executable, config_script.name], cwd=bundle_dir)
|
|
537
|
+
if res.returncode != 0:
|
|
538
|
+
console.print("[red]Configuration failed or cancelled. Aborting run.[/red]")
|
|
539
|
+
raise typer.Exit(1)
|
|
540
|
+
|
|
541
|
+
# Reload manifest after configuration
|
|
542
|
+
with open(manifest_file, "r") as f:
|
|
543
|
+
manifest_dict = json.load(f)
|
|
544
|
+
else:
|
|
545
|
+
console.print("[red]Bundle requires configuration, but config.py was not found.[/red]")
|
|
546
|
+
raise typer.Exit(1)
|
|
547
|
+
|
|
548
|
+
manifest_dict = prepare_manifest_for_submission(
|
|
549
|
+
bundle_dir,
|
|
550
|
+
manifest_dict,
|
|
551
|
+
env_overrides=env_overrides,
|
|
552
|
+
submission_metadata=submission_metadata,
|
|
553
|
+
)
|
|
554
|
+
manifest = json.dumps(manifest_dict)
|
|
555
|
+
|
|
556
|
+
payloads = {}
|
|
557
|
+
payloads_dir = bundle_dir / "payloads"
|
|
558
|
+
if payloads_dir.is_dir():
|
|
559
|
+
for filepath in payloads_dir.rglob("*"):
|
|
560
|
+
if filepath.is_file():
|
|
561
|
+
rel_path = filepath.relative_to(payloads_dir).as_posix()
|
|
562
|
+
with open(filepath, "rb") as f:
|
|
563
|
+
payloads[rel_path] = f.read()
|
|
564
|
+
|
|
565
|
+
job_id = client.submit_job(manifest, payloads)
|
|
566
|
+
log_writer = JobLogWriter(job_id)
|
|
567
|
+
blueprint_run_id = (submission_metadata or {}).get("blueprint_run_id") or (env_overrides or {}).get("MN_RUN_ID")
|
|
568
|
+
if blueprint_run_id:
|
|
569
|
+
_write_blueprint_job_mapping(blueprint_run_id, job_id, submission_metadata or {})
|
|
570
|
+
resolved_follow_seconds = (
|
|
571
|
+
float(os.getenv("MN_RUN_DETACH_LOG_SECONDS", "30"))
|
|
572
|
+
if follow_seconds is None
|
|
573
|
+
else follow_seconds
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
console.print(
|
|
577
|
+
generate_run_submitted_panel(
|
|
578
|
+
bundle_name=bundle_dir.name,
|
|
579
|
+
job_id=job_id,
|
|
580
|
+
payload_count=len(payloads),
|
|
581
|
+
log_dir=log_writer.log_dir,
|
|
582
|
+
follow_seconds=resolved_follow_seconds,
|
|
583
|
+
run_mode=_run_mode_label(manifest_dict),
|
|
584
|
+
blueprint_run_id=blueprint_run_id,
|
|
585
|
+
blueprint_revision=(submission_metadata or {}).get("blueprint_revision"),
|
|
586
|
+
)
|
|
587
|
+
)
|
|
588
|
+
_stream_and_format_events(job_id, log_writer, resolved_follow_seconds)
|
|
589
|
+
except typer.Exit:
|
|
590
|
+
raise
|
|
591
|
+
except Exception as e:
|
|
592
|
+
handle_cli_error(e, console, 'run bundle')
|
|
593
|
+
raise typer.Exit(1)
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
def prepare_manifest_for_submission(
|
|
597
|
+
bundle_dir: Path,
|
|
598
|
+
manifest_dict: dict[str, Any],
|
|
599
|
+
*,
|
|
600
|
+
env_overrides: Optional[dict[str, str]] = None,
|
|
601
|
+
submission_metadata: Optional[dict[str, Any]] = None,
|
|
602
|
+
) -> dict[str, Any]:
|
|
603
|
+
prepared = json.loads(json.dumps(manifest_dict))
|
|
604
|
+
runtime_env = _blueprint_runtime_environment(bundle_dir)
|
|
605
|
+
runtime_env.update({key: str(value) for key, value in (env_overrides or {}).items() if value is not None})
|
|
606
|
+
if runtime_env:
|
|
607
|
+
_inject_node_environment(prepared, runtime_env)
|
|
608
|
+
metadata = dict(submission_metadata or {})
|
|
609
|
+
if metadata:
|
|
610
|
+
prepared.setdefault("metadata", {}).setdefault("mn_cli", {}).update(metadata)
|
|
611
|
+
return prepared
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
def _blueprint_runtime_environment(bundle_dir: Path) -> dict[str, str]:
|
|
615
|
+
env: dict[str, str] = {}
|
|
616
|
+
for filename, env_name in (
|
|
617
|
+
("config/default.json", "MN_BLUEPRINT_CONFIG_JSON"),
|
|
618
|
+
("scenario.json", "MN_BLUEPRINT_SCENARIO_JSON"),
|
|
619
|
+
):
|
|
620
|
+
path = bundle_dir / filename
|
|
621
|
+
if path.exists():
|
|
622
|
+
env[env_name] = path.read_text(encoding="utf-8")
|
|
623
|
+
return env
|
|
624
|
+
|
|
625
|
+
|
|
626
|
+
def _inject_node_environment(manifest: dict[str, Any], env: dict[str, str]) -> None:
|
|
627
|
+
for node in manifest.get("nodes") or []:
|
|
628
|
+
if not isinstance(node, dict):
|
|
629
|
+
continue
|
|
630
|
+
config = node.setdefault("config", {})
|
|
631
|
+
if not isinstance(config, dict):
|
|
632
|
+
continue
|
|
633
|
+
environment = config.setdefault("environment", {})
|
|
634
|
+
if not isinstance(environment, dict):
|
|
635
|
+
continue
|
|
636
|
+
environment.update(env)
|
|
637
|
+
_add_mn_llm_aliases(environment)
|
|
638
|
+
|
|
639
|
+
|
|
640
|
+
def _add_mn_llm_aliases(environment: dict[str, Any]) -> None:
|
|
641
|
+
for legacy, primary in (
|
|
642
|
+
("LITELLM_MODEL", "MN_LLM_MODEL"),
|
|
643
|
+
("LITELLM_API_BASE", "MN_LLM_API_BASE"),
|
|
644
|
+
("LITELLM_API_KEY", "MN_LLM_API_KEY"),
|
|
645
|
+
("LITELLM_TIMEOUT_SECONDS", "MN_LLM_TIMEOUT_SECONDS"),
|
|
646
|
+
("LITELLM_MAX_TOKENS", "MN_LLM_MAX_TOKENS"),
|
|
647
|
+
("LITELLM_NUM_RETRIES", "MN_LLM_NUM_RETRIES"),
|
|
648
|
+
("LITELLM_RETRY_BACKOFF_SECONDS", "MN_LLM_RETRY_BACKOFF_SECONDS"),
|
|
649
|
+
):
|
|
650
|
+
if primary not in environment and legacy in environment:
|
|
651
|
+
environment[primary] = environment[legacy]
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
def _write_blueprint_job_mapping(blueprint_run_id: str, job_id: str, metadata: dict[str, Any]) -> None:
|
|
655
|
+
run_dir = Path(os.getenv("MN_RUNS_ROOT", "~/.mn/runs")).expanduser() / blueprint_run_id
|
|
656
|
+
try:
|
|
657
|
+
run_dir.mkdir(parents=True, exist_ok=True)
|
|
658
|
+
payload = {
|
|
659
|
+
"run_id": blueprint_run_id,
|
|
660
|
+
"job_id": job_id,
|
|
661
|
+
"blueprint_revision": metadata.get("blueprint_revision"),
|
|
662
|
+
"submitted_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
|
663
|
+
}
|
|
664
|
+
tmp = run_dir / f".job.json.{os.getpid()}.tmp"
|
|
665
|
+
tmp.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
|
666
|
+
tmp.replace(run_dir / "job.json")
|
|
667
|
+
except OSError:
|
|
668
|
+
logger.exception("Failed to write blueprint job mapping for run_id=%s job_id=%s", blueprint_run_id, job_id)
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
def _run_mode_label(manifest: dict) -> str:
|
|
672
|
+
is_live = manifest.get("daemon") is True or manifest.get("policies", {}).get("stream_mode") == "live"
|
|
673
|
+
if is_live and manifest.get("daemon") is True:
|
|
674
|
+
return "Live daemon"
|
|
675
|
+
if is_live:
|
|
676
|
+
return "Live"
|
|
677
|
+
return "Batch"
|
|
678
|
+
def _live_monitor(job_id: str):
|
|
679
|
+
import sys
|
|
680
|
+
import select
|
|
681
|
+
import time
|
|
682
|
+
from rich.live import Live
|
|
683
|
+
|
|
684
|
+
is_tty = sys.stdin.isatty()
|
|
685
|
+
old_settings = None
|
|
686
|
+
if is_tty:
|
|
687
|
+
import tty
|
|
688
|
+
import termios
|
|
689
|
+
fd = sys.stdin.fileno()
|
|
690
|
+
old_settings = termios.tcgetattr(fd)
|
|
691
|
+
tty.setcbreak(fd)
|
|
692
|
+
|
|
693
|
+
class MonitorView:
|
|
694
|
+
def __init__(self):
|
|
695
|
+
self.data = None
|
|
696
|
+
def __rich__(self):
|
|
697
|
+
if not self.data:
|
|
698
|
+
from rich.panel import Panel
|
|
699
|
+
return Panel("Connecting...", style="cyan")
|
|
700
|
+
if "error" in self.data:
|
|
701
|
+
from rich.panel import Panel
|
|
702
|
+
return Panel(f"Error fetching job: {self.data['error']}", style="red")
|
|
703
|
+
return generate_live_layout(job_id, self.data)
|
|
704
|
+
|
|
705
|
+
final_status = "unknown"
|
|
706
|
+
view = MonitorView()
|
|
707
|
+
|
|
708
|
+
try:
|
|
709
|
+
with Live(view, refresh_per_second=12, console=console) as live:
|
|
710
|
+
while True:
|
|
711
|
+
try:
|
|
712
|
+
job_json = client.get_job(job_id)
|
|
713
|
+
data = json.loads(job_json)
|
|
714
|
+
except Exception as e:
|
|
715
|
+
data = {"error": str(e)}
|
|
716
|
+
|
|
717
|
+
view.data = data
|
|
718
|
+
|
|
719
|
+
if data and "error" not in data:
|
|
720
|
+
status = data.get("summary", {}).get("status", "unknown")
|
|
721
|
+
if status in ["completed", "failed", "cancelled"]:
|
|
722
|
+
final_status = status
|
|
723
|
+
break
|
|
724
|
+
|
|
725
|
+
if is_tty:
|
|
726
|
+
i, o, e = select.select([sys.stdin], [], [], 0.5)
|
|
727
|
+
if i:
|
|
728
|
+
key = sys.stdin.read(1)
|
|
729
|
+
if key.lower() == 'q' or key == '\x03': # \x03 is Ctrl-C
|
|
730
|
+
break
|
|
731
|
+
else:
|
|
732
|
+
time.sleep(0.5)
|
|
733
|
+
break
|
|
734
|
+
|
|
735
|
+
except KeyboardInterrupt:
|
|
736
|
+
pass
|
|
737
|
+
finally:
|
|
738
|
+
if is_tty and old_settings:
|
|
739
|
+
import termios
|
|
740
|
+
termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
|
|
741
|
+
|
|
742
|
+
if final_status in ["completed", "failed", "cancelled"]:
|
|
743
|
+
# Save results and print final summary
|
|
744
|
+
fetch_and_save_results(job_id, data)
|
|
745
|
+
log_dir = Path(f"/tmp/mn_{job_id}")
|
|
746
|
+
panel = generate_summary_panel(job_id, final_status, log_dir)
|
|
747
|
+
console.print(panel)
|
|
748
|
+
else:
|
|
749
|
+
console.print(f"\n[yellow]Exited live monitor for {job_id}[/yellow]")
|
|
750
|
+
fetch_and_save_results(job_id, data)
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
def monitor(job_id: str):
|
|
754
|
+
"""Stream live events for a job"""
|
|
755
|
+
try:
|
|
756
|
+
_live_monitor(job_id)
|
|
757
|
+
except Exception as e:
|
|
758
|
+
handle_cli_error(e, console, 'monitor stream')
|
|
759
|
+
|
|
760
|
+
|
|
761
|
+
def result(job_id: str):
|
|
762
|
+
"""Fetch and save the final and progressive results for a job"""
|
|
763
|
+
try:
|
|
764
|
+
console.print(f"Fetching results for {job_id}...")
|
|
765
|
+
fetch_and_save_results(job_id)
|
|
766
|
+
|
|
767
|
+
log_dir = Path(f"/tmp/mn_{job_id}")
|
|
768
|
+
res_file = log_dir / "result.txt"
|
|
769
|
+
stream_file = log_dir / "result_stream.txt"
|
|
770
|
+
|
|
771
|
+
if res_file.exists():
|
|
772
|
+
console.print(f"[green]Final result saved to: {res_file}[/green]")
|
|
773
|
+
else:
|
|
774
|
+
console.print(f"[yellow]No final result found (job might not be completed).[/yellow]")
|
|
775
|
+
|
|
776
|
+
if stream_file.exists():
|
|
777
|
+
console.print(f"[green]Stream results saved to: {stream_file}[/green]")
|
|
778
|
+
|
|
779
|
+
except Exception as e:
|
|
780
|
+
handle_cli_error(e, console, 'fetch results')
|