experimaestro 2.0.0b4__py3-none-any.whl → 2.0.0b8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of experimaestro might be problematic. Click here for more details.
- experimaestro/cli/__init__.py +177 -31
- experimaestro/experiments/cli.py +6 -2
- experimaestro/scheduler/base.py +21 -0
- experimaestro/scheduler/experiment.py +64 -34
- experimaestro/scheduler/interfaces.py +27 -0
- experimaestro/scheduler/remote/__init__.py +31 -0
- experimaestro/scheduler/remote/client.py +874 -0
- experimaestro/scheduler/remote/protocol.py +467 -0
- experimaestro/scheduler/remote/server.py +423 -0
- experimaestro/scheduler/remote/sync.py +144 -0
- experimaestro/scheduler/services.py +158 -32
- experimaestro/scheduler/state_db.py +58 -9
- experimaestro/scheduler/state_provider.py +512 -91
- experimaestro/scheduler/state_sync.py +65 -8
- experimaestro/tests/test_cli_jobs.py +3 -3
- experimaestro/tests/test_remote_state.py +671 -0
- experimaestro/tests/test_state_db.py +8 -8
- experimaestro/tui/app.py +100 -8
- experimaestro/version.py +2 -2
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b8.dist-info}/METADATA +4 -4
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b8.dist-info}/RECORD +24 -18
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b8.dist-info}/WHEEL +0 -0
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b8.dist-info}/entry_points.txt +0 -0
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b8.dist-info}/licenses/LICENSE +0 -0
experimaestro/cli/__init__.py
CHANGED
|
@@ -309,11 +309,98 @@ def experiments(ctx, workdir, workspace):
|
|
|
309
309
|
@experiments.command()
|
|
310
310
|
@pass_cfg
|
|
311
311
|
def list(workdir: Path):
|
|
312
|
+
"""List experiments in the workspace"""
|
|
313
|
+
from experimaestro.scheduler.state_provider import WorkspaceStateProvider
|
|
314
|
+
|
|
315
|
+
# Get experiments from state provider for detailed info
|
|
316
|
+
state_provider = WorkspaceStateProvider.get_instance(
|
|
317
|
+
workdir, read_only=True, sync_on_start=True
|
|
318
|
+
)
|
|
319
|
+
experiments_list = state_provider.get_experiments()
|
|
320
|
+
|
|
321
|
+
# Build lookup by experiment_id
|
|
322
|
+
exp_info = {exp.experiment_id: exp for exp in experiments_list}
|
|
323
|
+
|
|
312
324
|
for p in (workdir / "xp").iterdir():
|
|
325
|
+
exp_id = p.name
|
|
326
|
+
exp = exp_info.get(exp_id)
|
|
327
|
+
|
|
328
|
+
# Build display string
|
|
329
|
+
display_parts = []
|
|
330
|
+
|
|
331
|
+
if (p / "jobs.bak").exists():
|
|
332
|
+
display_parts.append("[unfinished]")
|
|
333
|
+
|
|
334
|
+
display_parts.append(exp_id)
|
|
335
|
+
|
|
336
|
+
# Add hostname if available
|
|
337
|
+
if exp and getattr(exp, "hostname", None):
|
|
338
|
+
display_parts.append(f"[{exp.hostname}]")
|
|
339
|
+
|
|
340
|
+
# Add job stats if available
|
|
341
|
+
if exp:
|
|
342
|
+
display_parts.append(f"({exp.finished_jobs}/{exp.total_jobs} jobs)")
|
|
343
|
+
|
|
344
|
+
display_str = " ".join(display_parts)
|
|
345
|
+
|
|
313
346
|
if (p / "jobs.bak").exists():
|
|
314
|
-
cprint(
|
|
347
|
+
cprint(display_str, "yellow")
|
|
315
348
|
else:
|
|
316
|
-
cprint(
|
|
349
|
+
cprint(display_str, "cyan")
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def _run_monitor_ui(
|
|
353
|
+
state_provider, workdir: Path, console: bool, port: int, title: str = ""
|
|
354
|
+
):
|
|
355
|
+
"""Shared code for running monitor UI (TUI or web)
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
state_provider: StateProvider instance (local or remote)
|
|
359
|
+
workdir: Local workspace/cache directory
|
|
360
|
+
console: If True, use TUI; otherwise use web UI
|
|
361
|
+
port: Port for web server
|
|
362
|
+
title: Optional title for status messages
|
|
363
|
+
"""
|
|
364
|
+
try:
|
|
365
|
+
if console:
|
|
366
|
+
# Use Textual TUI
|
|
367
|
+
from experimaestro.tui import ExperimentTUI
|
|
368
|
+
|
|
369
|
+
app = ExperimentTUI(
|
|
370
|
+
workdir, state_provider=state_provider, watch=True, show_logs=True
|
|
371
|
+
)
|
|
372
|
+
app.run()
|
|
373
|
+
else:
|
|
374
|
+
# Use React web server
|
|
375
|
+
from experimaestro.server import Server
|
|
376
|
+
|
|
377
|
+
if title:
|
|
378
|
+
cprint(
|
|
379
|
+
f"Starting experiment monitor for {title} on http://localhost:{port}",
|
|
380
|
+
"green",
|
|
381
|
+
)
|
|
382
|
+
else:
|
|
383
|
+
cprint(
|
|
384
|
+
f"Starting experiment monitor on http://localhost:{port}", "green"
|
|
385
|
+
)
|
|
386
|
+
cprint("Press Ctrl+C to stop", "yellow")
|
|
387
|
+
|
|
388
|
+
settings = ServerSettings()
|
|
389
|
+
settings.port = port
|
|
390
|
+
server = Server.instance(settings, state_provider=state_provider)
|
|
391
|
+
server.start()
|
|
392
|
+
|
|
393
|
+
try:
|
|
394
|
+
import time
|
|
395
|
+
|
|
396
|
+
while True:
|
|
397
|
+
time.sleep(1)
|
|
398
|
+
except KeyboardInterrupt:
|
|
399
|
+
pass
|
|
400
|
+
finally:
|
|
401
|
+
cprint("\nShutting down...", "yellow")
|
|
402
|
+
if state_provider:
|
|
403
|
+
state_provider.close()
|
|
317
404
|
|
|
318
405
|
|
|
319
406
|
@experiments.command()
|
|
@@ -326,7 +413,7 @@ def list(workdir: Path):
|
|
|
326
413
|
)
|
|
327
414
|
@pass_cfg
|
|
328
415
|
def monitor(workdir: Path, console: bool, port: int, sync: bool):
|
|
329
|
-
"""Monitor experiments with web UI or console TUI"""
|
|
416
|
+
"""Monitor local experiments with web UI or console TUI"""
|
|
330
417
|
# Force sync from disk if requested
|
|
331
418
|
if sync:
|
|
332
419
|
from experimaestro.scheduler.state_sync import sync_workspace_from_disk
|
|
@@ -335,37 +422,96 @@ def monitor(workdir: Path, console: bool, port: int, sync: bool):
|
|
|
335
422
|
sync_workspace_from_disk(workdir, write_mode=True, force=True)
|
|
336
423
|
cprint("Sync complete", "green")
|
|
337
424
|
|
|
338
|
-
|
|
339
|
-
# Use Textual TUI
|
|
340
|
-
from experimaestro.tui import ExperimentTUI
|
|
425
|
+
from experimaestro.scheduler.state_provider import WorkspaceStateProvider
|
|
341
426
|
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
from experimaestro.scheduler.state_provider import WorkspaceStateProvider
|
|
347
|
-
from experimaestro.server import Server
|
|
348
|
-
|
|
349
|
-
cprint(f"Starting experiment monitor on http://localhost:{port}", "green")
|
|
350
|
-
cprint("Press Ctrl+C to stop", "yellow")
|
|
351
|
-
|
|
352
|
-
state_provider = WorkspaceStateProvider.get_instance(
|
|
353
|
-
workdir,
|
|
354
|
-
sync_on_start=not sync, # Skip auto-sync if we just did a forced one
|
|
355
|
-
)
|
|
356
|
-
settings = ServerSettings()
|
|
357
|
-
settings.port = port
|
|
358
|
-
server = Server.instance(settings, state_provider=state_provider)
|
|
359
|
-
server.start()
|
|
427
|
+
state_provider = WorkspaceStateProvider.get_instance(
|
|
428
|
+
workdir,
|
|
429
|
+
sync_on_start=not sync, # Skip auto-sync if we just did a forced one
|
|
430
|
+
)
|
|
360
431
|
|
|
361
|
-
|
|
362
|
-
import time
|
|
432
|
+
_run_monitor_ui(state_provider, workdir, console, port)
|
|
363
433
|
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
434
|
+
|
|
435
|
+
@experiments.command("ssh-monitor")
|
|
436
|
+
@click.argument("host", type=str)
|
|
437
|
+
@click.argument("remote_workdir", type=str)
|
|
438
|
+
@click.option("--console", is_flag=True, help="Use console TUI instead of web UI")
|
|
439
|
+
@click.option(
|
|
440
|
+
"--port", type=int, default=12345, help="Port for web server (default: 12345)"
|
|
441
|
+
)
|
|
442
|
+
@click.option(
|
|
443
|
+
"--remote-xpm",
|
|
444
|
+
type=str,
|
|
445
|
+
default=None,
|
|
446
|
+
help="Path to experimaestro on remote host (default: use 'uv tool run')",
|
|
447
|
+
)
|
|
448
|
+
@click.option(
|
|
449
|
+
"--ssh-option",
|
|
450
|
+
"-o",
|
|
451
|
+
multiple=True,
|
|
452
|
+
help="Additional SSH options (can be repeated, e.g., -o '-p 2222')",
|
|
453
|
+
)
|
|
454
|
+
def ssh_monitor(
|
|
455
|
+
host: str,
|
|
456
|
+
remote_workdir: str,
|
|
457
|
+
console: bool,
|
|
458
|
+
port: int,
|
|
459
|
+
remote_xpm: str,
|
|
460
|
+
ssh_option: tuple,
|
|
461
|
+
):
|
|
462
|
+
"""Monitor experiments on a remote server via SSH
|
|
463
|
+
|
|
464
|
+
HOST is the SSH host (e.g., user@server)
|
|
465
|
+
REMOTE_WORKDIR is the workspace path on the remote server
|
|
466
|
+
|
|
467
|
+
Examples:
|
|
468
|
+
experimaestro experiments ssh-monitor myserver /path/to/workspace
|
|
469
|
+
experimaestro experiments ssh-monitor user@host /workspace --console
|
|
470
|
+
experimaestro experiments ssh-monitor host /workspace --remote-xpm /opt/xpm/bin/experimaestro
|
|
471
|
+
"""
|
|
472
|
+
from experimaestro.scheduler.remote.client import SSHStateProviderClient
|
|
473
|
+
|
|
474
|
+
cprint(f"Connecting to {host}...", "yellow")
|
|
475
|
+
state_provider = SSHStateProviderClient(
|
|
476
|
+
host=host,
|
|
477
|
+
remote_workspace=remote_workdir,
|
|
478
|
+
ssh_options=list(ssh_option) if ssh_option else None,
|
|
479
|
+
remote_xpm_path=remote_xpm,
|
|
480
|
+
)
|
|
481
|
+
try:
|
|
482
|
+
state_provider.connect()
|
|
483
|
+
cprint(f"Connected to {host}", "green")
|
|
484
|
+
except Exception as e:
|
|
485
|
+
cprint(f"Failed to connect: {e}", "red")
|
|
486
|
+
raise click.Abort()
|
|
487
|
+
|
|
488
|
+
_run_monitor_ui(
|
|
489
|
+
state_provider,
|
|
490
|
+
state_provider.local_cache_dir,
|
|
491
|
+
console,
|
|
492
|
+
port,
|
|
493
|
+
title=host,
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
@experiments.command("monitor-server")
|
|
498
|
+
@pass_cfg
|
|
499
|
+
def monitor_server(workdir: Path):
|
|
500
|
+
"""Start monitoring server for SSH connections (JSON-RPC over stdio)
|
|
501
|
+
|
|
502
|
+
This command is intended to be run over SSH to provide remote monitoring.
|
|
503
|
+
Communication is via JSON-RPC over stdin/stdout.
|
|
504
|
+
|
|
505
|
+
Example:
|
|
506
|
+
ssh host 'experimaestro experiments --workdir /path monitor-server'
|
|
507
|
+
"""
|
|
508
|
+
from experimaestro.scheduler.remote.server import SSHStateProviderServer
|
|
509
|
+
|
|
510
|
+
server = SSHStateProviderServer(workdir)
|
|
511
|
+
try:
|
|
512
|
+
server.start()
|
|
513
|
+
except KeyboardInterrupt:
|
|
514
|
+
server.stop()
|
|
369
515
|
|
|
370
516
|
|
|
371
517
|
@experiments.command()
|
experimaestro/experiments/cli.py
CHANGED
|
@@ -360,7 +360,12 @@ def experiments_cli( # noqa: C901
|
|
|
360
360
|
except HandledException:
|
|
361
361
|
sys.exit(1)
|
|
362
362
|
|
|
363
|
-
|
|
363
|
+
# Console mode is only available in NORMAL run mode
|
|
364
|
+
use_console = console and run_mode == RunMode.NORMAL
|
|
365
|
+
if console and not use_console:
|
|
366
|
+
logging.warning("--console is ignored when run_mode is not NORMAL")
|
|
367
|
+
|
|
368
|
+
if use_console:
|
|
364
369
|
# Run experiment in background thread, console UI in main thread
|
|
365
370
|
import threading
|
|
366
371
|
from experimaestro.tui import ExperimentTUI
|
|
@@ -375,7 +380,6 @@ def experiments_cli( # noqa: C901
|
|
|
375
380
|
run_experiment_code(xp_holder, xp_ready, register_signals=False)
|
|
376
381
|
# Add a test message after experiment completes
|
|
377
382
|
logging.info("Experiment thread completed")
|
|
378
|
-
print("Experiment thread print test")
|
|
379
383
|
except Exception as e:
|
|
380
384
|
exception_holder["exception"] = e
|
|
381
385
|
xp_ready.set() # Signal even on error
|
experimaestro/scheduler/base.py
CHANGED
|
@@ -197,6 +197,27 @@ class Scheduler(threading.Thread):
|
|
|
197
197
|
with self._listeners_lock:
|
|
198
198
|
self._listeners.clear()
|
|
199
199
|
|
|
200
|
+
def wait_for_notifications(self, timeout: float = 5.0) -> bool:
|
|
201
|
+
"""Wait for all pending notifications to be processed.
|
|
202
|
+
|
|
203
|
+
This submits a sentinel task and waits for it to complete,
|
|
204
|
+
ensuring all previously submitted notifications have been processed.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
timeout: Maximum time to wait in seconds
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
True if all notifications were processed, False if timeout occurred
|
|
211
|
+
"""
|
|
212
|
+
try:
|
|
213
|
+
# Submit a no-op and wait for it to complete
|
|
214
|
+
future = self._notification_executor.submit(lambda: None)
|
|
215
|
+
future.result(timeout=timeout)
|
|
216
|
+
return True
|
|
217
|
+
except concurrent.futures.TimeoutError:
|
|
218
|
+
logger.warning("Timeout waiting for notification queue to drain")
|
|
219
|
+
return False
|
|
220
|
+
|
|
200
221
|
def getJobState(self, job: Job) -> "concurrent.futures.Future[JobState]":
|
|
201
222
|
# Check if the job belongs to this scheduler
|
|
202
223
|
if job.identifier not in self.jobs:
|
|
@@ -43,26 +43,22 @@ class DatabaseListener:
|
|
|
43
43
|
self.state_provider.update_job_state(job, self.experiment_id, self.run_id)
|
|
44
44
|
|
|
45
45
|
def service_add(self, service):
|
|
46
|
-
"""
|
|
47
|
-
|
|
46
|
+
"""Register service in database"""
|
|
47
|
+
from experimaestro.scheduler.services import Service
|
|
48
|
+
|
|
49
|
+
state_dict = Service.serialize_state_dict(service._full_state_dict())
|
|
50
|
+
self.state_provider.register_service(
|
|
48
51
|
service.id,
|
|
49
52
|
self.experiment_id,
|
|
50
53
|
self.run_id,
|
|
51
54
|
service.description(),
|
|
52
|
-
|
|
53
|
-
state_dict=json.dumps(service.state_dict()),
|
|
55
|
+
state_dict=json.dumps(state_dict),
|
|
54
56
|
)
|
|
55
57
|
|
|
56
58
|
def service_state_changed(self, service):
|
|
57
|
-
"""
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
self.experiment_id,
|
|
61
|
-
self.run_id,
|
|
62
|
-
service.description(),
|
|
63
|
-
service.state.name,
|
|
64
|
-
state_dict=json.dumps(service.state_dict()),
|
|
65
|
-
)
|
|
59
|
+
"""Called when service state changes (runtime only, not persisted)"""
|
|
60
|
+
# Service state is managed at runtime, not persisted to DB
|
|
61
|
+
pass
|
|
66
62
|
|
|
67
63
|
|
|
68
64
|
class experiment:
|
|
@@ -224,10 +220,13 @@ class experiment:
|
|
|
224
220
|
|
|
225
221
|
def _write_services_json(self):
|
|
226
222
|
"""Write all services to services.json file"""
|
|
223
|
+
from experimaestro.scheduler.services import Service
|
|
224
|
+
|
|
227
225
|
services_data = {}
|
|
228
226
|
for service_id, service in self.services.items():
|
|
229
227
|
# Get state_dict from service (includes __class__ for recreation)
|
|
230
|
-
|
|
228
|
+
# and serialize paths to JSON-compatible format
|
|
229
|
+
service_state = Service.serialize_state_dict(service._full_state_dict())
|
|
231
230
|
# Add runtime state info
|
|
232
231
|
service_state.update(
|
|
233
232
|
{
|
|
@@ -281,9 +280,10 @@ class experiment:
|
|
|
281
280
|
with self.jobs_jsonl_path.open("a") as f:
|
|
282
281
|
f.write(json.dumps(record) + "\n")
|
|
283
282
|
|
|
284
|
-
# Also register in database for TUI/monitoring
|
|
285
|
-
|
|
286
|
-
|
|
283
|
+
# Also register in database for TUI/monitoring (only in NORMAL mode)
|
|
284
|
+
if self._db_listener is not None:
|
|
285
|
+
experiment_id = self.workdir.name
|
|
286
|
+
self.state_provider.update_job_submitted(job, experiment_id, self.run_id)
|
|
287
287
|
|
|
288
288
|
def stop(self):
|
|
289
289
|
"""Stop the experiment as soon as possible"""
|
|
@@ -403,24 +403,31 @@ class experiment:
|
|
|
403
403
|
(self.workspace.path / ".__experimaestro__").touch()
|
|
404
404
|
|
|
405
405
|
# Initialize workspace state provider (singleton per workspace path)
|
|
406
|
+
# Use read_only mode when not in NORMAL run mode to prevent DB changes
|
|
406
407
|
from .state_provider import WorkspaceStateProvider
|
|
407
408
|
|
|
409
|
+
is_normal_mode = self.workspace.run_mode == RunMode.NORMAL
|
|
408
410
|
self.state_provider = WorkspaceStateProvider.get_instance(
|
|
409
411
|
self.workspace.path,
|
|
410
|
-
read_only=
|
|
412
|
+
read_only=not is_normal_mode,
|
|
411
413
|
sync_on_start=False, # Experiments don't sync on start
|
|
412
414
|
)
|
|
413
415
|
|
|
414
|
-
# Register experiment in database and create a run
|
|
416
|
+
# Register experiment in database and create a run (only in NORMAL mode)
|
|
415
417
|
experiment_id = self.workdir.name
|
|
416
|
-
self.
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
418
|
+
self._db_listener = None
|
|
419
|
+
if is_normal_mode:
|
|
420
|
+
self.state_provider.ensure_experiment(experiment_id)
|
|
421
|
+
self.run_id = self.state_provider.create_run(experiment_id)
|
|
422
|
+
|
|
423
|
+
# Add database listener to update job state in database
|
|
424
|
+
self._db_listener = DatabaseListener(
|
|
425
|
+
self.state_provider, experiment_id, self.run_id
|
|
426
|
+
)
|
|
427
|
+
self.scheduler.addlistener(self._db_listener)
|
|
428
|
+
else:
|
|
429
|
+
# In non-NORMAL modes, use a placeholder run_id
|
|
430
|
+
self.run_id = None
|
|
424
431
|
|
|
425
432
|
# Number of unfinished jobs
|
|
426
433
|
self.unfinishedJobs = 0
|
|
@@ -461,6 +468,10 @@ class experiment:
|
|
|
461
468
|
)
|
|
462
469
|
else:
|
|
463
470
|
self.wait()
|
|
471
|
+
|
|
472
|
+
# Wait for all pending notifications to be processed
|
|
473
|
+
# before removing listeners
|
|
474
|
+
self.scheduler.wait_for_notifications()
|
|
464
475
|
finally:
|
|
465
476
|
if self._register_signals:
|
|
466
477
|
SIGNAL_HANDLER.remove(self)
|
|
@@ -473,13 +484,14 @@ class experiment:
|
|
|
473
484
|
# Unregister experiment from scheduler
|
|
474
485
|
self.scheduler.unregister_experiment(self)
|
|
475
486
|
|
|
476
|
-
# Remove database listener
|
|
477
|
-
self.
|
|
487
|
+
# Remove database listener and mark run as completed (only in NORMAL mode)
|
|
488
|
+
if self._db_listener is not None:
|
|
489
|
+
self.scheduler.removelistener(self._db_listener)
|
|
478
490
|
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
491
|
+
# Mark run as completed in database
|
|
492
|
+
experiment_id = self.workdir.name
|
|
493
|
+
status = "failed" if exc_type else "completed"
|
|
494
|
+
self.state_provider.complete_run(experiment_id, self.run_id, status)
|
|
483
495
|
|
|
484
496
|
# Note: Don't stop scheduler - it's shared!
|
|
485
497
|
# Note: Don't stop server - it runs in daemon mode until program exit
|
|
@@ -526,10 +538,28 @@ class experiment:
|
|
|
526
538
|
"""Adds a service (e.g. tensorboard viewer) to the experiment
|
|
527
539
|
|
|
528
540
|
:param service: A service instance
|
|
529
|
-
:return: The same service instance
|
|
541
|
+
:return: The same service instance (or existing service if already added)
|
|
530
542
|
"""
|
|
543
|
+
existing = self.services.get(service.id)
|
|
544
|
+
if existing is not None:
|
|
545
|
+
if existing is service:
|
|
546
|
+
# Same service instance added twice - just return it
|
|
547
|
+
logger.debug("Service %s already added, ignoring duplicate", service.id)
|
|
548
|
+
return service
|
|
549
|
+
else:
|
|
550
|
+
# Different service with same id - warn and replace
|
|
551
|
+
logger.warning(
|
|
552
|
+
"Replacing service %s (old id=%s, new id=%s)",
|
|
553
|
+
service.id,
|
|
554
|
+
id(existing),
|
|
555
|
+
id(service),
|
|
556
|
+
)
|
|
557
|
+
|
|
531
558
|
self.services[service.id] = service
|
|
532
559
|
|
|
560
|
+
# Allow service to access experiment context
|
|
561
|
+
service.set_experiment(self)
|
|
562
|
+
|
|
533
563
|
# Register database listener for state changes
|
|
534
564
|
service.add_listener(self._db_listener)
|
|
535
565
|
|
|
@@ -472,3 +472,30 @@ class BaseExperiment:
|
|
|
472
472
|
def experiment_id(self) -> str:
|
|
473
473
|
"""Experiment identifier derived from workdir name"""
|
|
474
474
|
return self.workdir.name
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
class BaseService:
|
|
478
|
+
"""Base interface for service information
|
|
479
|
+
|
|
480
|
+
This class defines the interface for service data. Both live Service instances
|
|
481
|
+
and MockService instances should provide these attributes and methods.
|
|
482
|
+
|
|
483
|
+
Attributes:
|
|
484
|
+
id: Unique identifier for the service
|
|
485
|
+
state: Current service state (ServiceState enum or compatible)
|
|
486
|
+
"""
|
|
487
|
+
|
|
488
|
+
id: str
|
|
489
|
+
|
|
490
|
+
@property
|
|
491
|
+
def state(self):
|
|
492
|
+
"""Current service state"""
|
|
493
|
+
raise NotImplementedError
|
|
494
|
+
|
|
495
|
+
def description(self) -> str:
|
|
496
|
+
"""Human-readable description of the service"""
|
|
497
|
+
raise NotImplementedError
|
|
498
|
+
|
|
499
|
+
def state_dict(self) -> dict:
|
|
500
|
+
"""Return dictionary representation for serialization"""
|
|
501
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Remote monitoring support for experimaestro
|
|
2
|
+
|
|
3
|
+
This package provides SSH-based remote monitoring capabilities for experiments.
|
|
4
|
+
|
|
5
|
+
Main components:
|
|
6
|
+
- SSHStateProviderServer: JSON-RPC server that wraps WorkspaceStateProvider
|
|
7
|
+
- SSHStateProviderClient: Client that connects via SSH and implements StateProvider interface
|
|
8
|
+
- RemoteFileSynchronizer: Rsync-based file synchronization
|
|
9
|
+
|
|
10
|
+
Usage:
|
|
11
|
+
# On remote host (run via SSH):
|
|
12
|
+
from experimaestro.scheduler.remote.server import SSHStateProviderServer
|
|
13
|
+
server = SSHStateProviderServer(workspace_path)
|
|
14
|
+
server.start()
|
|
15
|
+
|
|
16
|
+
# On local host:
|
|
17
|
+
from experimaestro.scheduler.remote.client import SSHStateProviderClient
|
|
18
|
+
client = SSHStateProviderClient(host="server", remote_workspace="/path")
|
|
19
|
+
client.connect()
|
|
20
|
+
experiments = client.get_experiments()
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from experimaestro.scheduler.remote.server import SSHStateProviderServer
|
|
24
|
+
from experimaestro.scheduler.remote.client import SSHStateProviderClient
|
|
25
|
+
from experimaestro.scheduler.remote.sync import RemoteFileSynchronizer
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"SSHStateProviderServer",
|
|
29
|
+
"SSHStateProviderClient",
|
|
30
|
+
"RemoteFileSynchronizer",
|
|
31
|
+
]
|