experimaestro 2.0.0b4__py3-none-any.whl → 2.0.0b17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (154) hide show
  1. experimaestro/__init__.py +12 -5
  2. experimaestro/cli/__init__.py +393 -134
  3. experimaestro/cli/filter.py +48 -23
  4. experimaestro/cli/jobs.py +253 -71
  5. experimaestro/cli/refactor.py +1 -2
  6. experimaestro/commandline.py +7 -4
  7. experimaestro/connectors/__init__.py +9 -1
  8. experimaestro/connectors/local.py +43 -3
  9. experimaestro/core/arguments.py +18 -18
  10. experimaestro/core/identifier.py +11 -11
  11. experimaestro/core/objects/config.py +96 -39
  12. experimaestro/core/objects/config_walk.py +3 -3
  13. experimaestro/core/{subparameters.py → partial.py} +16 -16
  14. experimaestro/core/partial_lock.py +394 -0
  15. experimaestro/core/types.py +12 -15
  16. experimaestro/dynamic.py +290 -0
  17. experimaestro/experiments/__init__.py +6 -2
  18. experimaestro/experiments/cli.py +223 -52
  19. experimaestro/experiments/configuration.py +24 -0
  20. experimaestro/generators.py +5 -5
  21. experimaestro/ipc.py +118 -1
  22. experimaestro/launcherfinder/__init__.py +2 -2
  23. experimaestro/launcherfinder/registry.py +6 -7
  24. experimaestro/launcherfinder/specs.py +2 -9
  25. experimaestro/launchers/slurm/__init__.py +2 -2
  26. experimaestro/launchers/slurm/base.py +62 -0
  27. experimaestro/locking.py +957 -1
  28. experimaestro/notifications.py +89 -201
  29. experimaestro/progress.py +63 -366
  30. experimaestro/rpyc.py +0 -2
  31. experimaestro/run.py +29 -2
  32. experimaestro/scheduler/__init__.py +8 -1
  33. experimaestro/scheduler/base.py +650 -53
  34. experimaestro/scheduler/dependencies.py +20 -16
  35. experimaestro/scheduler/experiment.py +764 -169
  36. experimaestro/scheduler/interfaces.py +338 -96
  37. experimaestro/scheduler/jobs.py +58 -20
  38. experimaestro/scheduler/remote/__init__.py +31 -0
  39. experimaestro/scheduler/remote/adaptive_sync.py +265 -0
  40. experimaestro/scheduler/remote/client.py +928 -0
  41. experimaestro/scheduler/remote/protocol.py +282 -0
  42. experimaestro/scheduler/remote/server.py +447 -0
  43. experimaestro/scheduler/remote/sync.py +144 -0
  44. experimaestro/scheduler/services.py +186 -35
  45. experimaestro/scheduler/state_provider.py +811 -2157
  46. experimaestro/scheduler/state_status.py +1247 -0
  47. experimaestro/scheduler/transient.py +31 -0
  48. experimaestro/scheduler/workspace.py +1 -1
  49. experimaestro/scheduler/workspace_state_provider.py +1273 -0
  50. experimaestro/scriptbuilder.py +4 -4
  51. experimaestro/settings.py +36 -0
  52. experimaestro/tests/conftest.py +33 -5
  53. experimaestro/tests/connectors/bin/executable.py +1 -1
  54. experimaestro/tests/fixtures/pre_experiment/experiment_check_env.py +16 -0
  55. experimaestro/tests/fixtures/pre_experiment/experiment_check_mock.py +14 -0
  56. experimaestro/tests/fixtures/pre_experiment/experiment_simple.py +12 -0
  57. experimaestro/tests/fixtures/pre_experiment/pre_setup_env.py +5 -0
  58. experimaestro/tests/fixtures/pre_experiment/pre_setup_error.py +3 -0
  59. experimaestro/tests/fixtures/pre_experiment/pre_setup_mock.py +8 -0
  60. experimaestro/tests/launchers/bin/test.py +1 -0
  61. experimaestro/tests/launchers/test_slurm.py +9 -9
  62. experimaestro/tests/partial_reschedule.py +46 -0
  63. experimaestro/tests/restart.py +3 -3
  64. experimaestro/tests/restart_main.py +1 -0
  65. experimaestro/tests/scripts/notifyandwait.py +1 -0
  66. experimaestro/tests/task_partial.py +38 -0
  67. experimaestro/tests/task_tokens.py +2 -2
  68. experimaestro/tests/tasks/test_dynamic.py +6 -6
  69. experimaestro/tests/test_dependencies.py +3 -3
  70. experimaestro/tests/test_deprecated.py +15 -15
  71. experimaestro/tests/test_dynamic_locking.py +317 -0
  72. experimaestro/tests/test_environment.py +24 -14
  73. experimaestro/tests/test_experiment.py +171 -36
  74. experimaestro/tests/test_identifier.py +25 -25
  75. experimaestro/tests/test_identifier_stability.py +3 -5
  76. experimaestro/tests/test_multitoken.py +2 -4
  77. experimaestro/tests/{test_subparameters.py → test_partial.py} +25 -25
  78. experimaestro/tests/test_partial_paths.py +81 -138
  79. experimaestro/tests/test_pre_experiment.py +219 -0
  80. experimaestro/tests/test_progress.py +2 -8
  81. experimaestro/tests/test_remote_state.py +1132 -0
  82. experimaestro/tests/test_stray_jobs.py +261 -0
  83. experimaestro/tests/test_tasks.py +1 -2
  84. experimaestro/tests/test_token_locking.py +52 -67
  85. experimaestro/tests/test_tokens.py +5 -6
  86. experimaestro/tests/test_transient.py +225 -0
  87. experimaestro/tests/test_workspace_state_provider.py +768 -0
  88. experimaestro/tests/token_reschedule.py +1 -3
  89. experimaestro/tests/utils.py +2 -7
  90. experimaestro/tokens.py +227 -372
  91. experimaestro/tools/diff.py +1 -0
  92. experimaestro/tools/documentation.py +4 -5
  93. experimaestro/tools/jobs.py +1 -2
  94. experimaestro/tui/app.py +459 -1895
  95. experimaestro/tui/app.tcss +162 -0
  96. experimaestro/tui/dialogs.py +172 -0
  97. experimaestro/tui/log_viewer.py +253 -3
  98. experimaestro/tui/messages.py +137 -0
  99. experimaestro/tui/utils.py +54 -0
  100. experimaestro/tui/widgets/__init__.py +23 -0
  101. experimaestro/tui/widgets/experiments.py +468 -0
  102. experimaestro/tui/widgets/global_services.py +238 -0
  103. experimaestro/tui/widgets/jobs.py +972 -0
  104. experimaestro/tui/widgets/log.py +156 -0
  105. experimaestro/tui/widgets/orphans.py +363 -0
  106. experimaestro/tui/widgets/runs.py +185 -0
  107. experimaestro/tui/widgets/services.py +314 -0
  108. experimaestro/tui/widgets/stray_jobs.py +528 -0
  109. experimaestro/utils/__init__.py +1 -1
  110. experimaestro/utils/environment.py +105 -22
  111. experimaestro/utils/fswatcher.py +124 -0
  112. experimaestro/utils/jobs.py +1 -2
  113. experimaestro/utils/jupyter.py +1 -2
  114. experimaestro/utils/logging.py +72 -0
  115. experimaestro/version.py +2 -2
  116. experimaestro/webui/__init__.py +9 -0
  117. experimaestro/webui/app.py +117 -0
  118. experimaestro/{server → webui}/data/index.css +66 -11
  119. experimaestro/webui/data/index.css.map +1 -0
  120. experimaestro/{server → webui}/data/index.js +82763 -87217
  121. experimaestro/webui/data/index.js.map +1 -0
  122. experimaestro/webui/routes/__init__.py +5 -0
  123. experimaestro/webui/routes/auth.py +53 -0
  124. experimaestro/webui/routes/proxy.py +117 -0
  125. experimaestro/webui/server.py +200 -0
  126. experimaestro/webui/state_bridge.py +152 -0
  127. experimaestro/webui/websocket.py +413 -0
  128. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/METADATA +8 -9
  129. experimaestro-2.0.0b17.dist-info/RECORD +219 -0
  130. experimaestro/cli/progress.py +0 -269
  131. experimaestro/scheduler/state.py +0 -75
  132. experimaestro/scheduler/state_db.py +0 -388
  133. experimaestro/scheduler/state_sync.py +0 -834
  134. experimaestro/server/__init__.py +0 -467
  135. experimaestro/server/data/index.css.map +0 -1
  136. experimaestro/server/data/index.js.map +0 -1
  137. experimaestro/tests/test_cli_jobs.py +0 -615
  138. experimaestro/tests/test_file_progress.py +0 -425
  139. experimaestro/tests/test_file_progress_integration.py +0 -477
  140. experimaestro/tests/test_state_db.py +0 -434
  141. experimaestro-2.0.0b4.dist-info/RECORD +0 -181
  142. /experimaestro/{server → webui}/data/1815e00441357e01619e.ttf +0 -0
  143. /experimaestro/{server → webui}/data/2463b90d9a316e4e5294.woff2 +0 -0
  144. /experimaestro/{server → webui}/data/2582b0e4bcf85eceead0.ttf +0 -0
  145. /experimaestro/{server → webui}/data/89999bdf5d835c012025.woff2 +0 -0
  146. /experimaestro/{server → webui}/data/914997e1bdfc990d0897.ttf +0 -0
  147. /experimaestro/{server → webui}/data/c210719e60948b211a12.woff2 +0 -0
  148. /experimaestro/{server → webui}/data/favicon.ico +0 -0
  149. /experimaestro/{server → webui}/data/index.html +0 -0
  150. /experimaestro/{server → webui}/data/login.html +0 -0
  151. /experimaestro/{server → webui}/data/manifest.json +0 -0
  152. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/WHEEL +0 -0
  153. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/entry_points.txt +0 -0
  154. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/licenses/LICENSE +0 -0
experimaestro/__init__.py CHANGED
@@ -40,11 +40,11 @@ from .core.arguments import (
40
40
  help,
41
41
  )
42
42
  from .generators import pathgenerator, PathGenerator
43
- from .core.subparameters import (
44
- subparameters,
43
+ from .core.partial import (
44
+ partial,
45
45
  param_group,
46
46
  ParameterGroup,
47
- Subparameters,
47
+ Partial,
48
48
  )
49
49
  from .core.objects import (
50
50
  Config,
@@ -61,16 +61,23 @@ from .core.context import SerializationContext
61
61
  from .core.serializers import SerializationLWTask, PathSerializationLWTask
62
62
  from .core.types import Any, SubmitHook
63
63
  from .launchers import Launcher
64
- from .scheduler import Scheduler, experiment, FailedExperiment
64
+ from .scheduler import (
65
+ Scheduler,
66
+ experiment,
67
+ FailedExperiment,
68
+ DirtyGitError,
69
+ GracefulExperimentExit,
70
+ )
65
71
  from .exceptions import GracefulTimeout
66
72
  from .scheduler.workspace import Workspace, RunMode
67
- from .scheduler.state import get_experiment
73
+ from .scheduler.transient import TransientMode
68
74
  from .notifications import progress, tqdm
69
75
  from .checkers import Choices
70
76
  from .xpmutils import DirectoryContext
71
77
  from .mkdocs.annotations import documentation
72
78
  from .scheduler.base import Job
73
79
  from .launcherfinder.registry import LauncherRegistry
80
+ from .experiments.configuration import DirtyGitAction
74
81
 
75
82
 
76
83
  def set_launcher(launcher: Launcher):
@@ -1,7 +1,6 @@
1
1
  # flake8: noqa: T201
2
2
  import sys
3
3
  from typing import Set, Optional
4
- from itertools import chain
5
4
  from shutil import rmtree
6
5
  import click
7
6
  import logging
@@ -164,27 +163,23 @@ def diff(path: Path):
164
163
  check(".", job, new_job, set())
165
164
 
166
165
 
167
- @click.option("--show-all", is_flag=True, help="Show even not orphans")
168
- @click.option(
169
- "--ignore-old", is_flag=True, help="Ignore old jobs for unfinished experiments"
170
- )
171
166
  @click.option("--clean", is_flag=True, help="Prune the orphan folders")
172
167
  @click.option("--size", is_flag=True, help="Show size of each folder")
173
168
  @click.argument("path", type=Path, callback=check_xp_path)
174
169
  @cli.command()
175
- def orphans(path: Path, clean: bool, size: bool, show_all: bool, ignore_old: bool):
176
- """Check for tasks that are not part of an experimental plan"""
170
+ def orphans(path: Path, clean: bool, size: bool):
171
+ """Check for tasks that are not part of an experimental plan
177
172
 
178
- jobspath = path / "jobs"
179
-
180
- def getjobs(path: Path):
181
- return ((str(p.relative_to(path)), p) for p in path.glob("*/*") if p.is_dir())
173
+ Uses the same orphan detection as the TUI (WorkspaceStateProvider.get_orphan_jobs).
174
+ """
175
+ from experimaestro.scheduler.workspace_state_provider import WorkspaceStateProvider
182
176
 
183
- def show(key: str, prefix=""):
177
+ def show(job, prefix=""):
178
+ key = f"{job.task_id}/{job.identifier}"
184
179
  if size:
185
180
  print(
186
181
  prefix,
187
- subprocess.check_output(["du", "-hs", key], cwd=jobspath)
182
+ subprocess.check_output(["du", "-hs", str(job.path)])
188
183
  .decode("utf-8")
189
184
  .strip(),
190
185
  sep=None,
@@ -192,35 +187,21 @@ def orphans(path: Path, clean: bool, size: bool, show_all: bool, ignore_old: boo
192
187
  else:
193
188
  print(prefix, key, sep=None)
194
189
 
195
- for p in (path / "xp").glob("*/jobs.bak"):
196
- logging.warning("Experiment %s has not completed successfully", p.parent.name)
197
-
198
- # Retrieve the jobs within expedriments (jobs and jobs.bak folder within experiments)
199
- xpjobs = set()
200
- if ignore_old:
201
- paths = (path / "xp").glob("*/jobs")
202
- else:
203
- paths = chain((path / "xp").glob("*/jobs"), (path / "xp").glob("*/jobs.bak"))
204
-
205
- for p in paths:
206
- if p.is_dir():
207
- for relpath, path in getjobs(p):
208
- xpjobs.add(relpath)
209
-
210
- # Now, look at stored jobs
211
- found = 0
212
- for key, jobpath in getjobs(jobspath):
213
- if key not in xpjobs:
214
- show(key)
215
- if clean:
216
- logging.info("Removing data in %s", jobpath)
217
- rmtree(jobpath)
218
- else:
219
- if show_all:
220
- show(key, prefix="[not orphan] ")
221
- found += 1
190
+ # Use WorkspaceStateProvider.get_orphan_jobs() - same as TUI
191
+ provider = WorkspaceStateProvider.get_instance(path)
192
+ orphan_jobs = provider.get_orphan_jobs()
193
+
194
+ if not orphan_jobs:
195
+ print("No orphan jobs found.")
196
+ return
222
197
 
223
- print(f"{found} jobs are not orphans")
198
+ print(f"Found {len(orphan_jobs)} orphan job(s):")
199
+ for job in orphan_jobs:
200
+ show(job)
201
+ if clean:
202
+ logging.info("Removing data in %s", job.path)
203
+ if job.path and job.path.exists():
204
+ rmtree(job.path)
224
205
 
225
206
 
226
207
  def arg_split(ctx, param, value):
@@ -279,22 +260,140 @@ cli.add_command(Launchers("launchers", help="Launcher specific commands"))
279
260
  cli.add_command(Launchers("connectors", help="Connector specific commands"))
280
261
  cli.add_command(Launchers("tokens", help="Token specific commands"))
281
262
 
282
- # Import and add progress commands
283
- from .progress import progress as progress_cli
284
-
285
- cli.add_command(progress_cli)
286
-
287
263
  # Import and add jobs commands
288
- from .jobs import jobs as jobs_cli
264
+ from .jobs import jobs as jobs_cli # noqa: E402
289
265
 
290
266
  cli.add_command(jobs_cli)
291
267
 
292
268
  # Import and add refactor commands
293
- from .refactor import refactor as refactor_cli
269
+ from .refactor import refactor as refactor_cli # noqa: E402
294
270
 
295
271
  cli.add_command(refactor_cli)
296
272
 
297
273
 
274
+ @cli.group()
275
+ def migrate():
276
+ """Migration commands for experimaestro workspace upgrades"""
277
+ pass
278
+
279
+
280
+ @migrate.command("v1-to-v2")
281
+ @click.argument("workdir", type=Path, callback=check_xp_path)
282
+ @click.option(
283
+ "--dry-run", is_flag=True, help="Show what would be done without making changes"
284
+ )
285
+ @click.option(
286
+ "--keep-old", is_flag=True, help="Keep the old xp directory after migration"
287
+ )
288
+ def migrate_v1_to_v2(workdir: Path, dry_run: bool, keep_old: bool):
289
+ """Migrate workspace from v1 (xp/) to v2 (experiments/) layout
290
+
291
+ This command migrates experiment directories from the old layout:
292
+ workdir/xp/{experiment-id}/
293
+ to the new layout:
294
+ workdir/experiments/{experiment-id}/{run-id}/
295
+
296
+ Each old experiment directory becomes a single run directory with the
297
+ run ID based on its modification time.
298
+ """
299
+ from datetime import datetime
300
+
301
+ old_xp_dir = workdir / "xp"
302
+ new_experiments_dir = workdir / "experiments"
303
+
304
+ if not old_xp_dir.exists():
305
+ cprint(f"No old 'xp' directory found at {old_xp_dir}", "yellow")
306
+ return
307
+
308
+ # List all experiments in the old directory
309
+ old_experiments = [d for d in old_xp_dir.iterdir() if d.is_dir()]
310
+
311
+ if not old_experiments:
312
+ cprint("No experiments found in xp/ directory", "yellow")
313
+ return
314
+
315
+ cprint(f"Found {len(old_experiments)} experiment(s) to migrate:", "cyan")
316
+ for exp_dir in old_experiments:
317
+ cprint(f" - {exp_dir.name}", "white")
318
+
319
+ if dry_run:
320
+ cprint("\nDRY RUN MODE - showing what would be done:", "yellow")
321
+
322
+ migrated = 0
323
+ for exp_dir in old_experiments:
324
+ exp_id = exp_dir.name
325
+
326
+ # Generate run_id from directory modification time
327
+ mtime = exp_dir.stat().st_mtime
328
+ mtime_dt = datetime.fromtimestamp(mtime)
329
+ run_id = mtime_dt.strftime("%Y%m%d_%H%M%S")
330
+
331
+ # Target path
332
+ new_exp_base = new_experiments_dir / exp_id
333
+ new_run_dir = new_exp_base / run_id
334
+
335
+ # Handle collision
336
+ suffix = 1
337
+ while new_run_dir.exists():
338
+ run_id = f"{mtime_dt.strftime('%Y%m%d_%H%M%S')}.{suffix}"
339
+ new_run_dir = new_exp_base / run_id
340
+ suffix += 1
341
+
342
+ if dry_run:
343
+ cprint(f" {exp_dir} -> {new_run_dir}", "white")
344
+ else:
345
+ # Create the parent directory
346
+ new_exp_base.mkdir(parents=True, exist_ok=True)
347
+
348
+ # Move the experiment directory
349
+ import shutil
350
+
351
+ try:
352
+ shutil.move(str(exp_dir), str(new_run_dir))
353
+ cprint(
354
+ f" Migrated: {exp_id} -> {new_run_dir.relative_to(workdir)}",
355
+ "green",
356
+ )
357
+ migrated += 1
358
+ except Exception as e:
359
+ cprint(f" Failed to migrate {exp_id}: {e}", "red")
360
+
361
+ if not dry_run:
362
+ cprint(f"\nMigrated {migrated}/{len(old_experiments)} experiment(s)", "cyan")
363
+
364
+ # Handle old xp directory
365
+ remaining = list(old_xp_dir.iterdir())
366
+ if remaining:
367
+ if keep_old:
368
+ # Keep remaining files, rename directory
369
+ renamed_xp_dir = workdir / "xp_MIGRATED_TO_V2"
370
+ old_xp_dir.rename(renamed_xp_dir)
371
+ cprint(
372
+ f"Renamed 'xp' -> 'xp_MIGRATED_TO_V2' ({len(remaining)} item(s))",
373
+ "yellow",
374
+ )
375
+ else:
376
+ cprint(
377
+ f"'xp' directory still contains {len(remaining)} item(s), not removing",
378
+ "yellow",
379
+ )
380
+ cprint("Remove manually or use --keep-old to rename", "yellow")
381
+ return
382
+ else:
383
+ # Empty directory - remove it
384
+ old_xp_dir.rmdir()
385
+ cprint("Removed empty 'xp' directory", "green")
386
+
387
+ # Create a broken symlink to prevent v1 from recreating xp/
388
+ # v1 will find the symlink but fail when trying to use it
389
+ broken_link = workdir / "xp"
390
+ if not broken_link.exists() and not broken_link.is_symlink():
391
+ broken_link.symlink_to("/experimaestro_v2_migrated_workspace_do_not_use_v1")
392
+ cprint(
393
+ "Created broken 'xp' symlink to prevent experimaestro v1 usage", "green"
394
+ )
395
+
396
+
298
397
  @cli.group()
299
398
  @click.option("--workdir", type=Path, default=None)
300
399
  @click.option("--workspace", type=str, default=None)
@@ -309,11 +408,100 @@ def experiments(ctx, workdir, workspace):
309
408
  @experiments.command()
310
409
  @pass_cfg
311
410
  def list(workdir: Path):
312
- for p in (workdir / "xp").iterdir():
313
- if (p / "jobs.bak").exists():
314
- cprint(f"[unfinished] {p.name}", "yellow")
411
+ """List experiments in the workspace"""
412
+ from experimaestro.scheduler.workspace_state_provider import WorkspaceStateProvider
413
+
414
+ # Get experiments from state provider for detailed info
415
+ state_provider = WorkspaceStateProvider.get_instance(workdir)
416
+ experiments_list = state_provider.get_experiments()
417
+
418
+ # Build lookup by experiment_id
419
+ exp_info = {exp.experiment_id: exp for exp in experiments_list}
420
+
421
+ # New layout: experiments/{exp-id}/{run-id}/
422
+ experiments_dir = workdir / "experiments"
423
+ if not experiments_dir.exists():
424
+ cprint("No experiments found", "yellow")
425
+ return
426
+
427
+ for exp_dir in experiments_dir.iterdir():
428
+ if not exp_dir.is_dir():
429
+ continue
430
+
431
+ exp_id = exp_dir.name
432
+ exp = exp_info.get(exp_id)
433
+
434
+ # Build display string
435
+ display_parts = [exp_id]
436
+
437
+ # Add current run_id if available
438
+ if exp and getattr(exp, "current_run_id", None):
439
+ display_parts.append(f"[run: {exp.current_run_id}]")
440
+
441
+ # Add hostname if available
442
+ if exp and getattr(exp, "hostname", None):
443
+ display_parts.append(f"[{exp.hostname}]")
444
+
445
+ # Add job stats if available
446
+ if exp:
447
+ display_parts.append(f"({exp.finished_jobs}/{exp.total_jobs} jobs)")
448
+
449
+ display_str = " ".join(display_parts)
450
+ cprint(display_str, "cyan")
451
+
452
+
453
+ def _run_monitor_ui(
454
+ state_provider, workdir: Path, console: bool, port: int, title: str = ""
455
+ ):
456
+ """Shared code for running monitor UI (TUI or web)
457
+
458
+ Args:
459
+ state_provider: StateProvider instance (local or remote)
460
+ workdir: Local workspace/cache directory
461
+ console: If True, use TUI; otherwise use web UI
462
+ port: Port for web server
463
+ title: Optional title for status messages
464
+ """
465
+ try:
466
+ if console:
467
+ # Use Textual TUI
468
+ from experimaestro.tui import ExperimentTUI
469
+
470
+ app = ExperimentTUI(
471
+ workdir, state_provider=state_provider, watch=True, show_logs=True
472
+ )
473
+ app.run()
315
474
  else:
316
- cprint(p.name, "cyan")
475
+ # Use React web server
476
+ from experimaestro.webui import WebUIServer
477
+
478
+ if title:
479
+ cprint(
480
+ f"Starting experiment monitor for {title} on http://localhost:{port}",
481
+ "green",
482
+ )
483
+ else:
484
+ cprint(
485
+ f"Starting experiment monitor on http://localhost:{port}", "green"
486
+ )
487
+ cprint("Press Ctrl+C to stop", "yellow")
488
+
489
+ settings = ServerSettings()
490
+ settings.port = port
491
+ server = WebUIServer.instance(settings, state_provider=state_provider)
492
+ server.start()
493
+
494
+ try:
495
+ import time
496
+
497
+ while True:
498
+ time.sleep(1)
499
+ except KeyboardInterrupt:
500
+ pass
501
+ finally:
502
+ cprint("\nShutting down...", "yellow")
503
+ if state_provider:
504
+ state_provider.close()
317
505
 
318
506
 
319
507
  @experiments.command()
@@ -322,115 +510,186 @@ def list(workdir: Path):
322
510
  "--port", type=int, default=12345, help="Port for web server (default: 12345)"
323
511
  )
324
512
  @click.option(
325
- "--sync", is_flag=True, help="Force sync from disk before starting monitor"
513
+ "--watcher",
514
+ type=click.Choice(["auto", "polling", "inotify", "fsevents", "kqueue", "windows"]),
515
+ default="auto",
516
+ help="Filesystem watcher type (auto=platform default, polling=network mounts)",
517
+ )
518
+ @click.option(
519
+ "--polling-interval",
520
+ type=float,
521
+ default=1.0,
522
+ help="Polling interval in seconds (only for --watcher=polling)",
523
+ )
524
+ @click.option(
525
+ "--sync",
526
+ is_flag=True,
527
+ hidden=True,
528
+ help="Deprecated: no longer needed (filesystem state is always current)",
326
529
  )
327
530
  @pass_cfg
328
- def monitor(workdir: Path, console: bool, port: int, sync: bool):
329
- """Monitor experiments with web UI or console TUI"""
330
- # Force sync from disk if requested
531
+ def monitor(
532
+ workdir: Path,
533
+ console: bool,
534
+ port: int,
535
+ watcher: str,
536
+ polling_interval: float,
537
+ sync: bool,
538
+ ):
539
+ """Monitor local experiments with web UI or console TUI"""
540
+ # --sync is deprecated (kept for backwards compatibility)
331
541
  if sync:
332
- from experimaestro.scheduler.state_sync import sync_workspace_from_disk
542
+ cprint(
543
+ "Note: --sync is deprecated and no longer needed "
544
+ "(filesystem state is always current)",
545
+ "yellow",
546
+ )
333
547
 
334
- cprint("Syncing workspace from disk...", "yellow")
335
- sync_workspace_from_disk(workdir, write_mode=True, force=True)
336
- cprint("Sync complete", "green")
548
+ # Configure filesystem watcher type
549
+ from experimaestro.ipc import IPCom, WatcherType
337
550
 
338
- if console:
339
- # Use Textual TUI
340
- from experimaestro.tui import ExperimentTUI
551
+ if watcher != "auto":
552
+ IPCom.set_watcher_type(WatcherType(watcher), polling_interval)
553
+ elif polling_interval != 1.0:
554
+ IPCom.set_watcher_type(WatcherType.POLLING, polling_interval)
341
555
 
342
- app = ExperimentTUI(workdir, watch=True)
343
- app.run()
344
- else:
345
- # Use React web server
346
- from experimaestro.scheduler.state_provider import WorkspaceStateProvider
347
- from experimaestro.server import Server
556
+ from experimaestro.scheduler.workspace_state_provider import WorkspaceStateProvider
348
557
 
349
- cprint(f"Starting experiment monitor on http://localhost:{port}", "green")
350
- cprint("Press Ctrl+C to stop", "yellow")
558
+ state_provider = WorkspaceStateProvider.get_instance(workdir)
351
559
 
352
- state_provider = WorkspaceStateProvider.get_instance(
353
- workdir,
354
- sync_on_start=not sync, # Skip auto-sync if we just did a forced one
355
- )
356
- settings = ServerSettings()
357
- settings.port = port
358
- server = Server.instance(settings, state_provider=state_provider)
359
- server.start()
560
+ _run_monitor_ui(state_provider, workdir, console, port)
360
561
 
361
- try:
362
- import time
363
562
 
364
- while True:
365
- time.sleep(1)
366
- except KeyboardInterrupt:
367
- cprint("\nShutting down...", "yellow")
368
- state_provider.close()
563
+ @experiments.command("ssh-monitor")
564
+ @click.argument("host", type=str)
565
+ @click.argument("remote_workdir", type=str)
566
+ @click.option("--console", is_flag=True, help="Use console TUI instead of web UI")
567
+ @click.option(
568
+ "--port", type=int, default=12345, help="Port for web server (default: 12345)"
569
+ )
570
+ @click.option(
571
+ "--watcher",
572
+ type=click.Choice(["auto", "polling", "inotify", "fsevents", "kqueue", "windows"]),
573
+ default="auto",
574
+ help="Filesystem watcher type (auto=platform default, polling=network mounts)",
575
+ )
576
+ @click.option(
577
+ "--polling-interval",
578
+ type=float,
579
+ default=1.0,
580
+ help="Polling interval in seconds (only for --watcher=polling)",
581
+ )
582
+ @click.option(
583
+ "--remote-xpm",
584
+ type=str,
585
+ default=None,
586
+ help="Path to experimaestro on remote host (default: use 'uv tool run')",
587
+ )
588
+ @click.option(
589
+ "--ssh-option",
590
+ "-o",
591
+ multiple=True,
592
+ help="Additional SSH options (can be repeated, e.g., -o '-p 2222')",
593
+ )
594
+ def ssh_monitor(
595
+ host: str,
596
+ remote_workdir: str,
597
+ console: bool,
598
+ port: int,
599
+ watcher: str,
600
+ polling_interval: float,
601
+ remote_xpm: str,
602
+ ssh_option: tuple,
603
+ ):
604
+ """Monitor experiments on a remote server via SSH
605
+
606
+ HOST is the SSH host (e.g., user@server)
607
+ REMOTE_WORKDIR is the workspace path on the remote server
608
+
609
+ Examples:
610
+ experimaestro experiments ssh-monitor myserver /path/to/workspace
611
+ experimaestro experiments ssh-monitor user@host /workspace --console
612
+ experimaestro experiments ssh-monitor host /workspace --remote-xpm /opt/xpm/bin/experimaestro
613
+ """
614
+ # Configure filesystem watcher type
615
+ from experimaestro.ipc import IPCom, WatcherType
616
+
617
+ if watcher != "auto":
618
+ IPCom.set_watcher_type(WatcherType(watcher), polling_interval)
619
+ elif polling_interval != 1.0:
620
+ IPCom.set_watcher_type(WatcherType.POLLING, polling_interval)
621
+
622
+ from experimaestro.scheduler.remote.client import SSHStateProviderClient
623
+
624
+ cprint(f"Connecting to {host}...", "yellow")
625
+ state_provider = SSHStateProviderClient(
626
+ host=host,
627
+ remote_workspace=remote_workdir,
628
+ ssh_options=list(ssh_option) if ssh_option else None,
629
+ remote_xpm_path=remote_xpm,
630
+ )
631
+ try:
632
+ state_provider.connect()
633
+ cprint(f"Connected to {host}", "green")
634
+ except Exception as e:
635
+ cprint(f"Failed to connect: {e}", "red")
636
+ raise click.Abort()
637
+
638
+ _run_monitor_ui(
639
+ state_provider,
640
+ state_provider.local_cache_dir,
641
+ console,
642
+ port,
643
+ title=host,
644
+ )
645
+
646
+
647
+ @experiments.command("monitor-server")
648
+ @pass_cfg
649
+ def monitor_server(workdir: Path):
650
+ """Start monitoring server for SSH connections (JSON-RPC over stdio)
651
+
652
+ This command is intended to be run over SSH to provide remote monitoring.
653
+ Communication is via JSON-RPC over stdin/stdout.
654
+
655
+ Example:
656
+ ssh host 'experimaestro experiments --workdir /path monitor-server'
657
+ """
658
+ from experimaestro.scheduler.remote.server import SSHStateProviderServer
659
+
660
+ server = SSHStateProviderServer(workdir)
661
+ try:
662
+ server.start()
663
+ except KeyboardInterrupt:
664
+ server.stop()
369
665
 
370
666
 
371
667
  @experiments.command()
372
668
  @click.option(
373
669
  "--dry-run",
374
670
  is_flag=True,
375
- help="Don't write to database, only show what would be synced",
671
+ help="[DEPRECATED] No longer needed with filesystem-based state tracking",
376
672
  )
377
673
  @click.option(
378
674
  "--force",
379
675
  is_flag=True,
380
- help="Force sync even if recently synced (bypasses time throttling)",
676
+ help="[DEPRECATED] No longer needed with filesystem-based state tracking",
381
677
  )
382
678
  @click.option(
383
679
  "--no-wait",
384
680
  is_flag=True,
385
- help="Don't wait for lock, fail immediately if unavailable",
681
+ help="[DEPRECATED] No longer needed with filesystem-based state tracking",
386
682
  )
387
683
  @pass_cfg
388
684
  def sync(workdir: Path, dry_run: bool, force: bool, no_wait: bool):
389
- """Synchronize workspace database from disk state
685
+ """[DEPRECATED] Synchronize workspace database from disk state
390
686
 
391
- Scans experiment directories and job marker files to update the workspace
392
- database. Uses exclusive locking to prevent conflicts with running experiments.
687
+ This command is deprecated. With the new filesystem-based state tracking,
688
+ state is read directly from status.json and events files. No synchronization
689
+ is needed.
393
690
  """
394
- from experimaestro.scheduler.state_sync import sync_workspace_from_disk
395
- from experimaestro.scheduler.workspace import Workspace
396
- from experimaestro.settings import Settings
397
-
398
- # Get settings and workspace settings
399
- settings = Settings.instance()
400
- ws_settings = find_workspace(workdir=workdir)
401
-
402
- # Create workspace instance (manages database lifecycle)
403
- workspace = Workspace(
404
- settings=settings,
405
- workspace_settings=ws_settings,
406
- sync_on_init=False, # Don't sync on init since we're explicitly syncing
691
+ cprint(
692
+ "Warning: 'sync' command is deprecated. "
693
+ "State is now tracked via filesystem (status.json) - no sync needed.",
694
+ "yellow",
407
695
  )
408
-
409
- try:
410
- # Enter workspace context to initialize database
411
- with workspace:
412
- cprint(f"Syncing workspace: {workspace.path}", "cyan")
413
- if dry_run:
414
- cprint("DRY RUN MODE: No changes will be written", "yellow")
415
- if force:
416
- cprint("FORCE MODE: Bypassing time throttling", "yellow")
417
-
418
- # Run sync
419
- sync_workspace_from_disk(
420
- workspace=workspace,
421
- write_mode=not dry_run,
422
- force=force,
423
- blocking=not no_wait,
424
- )
425
-
426
- cprint("Sync completed successfully", "green")
427
-
428
- except RuntimeError as e:
429
- cprint(f"Sync failed: {e}", "red")
430
- sys.exit(1)
431
- except Exception as e:
432
- cprint(f"Unexpected error during sync: {e}", "red")
433
- import traceback
434
-
435
- traceback.print_exc()
436
- sys.exit(1)