experimaestro 2.0.0b4__py3-none-any.whl → 2.0.0b17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of experimaestro might be problematic. Click here for more details.
- experimaestro/__init__.py +12 -5
- experimaestro/cli/__init__.py +393 -134
- experimaestro/cli/filter.py +48 -23
- experimaestro/cli/jobs.py +253 -71
- experimaestro/cli/refactor.py +1 -2
- experimaestro/commandline.py +7 -4
- experimaestro/connectors/__init__.py +9 -1
- experimaestro/connectors/local.py +43 -3
- experimaestro/core/arguments.py +18 -18
- experimaestro/core/identifier.py +11 -11
- experimaestro/core/objects/config.py +96 -39
- experimaestro/core/objects/config_walk.py +3 -3
- experimaestro/core/{subparameters.py → partial.py} +16 -16
- experimaestro/core/partial_lock.py +394 -0
- experimaestro/core/types.py +12 -15
- experimaestro/dynamic.py +290 -0
- experimaestro/experiments/__init__.py +6 -2
- experimaestro/experiments/cli.py +223 -52
- experimaestro/experiments/configuration.py +24 -0
- experimaestro/generators.py +5 -5
- experimaestro/ipc.py +118 -1
- experimaestro/launcherfinder/__init__.py +2 -2
- experimaestro/launcherfinder/registry.py +6 -7
- experimaestro/launcherfinder/specs.py +2 -9
- experimaestro/launchers/slurm/__init__.py +2 -2
- experimaestro/launchers/slurm/base.py +62 -0
- experimaestro/locking.py +957 -1
- experimaestro/notifications.py +89 -201
- experimaestro/progress.py +63 -366
- experimaestro/rpyc.py +0 -2
- experimaestro/run.py +29 -2
- experimaestro/scheduler/__init__.py +8 -1
- experimaestro/scheduler/base.py +650 -53
- experimaestro/scheduler/dependencies.py +20 -16
- experimaestro/scheduler/experiment.py +764 -169
- experimaestro/scheduler/interfaces.py +338 -96
- experimaestro/scheduler/jobs.py +58 -20
- experimaestro/scheduler/remote/__init__.py +31 -0
- experimaestro/scheduler/remote/adaptive_sync.py +265 -0
- experimaestro/scheduler/remote/client.py +928 -0
- experimaestro/scheduler/remote/protocol.py +282 -0
- experimaestro/scheduler/remote/server.py +447 -0
- experimaestro/scheduler/remote/sync.py +144 -0
- experimaestro/scheduler/services.py +186 -35
- experimaestro/scheduler/state_provider.py +811 -2157
- experimaestro/scheduler/state_status.py +1247 -0
- experimaestro/scheduler/transient.py +31 -0
- experimaestro/scheduler/workspace.py +1 -1
- experimaestro/scheduler/workspace_state_provider.py +1273 -0
- experimaestro/scriptbuilder.py +4 -4
- experimaestro/settings.py +36 -0
- experimaestro/tests/conftest.py +33 -5
- experimaestro/tests/connectors/bin/executable.py +1 -1
- experimaestro/tests/fixtures/pre_experiment/experiment_check_env.py +16 -0
- experimaestro/tests/fixtures/pre_experiment/experiment_check_mock.py +14 -0
- experimaestro/tests/fixtures/pre_experiment/experiment_simple.py +12 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_env.py +5 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_error.py +3 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_mock.py +8 -0
- experimaestro/tests/launchers/bin/test.py +1 -0
- experimaestro/tests/launchers/test_slurm.py +9 -9
- experimaestro/tests/partial_reschedule.py +46 -0
- experimaestro/tests/restart.py +3 -3
- experimaestro/tests/restart_main.py +1 -0
- experimaestro/tests/scripts/notifyandwait.py +1 -0
- experimaestro/tests/task_partial.py +38 -0
- experimaestro/tests/task_tokens.py +2 -2
- experimaestro/tests/tasks/test_dynamic.py +6 -6
- experimaestro/tests/test_dependencies.py +3 -3
- experimaestro/tests/test_deprecated.py +15 -15
- experimaestro/tests/test_dynamic_locking.py +317 -0
- experimaestro/tests/test_environment.py +24 -14
- experimaestro/tests/test_experiment.py +171 -36
- experimaestro/tests/test_identifier.py +25 -25
- experimaestro/tests/test_identifier_stability.py +3 -5
- experimaestro/tests/test_multitoken.py +2 -4
- experimaestro/tests/{test_subparameters.py → test_partial.py} +25 -25
- experimaestro/tests/test_partial_paths.py +81 -138
- experimaestro/tests/test_pre_experiment.py +219 -0
- experimaestro/tests/test_progress.py +2 -8
- experimaestro/tests/test_remote_state.py +1132 -0
- experimaestro/tests/test_stray_jobs.py +261 -0
- experimaestro/tests/test_tasks.py +1 -2
- experimaestro/tests/test_token_locking.py +52 -67
- experimaestro/tests/test_tokens.py +5 -6
- experimaestro/tests/test_transient.py +225 -0
- experimaestro/tests/test_workspace_state_provider.py +768 -0
- experimaestro/tests/token_reschedule.py +1 -3
- experimaestro/tests/utils.py +2 -7
- experimaestro/tokens.py +227 -372
- experimaestro/tools/diff.py +1 -0
- experimaestro/tools/documentation.py +4 -5
- experimaestro/tools/jobs.py +1 -2
- experimaestro/tui/app.py +459 -1895
- experimaestro/tui/app.tcss +162 -0
- experimaestro/tui/dialogs.py +172 -0
- experimaestro/tui/log_viewer.py +253 -3
- experimaestro/tui/messages.py +137 -0
- experimaestro/tui/utils.py +54 -0
- experimaestro/tui/widgets/__init__.py +23 -0
- experimaestro/tui/widgets/experiments.py +468 -0
- experimaestro/tui/widgets/global_services.py +238 -0
- experimaestro/tui/widgets/jobs.py +972 -0
- experimaestro/tui/widgets/log.py +156 -0
- experimaestro/tui/widgets/orphans.py +363 -0
- experimaestro/tui/widgets/runs.py +185 -0
- experimaestro/tui/widgets/services.py +314 -0
- experimaestro/tui/widgets/stray_jobs.py +528 -0
- experimaestro/utils/__init__.py +1 -1
- experimaestro/utils/environment.py +105 -22
- experimaestro/utils/fswatcher.py +124 -0
- experimaestro/utils/jobs.py +1 -2
- experimaestro/utils/jupyter.py +1 -2
- experimaestro/utils/logging.py +72 -0
- experimaestro/version.py +2 -2
- experimaestro/webui/__init__.py +9 -0
- experimaestro/webui/app.py +117 -0
- experimaestro/{server → webui}/data/index.css +66 -11
- experimaestro/webui/data/index.css.map +1 -0
- experimaestro/{server → webui}/data/index.js +82763 -87217
- experimaestro/webui/data/index.js.map +1 -0
- experimaestro/webui/routes/__init__.py +5 -0
- experimaestro/webui/routes/auth.py +53 -0
- experimaestro/webui/routes/proxy.py +117 -0
- experimaestro/webui/server.py +200 -0
- experimaestro/webui/state_bridge.py +152 -0
- experimaestro/webui/websocket.py +413 -0
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/METADATA +8 -9
- experimaestro-2.0.0b17.dist-info/RECORD +219 -0
- experimaestro/cli/progress.py +0 -269
- experimaestro/scheduler/state.py +0 -75
- experimaestro/scheduler/state_db.py +0 -388
- experimaestro/scheduler/state_sync.py +0 -834
- experimaestro/server/__init__.py +0 -467
- experimaestro/server/data/index.css.map +0 -1
- experimaestro/server/data/index.js.map +0 -1
- experimaestro/tests/test_cli_jobs.py +0 -615
- experimaestro/tests/test_file_progress.py +0 -425
- experimaestro/tests/test_file_progress_integration.py +0 -477
- experimaestro/tests/test_state_db.py +0 -434
- experimaestro-2.0.0b4.dist-info/RECORD +0 -181
- /experimaestro/{server → webui}/data/1815e00441357e01619e.ttf +0 -0
- /experimaestro/{server → webui}/data/2463b90d9a316e4e5294.woff2 +0 -0
- /experimaestro/{server → webui}/data/2582b0e4bcf85eceead0.ttf +0 -0
- /experimaestro/{server → webui}/data/89999bdf5d835c012025.woff2 +0 -0
- /experimaestro/{server → webui}/data/914997e1bdfc990d0897.ttf +0 -0
- /experimaestro/{server → webui}/data/c210719e60948b211a12.woff2 +0 -0
- /experimaestro/{server → webui}/data/favicon.ico +0 -0
- /experimaestro/{server → webui}/data/index.html +0 -0
- /experimaestro/{server → webui}/data/login.html +0 -0
- /experimaestro/{server → webui}/data/manifest.json +0 -0
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/WHEEL +0 -0
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/entry_points.txt +0 -0
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/licenses/LICENSE +0 -0
experimaestro/__init__.py
CHANGED
|
@@ -40,11 +40,11 @@ from .core.arguments import (
|
|
|
40
40
|
help,
|
|
41
41
|
)
|
|
42
42
|
from .generators import pathgenerator, PathGenerator
|
|
43
|
-
from .core.
|
|
44
|
-
|
|
43
|
+
from .core.partial import (
|
|
44
|
+
partial,
|
|
45
45
|
param_group,
|
|
46
46
|
ParameterGroup,
|
|
47
|
-
|
|
47
|
+
Partial,
|
|
48
48
|
)
|
|
49
49
|
from .core.objects import (
|
|
50
50
|
Config,
|
|
@@ -61,16 +61,23 @@ from .core.context import SerializationContext
|
|
|
61
61
|
from .core.serializers import SerializationLWTask, PathSerializationLWTask
|
|
62
62
|
from .core.types import Any, SubmitHook
|
|
63
63
|
from .launchers import Launcher
|
|
64
|
-
from .scheduler import
|
|
64
|
+
from .scheduler import (
|
|
65
|
+
Scheduler,
|
|
66
|
+
experiment,
|
|
67
|
+
FailedExperiment,
|
|
68
|
+
DirtyGitError,
|
|
69
|
+
GracefulExperimentExit,
|
|
70
|
+
)
|
|
65
71
|
from .exceptions import GracefulTimeout
|
|
66
72
|
from .scheduler.workspace import Workspace, RunMode
|
|
67
|
-
from .scheduler.
|
|
73
|
+
from .scheduler.transient import TransientMode
|
|
68
74
|
from .notifications import progress, tqdm
|
|
69
75
|
from .checkers import Choices
|
|
70
76
|
from .xpmutils import DirectoryContext
|
|
71
77
|
from .mkdocs.annotations import documentation
|
|
72
78
|
from .scheduler.base import Job
|
|
73
79
|
from .launcherfinder.registry import LauncherRegistry
|
|
80
|
+
from .experiments.configuration import DirtyGitAction
|
|
74
81
|
|
|
75
82
|
|
|
76
83
|
def set_launcher(launcher: Launcher):
|
experimaestro/cli/__init__.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
# flake8: noqa: T201
|
|
2
2
|
import sys
|
|
3
3
|
from typing import Set, Optional
|
|
4
|
-
from itertools import chain
|
|
5
4
|
from shutil import rmtree
|
|
6
5
|
import click
|
|
7
6
|
import logging
|
|
@@ -164,27 +163,23 @@ def diff(path: Path):
|
|
|
164
163
|
check(".", job, new_job, set())
|
|
165
164
|
|
|
166
165
|
|
|
167
|
-
@click.option("--show-all", is_flag=True, help="Show even not orphans")
|
|
168
|
-
@click.option(
|
|
169
|
-
"--ignore-old", is_flag=True, help="Ignore old jobs for unfinished experiments"
|
|
170
|
-
)
|
|
171
166
|
@click.option("--clean", is_flag=True, help="Prune the orphan folders")
|
|
172
167
|
@click.option("--size", is_flag=True, help="Show size of each folder")
|
|
173
168
|
@click.argument("path", type=Path, callback=check_xp_path)
|
|
174
169
|
@cli.command()
|
|
175
|
-
def orphans(path: Path, clean: bool, size: bool
|
|
176
|
-
"""Check for tasks that are not part of an experimental plan
|
|
170
|
+
def orphans(path: Path, clean: bool, size: bool):
|
|
171
|
+
"""Check for tasks that are not part of an experimental plan
|
|
177
172
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
return ((str(p.relative_to(path)), p) for p in path.glob("*/*") if p.is_dir())
|
|
173
|
+
Uses the same orphan detection as the TUI (WorkspaceStateProvider.get_orphan_jobs).
|
|
174
|
+
"""
|
|
175
|
+
from experimaestro.scheduler.workspace_state_provider import WorkspaceStateProvider
|
|
182
176
|
|
|
183
|
-
def show(
|
|
177
|
+
def show(job, prefix=""):
|
|
178
|
+
key = f"{job.task_id}/{job.identifier}"
|
|
184
179
|
if size:
|
|
185
180
|
print(
|
|
186
181
|
prefix,
|
|
187
|
-
subprocess.check_output(["du", "-hs",
|
|
182
|
+
subprocess.check_output(["du", "-hs", str(job.path)])
|
|
188
183
|
.decode("utf-8")
|
|
189
184
|
.strip(),
|
|
190
185
|
sep=None,
|
|
@@ -192,35 +187,21 @@ def orphans(path: Path, clean: bool, size: bool, show_all: bool, ignore_old: boo
|
|
|
192
187
|
else:
|
|
193
188
|
print(prefix, key, sep=None)
|
|
194
189
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
else:
|
|
203
|
-
paths = chain((path / "xp").glob("*/jobs"), (path / "xp").glob("*/jobs.bak"))
|
|
204
|
-
|
|
205
|
-
for p in paths:
|
|
206
|
-
if p.is_dir():
|
|
207
|
-
for relpath, path in getjobs(p):
|
|
208
|
-
xpjobs.add(relpath)
|
|
209
|
-
|
|
210
|
-
# Now, look at stored jobs
|
|
211
|
-
found = 0
|
|
212
|
-
for key, jobpath in getjobs(jobspath):
|
|
213
|
-
if key not in xpjobs:
|
|
214
|
-
show(key)
|
|
215
|
-
if clean:
|
|
216
|
-
logging.info("Removing data in %s", jobpath)
|
|
217
|
-
rmtree(jobpath)
|
|
218
|
-
else:
|
|
219
|
-
if show_all:
|
|
220
|
-
show(key, prefix="[not orphan] ")
|
|
221
|
-
found += 1
|
|
190
|
+
# Use WorkspaceStateProvider.get_orphan_jobs() - same as TUI
|
|
191
|
+
provider = WorkspaceStateProvider.get_instance(path)
|
|
192
|
+
orphan_jobs = provider.get_orphan_jobs()
|
|
193
|
+
|
|
194
|
+
if not orphan_jobs:
|
|
195
|
+
print("No orphan jobs found.")
|
|
196
|
+
return
|
|
222
197
|
|
|
223
|
-
print(f"{
|
|
198
|
+
print(f"Found {len(orphan_jobs)} orphan job(s):")
|
|
199
|
+
for job in orphan_jobs:
|
|
200
|
+
show(job)
|
|
201
|
+
if clean:
|
|
202
|
+
logging.info("Removing data in %s", job.path)
|
|
203
|
+
if job.path and job.path.exists():
|
|
204
|
+
rmtree(job.path)
|
|
224
205
|
|
|
225
206
|
|
|
226
207
|
def arg_split(ctx, param, value):
|
|
@@ -279,22 +260,140 @@ cli.add_command(Launchers("launchers", help="Launcher specific commands"))
|
|
|
279
260
|
cli.add_command(Launchers("connectors", help="Connector specific commands"))
|
|
280
261
|
cli.add_command(Launchers("tokens", help="Token specific commands"))
|
|
281
262
|
|
|
282
|
-
# Import and add progress commands
|
|
283
|
-
from .progress import progress as progress_cli
|
|
284
|
-
|
|
285
|
-
cli.add_command(progress_cli)
|
|
286
|
-
|
|
287
263
|
# Import and add jobs commands
|
|
288
|
-
from .jobs import jobs as jobs_cli
|
|
264
|
+
from .jobs import jobs as jobs_cli # noqa: E402
|
|
289
265
|
|
|
290
266
|
cli.add_command(jobs_cli)
|
|
291
267
|
|
|
292
268
|
# Import and add refactor commands
|
|
293
|
-
from .refactor import refactor as refactor_cli
|
|
269
|
+
from .refactor import refactor as refactor_cli # noqa: E402
|
|
294
270
|
|
|
295
271
|
cli.add_command(refactor_cli)
|
|
296
272
|
|
|
297
273
|
|
|
274
|
+
@cli.group()
|
|
275
|
+
def migrate():
|
|
276
|
+
"""Migration commands for experimaestro workspace upgrades"""
|
|
277
|
+
pass
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
@migrate.command("v1-to-v2")
|
|
281
|
+
@click.argument("workdir", type=Path, callback=check_xp_path)
|
|
282
|
+
@click.option(
|
|
283
|
+
"--dry-run", is_flag=True, help="Show what would be done without making changes"
|
|
284
|
+
)
|
|
285
|
+
@click.option(
|
|
286
|
+
"--keep-old", is_flag=True, help="Keep the old xp directory after migration"
|
|
287
|
+
)
|
|
288
|
+
def migrate_v1_to_v2(workdir: Path, dry_run: bool, keep_old: bool):
|
|
289
|
+
"""Migrate workspace from v1 (xp/) to v2 (experiments/) layout
|
|
290
|
+
|
|
291
|
+
This command migrates experiment directories from the old layout:
|
|
292
|
+
workdir/xp/{experiment-id}/
|
|
293
|
+
to the new layout:
|
|
294
|
+
workdir/experiments/{experiment-id}/{run-id}/
|
|
295
|
+
|
|
296
|
+
Each old experiment directory becomes a single run directory with the
|
|
297
|
+
run ID based on its modification time.
|
|
298
|
+
"""
|
|
299
|
+
from datetime import datetime
|
|
300
|
+
|
|
301
|
+
old_xp_dir = workdir / "xp"
|
|
302
|
+
new_experiments_dir = workdir / "experiments"
|
|
303
|
+
|
|
304
|
+
if not old_xp_dir.exists():
|
|
305
|
+
cprint(f"No old 'xp' directory found at {old_xp_dir}", "yellow")
|
|
306
|
+
return
|
|
307
|
+
|
|
308
|
+
# List all experiments in the old directory
|
|
309
|
+
old_experiments = [d for d in old_xp_dir.iterdir() if d.is_dir()]
|
|
310
|
+
|
|
311
|
+
if not old_experiments:
|
|
312
|
+
cprint("No experiments found in xp/ directory", "yellow")
|
|
313
|
+
return
|
|
314
|
+
|
|
315
|
+
cprint(f"Found {len(old_experiments)} experiment(s) to migrate:", "cyan")
|
|
316
|
+
for exp_dir in old_experiments:
|
|
317
|
+
cprint(f" - {exp_dir.name}", "white")
|
|
318
|
+
|
|
319
|
+
if dry_run:
|
|
320
|
+
cprint("\nDRY RUN MODE - showing what would be done:", "yellow")
|
|
321
|
+
|
|
322
|
+
migrated = 0
|
|
323
|
+
for exp_dir in old_experiments:
|
|
324
|
+
exp_id = exp_dir.name
|
|
325
|
+
|
|
326
|
+
# Generate run_id from directory modification time
|
|
327
|
+
mtime = exp_dir.stat().st_mtime
|
|
328
|
+
mtime_dt = datetime.fromtimestamp(mtime)
|
|
329
|
+
run_id = mtime_dt.strftime("%Y%m%d_%H%M%S")
|
|
330
|
+
|
|
331
|
+
# Target path
|
|
332
|
+
new_exp_base = new_experiments_dir / exp_id
|
|
333
|
+
new_run_dir = new_exp_base / run_id
|
|
334
|
+
|
|
335
|
+
# Handle collision
|
|
336
|
+
suffix = 1
|
|
337
|
+
while new_run_dir.exists():
|
|
338
|
+
run_id = f"{mtime_dt.strftime('%Y%m%d_%H%M%S')}.{suffix}"
|
|
339
|
+
new_run_dir = new_exp_base / run_id
|
|
340
|
+
suffix += 1
|
|
341
|
+
|
|
342
|
+
if dry_run:
|
|
343
|
+
cprint(f" {exp_dir} -> {new_run_dir}", "white")
|
|
344
|
+
else:
|
|
345
|
+
# Create the parent directory
|
|
346
|
+
new_exp_base.mkdir(parents=True, exist_ok=True)
|
|
347
|
+
|
|
348
|
+
# Move the experiment directory
|
|
349
|
+
import shutil
|
|
350
|
+
|
|
351
|
+
try:
|
|
352
|
+
shutil.move(str(exp_dir), str(new_run_dir))
|
|
353
|
+
cprint(
|
|
354
|
+
f" Migrated: {exp_id} -> {new_run_dir.relative_to(workdir)}",
|
|
355
|
+
"green",
|
|
356
|
+
)
|
|
357
|
+
migrated += 1
|
|
358
|
+
except Exception as e:
|
|
359
|
+
cprint(f" Failed to migrate {exp_id}: {e}", "red")
|
|
360
|
+
|
|
361
|
+
if not dry_run:
|
|
362
|
+
cprint(f"\nMigrated {migrated}/{len(old_experiments)} experiment(s)", "cyan")
|
|
363
|
+
|
|
364
|
+
# Handle old xp directory
|
|
365
|
+
remaining = list(old_xp_dir.iterdir())
|
|
366
|
+
if remaining:
|
|
367
|
+
if keep_old:
|
|
368
|
+
# Keep remaining files, rename directory
|
|
369
|
+
renamed_xp_dir = workdir / "xp_MIGRATED_TO_V2"
|
|
370
|
+
old_xp_dir.rename(renamed_xp_dir)
|
|
371
|
+
cprint(
|
|
372
|
+
f"Renamed 'xp' -> 'xp_MIGRATED_TO_V2' ({len(remaining)} item(s))",
|
|
373
|
+
"yellow",
|
|
374
|
+
)
|
|
375
|
+
else:
|
|
376
|
+
cprint(
|
|
377
|
+
f"'xp' directory still contains {len(remaining)} item(s), not removing",
|
|
378
|
+
"yellow",
|
|
379
|
+
)
|
|
380
|
+
cprint("Remove manually or use --keep-old to rename", "yellow")
|
|
381
|
+
return
|
|
382
|
+
else:
|
|
383
|
+
# Empty directory - remove it
|
|
384
|
+
old_xp_dir.rmdir()
|
|
385
|
+
cprint("Removed empty 'xp' directory", "green")
|
|
386
|
+
|
|
387
|
+
# Create a broken symlink to prevent v1 from recreating xp/
|
|
388
|
+
# v1 will find the symlink but fail when trying to use it
|
|
389
|
+
broken_link = workdir / "xp"
|
|
390
|
+
if not broken_link.exists() and not broken_link.is_symlink():
|
|
391
|
+
broken_link.symlink_to("/experimaestro_v2_migrated_workspace_do_not_use_v1")
|
|
392
|
+
cprint(
|
|
393
|
+
"Created broken 'xp' symlink to prevent experimaestro v1 usage", "green"
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
|
|
298
397
|
@cli.group()
|
|
299
398
|
@click.option("--workdir", type=Path, default=None)
|
|
300
399
|
@click.option("--workspace", type=str, default=None)
|
|
@@ -309,11 +408,100 @@ def experiments(ctx, workdir, workspace):
|
|
|
309
408
|
@experiments.command()
|
|
310
409
|
@pass_cfg
|
|
311
410
|
def list(workdir: Path):
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
411
|
+
"""List experiments in the workspace"""
|
|
412
|
+
from experimaestro.scheduler.workspace_state_provider import WorkspaceStateProvider
|
|
413
|
+
|
|
414
|
+
# Get experiments from state provider for detailed info
|
|
415
|
+
state_provider = WorkspaceStateProvider.get_instance(workdir)
|
|
416
|
+
experiments_list = state_provider.get_experiments()
|
|
417
|
+
|
|
418
|
+
# Build lookup by experiment_id
|
|
419
|
+
exp_info = {exp.experiment_id: exp for exp in experiments_list}
|
|
420
|
+
|
|
421
|
+
# New layout: experiments/{exp-id}/{run-id}/
|
|
422
|
+
experiments_dir = workdir / "experiments"
|
|
423
|
+
if not experiments_dir.exists():
|
|
424
|
+
cprint("No experiments found", "yellow")
|
|
425
|
+
return
|
|
426
|
+
|
|
427
|
+
for exp_dir in experiments_dir.iterdir():
|
|
428
|
+
if not exp_dir.is_dir():
|
|
429
|
+
continue
|
|
430
|
+
|
|
431
|
+
exp_id = exp_dir.name
|
|
432
|
+
exp = exp_info.get(exp_id)
|
|
433
|
+
|
|
434
|
+
# Build display string
|
|
435
|
+
display_parts = [exp_id]
|
|
436
|
+
|
|
437
|
+
# Add current run_id if available
|
|
438
|
+
if exp and getattr(exp, "current_run_id", None):
|
|
439
|
+
display_parts.append(f"[run: {exp.current_run_id}]")
|
|
440
|
+
|
|
441
|
+
# Add hostname if available
|
|
442
|
+
if exp and getattr(exp, "hostname", None):
|
|
443
|
+
display_parts.append(f"[{exp.hostname}]")
|
|
444
|
+
|
|
445
|
+
# Add job stats if available
|
|
446
|
+
if exp:
|
|
447
|
+
display_parts.append(f"({exp.finished_jobs}/{exp.total_jobs} jobs)")
|
|
448
|
+
|
|
449
|
+
display_str = " ".join(display_parts)
|
|
450
|
+
cprint(display_str, "cyan")
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
def _run_monitor_ui(
|
|
454
|
+
state_provider, workdir: Path, console: bool, port: int, title: str = ""
|
|
455
|
+
):
|
|
456
|
+
"""Shared code for running monitor UI (TUI or web)
|
|
457
|
+
|
|
458
|
+
Args:
|
|
459
|
+
state_provider: StateProvider instance (local or remote)
|
|
460
|
+
workdir: Local workspace/cache directory
|
|
461
|
+
console: If True, use TUI; otherwise use web UI
|
|
462
|
+
port: Port for web server
|
|
463
|
+
title: Optional title for status messages
|
|
464
|
+
"""
|
|
465
|
+
try:
|
|
466
|
+
if console:
|
|
467
|
+
# Use Textual TUI
|
|
468
|
+
from experimaestro.tui import ExperimentTUI
|
|
469
|
+
|
|
470
|
+
app = ExperimentTUI(
|
|
471
|
+
workdir, state_provider=state_provider, watch=True, show_logs=True
|
|
472
|
+
)
|
|
473
|
+
app.run()
|
|
315
474
|
else:
|
|
316
|
-
|
|
475
|
+
# Use React web server
|
|
476
|
+
from experimaestro.webui import WebUIServer
|
|
477
|
+
|
|
478
|
+
if title:
|
|
479
|
+
cprint(
|
|
480
|
+
f"Starting experiment monitor for {title} on http://localhost:{port}",
|
|
481
|
+
"green",
|
|
482
|
+
)
|
|
483
|
+
else:
|
|
484
|
+
cprint(
|
|
485
|
+
f"Starting experiment monitor on http://localhost:{port}", "green"
|
|
486
|
+
)
|
|
487
|
+
cprint("Press Ctrl+C to stop", "yellow")
|
|
488
|
+
|
|
489
|
+
settings = ServerSettings()
|
|
490
|
+
settings.port = port
|
|
491
|
+
server = WebUIServer.instance(settings, state_provider=state_provider)
|
|
492
|
+
server.start()
|
|
493
|
+
|
|
494
|
+
try:
|
|
495
|
+
import time
|
|
496
|
+
|
|
497
|
+
while True:
|
|
498
|
+
time.sleep(1)
|
|
499
|
+
except KeyboardInterrupt:
|
|
500
|
+
pass
|
|
501
|
+
finally:
|
|
502
|
+
cprint("\nShutting down...", "yellow")
|
|
503
|
+
if state_provider:
|
|
504
|
+
state_provider.close()
|
|
317
505
|
|
|
318
506
|
|
|
319
507
|
@experiments.command()
|
|
@@ -322,115 +510,186 @@ def list(workdir: Path):
|
|
|
322
510
|
"--port", type=int, default=12345, help="Port for web server (default: 12345)"
|
|
323
511
|
)
|
|
324
512
|
@click.option(
|
|
325
|
-
"--
|
|
513
|
+
"--watcher",
|
|
514
|
+
type=click.Choice(["auto", "polling", "inotify", "fsevents", "kqueue", "windows"]),
|
|
515
|
+
default="auto",
|
|
516
|
+
help="Filesystem watcher type (auto=platform default, polling=network mounts)",
|
|
517
|
+
)
|
|
518
|
+
@click.option(
|
|
519
|
+
"--polling-interval",
|
|
520
|
+
type=float,
|
|
521
|
+
default=1.0,
|
|
522
|
+
help="Polling interval in seconds (only for --watcher=polling)",
|
|
523
|
+
)
|
|
524
|
+
@click.option(
|
|
525
|
+
"--sync",
|
|
526
|
+
is_flag=True,
|
|
527
|
+
hidden=True,
|
|
528
|
+
help="Deprecated: no longer needed (filesystem state is always current)",
|
|
326
529
|
)
|
|
327
530
|
@pass_cfg
|
|
328
|
-
def monitor(
|
|
329
|
-
|
|
330
|
-
|
|
531
|
+
def monitor(
|
|
532
|
+
workdir: Path,
|
|
533
|
+
console: bool,
|
|
534
|
+
port: int,
|
|
535
|
+
watcher: str,
|
|
536
|
+
polling_interval: float,
|
|
537
|
+
sync: bool,
|
|
538
|
+
):
|
|
539
|
+
"""Monitor local experiments with web UI or console TUI"""
|
|
540
|
+
# --sync is deprecated (kept for backwards compatibility)
|
|
331
541
|
if sync:
|
|
332
|
-
|
|
542
|
+
cprint(
|
|
543
|
+
"Note: --sync is deprecated and no longer needed "
|
|
544
|
+
"(filesystem state is always current)",
|
|
545
|
+
"yellow",
|
|
546
|
+
)
|
|
333
547
|
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
cprint("Sync complete", "green")
|
|
548
|
+
# Configure filesystem watcher type
|
|
549
|
+
from experimaestro.ipc import IPCom, WatcherType
|
|
337
550
|
|
|
338
|
-
if
|
|
339
|
-
|
|
340
|
-
|
|
551
|
+
if watcher != "auto":
|
|
552
|
+
IPCom.set_watcher_type(WatcherType(watcher), polling_interval)
|
|
553
|
+
elif polling_interval != 1.0:
|
|
554
|
+
IPCom.set_watcher_type(WatcherType.POLLING, polling_interval)
|
|
341
555
|
|
|
342
|
-
|
|
343
|
-
app.run()
|
|
344
|
-
else:
|
|
345
|
-
# Use React web server
|
|
346
|
-
from experimaestro.scheduler.state_provider import WorkspaceStateProvider
|
|
347
|
-
from experimaestro.server import Server
|
|
556
|
+
from experimaestro.scheduler.workspace_state_provider import WorkspaceStateProvider
|
|
348
557
|
|
|
349
|
-
|
|
350
|
-
cprint("Press Ctrl+C to stop", "yellow")
|
|
558
|
+
state_provider = WorkspaceStateProvider.get_instance(workdir)
|
|
351
559
|
|
|
352
|
-
|
|
353
|
-
workdir,
|
|
354
|
-
sync_on_start=not sync, # Skip auto-sync if we just did a forced one
|
|
355
|
-
)
|
|
356
|
-
settings = ServerSettings()
|
|
357
|
-
settings.port = port
|
|
358
|
-
server = Server.instance(settings, state_provider=state_provider)
|
|
359
|
-
server.start()
|
|
560
|
+
_run_monitor_ui(state_provider, workdir, console, port)
|
|
360
561
|
|
|
361
|
-
try:
|
|
362
|
-
import time
|
|
363
562
|
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
563
|
+
@experiments.command("ssh-monitor")
|
|
564
|
+
@click.argument("host", type=str)
|
|
565
|
+
@click.argument("remote_workdir", type=str)
|
|
566
|
+
@click.option("--console", is_flag=True, help="Use console TUI instead of web UI")
|
|
567
|
+
@click.option(
|
|
568
|
+
"--port", type=int, default=12345, help="Port for web server (default: 12345)"
|
|
569
|
+
)
|
|
570
|
+
@click.option(
|
|
571
|
+
"--watcher",
|
|
572
|
+
type=click.Choice(["auto", "polling", "inotify", "fsevents", "kqueue", "windows"]),
|
|
573
|
+
default="auto",
|
|
574
|
+
help="Filesystem watcher type (auto=platform default, polling=network mounts)",
|
|
575
|
+
)
|
|
576
|
+
@click.option(
|
|
577
|
+
"--polling-interval",
|
|
578
|
+
type=float,
|
|
579
|
+
default=1.0,
|
|
580
|
+
help="Polling interval in seconds (only for --watcher=polling)",
|
|
581
|
+
)
|
|
582
|
+
@click.option(
|
|
583
|
+
"--remote-xpm",
|
|
584
|
+
type=str,
|
|
585
|
+
default=None,
|
|
586
|
+
help="Path to experimaestro on remote host (default: use 'uv tool run')",
|
|
587
|
+
)
|
|
588
|
+
@click.option(
|
|
589
|
+
"--ssh-option",
|
|
590
|
+
"-o",
|
|
591
|
+
multiple=True,
|
|
592
|
+
help="Additional SSH options (can be repeated, e.g., -o '-p 2222')",
|
|
593
|
+
)
|
|
594
|
+
def ssh_monitor(
|
|
595
|
+
host: str,
|
|
596
|
+
remote_workdir: str,
|
|
597
|
+
console: bool,
|
|
598
|
+
port: int,
|
|
599
|
+
watcher: str,
|
|
600
|
+
polling_interval: float,
|
|
601
|
+
remote_xpm: str,
|
|
602
|
+
ssh_option: tuple,
|
|
603
|
+
):
|
|
604
|
+
"""Monitor experiments on a remote server via SSH
|
|
605
|
+
|
|
606
|
+
HOST is the SSH host (e.g., user@server)
|
|
607
|
+
REMOTE_WORKDIR is the workspace path on the remote server
|
|
608
|
+
|
|
609
|
+
Examples:
|
|
610
|
+
experimaestro experiments ssh-monitor myserver /path/to/workspace
|
|
611
|
+
experimaestro experiments ssh-monitor user@host /workspace --console
|
|
612
|
+
experimaestro experiments ssh-monitor host /workspace --remote-xpm /opt/xpm/bin/experimaestro
|
|
613
|
+
"""
|
|
614
|
+
# Configure filesystem watcher type
|
|
615
|
+
from experimaestro.ipc import IPCom, WatcherType
|
|
616
|
+
|
|
617
|
+
if watcher != "auto":
|
|
618
|
+
IPCom.set_watcher_type(WatcherType(watcher), polling_interval)
|
|
619
|
+
elif polling_interval != 1.0:
|
|
620
|
+
IPCom.set_watcher_type(WatcherType.POLLING, polling_interval)
|
|
621
|
+
|
|
622
|
+
from experimaestro.scheduler.remote.client import SSHStateProviderClient
|
|
623
|
+
|
|
624
|
+
cprint(f"Connecting to {host}...", "yellow")
|
|
625
|
+
state_provider = SSHStateProviderClient(
|
|
626
|
+
host=host,
|
|
627
|
+
remote_workspace=remote_workdir,
|
|
628
|
+
ssh_options=list(ssh_option) if ssh_option else None,
|
|
629
|
+
remote_xpm_path=remote_xpm,
|
|
630
|
+
)
|
|
631
|
+
try:
|
|
632
|
+
state_provider.connect()
|
|
633
|
+
cprint(f"Connected to {host}", "green")
|
|
634
|
+
except Exception as e:
|
|
635
|
+
cprint(f"Failed to connect: {e}", "red")
|
|
636
|
+
raise click.Abort()
|
|
637
|
+
|
|
638
|
+
_run_monitor_ui(
|
|
639
|
+
state_provider,
|
|
640
|
+
state_provider.local_cache_dir,
|
|
641
|
+
console,
|
|
642
|
+
port,
|
|
643
|
+
title=host,
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
@experiments.command("monitor-server")
|
|
648
|
+
@pass_cfg
|
|
649
|
+
def monitor_server(workdir: Path):
|
|
650
|
+
"""Start monitoring server for SSH connections (JSON-RPC over stdio)
|
|
651
|
+
|
|
652
|
+
This command is intended to be run over SSH to provide remote monitoring.
|
|
653
|
+
Communication is via JSON-RPC over stdin/stdout.
|
|
654
|
+
|
|
655
|
+
Example:
|
|
656
|
+
ssh host 'experimaestro experiments --workdir /path monitor-server'
|
|
657
|
+
"""
|
|
658
|
+
from experimaestro.scheduler.remote.server import SSHStateProviderServer
|
|
659
|
+
|
|
660
|
+
server = SSHStateProviderServer(workdir)
|
|
661
|
+
try:
|
|
662
|
+
server.start()
|
|
663
|
+
except KeyboardInterrupt:
|
|
664
|
+
server.stop()
|
|
369
665
|
|
|
370
666
|
|
|
371
667
|
@experiments.command()
|
|
372
668
|
@click.option(
|
|
373
669
|
"--dry-run",
|
|
374
670
|
is_flag=True,
|
|
375
|
-
help="
|
|
671
|
+
help="[DEPRECATED] No longer needed with filesystem-based state tracking",
|
|
376
672
|
)
|
|
377
673
|
@click.option(
|
|
378
674
|
"--force",
|
|
379
675
|
is_flag=True,
|
|
380
|
-
help="
|
|
676
|
+
help="[DEPRECATED] No longer needed with filesystem-based state tracking",
|
|
381
677
|
)
|
|
382
678
|
@click.option(
|
|
383
679
|
"--no-wait",
|
|
384
680
|
is_flag=True,
|
|
385
|
-
help="
|
|
681
|
+
help="[DEPRECATED] No longer needed with filesystem-based state tracking",
|
|
386
682
|
)
|
|
387
683
|
@pass_cfg
|
|
388
684
|
def sync(workdir: Path, dry_run: bool, force: bool, no_wait: bool):
|
|
389
|
-
"""Synchronize workspace database from disk state
|
|
685
|
+
"""[DEPRECATED] Synchronize workspace database from disk state
|
|
390
686
|
|
|
391
|
-
|
|
392
|
-
|
|
687
|
+
This command is deprecated. With the new filesystem-based state tracking,
|
|
688
|
+
state is read directly from status.json and events files. No synchronization
|
|
689
|
+
is needed.
|
|
393
690
|
"""
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
# Get settings and workspace settings
|
|
399
|
-
settings = Settings.instance()
|
|
400
|
-
ws_settings = find_workspace(workdir=workdir)
|
|
401
|
-
|
|
402
|
-
# Create workspace instance (manages database lifecycle)
|
|
403
|
-
workspace = Workspace(
|
|
404
|
-
settings=settings,
|
|
405
|
-
workspace_settings=ws_settings,
|
|
406
|
-
sync_on_init=False, # Don't sync on init since we're explicitly syncing
|
|
691
|
+
cprint(
|
|
692
|
+
"Warning: 'sync' command is deprecated. "
|
|
693
|
+
"State is now tracked via filesystem (status.json) - no sync needed.",
|
|
694
|
+
"yellow",
|
|
407
695
|
)
|
|
408
|
-
|
|
409
|
-
try:
|
|
410
|
-
# Enter workspace context to initialize database
|
|
411
|
-
with workspace:
|
|
412
|
-
cprint(f"Syncing workspace: {workspace.path}", "cyan")
|
|
413
|
-
if dry_run:
|
|
414
|
-
cprint("DRY RUN MODE: No changes will be written", "yellow")
|
|
415
|
-
if force:
|
|
416
|
-
cprint("FORCE MODE: Bypassing time throttling", "yellow")
|
|
417
|
-
|
|
418
|
-
# Run sync
|
|
419
|
-
sync_workspace_from_disk(
|
|
420
|
-
workspace=workspace,
|
|
421
|
-
write_mode=not dry_run,
|
|
422
|
-
force=force,
|
|
423
|
-
blocking=not no_wait,
|
|
424
|
-
)
|
|
425
|
-
|
|
426
|
-
cprint("Sync completed successfully", "green")
|
|
427
|
-
|
|
428
|
-
except RuntimeError as e:
|
|
429
|
-
cprint(f"Sync failed: {e}", "red")
|
|
430
|
-
sys.exit(1)
|
|
431
|
-
except Exception as e:
|
|
432
|
-
cprint(f"Unexpected error during sync: {e}", "red")
|
|
433
|
-
import traceback
|
|
434
|
-
|
|
435
|
-
traceback.print_exc()
|
|
436
|
-
sys.exit(1)
|