experimaestro 2.0.0b8__py3-none-any.whl → 2.0.0b17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of experimaestro might be problematic. Click here for more details.
- experimaestro/__init__.py +12 -5
- experimaestro/cli/__init__.py +239 -126
- experimaestro/cli/filter.py +48 -23
- experimaestro/cli/jobs.py +253 -71
- experimaestro/cli/refactor.py +1 -2
- experimaestro/commandline.py +7 -4
- experimaestro/connectors/__init__.py +9 -1
- experimaestro/connectors/local.py +43 -3
- experimaestro/core/arguments.py +18 -18
- experimaestro/core/identifier.py +11 -11
- experimaestro/core/objects/config.py +96 -39
- experimaestro/core/objects/config_walk.py +3 -3
- experimaestro/core/{subparameters.py → partial.py} +16 -16
- experimaestro/core/partial_lock.py +394 -0
- experimaestro/core/types.py +12 -15
- experimaestro/dynamic.py +290 -0
- experimaestro/experiments/__init__.py +6 -2
- experimaestro/experiments/cli.py +217 -50
- experimaestro/experiments/configuration.py +24 -0
- experimaestro/generators.py +5 -5
- experimaestro/ipc.py +118 -1
- experimaestro/launcherfinder/__init__.py +2 -2
- experimaestro/launcherfinder/registry.py +6 -7
- experimaestro/launcherfinder/specs.py +2 -9
- experimaestro/launchers/slurm/__init__.py +2 -2
- experimaestro/launchers/slurm/base.py +62 -0
- experimaestro/locking.py +957 -1
- experimaestro/notifications.py +89 -201
- experimaestro/progress.py +63 -366
- experimaestro/rpyc.py +0 -2
- experimaestro/run.py +29 -2
- experimaestro/scheduler/__init__.py +8 -1
- experimaestro/scheduler/base.py +629 -53
- experimaestro/scheduler/dependencies.py +20 -16
- experimaestro/scheduler/experiment.py +732 -167
- experimaestro/scheduler/interfaces.py +316 -101
- experimaestro/scheduler/jobs.py +58 -20
- experimaestro/scheduler/remote/adaptive_sync.py +265 -0
- experimaestro/scheduler/remote/client.py +171 -117
- experimaestro/scheduler/remote/protocol.py +8 -193
- experimaestro/scheduler/remote/server.py +95 -71
- experimaestro/scheduler/services.py +53 -28
- experimaestro/scheduler/state_provider.py +663 -2430
- experimaestro/scheduler/state_status.py +1247 -0
- experimaestro/scheduler/transient.py +31 -0
- experimaestro/scheduler/workspace.py +1 -1
- experimaestro/scheduler/workspace_state_provider.py +1273 -0
- experimaestro/scriptbuilder.py +4 -4
- experimaestro/settings.py +36 -0
- experimaestro/tests/conftest.py +33 -5
- experimaestro/tests/connectors/bin/executable.py +1 -1
- experimaestro/tests/fixtures/pre_experiment/experiment_check_env.py +16 -0
- experimaestro/tests/fixtures/pre_experiment/experiment_check_mock.py +14 -0
- experimaestro/tests/fixtures/pre_experiment/experiment_simple.py +12 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_env.py +5 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_error.py +3 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_mock.py +8 -0
- experimaestro/tests/launchers/bin/test.py +1 -0
- experimaestro/tests/launchers/test_slurm.py +9 -9
- experimaestro/tests/partial_reschedule.py +46 -0
- experimaestro/tests/restart.py +3 -3
- experimaestro/tests/restart_main.py +1 -0
- experimaestro/tests/scripts/notifyandwait.py +1 -0
- experimaestro/tests/task_partial.py +38 -0
- experimaestro/tests/task_tokens.py +2 -2
- experimaestro/tests/tasks/test_dynamic.py +6 -6
- experimaestro/tests/test_dependencies.py +3 -3
- experimaestro/tests/test_deprecated.py +15 -15
- experimaestro/tests/test_dynamic_locking.py +317 -0
- experimaestro/tests/test_environment.py +24 -14
- experimaestro/tests/test_experiment.py +171 -36
- experimaestro/tests/test_identifier.py +25 -25
- experimaestro/tests/test_identifier_stability.py +3 -5
- experimaestro/tests/test_multitoken.py +2 -4
- experimaestro/tests/{test_subparameters.py → test_partial.py} +25 -25
- experimaestro/tests/test_partial_paths.py +81 -138
- experimaestro/tests/test_pre_experiment.py +219 -0
- experimaestro/tests/test_progress.py +2 -8
- experimaestro/tests/test_remote_state.py +560 -99
- experimaestro/tests/test_stray_jobs.py +261 -0
- experimaestro/tests/test_tasks.py +1 -2
- experimaestro/tests/test_token_locking.py +52 -67
- experimaestro/tests/test_tokens.py +5 -6
- experimaestro/tests/test_transient.py +225 -0
- experimaestro/tests/test_workspace_state_provider.py +768 -0
- experimaestro/tests/token_reschedule.py +1 -3
- experimaestro/tests/utils.py +2 -7
- experimaestro/tokens.py +227 -372
- experimaestro/tools/diff.py +1 -0
- experimaestro/tools/documentation.py +4 -5
- experimaestro/tools/jobs.py +1 -2
- experimaestro/tui/app.py +438 -1966
- experimaestro/tui/app.tcss +162 -0
- experimaestro/tui/dialogs.py +172 -0
- experimaestro/tui/log_viewer.py +253 -3
- experimaestro/tui/messages.py +137 -0
- experimaestro/tui/utils.py +54 -0
- experimaestro/tui/widgets/__init__.py +23 -0
- experimaestro/tui/widgets/experiments.py +468 -0
- experimaestro/tui/widgets/global_services.py +238 -0
- experimaestro/tui/widgets/jobs.py +972 -0
- experimaestro/tui/widgets/log.py +156 -0
- experimaestro/tui/widgets/orphans.py +363 -0
- experimaestro/tui/widgets/runs.py +185 -0
- experimaestro/tui/widgets/services.py +314 -0
- experimaestro/tui/widgets/stray_jobs.py +528 -0
- experimaestro/utils/__init__.py +1 -1
- experimaestro/utils/environment.py +105 -22
- experimaestro/utils/fswatcher.py +124 -0
- experimaestro/utils/jobs.py +1 -2
- experimaestro/utils/jupyter.py +1 -2
- experimaestro/utils/logging.py +72 -0
- experimaestro/version.py +2 -2
- experimaestro/webui/__init__.py +9 -0
- experimaestro/webui/app.py +117 -0
- experimaestro/{server → webui}/data/index.css +66 -11
- experimaestro/webui/data/index.css.map +1 -0
- experimaestro/{server → webui}/data/index.js +82763 -87217
- experimaestro/webui/data/index.js.map +1 -0
- experimaestro/webui/routes/__init__.py +5 -0
- experimaestro/webui/routes/auth.py +53 -0
- experimaestro/webui/routes/proxy.py +117 -0
- experimaestro/webui/server.py +200 -0
- experimaestro/webui/state_bridge.py +152 -0
- experimaestro/webui/websocket.py +413 -0
- {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/METADATA +5 -6
- experimaestro-2.0.0b17.dist-info/RECORD +219 -0
- experimaestro/cli/progress.py +0 -269
- experimaestro/scheduler/state.py +0 -75
- experimaestro/scheduler/state_db.py +0 -437
- experimaestro/scheduler/state_sync.py +0 -891
- experimaestro/server/__init__.py +0 -467
- experimaestro/server/data/index.css.map +0 -1
- experimaestro/server/data/index.js.map +0 -1
- experimaestro/tests/test_cli_jobs.py +0 -615
- experimaestro/tests/test_file_progress.py +0 -425
- experimaestro/tests/test_file_progress_integration.py +0 -477
- experimaestro/tests/test_state_db.py +0 -434
- experimaestro-2.0.0b8.dist-info/RECORD +0 -187
- /experimaestro/{server → webui}/data/1815e00441357e01619e.ttf +0 -0
- /experimaestro/{server → webui}/data/2463b90d9a316e4e5294.woff2 +0 -0
- /experimaestro/{server → webui}/data/2582b0e4bcf85eceead0.ttf +0 -0
- /experimaestro/{server → webui}/data/89999bdf5d835c012025.woff2 +0 -0
- /experimaestro/{server → webui}/data/914997e1bdfc990d0897.ttf +0 -0
- /experimaestro/{server → webui}/data/c210719e60948b211a12.woff2 +0 -0
- /experimaestro/{server → webui}/data/favicon.ico +0 -0
- /experimaestro/{server → webui}/data/index.html +0 -0
- /experimaestro/{server → webui}/data/login.html +0 -0
- /experimaestro/{server → webui}/data/manifest.json +0 -0
- {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/WHEEL +0 -0
- {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/entry_points.txt +0 -0
- {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/licenses/LICENSE +0 -0
|
@@ -27,10 +27,7 @@ class CudaSpecification:
|
|
|
27
27
|
return (self.memory >= spec.memory) and (self.min_memory <= spec.memory)
|
|
28
28
|
|
|
29
29
|
def __repr__(self):
|
|
30
|
-
return (
|
|
31
|
-
f"CUDA({self.model} "
|
|
32
|
-
f"max={format_size(self.memory, binary=True)}/min={format_size(self.min_memory, binary=True)})"
|
|
33
|
-
)
|
|
30
|
+
return f"CUDA({self.model} max={format_size(self.memory, binary=True)}/min={format_size(self.min_memory, binary=True)})"
|
|
34
31
|
|
|
35
32
|
|
|
36
33
|
@dataclass
|
|
@@ -48,11 +45,7 @@ class CPUSpecification:
|
|
|
48
45
|
"""Number of CPU per GPU (0 if not defined)"""
|
|
49
46
|
|
|
50
47
|
def __repr__(self):
|
|
51
|
-
return (
|
|
52
|
-
f"CPU("
|
|
53
|
-
f"mem={format_size(self.memory, binary=True)}, cores={self.cores}"
|
|
54
|
-
")"
|
|
55
|
-
)
|
|
48
|
+
return f"CPU(mem={format_size(self.memory, binary=True)}, cores={self.cores})"
|
|
56
49
|
|
|
57
50
|
def match(self, other: "CPUSpecification"):
|
|
58
51
|
return (self.memory >= other.memory) and (self.cores >= other.cores)
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
#
|
|
2
|
-
from .base import *
|
|
1
|
+
# ruff: noqa: F401
|
|
2
|
+
from .base import * # noqa: F403
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import logging
|
|
2
3
|
import threading
|
|
3
4
|
from pathlib import Path
|
|
@@ -219,6 +220,13 @@ class SlurmProcessWatcher(threading.Thread):
|
|
|
219
220
|
self.cv = ThreadingCondition()
|
|
220
221
|
self.fetched_event = threading.Event()
|
|
221
222
|
self.updating_jobs = threading.Lock()
|
|
223
|
+
|
|
224
|
+
# Async waiters: jobid -> list of (asyncio.Event, event_loop)
|
|
225
|
+
self.async_waiters: Dict[
|
|
226
|
+
str, List[Tuple[asyncio.Event, asyncio.AbstractEventLoop]]
|
|
227
|
+
] = {}
|
|
228
|
+
self.async_waiters_lock = threading.Lock()
|
|
229
|
+
|
|
222
230
|
self.start()
|
|
223
231
|
|
|
224
232
|
@staticmethod
|
|
@@ -250,6 +258,35 @@ class SlurmProcessWatcher(threading.Thread):
|
|
|
250
258
|
with self.updating_jobs:
|
|
251
259
|
return self.jobs.get(jobid)
|
|
252
260
|
|
|
261
|
+
def register_async_waiter(
|
|
262
|
+
self, jobid: str, loop: asyncio.AbstractEventLoop
|
|
263
|
+
) -> asyncio.Event:
|
|
264
|
+
"""Register an async waiter for a job.
|
|
265
|
+
|
|
266
|
+
Returns an asyncio.Event that will be set when the job finishes.
|
|
267
|
+
"""
|
|
268
|
+
event = loop.create_future()
|
|
269
|
+
with self.async_waiters_lock:
|
|
270
|
+
if jobid not in self.async_waiters:
|
|
271
|
+
self.async_waiters[jobid] = []
|
|
272
|
+
self.async_waiters[jobid].append((event, loop))
|
|
273
|
+
return event
|
|
274
|
+
|
|
275
|
+
def _notify_async_waiters(self):
|
|
276
|
+
"""Notify async waiters for finished jobs"""
|
|
277
|
+
with self.async_waiters_lock:
|
|
278
|
+
finished_jobs = []
|
|
279
|
+
for jobid, waiters in self.async_waiters.items():
|
|
280
|
+
state = self.jobs.get(jobid)
|
|
281
|
+
if state and state.finished():
|
|
282
|
+
finished_jobs.append(jobid)
|
|
283
|
+
for future, loop in waiters:
|
|
284
|
+
# Set the result from watcher thread to asyncio loop
|
|
285
|
+
loop.call_soon_threadsafe(future.set_result, state)
|
|
286
|
+
|
|
287
|
+
for jobid in finished_jobs:
|
|
288
|
+
del self.async_waiters[jobid]
|
|
289
|
+
|
|
253
290
|
def run(self):
|
|
254
291
|
while self.count > 0:
|
|
255
292
|
builder = self.launcher.connector.processbuilder()
|
|
@@ -280,6 +317,9 @@ class SlurmProcessWatcher(threading.Thread):
|
|
|
280
317
|
logger.error("Could not parse line %s", line)
|
|
281
318
|
process.kill()
|
|
282
319
|
|
|
320
|
+
# Notify async waiters for finished jobs
|
|
321
|
+
self._notify_async_waiters()
|
|
322
|
+
|
|
283
323
|
with self.cv:
|
|
284
324
|
logger.debug("Jobs %s", self.jobs)
|
|
285
325
|
self.fetched_event.set()
|
|
@@ -310,6 +350,28 @@ class BatchSlurmProcess(Process):
|
|
|
310
350
|
self._last_state = state
|
|
311
351
|
return 0 if state.slurm_state == "COMPLETED" else 1
|
|
312
352
|
|
|
353
|
+
async def aio_wait(self) -> int:
|
|
354
|
+
"""Asynchronously wait for SLURM job to finish (event-driven)"""
|
|
355
|
+
logger.debug("Async waiting for SLURM job %s", self.jobid)
|
|
356
|
+
loop = asyncio.get_running_loop()
|
|
357
|
+
|
|
358
|
+
with SlurmProcessWatcher.get(self.launcher) as watcher:
|
|
359
|
+
# Check if already finished
|
|
360
|
+
state = watcher.getjob(self.jobid)
|
|
361
|
+
if state and state.finished():
|
|
362
|
+
self._last_state = state
|
|
363
|
+
return 0 if state.slurm_state == "COMPLETED" else 1
|
|
364
|
+
|
|
365
|
+
# Register and wait for the job to finish
|
|
366
|
+
future = watcher.register_async_waiter(self.jobid, loop)
|
|
367
|
+
self._last_state = await future
|
|
368
|
+
|
|
369
|
+
code = 0 if self._last_state.slurm_state == "COMPLETED" else 1
|
|
370
|
+
logger.debug(
|
|
371
|
+
"Finished async wait for SLURM job %s: code %s", self.jobid, code
|
|
372
|
+
)
|
|
373
|
+
return code
|
|
374
|
+
|
|
313
375
|
def get_job_state(self, code: int) -> "JobState":
|
|
314
376
|
"""Convert SLURM exit code to JobState, detecting timeouts"""
|
|
315
377
|
from experimaestro.scheduler.jobs import (
|