experimaestro 2.0.0b4__py3-none-any.whl → 2.0.0b17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (154) hide show
  1. experimaestro/__init__.py +12 -5
  2. experimaestro/cli/__init__.py +393 -134
  3. experimaestro/cli/filter.py +48 -23
  4. experimaestro/cli/jobs.py +253 -71
  5. experimaestro/cli/refactor.py +1 -2
  6. experimaestro/commandline.py +7 -4
  7. experimaestro/connectors/__init__.py +9 -1
  8. experimaestro/connectors/local.py +43 -3
  9. experimaestro/core/arguments.py +18 -18
  10. experimaestro/core/identifier.py +11 -11
  11. experimaestro/core/objects/config.py +96 -39
  12. experimaestro/core/objects/config_walk.py +3 -3
  13. experimaestro/core/{subparameters.py → partial.py} +16 -16
  14. experimaestro/core/partial_lock.py +394 -0
  15. experimaestro/core/types.py +12 -15
  16. experimaestro/dynamic.py +290 -0
  17. experimaestro/experiments/__init__.py +6 -2
  18. experimaestro/experiments/cli.py +223 -52
  19. experimaestro/experiments/configuration.py +24 -0
  20. experimaestro/generators.py +5 -5
  21. experimaestro/ipc.py +118 -1
  22. experimaestro/launcherfinder/__init__.py +2 -2
  23. experimaestro/launcherfinder/registry.py +6 -7
  24. experimaestro/launcherfinder/specs.py +2 -9
  25. experimaestro/launchers/slurm/__init__.py +2 -2
  26. experimaestro/launchers/slurm/base.py +62 -0
  27. experimaestro/locking.py +957 -1
  28. experimaestro/notifications.py +89 -201
  29. experimaestro/progress.py +63 -366
  30. experimaestro/rpyc.py +0 -2
  31. experimaestro/run.py +29 -2
  32. experimaestro/scheduler/__init__.py +8 -1
  33. experimaestro/scheduler/base.py +650 -53
  34. experimaestro/scheduler/dependencies.py +20 -16
  35. experimaestro/scheduler/experiment.py +764 -169
  36. experimaestro/scheduler/interfaces.py +338 -96
  37. experimaestro/scheduler/jobs.py +58 -20
  38. experimaestro/scheduler/remote/__init__.py +31 -0
  39. experimaestro/scheduler/remote/adaptive_sync.py +265 -0
  40. experimaestro/scheduler/remote/client.py +928 -0
  41. experimaestro/scheduler/remote/protocol.py +282 -0
  42. experimaestro/scheduler/remote/server.py +447 -0
  43. experimaestro/scheduler/remote/sync.py +144 -0
  44. experimaestro/scheduler/services.py +186 -35
  45. experimaestro/scheduler/state_provider.py +811 -2157
  46. experimaestro/scheduler/state_status.py +1247 -0
  47. experimaestro/scheduler/transient.py +31 -0
  48. experimaestro/scheduler/workspace.py +1 -1
  49. experimaestro/scheduler/workspace_state_provider.py +1273 -0
  50. experimaestro/scriptbuilder.py +4 -4
  51. experimaestro/settings.py +36 -0
  52. experimaestro/tests/conftest.py +33 -5
  53. experimaestro/tests/connectors/bin/executable.py +1 -1
  54. experimaestro/tests/fixtures/pre_experiment/experiment_check_env.py +16 -0
  55. experimaestro/tests/fixtures/pre_experiment/experiment_check_mock.py +14 -0
  56. experimaestro/tests/fixtures/pre_experiment/experiment_simple.py +12 -0
  57. experimaestro/tests/fixtures/pre_experiment/pre_setup_env.py +5 -0
  58. experimaestro/tests/fixtures/pre_experiment/pre_setup_error.py +3 -0
  59. experimaestro/tests/fixtures/pre_experiment/pre_setup_mock.py +8 -0
  60. experimaestro/tests/launchers/bin/test.py +1 -0
  61. experimaestro/tests/launchers/test_slurm.py +9 -9
  62. experimaestro/tests/partial_reschedule.py +46 -0
  63. experimaestro/tests/restart.py +3 -3
  64. experimaestro/tests/restart_main.py +1 -0
  65. experimaestro/tests/scripts/notifyandwait.py +1 -0
  66. experimaestro/tests/task_partial.py +38 -0
  67. experimaestro/tests/task_tokens.py +2 -2
  68. experimaestro/tests/tasks/test_dynamic.py +6 -6
  69. experimaestro/tests/test_dependencies.py +3 -3
  70. experimaestro/tests/test_deprecated.py +15 -15
  71. experimaestro/tests/test_dynamic_locking.py +317 -0
  72. experimaestro/tests/test_environment.py +24 -14
  73. experimaestro/tests/test_experiment.py +171 -36
  74. experimaestro/tests/test_identifier.py +25 -25
  75. experimaestro/tests/test_identifier_stability.py +3 -5
  76. experimaestro/tests/test_multitoken.py +2 -4
  77. experimaestro/tests/{test_subparameters.py → test_partial.py} +25 -25
  78. experimaestro/tests/test_partial_paths.py +81 -138
  79. experimaestro/tests/test_pre_experiment.py +219 -0
  80. experimaestro/tests/test_progress.py +2 -8
  81. experimaestro/tests/test_remote_state.py +1132 -0
  82. experimaestro/tests/test_stray_jobs.py +261 -0
  83. experimaestro/tests/test_tasks.py +1 -2
  84. experimaestro/tests/test_token_locking.py +52 -67
  85. experimaestro/tests/test_tokens.py +5 -6
  86. experimaestro/tests/test_transient.py +225 -0
  87. experimaestro/tests/test_workspace_state_provider.py +768 -0
  88. experimaestro/tests/token_reschedule.py +1 -3
  89. experimaestro/tests/utils.py +2 -7
  90. experimaestro/tokens.py +227 -372
  91. experimaestro/tools/diff.py +1 -0
  92. experimaestro/tools/documentation.py +4 -5
  93. experimaestro/tools/jobs.py +1 -2
  94. experimaestro/tui/app.py +459 -1895
  95. experimaestro/tui/app.tcss +162 -0
  96. experimaestro/tui/dialogs.py +172 -0
  97. experimaestro/tui/log_viewer.py +253 -3
  98. experimaestro/tui/messages.py +137 -0
  99. experimaestro/tui/utils.py +54 -0
  100. experimaestro/tui/widgets/__init__.py +23 -0
  101. experimaestro/tui/widgets/experiments.py +468 -0
  102. experimaestro/tui/widgets/global_services.py +238 -0
  103. experimaestro/tui/widgets/jobs.py +972 -0
  104. experimaestro/tui/widgets/log.py +156 -0
  105. experimaestro/tui/widgets/orphans.py +363 -0
  106. experimaestro/tui/widgets/runs.py +185 -0
  107. experimaestro/tui/widgets/services.py +314 -0
  108. experimaestro/tui/widgets/stray_jobs.py +528 -0
  109. experimaestro/utils/__init__.py +1 -1
  110. experimaestro/utils/environment.py +105 -22
  111. experimaestro/utils/fswatcher.py +124 -0
  112. experimaestro/utils/jobs.py +1 -2
  113. experimaestro/utils/jupyter.py +1 -2
  114. experimaestro/utils/logging.py +72 -0
  115. experimaestro/version.py +2 -2
  116. experimaestro/webui/__init__.py +9 -0
  117. experimaestro/webui/app.py +117 -0
  118. experimaestro/{server → webui}/data/index.css +66 -11
  119. experimaestro/webui/data/index.css.map +1 -0
  120. experimaestro/{server → webui}/data/index.js +82763 -87217
  121. experimaestro/webui/data/index.js.map +1 -0
  122. experimaestro/webui/routes/__init__.py +5 -0
  123. experimaestro/webui/routes/auth.py +53 -0
  124. experimaestro/webui/routes/proxy.py +117 -0
  125. experimaestro/webui/server.py +200 -0
  126. experimaestro/webui/state_bridge.py +152 -0
  127. experimaestro/webui/websocket.py +413 -0
  128. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/METADATA +8 -9
  129. experimaestro-2.0.0b17.dist-info/RECORD +219 -0
  130. experimaestro/cli/progress.py +0 -269
  131. experimaestro/scheduler/state.py +0 -75
  132. experimaestro/scheduler/state_db.py +0 -388
  133. experimaestro/scheduler/state_sync.py +0 -834
  134. experimaestro/server/__init__.py +0 -467
  135. experimaestro/server/data/index.css.map +0 -1
  136. experimaestro/server/data/index.js.map +0 -1
  137. experimaestro/tests/test_cli_jobs.py +0 -615
  138. experimaestro/tests/test_file_progress.py +0 -425
  139. experimaestro/tests/test_file_progress_integration.py +0 -477
  140. experimaestro/tests/test_state_db.py +0 -434
  141. experimaestro-2.0.0b4.dist-info/RECORD +0 -181
  142. /experimaestro/{server → webui}/data/1815e00441357e01619e.ttf +0 -0
  143. /experimaestro/{server → webui}/data/2463b90d9a316e4e5294.woff2 +0 -0
  144. /experimaestro/{server → webui}/data/2582b0e4bcf85eceead0.ttf +0 -0
  145. /experimaestro/{server → webui}/data/89999bdf5d835c012025.woff2 +0 -0
  146. /experimaestro/{server → webui}/data/914997e1bdfc990d0897.ttf +0 -0
  147. /experimaestro/{server → webui}/data/c210719e60948b211a12.woff2 +0 -0
  148. /experimaestro/{server → webui}/data/favicon.ico +0 -0
  149. /experimaestro/{server → webui}/data/index.html +0 -0
  150. /experimaestro/{server → webui}/data/login.html +0 -0
  151. /experimaestro/{server → webui}/data/manifest.json +0 -0
  152. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/WHEEL +0 -0
  153. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/entry_points.txt +0 -0
  154. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/licenses/LICENSE +0 -0
@@ -59,9 +59,9 @@ class LauncherRegistry:
59
59
  ).expanduser()
60
60
 
61
61
  if LauncherRegistry.CURRENT_CONFIG_DIR not in LauncherRegistry.INSTANCES:
62
- LauncherRegistry.INSTANCES[
63
- LauncherRegistry.CURRENT_CONFIG_DIR
64
- ] = LauncherRegistry(LauncherRegistry.CURRENT_CONFIG_DIR)
62
+ LauncherRegistry.INSTANCES[LauncherRegistry.CURRENT_CONFIG_DIR] = (
63
+ LauncherRegistry(LauncherRegistry.CURRENT_CONFIG_DIR)
64
+ )
65
65
 
66
66
  return LauncherRegistry.INSTANCES[LauncherRegistry.CURRENT_CONFIG_DIR]
67
67
 
@@ -83,7 +83,6 @@ class LauncherRegistry:
83
83
 
84
84
  # Register the find launcher function if it exists
85
85
  launchers_py = basepath / "launchers.py"
86
- print(f"basepath {launchers_py}")
87
86
  if launchers_py.is_file():
88
87
  logger.info("Loading %s", launchers_py)
89
88
 
@@ -163,9 +162,9 @@ class LauncherRegistry:
163
162
  if self.find_launcher_fn is not None:
164
163
  for spec in specs.requirements:
165
164
  if launcher := self.find_launcher_fn(spec, tags):
166
- assert isinstance(
167
- launcher, Launcher
168
- ), "f{self.find_launcher_fn} did not return a Launcher but {type(launcher)}"
165
+ assert isinstance(launcher, Launcher), (
166
+ "f{self.find_launcher_fn} did not return a Launcher but {type(launcher)}"
167
+ )
169
168
  return launcher
170
169
 
171
170
  return None
@@ -27,10 +27,7 @@ class CudaSpecification:
27
27
  return (self.memory >= spec.memory) and (self.min_memory <= spec.memory)
28
28
 
29
29
  def __repr__(self):
30
- return (
31
- f"CUDA({self.model} "
32
- f"max={format_size(self.memory, binary=True)}/min={format_size(self.min_memory, binary=True)})"
33
- )
30
+ return f"CUDA({self.model} max={format_size(self.memory, binary=True)}/min={format_size(self.min_memory, binary=True)})"
34
31
 
35
32
 
36
33
  @dataclass
@@ -48,11 +45,7 @@ class CPUSpecification:
48
45
  """Number of CPU per GPU (0 if not defined)"""
49
46
 
50
47
  def __repr__(self):
51
- return (
52
- f"CPU("
53
- f"mem={format_size(self.memory, binary=True)}, cores={self.cores}"
54
- ")"
55
- )
48
+ return f"CPU(mem={format_size(self.memory, binary=True)}, cores={self.cores})"
56
49
 
57
50
  def match(self, other: "CPUSpecification"):
58
51
  return (self.memory >= other.memory) and (self.cores >= other.cores)
@@ -1,2 +1,2 @@
1
- # flake8: noqa: F401
2
- from .base import *
1
+ # ruff: noqa: F401
2
+ from .base import * # noqa: F403
@@ -1,3 +1,4 @@
1
+ import asyncio
1
2
  import logging
2
3
  import threading
3
4
  from pathlib import Path
@@ -219,6 +220,13 @@ class SlurmProcessWatcher(threading.Thread):
219
220
  self.cv = ThreadingCondition()
220
221
  self.fetched_event = threading.Event()
221
222
  self.updating_jobs = threading.Lock()
223
+
224
+ # Async waiters: jobid -> list of (asyncio.Event, event_loop)
225
+ self.async_waiters: Dict[
226
+ str, List[Tuple[asyncio.Event, asyncio.AbstractEventLoop]]
227
+ ] = {}
228
+ self.async_waiters_lock = threading.Lock()
229
+
222
230
  self.start()
223
231
 
224
232
  @staticmethod
@@ -250,6 +258,35 @@ class SlurmProcessWatcher(threading.Thread):
250
258
  with self.updating_jobs:
251
259
  return self.jobs.get(jobid)
252
260
 
261
+ def register_async_waiter(
262
+ self, jobid: str, loop: asyncio.AbstractEventLoop
263
+ ) -> asyncio.Event:
264
+ """Register an async waiter for a job.
265
+
266
+ Returns an asyncio.Event that will be set when the job finishes.
267
+ """
268
+ event = loop.create_future()
269
+ with self.async_waiters_lock:
270
+ if jobid not in self.async_waiters:
271
+ self.async_waiters[jobid] = []
272
+ self.async_waiters[jobid].append((event, loop))
273
+ return event
274
+
275
+ def _notify_async_waiters(self):
276
+ """Notify async waiters for finished jobs"""
277
+ with self.async_waiters_lock:
278
+ finished_jobs = []
279
+ for jobid, waiters in self.async_waiters.items():
280
+ state = self.jobs.get(jobid)
281
+ if state and state.finished():
282
+ finished_jobs.append(jobid)
283
+ for future, loop in waiters:
284
+ # Set the result from watcher thread to asyncio loop
285
+ loop.call_soon_threadsafe(future.set_result, state)
286
+
287
+ for jobid in finished_jobs:
288
+ del self.async_waiters[jobid]
289
+
253
290
  def run(self):
254
291
  while self.count > 0:
255
292
  builder = self.launcher.connector.processbuilder()
@@ -280,6 +317,9 @@ class SlurmProcessWatcher(threading.Thread):
280
317
  logger.error("Could not parse line %s", line)
281
318
  process.kill()
282
319
 
320
+ # Notify async waiters for finished jobs
321
+ self._notify_async_waiters()
322
+
283
323
  with self.cv:
284
324
  logger.debug("Jobs %s", self.jobs)
285
325
  self.fetched_event.set()
@@ -310,6 +350,28 @@ class BatchSlurmProcess(Process):
310
350
  self._last_state = state
311
351
  return 0 if state.slurm_state == "COMPLETED" else 1
312
352
 
353
+ async def aio_wait(self) -> int:
354
+ """Asynchronously wait for SLURM job to finish (event-driven)"""
355
+ logger.debug("Async waiting for SLURM job %s", self.jobid)
356
+ loop = asyncio.get_running_loop()
357
+
358
+ with SlurmProcessWatcher.get(self.launcher) as watcher:
359
+ # Check if already finished
360
+ state = watcher.getjob(self.jobid)
361
+ if state and state.finished():
362
+ self._last_state = state
363
+ return 0 if state.slurm_state == "COMPLETED" else 1
364
+
365
+ # Register and wait for the job to finish
366
+ future = watcher.register_async_waiter(self.jobid, loop)
367
+ self._last_state = await future
368
+
369
+ code = 0 if self._last_state.slurm_state == "COMPLETED" else 1
370
+ logger.debug(
371
+ "Finished async wait for SLURM job %s: code %s", self.jobid, code
372
+ )
373
+ return code
374
+
313
375
  def get_job_state(self, code: int) -> "JobState":
314
376
  """Convert SLURM exit code to JobState, detecting timeouts"""
315
377
  from experimaestro.scheduler.jobs import (