experimaestro 2.0.0b8__py3-none-any.whl → 2.0.0b17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (152) hide show
  1. experimaestro/__init__.py +12 -5
  2. experimaestro/cli/__init__.py +239 -126
  3. experimaestro/cli/filter.py +48 -23
  4. experimaestro/cli/jobs.py +253 -71
  5. experimaestro/cli/refactor.py +1 -2
  6. experimaestro/commandline.py +7 -4
  7. experimaestro/connectors/__init__.py +9 -1
  8. experimaestro/connectors/local.py +43 -3
  9. experimaestro/core/arguments.py +18 -18
  10. experimaestro/core/identifier.py +11 -11
  11. experimaestro/core/objects/config.py +96 -39
  12. experimaestro/core/objects/config_walk.py +3 -3
  13. experimaestro/core/{subparameters.py → partial.py} +16 -16
  14. experimaestro/core/partial_lock.py +394 -0
  15. experimaestro/core/types.py +12 -15
  16. experimaestro/dynamic.py +290 -0
  17. experimaestro/experiments/__init__.py +6 -2
  18. experimaestro/experiments/cli.py +217 -50
  19. experimaestro/experiments/configuration.py +24 -0
  20. experimaestro/generators.py +5 -5
  21. experimaestro/ipc.py +118 -1
  22. experimaestro/launcherfinder/__init__.py +2 -2
  23. experimaestro/launcherfinder/registry.py +6 -7
  24. experimaestro/launcherfinder/specs.py +2 -9
  25. experimaestro/launchers/slurm/__init__.py +2 -2
  26. experimaestro/launchers/slurm/base.py +62 -0
  27. experimaestro/locking.py +957 -1
  28. experimaestro/notifications.py +89 -201
  29. experimaestro/progress.py +63 -366
  30. experimaestro/rpyc.py +0 -2
  31. experimaestro/run.py +29 -2
  32. experimaestro/scheduler/__init__.py +8 -1
  33. experimaestro/scheduler/base.py +629 -53
  34. experimaestro/scheduler/dependencies.py +20 -16
  35. experimaestro/scheduler/experiment.py +732 -167
  36. experimaestro/scheduler/interfaces.py +316 -101
  37. experimaestro/scheduler/jobs.py +58 -20
  38. experimaestro/scheduler/remote/adaptive_sync.py +265 -0
  39. experimaestro/scheduler/remote/client.py +171 -117
  40. experimaestro/scheduler/remote/protocol.py +8 -193
  41. experimaestro/scheduler/remote/server.py +95 -71
  42. experimaestro/scheduler/services.py +53 -28
  43. experimaestro/scheduler/state_provider.py +663 -2430
  44. experimaestro/scheduler/state_status.py +1247 -0
  45. experimaestro/scheduler/transient.py +31 -0
  46. experimaestro/scheduler/workspace.py +1 -1
  47. experimaestro/scheduler/workspace_state_provider.py +1273 -0
  48. experimaestro/scriptbuilder.py +4 -4
  49. experimaestro/settings.py +36 -0
  50. experimaestro/tests/conftest.py +33 -5
  51. experimaestro/tests/connectors/bin/executable.py +1 -1
  52. experimaestro/tests/fixtures/pre_experiment/experiment_check_env.py +16 -0
  53. experimaestro/tests/fixtures/pre_experiment/experiment_check_mock.py +14 -0
  54. experimaestro/tests/fixtures/pre_experiment/experiment_simple.py +12 -0
  55. experimaestro/tests/fixtures/pre_experiment/pre_setup_env.py +5 -0
  56. experimaestro/tests/fixtures/pre_experiment/pre_setup_error.py +3 -0
  57. experimaestro/tests/fixtures/pre_experiment/pre_setup_mock.py +8 -0
  58. experimaestro/tests/launchers/bin/test.py +1 -0
  59. experimaestro/tests/launchers/test_slurm.py +9 -9
  60. experimaestro/tests/partial_reschedule.py +46 -0
  61. experimaestro/tests/restart.py +3 -3
  62. experimaestro/tests/restart_main.py +1 -0
  63. experimaestro/tests/scripts/notifyandwait.py +1 -0
  64. experimaestro/tests/task_partial.py +38 -0
  65. experimaestro/tests/task_tokens.py +2 -2
  66. experimaestro/tests/tasks/test_dynamic.py +6 -6
  67. experimaestro/tests/test_dependencies.py +3 -3
  68. experimaestro/tests/test_deprecated.py +15 -15
  69. experimaestro/tests/test_dynamic_locking.py +317 -0
  70. experimaestro/tests/test_environment.py +24 -14
  71. experimaestro/tests/test_experiment.py +171 -36
  72. experimaestro/tests/test_identifier.py +25 -25
  73. experimaestro/tests/test_identifier_stability.py +3 -5
  74. experimaestro/tests/test_multitoken.py +2 -4
  75. experimaestro/tests/{test_subparameters.py → test_partial.py} +25 -25
  76. experimaestro/tests/test_partial_paths.py +81 -138
  77. experimaestro/tests/test_pre_experiment.py +219 -0
  78. experimaestro/tests/test_progress.py +2 -8
  79. experimaestro/tests/test_remote_state.py +560 -99
  80. experimaestro/tests/test_stray_jobs.py +261 -0
  81. experimaestro/tests/test_tasks.py +1 -2
  82. experimaestro/tests/test_token_locking.py +52 -67
  83. experimaestro/tests/test_tokens.py +5 -6
  84. experimaestro/tests/test_transient.py +225 -0
  85. experimaestro/tests/test_workspace_state_provider.py +768 -0
  86. experimaestro/tests/token_reschedule.py +1 -3
  87. experimaestro/tests/utils.py +2 -7
  88. experimaestro/tokens.py +227 -372
  89. experimaestro/tools/diff.py +1 -0
  90. experimaestro/tools/documentation.py +4 -5
  91. experimaestro/tools/jobs.py +1 -2
  92. experimaestro/tui/app.py +438 -1966
  93. experimaestro/tui/app.tcss +162 -0
  94. experimaestro/tui/dialogs.py +172 -0
  95. experimaestro/tui/log_viewer.py +253 -3
  96. experimaestro/tui/messages.py +137 -0
  97. experimaestro/tui/utils.py +54 -0
  98. experimaestro/tui/widgets/__init__.py +23 -0
  99. experimaestro/tui/widgets/experiments.py +468 -0
  100. experimaestro/tui/widgets/global_services.py +238 -0
  101. experimaestro/tui/widgets/jobs.py +972 -0
  102. experimaestro/tui/widgets/log.py +156 -0
  103. experimaestro/tui/widgets/orphans.py +363 -0
  104. experimaestro/tui/widgets/runs.py +185 -0
  105. experimaestro/tui/widgets/services.py +314 -0
  106. experimaestro/tui/widgets/stray_jobs.py +528 -0
  107. experimaestro/utils/__init__.py +1 -1
  108. experimaestro/utils/environment.py +105 -22
  109. experimaestro/utils/fswatcher.py +124 -0
  110. experimaestro/utils/jobs.py +1 -2
  111. experimaestro/utils/jupyter.py +1 -2
  112. experimaestro/utils/logging.py +72 -0
  113. experimaestro/version.py +2 -2
  114. experimaestro/webui/__init__.py +9 -0
  115. experimaestro/webui/app.py +117 -0
  116. experimaestro/{server → webui}/data/index.css +66 -11
  117. experimaestro/webui/data/index.css.map +1 -0
  118. experimaestro/{server → webui}/data/index.js +82763 -87217
  119. experimaestro/webui/data/index.js.map +1 -0
  120. experimaestro/webui/routes/__init__.py +5 -0
  121. experimaestro/webui/routes/auth.py +53 -0
  122. experimaestro/webui/routes/proxy.py +117 -0
  123. experimaestro/webui/server.py +200 -0
  124. experimaestro/webui/state_bridge.py +152 -0
  125. experimaestro/webui/websocket.py +413 -0
  126. {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/METADATA +5 -6
  127. experimaestro-2.0.0b17.dist-info/RECORD +219 -0
  128. experimaestro/cli/progress.py +0 -269
  129. experimaestro/scheduler/state.py +0 -75
  130. experimaestro/scheduler/state_db.py +0 -437
  131. experimaestro/scheduler/state_sync.py +0 -891
  132. experimaestro/server/__init__.py +0 -467
  133. experimaestro/server/data/index.css.map +0 -1
  134. experimaestro/server/data/index.js.map +0 -1
  135. experimaestro/tests/test_cli_jobs.py +0 -615
  136. experimaestro/tests/test_file_progress.py +0 -425
  137. experimaestro/tests/test_file_progress_integration.py +0 -477
  138. experimaestro/tests/test_state_db.py +0 -434
  139. experimaestro-2.0.0b8.dist-info/RECORD +0 -187
  140. /experimaestro/{server → webui}/data/1815e00441357e01619e.ttf +0 -0
  141. /experimaestro/{server → webui}/data/2463b90d9a316e4e5294.woff2 +0 -0
  142. /experimaestro/{server → webui}/data/2582b0e4bcf85eceead0.ttf +0 -0
  143. /experimaestro/{server → webui}/data/89999bdf5d835c012025.woff2 +0 -0
  144. /experimaestro/{server → webui}/data/914997e1bdfc990d0897.ttf +0 -0
  145. /experimaestro/{server → webui}/data/c210719e60948b211a12.woff2 +0 -0
  146. /experimaestro/{server → webui}/data/favicon.ico +0 -0
  147. /experimaestro/{server → webui}/data/index.html +0 -0
  148. /experimaestro/{server → webui}/data/login.html +0 -0
  149. /experimaestro/{server → webui}/data/manifest.json +0 -0
  150. {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/WHEEL +0 -0
  151. {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/entry_points.txt +0 -0
  152. {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/licenses/LICENSE +0 -0
@@ -27,10 +27,7 @@ class CudaSpecification:
27
27
  return (self.memory >= spec.memory) and (self.min_memory <= spec.memory)
28
28
 
29
29
  def __repr__(self):
30
- return (
31
- f"CUDA({self.model} "
32
- f"max={format_size(self.memory, binary=True)}/min={format_size(self.min_memory, binary=True)})"
33
- )
30
+ return f"CUDA({self.model} max={format_size(self.memory, binary=True)}/min={format_size(self.min_memory, binary=True)})"
34
31
 
35
32
 
36
33
  @dataclass
@@ -48,11 +45,7 @@ class CPUSpecification:
48
45
  """Number of CPU per GPU (0 if not defined)"""
49
46
 
50
47
  def __repr__(self):
51
- return (
52
- f"CPU("
53
- f"mem={format_size(self.memory, binary=True)}, cores={self.cores}"
54
- ")"
55
- )
48
+ return f"CPU(mem={format_size(self.memory, binary=True)}, cores={self.cores})"
56
49
 
57
50
  def match(self, other: "CPUSpecification"):
58
51
  return (self.memory >= other.memory) and (self.cores >= other.cores)
@@ -1,2 +1,2 @@
1
- # flake8: noqa: F401
2
- from .base import *
1
+ # ruff: noqa: F401
2
+ from .base import * # noqa: F403
@@ -1,3 +1,4 @@
1
+ import asyncio
1
2
  import logging
2
3
  import threading
3
4
  from pathlib import Path
@@ -219,6 +220,13 @@ class SlurmProcessWatcher(threading.Thread):
219
220
  self.cv = ThreadingCondition()
220
221
  self.fetched_event = threading.Event()
221
222
  self.updating_jobs = threading.Lock()
223
+
224
+ # Async waiters: jobid -> list of (asyncio.Event, event_loop)
225
+ self.async_waiters: Dict[
226
+ str, List[Tuple[asyncio.Event, asyncio.AbstractEventLoop]]
227
+ ] = {}
228
+ self.async_waiters_lock = threading.Lock()
229
+
222
230
  self.start()
223
231
 
224
232
  @staticmethod
@@ -250,6 +258,35 @@ class SlurmProcessWatcher(threading.Thread):
250
258
  with self.updating_jobs:
251
259
  return self.jobs.get(jobid)
252
260
 
261
+ def register_async_waiter(
262
+ self, jobid: str, loop: asyncio.AbstractEventLoop
263
+ ) -> asyncio.Event:
264
+ """Register an async waiter for a job.
265
+
266
+ Returns an asyncio.Event that will be set when the job finishes.
267
+ """
268
+ event = loop.create_future()
269
+ with self.async_waiters_lock:
270
+ if jobid not in self.async_waiters:
271
+ self.async_waiters[jobid] = []
272
+ self.async_waiters[jobid].append((event, loop))
273
+ return event
274
+
275
+ def _notify_async_waiters(self):
276
+ """Notify async waiters for finished jobs"""
277
+ with self.async_waiters_lock:
278
+ finished_jobs = []
279
+ for jobid, waiters in self.async_waiters.items():
280
+ state = self.jobs.get(jobid)
281
+ if state and state.finished():
282
+ finished_jobs.append(jobid)
283
+ for future, loop in waiters:
284
+ # Set the result from watcher thread to asyncio loop
285
+ loop.call_soon_threadsafe(future.set_result, state)
286
+
287
+ for jobid in finished_jobs:
288
+ del self.async_waiters[jobid]
289
+
253
290
  def run(self):
254
291
  while self.count > 0:
255
292
  builder = self.launcher.connector.processbuilder()
@@ -280,6 +317,9 @@ class SlurmProcessWatcher(threading.Thread):
280
317
  logger.error("Could not parse line %s", line)
281
318
  process.kill()
282
319
 
320
+ # Notify async waiters for finished jobs
321
+ self._notify_async_waiters()
322
+
283
323
  with self.cv:
284
324
  logger.debug("Jobs %s", self.jobs)
285
325
  self.fetched_event.set()
@@ -310,6 +350,28 @@ class BatchSlurmProcess(Process):
310
350
  self._last_state = state
311
351
  return 0 if state.slurm_state == "COMPLETED" else 1
312
352
 
353
+ async def aio_wait(self) -> int:
354
+ """Asynchronously wait for SLURM job to finish (event-driven)"""
355
+ logger.debug("Async waiting for SLURM job %s", self.jobid)
356
+ loop = asyncio.get_running_loop()
357
+
358
+ with SlurmProcessWatcher.get(self.launcher) as watcher:
359
+ # Check if already finished
360
+ state = watcher.getjob(self.jobid)
361
+ if state and state.finished():
362
+ self._last_state = state
363
+ return 0 if state.slurm_state == "COMPLETED" else 1
364
+
365
+ # Register and wait for the job to finish
366
+ future = watcher.register_async_waiter(self.jobid, loop)
367
+ self._last_state = await future
368
+
369
+ code = 0 if self._last_state.slurm_state == "COMPLETED" else 1
370
+ logger.debug(
371
+ "Finished async wait for SLURM job %s: code %s", self.jobid, code
372
+ )
373
+ return code
374
+
313
375
  def get_job_state(self, code: int) -> "JobState":
314
376
  """Convert SLURM exit code to JobState, detecting timeouts"""
315
377
  from experimaestro.scheduler.jobs import (