experimaestro 2.0.0b4__py3-none-any.whl → 2.0.0b17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (154) hide show
  1. experimaestro/__init__.py +12 -5
  2. experimaestro/cli/__init__.py +393 -134
  3. experimaestro/cli/filter.py +48 -23
  4. experimaestro/cli/jobs.py +253 -71
  5. experimaestro/cli/refactor.py +1 -2
  6. experimaestro/commandline.py +7 -4
  7. experimaestro/connectors/__init__.py +9 -1
  8. experimaestro/connectors/local.py +43 -3
  9. experimaestro/core/arguments.py +18 -18
  10. experimaestro/core/identifier.py +11 -11
  11. experimaestro/core/objects/config.py +96 -39
  12. experimaestro/core/objects/config_walk.py +3 -3
  13. experimaestro/core/{subparameters.py → partial.py} +16 -16
  14. experimaestro/core/partial_lock.py +394 -0
  15. experimaestro/core/types.py +12 -15
  16. experimaestro/dynamic.py +290 -0
  17. experimaestro/experiments/__init__.py +6 -2
  18. experimaestro/experiments/cli.py +223 -52
  19. experimaestro/experiments/configuration.py +24 -0
  20. experimaestro/generators.py +5 -5
  21. experimaestro/ipc.py +118 -1
  22. experimaestro/launcherfinder/__init__.py +2 -2
  23. experimaestro/launcherfinder/registry.py +6 -7
  24. experimaestro/launcherfinder/specs.py +2 -9
  25. experimaestro/launchers/slurm/__init__.py +2 -2
  26. experimaestro/launchers/slurm/base.py +62 -0
  27. experimaestro/locking.py +957 -1
  28. experimaestro/notifications.py +89 -201
  29. experimaestro/progress.py +63 -366
  30. experimaestro/rpyc.py +0 -2
  31. experimaestro/run.py +29 -2
  32. experimaestro/scheduler/__init__.py +8 -1
  33. experimaestro/scheduler/base.py +650 -53
  34. experimaestro/scheduler/dependencies.py +20 -16
  35. experimaestro/scheduler/experiment.py +764 -169
  36. experimaestro/scheduler/interfaces.py +338 -96
  37. experimaestro/scheduler/jobs.py +58 -20
  38. experimaestro/scheduler/remote/__init__.py +31 -0
  39. experimaestro/scheduler/remote/adaptive_sync.py +265 -0
  40. experimaestro/scheduler/remote/client.py +928 -0
  41. experimaestro/scheduler/remote/protocol.py +282 -0
  42. experimaestro/scheduler/remote/server.py +447 -0
  43. experimaestro/scheduler/remote/sync.py +144 -0
  44. experimaestro/scheduler/services.py +186 -35
  45. experimaestro/scheduler/state_provider.py +811 -2157
  46. experimaestro/scheduler/state_status.py +1247 -0
  47. experimaestro/scheduler/transient.py +31 -0
  48. experimaestro/scheduler/workspace.py +1 -1
  49. experimaestro/scheduler/workspace_state_provider.py +1273 -0
  50. experimaestro/scriptbuilder.py +4 -4
  51. experimaestro/settings.py +36 -0
  52. experimaestro/tests/conftest.py +33 -5
  53. experimaestro/tests/connectors/bin/executable.py +1 -1
  54. experimaestro/tests/fixtures/pre_experiment/experiment_check_env.py +16 -0
  55. experimaestro/tests/fixtures/pre_experiment/experiment_check_mock.py +14 -0
  56. experimaestro/tests/fixtures/pre_experiment/experiment_simple.py +12 -0
  57. experimaestro/tests/fixtures/pre_experiment/pre_setup_env.py +5 -0
  58. experimaestro/tests/fixtures/pre_experiment/pre_setup_error.py +3 -0
  59. experimaestro/tests/fixtures/pre_experiment/pre_setup_mock.py +8 -0
  60. experimaestro/tests/launchers/bin/test.py +1 -0
  61. experimaestro/tests/launchers/test_slurm.py +9 -9
  62. experimaestro/tests/partial_reschedule.py +46 -0
  63. experimaestro/tests/restart.py +3 -3
  64. experimaestro/tests/restart_main.py +1 -0
  65. experimaestro/tests/scripts/notifyandwait.py +1 -0
  66. experimaestro/tests/task_partial.py +38 -0
  67. experimaestro/tests/task_tokens.py +2 -2
  68. experimaestro/tests/tasks/test_dynamic.py +6 -6
  69. experimaestro/tests/test_dependencies.py +3 -3
  70. experimaestro/tests/test_deprecated.py +15 -15
  71. experimaestro/tests/test_dynamic_locking.py +317 -0
  72. experimaestro/tests/test_environment.py +24 -14
  73. experimaestro/tests/test_experiment.py +171 -36
  74. experimaestro/tests/test_identifier.py +25 -25
  75. experimaestro/tests/test_identifier_stability.py +3 -5
  76. experimaestro/tests/test_multitoken.py +2 -4
  77. experimaestro/tests/{test_subparameters.py → test_partial.py} +25 -25
  78. experimaestro/tests/test_partial_paths.py +81 -138
  79. experimaestro/tests/test_pre_experiment.py +219 -0
  80. experimaestro/tests/test_progress.py +2 -8
  81. experimaestro/tests/test_remote_state.py +1132 -0
  82. experimaestro/tests/test_stray_jobs.py +261 -0
  83. experimaestro/tests/test_tasks.py +1 -2
  84. experimaestro/tests/test_token_locking.py +52 -67
  85. experimaestro/tests/test_tokens.py +5 -6
  86. experimaestro/tests/test_transient.py +225 -0
  87. experimaestro/tests/test_workspace_state_provider.py +768 -0
  88. experimaestro/tests/token_reschedule.py +1 -3
  89. experimaestro/tests/utils.py +2 -7
  90. experimaestro/tokens.py +227 -372
  91. experimaestro/tools/diff.py +1 -0
  92. experimaestro/tools/documentation.py +4 -5
  93. experimaestro/tools/jobs.py +1 -2
  94. experimaestro/tui/app.py +459 -1895
  95. experimaestro/tui/app.tcss +162 -0
  96. experimaestro/tui/dialogs.py +172 -0
  97. experimaestro/tui/log_viewer.py +253 -3
  98. experimaestro/tui/messages.py +137 -0
  99. experimaestro/tui/utils.py +54 -0
  100. experimaestro/tui/widgets/__init__.py +23 -0
  101. experimaestro/tui/widgets/experiments.py +468 -0
  102. experimaestro/tui/widgets/global_services.py +238 -0
  103. experimaestro/tui/widgets/jobs.py +972 -0
  104. experimaestro/tui/widgets/log.py +156 -0
  105. experimaestro/tui/widgets/orphans.py +363 -0
  106. experimaestro/tui/widgets/runs.py +185 -0
  107. experimaestro/tui/widgets/services.py +314 -0
  108. experimaestro/tui/widgets/stray_jobs.py +528 -0
  109. experimaestro/utils/__init__.py +1 -1
  110. experimaestro/utils/environment.py +105 -22
  111. experimaestro/utils/fswatcher.py +124 -0
  112. experimaestro/utils/jobs.py +1 -2
  113. experimaestro/utils/jupyter.py +1 -2
  114. experimaestro/utils/logging.py +72 -0
  115. experimaestro/version.py +2 -2
  116. experimaestro/webui/__init__.py +9 -0
  117. experimaestro/webui/app.py +117 -0
  118. experimaestro/{server → webui}/data/index.css +66 -11
  119. experimaestro/webui/data/index.css.map +1 -0
  120. experimaestro/{server → webui}/data/index.js +82763 -87217
  121. experimaestro/webui/data/index.js.map +1 -0
  122. experimaestro/webui/routes/__init__.py +5 -0
  123. experimaestro/webui/routes/auth.py +53 -0
  124. experimaestro/webui/routes/proxy.py +117 -0
  125. experimaestro/webui/server.py +200 -0
  126. experimaestro/webui/state_bridge.py +152 -0
  127. experimaestro/webui/websocket.py +413 -0
  128. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/METADATA +8 -9
  129. experimaestro-2.0.0b17.dist-info/RECORD +219 -0
  130. experimaestro/cli/progress.py +0 -269
  131. experimaestro/scheduler/state.py +0 -75
  132. experimaestro/scheduler/state_db.py +0 -388
  133. experimaestro/scheduler/state_sync.py +0 -834
  134. experimaestro/server/__init__.py +0 -467
  135. experimaestro/server/data/index.css.map +0 -1
  136. experimaestro/server/data/index.js.map +0 -1
  137. experimaestro/tests/test_cli_jobs.py +0 -615
  138. experimaestro/tests/test_file_progress.py +0 -425
  139. experimaestro/tests/test_file_progress_integration.py +0 -477
  140. experimaestro/tests/test_state_db.py +0 -434
  141. experimaestro-2.0.0b4.dist-info/RECORD +0 -181
  142. /experimaestro/{server → webui}/data/1815e00441357e01619e.ttf +0 -0
  143. /experimaestro/{server → webui}/data/2463b90d9a316e4e5294.woff2 +0 -0
  144. /experimaestro/{server → webui}/data/2582b0e4bcf85eceead0.ttf +0 -0
  145. /experimaestro/{server → webui}/data/89999bdf5d835c012025.woff2 +0 -0
  146. /experimaestro/{server → webui}/data/914997e1bdfc990d0897.ttf +0 -0
  147. /experimaestro/{server → webui}/data/c210719e60948b211a12.woff2 +0 -0
  148. /experimaestro/{server → webui}/data/favicon.ico +0 -0
  149. /experimaestro/{server → webui}/data/index.html +0 -0
  150. /experimaestro/{server → webui}/data/login.html +0 -0
  151. /experimaestro/{server → webui}/data/manifest.json +0 -0
  152. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/WHEEL +0 -0
  153. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/entry_points.txt +0 -0
  154. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,290 @@
1
+ """Dynamic resources and dependencies that can be waited on asynchronously."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ import threading
8
+ import time
9
+ import os
10
+ from abc import ABC, abstractmethod
11
+ from typing import TYPE_CHECKING, Optional
12
+ from weakref import WeakSet
13
+
14
+ if TYPE_CHECKING:
15
+ from experimaestro.scheduler.jobs import Job
16
+ from experimaestro.locking import Lock
17
+
18
+ logger = logging.getLogger("xpm.dynamic")
19
+
20
+ # Polling configuration
21
+ POLL_INTERVAL_INITIAL = 0.1 # seconds
22
+ POLL_INTERVAL_MAX = float(os.environ.get("XPM_POLL_INTERVAL_MAX", "30.0"))
23
+ POLL_INTERVAL_MULTIPLIER = 1.5
24
+
25
+
26
+ class DynamicResource(ABC):
27
+ """Abstract base class for resources that can be waited on asynchronously.
28
+
29
+ Subclasses must implement async_wait() which waits until the resource
30
+ state may have changed.
31
+ """
32
+
33
+ @abstractmethod
34
+ async def async_wait(self, timeout: float = 0) -> bool:
35
+ """Wait asynchronously until the resource state may have changed.
36
+
37
+ Args:
38
+ timeout: Maximum time to wait in seconds (0 = wait indefinitely)
39
+
40
+ Returns:
41
+ True if notified of a change, False if timed out
42
+ """
43
+ ...
44
+
45
+ @abstractmethod
46
+ def refresh_state(self) -> None:
47
+ """Refresh resource state from underlying storage.
48
+
49
+ Called by ResourcePoller. Should update internal state and
50
+ notify waiters as appropriate.
51
+ """
52
+ ...
53
+
54
+
55
+ class ResourcePoller(threading.Thread):
56
+ """Single thread that polls all DynamicResource instances.
57
+
58
+ This consolidates polling into one thread instead of creating threads
59
+ per wait operation. Resources register themselves and get polled at
60
+ intervals determined by exponential backoff.
61
+ """
62
+
63
+ _instance: Optional["ResourcePoller"] = None
64
+ _instance_lock = threading.Lock()
65
+
66
+ @classmethod
67
+ def instance(cls) -> "ResourcePoller":
68
+ """Get or create the singleton ResourcePoller."""
69
+ if cls._instance is None:
70
+ with cls._instance_lock:
71
+ if cls._instance is None:
72
+ cls._instance = cls()
73
+ cls._instance.start()
74
+ return cls._instance
75
+
76
+ @classmethod
77
+ def reset(cls) -> None:
78
+ """Reset the singleton instance. Used for testing."""
79
+ with cls._instance_lock:
80
+ if cls._instance is not None:
81
+ # Clear waiters to stop processing
82
+ with cls._instance._lock:
83
+ cls._instance._waiters.clear()
84
+ cls._instance._resources.clear()
85
+ cls._instance = None
86
+
87
+ def __init__(self):
88
+ super().__init__(daemon=True, name="ResourcePoller")
89
+ self._lock = threading.Lock()
90
+ self._cv = threading.Condition(self._lock)
91
+
92
+ # Resources waiting to be polled (weak references)
93
+ self._resources: WeakSet[DynamicResource] = WeakSet()
94
+
95
+ # Async waiters: resource_id -> list of (asyncio.Event, loop, deadline)
96
+ self._waiters: dict[
97
+ int, list[tuple[asyncio.Event, asyncio.AbstractEventLoop, Optional[float]]]
98
+ ] = {}
99
+
100
+ def register(
101
+ self,
102
+ resource: DynamicResource,
103
+ loop: asyncio.AbstractEventLoop,
104
+ timeout: float = 0,
105
+ ) -> asyncio.Event:
106
+ """Register a resource for polling and return an event to wait on.
107
+
108
+ Args:
109
+ resource: The resource to poll
110
+ loop: The asyncio event loop to notify
111
+ timeout: Timeout in seconds (0 = no timeout)
112
+
113
+ Returns:
114
+ asyncio.Event that will be set when resource changes or timeout
115
+ """
116
+ event = asyncio.Event()
117
+ deadline = time.time() + timeout if timeout > 0 else None
118
+ resource_id = id(resource)
119
+
120
+ with self._lock:
121
+ self._resources.add(resource)
122
+ if resource_id not in self._waiters:
123
+ self._waiters[resource_id] = []
124
+ self._waiters[resource_id].append((event, loop, deadline))
125
+ self._cv.notify()
126
+
127
+ return event
128
+
129
+ def _notify_waiters(self, resource: DynamicResource) -> None:
130
+ """Notify all waiters for a resource."""
131
+ resource_id = id(resource)
132
+ with self._lock:
133
+ waiters = self._waiters.pop(resource_id, [])
134
+
135
+ for event, loop, _ in waiters:
136
+ try:
137
+ loop.call_soon_threadsafe(event.set)
138
+ except RuntimeError:
139
+ # Loop might be closed
140
+ pass
141
+
142
+ def notify(self, resource: DynamicResource) -> None:
143
+ """Notify that a resource's state has changed.
144
+
145
+ Called by resources when they detect a state change (e.g., via watchdog).
146
+ This wakes up any waiters for this resource immediately.
147
+ """
148
+ self._notify_waiters(resource)
149
+
150
+ def _check_timeouts(self) -> Optional[float]:
151
+ """Check for timed out waiters and return time until next timeout."""
152
+ now = time.time()
153
+ next_timeout = float("inf")
154
+
155
+ with self._lock:
156
+ for resource_id, waiters in list(self._waiters.items()):
157
+ remaining = []
158
+ for event, loop, deadline in waiters:
159
+ if deadline is not None and now >= deadline:
160
+ # Timed out - notify with event set
161
+ try:
162
+ loop.call_soon_threadsafe(event.set)
163
+ except RuntimeError:
164
+ pass
165
+ else:
166
+ remaining.append((event, loop, deadline))
167
+ if deadline is not None:
168
+ next_timeout = min(next_timeout, deadline - now)
169
+
170
+ if remaining:
171
+ self._waiters[resource_id] = remaining
172
+ else:
173
+ self._waiters.pop(resource_id, None)
174
+
175
+ return next_timeout if next_timeout != float("inf") else None
176
+
177
+ def run(self):
178
+ """Main polling loop."""
179
+ poll_interval = POLL_INTERVAL_INITIAL
180
+
181
+ while True:
182
+ # Get resources to poll
183
+ with self._lock:
184
+ resources = list(self._resources)
185
+ has_waiters = bool(self._waiters)
186
+
187
+ if not has_waiters:
188
+ # No active waiters, wait for registration
189
+ with self._lock:
190
+ self._cv.wait(timeout=1.0)
191
+ poll_interval = POLL_INTERVAL_INITIAL
192
+ continue
193
+
194
+ # Poll each resource
195
+ for resource in resources:
196
+ try:
197
+ resource.refresh_state()
198
+ self._notify_waiters(resource)
199
+ except Exception:
200
+ logger.exception("Error polling resource %s", resource)
201
+
202
+ # Check timeouts
203
+ next_timeout = self._check_timeouts()
204
+
205
+ # Calculate sleep time
206
+ sleep_time = poll_interval
207
+ if next_timeout is not None:
208
+ sleep_time = min(sleep_time, next_timeout)
209
+
210
+ # Sleep with ability to wake up on new registration
211
+ with self._lock:
212
+ self._cv.wait(timeout=max(0.01, sleep_time))
213
+
214
+ # Increase poll interval (exponential backoff)
215
+ poll_interval = min(
216
+ poll_interval * POLL_INTERVAL_MULTIPLIER, POLL_INTERVAL_MAX
217
+ )
218
+
219
+
220
+ class DynamicDependency(ABC):
221
+ """Base class for dynamic dependencies.
222
+
223
+ Dynamic dependencies (like tokens) can change state at any time - availability
224
+ can go from OK to WAIT and back. These require special handling during lock
225
+ acquisition with retry logic.
226
+
227
+ The origin must be a DynamicResource that supports async_wait().
228
+
229
+ Subclasses must implement:
230
+ - _create_lock(): Create the appropriate lock object for this dependency
231
+ """
232
+
233
+ origin: DynamicResource
234
+ target: Optional["Job"]
235
+
236
+ def __init__(self, origin: DynamicResource):
237
+ self.origin = origin
238
+ self.target = None
239
+
240
+ def is_dynamic(self) -> bool:
241
+ """Returns True - this is a dynamic dependency."""
242
+ return True
243
+
244
+ @abstractmethod
245
+ def _create_lock(self) -> Lock:
246
+ """Create a lock object for this dependency.
247
+
248
+ Returns:
249
+ Lock object (subclass of DynamicDependencyLock)
250
+ """
251
+ ...
252
+
253
+ async def aio_lock(self, timeout: float = 0) -> Lock:
254
+ """Acquire lock on the resource with async waiting.
255
+
256
+ Uses the resource's async_wait() for efficient waiting without threads.
257
+
258
+ Args:
259
+ timeout: Timeout in seconds (0 = wait indefinitely)
260
+
261
+ Returns:
262
+ Lock object
263
+
264
+ Raises:
265
+ LockError: If lock cannot be acquired within timeout
266
+ """
267
+ from experimaestro.locking import LockError
268
+
269
+ start_time = time.time()
270
+
271
+ while True:
272
+ try:
273
+ lock = self._create_lock()
274
+ lock.acquire()
275
+ return lock
276
+ except LockError:
277
+ # Calculate remaining timeout
278
+ if timeout > 0:
279
+ elapsed = time.time() - start_time
280
+ remaining = timeout - elapsed
281
+ if remaining <= 0:
282
+ raise LockError(f"Timeout waiting for resource: {self.origin}")
283
+ else:
284
+ remaining = 0 # Wait indefinitely
285
+
286
+ # Wait for resource state to change
287
+ await self.origin.async_wait(timeout=remaining)
288
+
289
+ def __repr__(self) -> str:
290
+ return f"DynamicDep[{self.origin}]"
@@ -1,2 +1,6 @@
1
- from .cli import experiments_cli, ExperimentHelper, ExperimentCallable # noqa: F401
2
- from .configuration import configuration, ConfigurationBase # noqa: F401
1
+ from .configuration import ( # noqa: F401
2
+ configuration,
3
+ ConfigurationBase,
4
+ DirtyGitAction,
5
+ )
6
+ from .cli import ExperimentHelper # noqa: F401