experimaestro 2.0.0b8__py3-none-any.whl → 2.0.0b17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of experimaestro might be problematic. Click here for more details.
- experimaestro/__init__.py +12 -5
- experimaestro/cli/__init__.py +239 -126
- experimaestro/cli/filter.py +48 -23
- experimaestro/cli/jobs.py +253 -71
- experimaestro/cli/refactor.py +1 -2
- experimaestro/commandline.py +7 -4
- experimaestro/connectors/__init__.py +9 -1
- experimaestro/connectors/local.py +43 -3
- experimaestro/core/arguments.py +18 -18
- experimaestro/core/identifier.py +11 -11
- experimaestro/core/objects/config.py +96 -39
- experimaestro/core/objects/config_walk.py +3 -3
- experimaestro/core/{subparameters.py → partial.py} +16 -16
- experimaestro/core/partial_lock.py +394 -0
- experimaestro/core/types.py +12 -15
- experimaestro/dynamic.py +290 -0
- experimaestro/experiments/__init__.py +6 -2
- experimaestro/experiments/cli.py +217 -50
- experimaestro/experiments/configuration.py +24 -0
- experimaestro/generators.py +5 -5
- experimaestro/ipc.py +118 -1
- experimaestro/launcherfinder/__init__.py +2 -2
- experimaestro/launcherfinder/registry.py +6 -7
- experimaestro/launcherfinder/specs.py +2 -9
- experimaestro/launchers/slurm/__init__.py +2 -2
- experimaestro/launchers/slurm/base.py +62 -0
- experimaestro/locking.py +957 -1
- experimaestro/notifications.py +89 -201
- experimaestro/progress.py +63 -366
- experimaestro/rpyc.py +0 -2
- experimaestro/run.py +29 -2
- experimaestro/scheduler/__init__.py +8 -1
- experimaestro/scheduler/base.py +629 -53
- experimaestro/scheduler/dependencies.py +20 -16
- experimaestro/scheduler/experiment.py +732 -167
- experimaestro/scheduler/interfaces.py +316 -101
- experimaestro/scheduler/jobs.py +58 -20
- experimaestro/scheduler/remote/adaptive_sync.py +265 -0
- experimaestro/scheduler/remote/client.py +171 -117
- experimaestro/scheduler/remote/protocol.py +8 -193
- experimaestro/scheduler/remote/server.py +95 -71
- experimaestro/scheduler/services.py +53 -28
- experimaestro/scheduler/state_provider.py +663 -2430
- experimaestro/scheduler/state_status.py +1247 -0
- experimaestro/scheduler/transient.py +31 -0
- experimaestro/scheduler/workspace.py +1 -1
- experimaestro/scheduler/workspace_state_provider.py +1273 -0
- experimaestro/scriptbuilder.py +4 -4
- experimaestro/settings.py +36 -0
- experimaestro/tests/conftest.py +33 -5
- experimaestro/tests/connectors/bin/executable.py +1 -1
- experimaestro/tests/fixtures/pre_experiment/experiment_check_env.py +16 -0
- experimaestro/tests/fixtures/pre_experiment/experiment_check_mock.py +14 -0
- experimaestro/tests/fixtures/pre_experiment/experiment_simple.py +12 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_env.py +5 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_error.py +3 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_mock.py +8 -0
- experimaestro/tests/launchers/bin/test.py +1 -0
- experimaestro/tests/launchers/test_slurm.py +9 -9
- experimaestro/tests/partial_reschedule.py +46 -0
- experimaestro/tests/restart.py +3 -3
- experimaestro/tests/restart_main.py +1 -0
- experimaestro/tests/scripts/notifyandwait.py +1 -0
- experimaestro/tests/task_partial.py +38 -0
- experimaestro/tests/task_tokens.py +2 -2
- experimaestro/tests/tasks/test_dynamic.py +6 -6
- experimaestro/tests/test_dependencies.py +3 -3
- experimaestro/tests/test_deprecated.py +15 -15
- experimaestro/tests/test_dynamic_locking.py +317 -0
- experimaestro/tests/test_environment.py +24 -14
- experimaestro/tests/test_experiment.py +171 -36
- experimaestro/tests/test_identifier.py +25 -25
- experimaestro/tests/test_identifier_stability.py +3 -5
- experimaestro/tests/test_multitoken.py +2 -4
- experimaestro/tests/{test_subparameters.py → test_partial.py} +25 -25
- experimaestro/tests/test_partial_paths.py +81 -138
- experimaestro/tests/test_pre_experiment.py +219 -0
- experimaestro/tests/test_progress.py +2 -8
- experimaestro/tests/test_remote_state.py +560 -99
- experimaestro/tests/test_stray_jobs.py +261 -0
- experimaestro/tests/test_tasks.py +1 -2
- experimaestro/tests/test_token_locking.py +52 -67
- experimaestro/tests/test_tokens.py +5 -6
- experimaestro/tests/test_transient.py +225 -0
- experimaestro/tests/test_workspace_state_provider.py +768 -0
- experimaestro/tests/token_reschedule.py +1 -3
- experimaestro/tests/utils.py +2 -7
- experimaestro/tokens.py +227 -372
- experimaestro/tools/diff.py +1 -0
- experimaestro/tools/documentation.py +4 -5
- experimaestro/tools/jobs.py +1 -2
- experimaestro/tui/app.py +438 -1966
- experimaestro/tui/app.tcss +162 -0
- experimaestro/tui/dialogs.py +172 -0
- experimaestro/tui/log_viewer.py +253 -3
- experimaestro/tui/messages.py +137 -0
- experimaestro/tui/utils.py +54 -0
- experimaestro/tui/widgets/__init__.py +23 -0
- experimaestro/tui/widgets/experiments.py +468 -0
- experimaestro/tui/widgets/global_services.py +238 -0
- experimaestro/tui/widgets/jobs.py +972 -0
- experimaestro/tui/widgets/log.py +156 -0
- experimaestro/tui/widgets/orphans.py +363 -0
- experimaestro/tui/widgets/runs.py +185 -0
- experimaestro/tui/widgets/services.py +314 -0
- experimaestro/tui/widgets/stray_jobs.py +528 -0
- experimaestro/utils/__init__.py +1 -1
- experimaestro/utils/environment.py +105 -22
- experimaestro/utils/fswatcher.py +124 -0
- experimaestro/utils/jobs.py +1 -2
- experimaestro/utils/jupyter.py +1 -2
- experimaestro/utils/logging.py +72 -0
- experimaestro/version.py +2 -2
- experimaestro/webui/__init__.py +9 -0
- experimaestro/webui/app.py +117 -0
- experimaestro/{server → webui}/data/index.css +66 -11
- experimaestro/webui/data/index.css.map +1 -0
- experimaestro/{server → webui}/data/index.js +82763 -87217
- experimaestro/webui/data/index.js.map +1 -0
- experimaestro/webui/routes/__init__.py +5 -0
- experimaestro/webui/routes/auth.py +53 -0
- experimaestro/webui/routes/proxy.py +117 -0
- experimaestro/webui/server.py +200 -0
- experimaestro/webui/state_bridge.py +152 -0
- experimaestro/webui/websocket.py +413 -0
- {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/METADATA +5 -6
- experimaestro-2.0.0b17.dist-info/RECORD +219 -0
- experimaestro/cli/progress.py +0 -269
- experimaestro/scheduler/state.py +0 -75
- experimaestro/scheduler/state_db.py +0 -437
- experimaestro/scheduler/state_sync.py +0 -891
- experimaestro/server/__init__.py +0 -467
- experimaestro/server/data/index.css.map +0 -1
- experimaestro/server/data/index.js.map +0 -1
- experimaestro/tests/test_cli_jobs.py +0 -615
- experimaestro/tests/test_file_progress.py +0 -425
- experimaestro/tests/test_file_progress_integration.py +0 -477
- experimaestro/tests/test_state_db.py +0 -434
- experimaestro-2.0.0b8.dist-info/RECORD +0 -187
- /experimaestro/{server → webui}/data/1815e00441357e01619e.ttf +0 -0
- /experimaestro/{server → webui}/data/2463b90d9a316e4e5294.woff2 +0 -0
- /experimaestro/{server → webui}/data/2582b0e4bcf85eceead0.ttf +0 -0
- /experimaestro/{server → webui}/data/89999bdf5d835c012025.woff2 +0 -0
- /experimaestro/{server → webui}/data/914997e1bdfc990d0897.ttf +0 -0
- /experimaestro/{server → webui}/data/c210719e60948b211a12.woff2 +0 -0
- /experimaestro/{server → webui}/data/favicon.ico +0 -0
- /experimaestro/{server → webui}/data/index.html +0 -0
- /experimaestro/{server → webui}/data/login.html +0 -0
- /experimaestro/{server → webui}/data/manifest.json +0 -0
- {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/WHEEL +0 -0
- {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/entry_points.txt +0 -0
- {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/licenses/LICENSE +0 -0
experimaestro/dynamic.py
ADDED
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
"""Dynamic resources and dependencies that can be waited on asynchronously."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
import threading
|
|
8
|
+
import time
|
|
9
|
+
import os
|
|
10
|
+
from abc import ABC, abstractmethod
|
|
11
|
+
from typing import TYPE_CHECKING, Optional
|
|
12
|
+
from weakref import WeakSet
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from experimaestro.scheduler.jobs import Job
|
|
16
|
+
from experimaestro.locking import Lock
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger("xpm.dynamic")
|
|
19
|
+
|
|
20
|
+
# Polling configuration
|
|
21
|
+
POLL_INTERVAL_INITIAL = 0.1 # seconds
|
|
22
|
+
POLL_INTERVAL_MAX = float(os.environ.get("XPM_POLL_INTERVAL_MAX", "30.0"))
|
|
23
|
+
POLL_INTERVAL_MULTIPLIER = 1.5
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class DynamicResource(ABC):
|
|
27
|
+
"""Abstract base class for resources that can be waited on asynchronously.
|
|
28
|
+
|
|
29
|
+
Subclasses must implement async_wait() which waits until the resource
|
|
30
|
+
state may have changed.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
@abstractmethod
|
|
34
|
+
async def async_wait(self, timeout: float = 0) -> bool:
|
|
35
|
+
"""Wait asynchronously until the resource state may have changed.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
timeout: Maximum time to wait in seconds (0 = wait indefinitely)
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
True if notified of a change, False if timed out
|
|
42
|
+
"""
|
|
43
|
+
...
|
|
44
|
+
|
|
45
|
+
@abstractmethod
|
|
46
|
+
def refresh_state(self) -> None:
|
|
47
|
+
"""Refresh resource state from underlying storage.
|
|
48
|
+
|
|
49
|
+
Called by ResourcePoller. Should update internal state and
|
|
50
|
+
notify waiters as appropriate.
|
|
51
|
+
"""
|
|
52
|
+
...
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class ResourcePoller(threading.Thread):
|
|
56
|
+
"""Single thread that polls all DynamicResource instances.
|
|
57
|
+
|
|
58
|
+
This consolidates polling into one thread instead of creating threads
|
|
59
|
+
per wait operation. Resources register themselves and get polled at
|
|
60
|
+
intervals determined by exponential backoff.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
_instance: Optional["ResourcePoller"] = None
|
|
64
|
+
_instance_lock = threading.Lock()
|
|
65
|
+
|
|
66
|
+
@classmethod
|
|
67
|
+
def instance(cls) -> "ResourcePoller":
|
|
68
|
+
"""Get or create the singleton ResourcePoller."""
|
|
69
|
+
if cls._instance is None:
|
|
70
|
+
with cls._instance_lock:
|
|
71
|
+
if cls._instance is None:
|
|
72
|
+
cls._instance = cls()
|
|
73
|
+
cls._instance.start()
|
|
74
|
+
return cls._instance
|
|
75
|
+
|
|
76
|
+
@classmethod
|
|
77
|
+
def reset(cls) -> None:
|
|
78
|
+
"""Reset the singleton instance. Used for testing."""
|
|
79
|
+
with cls._instance_lock:
|
|
80
|
+
if cls._instance is not None:
|
|
81
|
+
# Clear waiters to stop processing
|
|
82
|
+
with cls._instance._lock:
|
|
83
|
+
cls._instance._waiters.clear()
|
|
84
|
+
cls._instance._resources.clear()
|
|
85
|
+
cls._instance = None
|
|
86
|
+
|
|
87
|
+
def __init__(self):
|
|
88
|
+
super().__init__(daemon=True, name="ResourcePoller")
|
|
89
|
+
self._lock = threading.Lock()
|
|
90
|
+
self._cv = threading.Condition(self._lock)
|
|
91
|
+
|
|
92
|
+
# Resources waiting to be polled (weak references)
|
|
93
|
+
self._resources: WeakSet[DynamicResource] = WeakSet()
|
|
94
|
+
|
|
95
|
+
# Async waiters: resource_id -> list of (asyncio.Event, loop, deadline)
|
|
96
|
+
self._waiters: dict[
|
|
97
|
+
int, list[tuple[asyncio.Event, asyncio.AbstractEventLoop, Optional[float]]]
|
|
98
|
+
] = {}
|
|
99
|
+
|
|
100
|
+
def register(
|
|
101
|
+
self,
|
|
102
|
+
resource: DynamicResource,
|
|
103
|
+
loop: asyncio.AbstractEventLoop,
|
|
104
|
+
timeout: float = 0,
|
|
105
|
+
) -> asyncio.Event:
|
|
106
|
+
"""Register a resource for polling and return an event to wait on.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
resource: The resource to poll
|
|
110
|
+
loop: The asyncio event loop to notify
|
|
111
|
+
timeout: Timeout in seconds (0 = no timeout)
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
asyncio.Event that will be set when resource changes or timeout
|
|
115
|
+
"""
|
|
116
|
+
event = asyncio.Event()
|
|
117
|
+
deadline = time.time() + timeout if timeout > 0 else None
|
|
118
|
+
resource_id = id(resource)
|
|
119
|
+
|
|
120
|
+
with self._lock:
|
|
121
|
+
self._resources.add(resource)
|
|
122
|
+
if resource_id not in self._waiters:
|
|
123
|
+
self._waiters[resource_id] = []
|
|
124
|
+
self._waiters[resource_id].append((event, loop, deadline))
|
|
125
|
+
self._cv.notify()
|
|
126
|
+
|
|
127
|
+
return event
|
|
128
|
+
|
|
129
|
+
def _notify_waiters(self, resource: DynamicResource) -> None:
|
|
130
|
+
"""Notify all waiters for a resource."""
|
|
131
|
+
resource_id = id(resource)
|
|
132
|
+
with self._lock:
|
|
133
|
+
waiters = self._waiters.pop(resource_id, [])
|
|
134
|
+
|
|
135
|
+
for event, loop, _ in waiters:
|
|
136
|
+
try:
|
|
137
|
+
loop.call_soon_threadsafe(event.set)
|
|
138
|
+
except RuntimeError:
|
|
139
|
+
# Loop might be closed
|
|
140
|
+
pass
|
|
141
|
+
|
|
142
|
+
def notify(self, resource: DynamicResource) -> None:
|
|
143
|
+
"""Notify that a resource's state has changed.
|
|
144
|
+
|
|
145
|
+
Called by resources when they detect a state change (e.g., via watchdog).
|
|
146
|
+
This wakes up any waiters for this resource immediately.
|
|
147
|
+
"""
|
|
148
|
+
self._notify_waiters(resource)
|
|
149
|
+
|
|
150
|
+
def _check_timeouts(self) -> Optional[float]:
|
|
151
|
+
"""Check for timed out waiters and return time until next timeout."""
|
|
152
|
+
now = time.time()
|
|
153
|
+
next_timeout = float("inf")
|
|
154
|
+
|
|
155
|
+
with self._lock:
|
|
156
|
+
for resource_id, waiters in list(self._waiters.items()):
|
|
157
|
+
remaining = []
|
|
158
|
+
for event, loop, deadline in waiters:
|
|
159
|
+
if deadline is not None and now >= deadline:
|
|
160
|
+
# Timed out - notify with event set
|
|
161
|
+
try:
|
|
162
|
+
loop.call_soon_threadsafe(event.set)
|
|
163
|
+
except RuntimeError:
|
|
164
|
+
pass
|
|
165
|
+
else:
|
|
166
|
+
remaining.append((event, loop, deadline))
|
|
167
|
+
if deadline is not None:
|
|
168
|
+
next_timeout = min(next_timeout, deadline - now)
|
|
169
|
+
|
|
170
|
+
if remaining:
|
|
171
|
+
self._waiters[resource_id] = remaining
|
|
172
|
+
else:
|
|
173
|
+
self._waiters.pop(resource_id, None)
|
|
174
|
+
|
|
175
|
+
return next_timeout if next_timeout != float("inf") else None
|
|
176
|
+
|
|
177
|
+
def run(self):
|
|
178
|
+
"""Main polling loop."""
|
|
179
|
+
poll_interval = POLL_INTERVAL_INITIAL
|
|
180
|
+
|
|
181
|
+
while True:
|
|
182
|
+
# Get resources to poll
|
|
183
|
+
with self._lock:
|
|
184
|
+
resources = list(self._resources)
|
|
185
|
+
has_waiters = bool(self._waiters)
|
|
186
|
+
|
|
187
|
+
if not has_waiters:
|
|
188
|
+
# No active waiters, wait for registration
|
|
189
|
+
with self._lock:
|
|
190
|
+
self._cv.wait(timeout=1.0)
|
|
191
|
+
poll_interval = POLL_INTERVAL_INITIAL
|
|
192
|
+
continue
|
|
193
|
+
|
|
194
|
+
# Poll each resource
|
|
195
|
+
for resource in resources:
|
|
196
|
+
try:
|
|
197
|
+
resource.refresh_state()
|
|
198
|
+
self._notify_waiters(resource)
|
|
199
|
+
except Exception:
|
|
200
|
+
logger.exception("Error polling resource %s", resource)
|
|
201
|
+
|
|
202
|
+
# Check timeouts
|
|
203
|
+
next_timeout = self._check_timeouts()
|
|
204
|
+
|
|
205
|
+
# Calculate sleep time
|
|
206
|
+
sleep_time = poll_interval
|
|
207
|
+
if next_timeout is not None:
|
|
208
|
+
sleep_time = min(sleep_time, next_timeout)
|
|
209
|
+
|
|
210
|
+
# Sleep with ability to wake up on new registration
|
|
211
|
+
with self._lock:
|
|
212
|
+
self._cv.wait(timeout=max(0.01, sleep_time))
|
|
213
|
+
|
|
214
|
+
# Increase poll interval (exponential backoff)
|
|
215
|
+
poll_interval = min(
|
|
216
|
+
poll_interval * POLL_INTERVAL_MULTIPLIER, POLL_INTERVAL_MAX
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
class DynamicDependency(ABC):
|
|
221
|
+
"""Base class for dynamic dependencies.
|
|
222
|
+
|
|
223
|
+
Dynamic dependencies (like tokens) can change state at any time - availability
|
|
224
|
+
can go from OK to WAIT and back. These require special handling during lock
|
|
225
|
+
acquisition with retry logic.
|
|
226
|
+
|
|
227
|
+
The origin must be a DynamicResource that supports async_wait().
|
|
228
|
+
|
|
229
|
+
Subclasses must implement:
|
|
230
|
+
- _create_lock(): Create the appropriate lock object for this dependency
|
|
231
|
+
"""
|
|
232
|
+
|
|
233
|
+
origin: DynamicResource
|
|
234
|
+
target: Optional["Job"]
|
|
235
|
+
|
|
236
|
+
def __init__(self, origin: DynamicResource):
|
|
237
|
+
self.origin = origin
|
|
238
|
+
self.target = None
|
|
239
|
+
|
|
240
|
+
def is_dynamic(self) -> bool:
|
|
241
|
+
"""Returns True - this is a dynamic dependency."""
|
|
242
|
+
return True
|
|
243
|
+
|
|
244
|
+
@abstractmethod
|
|
245
|
+
def _create_lock(self) -> Lock:
|
|
246
|
+
"""Create a lock object for this dependency.
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
Lock object (subclass of DynamicDependencyLock)
|
|
250
|
+
"""
|
|
251
|
+
...
|
|
252
|
+
|
|
253
|
+
async def aio_lock(self, timeout: float = 0) -> Lock:
|
|
254
|
+
"""Acquire lock on the resource with async waiting.
|
|
255
|
+
|
|
256
|
+
Uses the resource's async_wait() for efficient waiting without threads.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
timeout: Timeout in seconds (0 = wait indefinitely)
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
Lock object
|
|
263
|
+
|
|
264
|
+
Raises:
|
|
265
|
+
LockError: If lock cannot be acquired within timeout
|
|
266
|
+
"""
|
|
267
|
+
from experimaestro.locking import LockError
|
|
268
|
+
|
|
269
|
+
start_time = time.time()
|
|
270
|
+
|
|
271
|
+
while True:
|
|
272
|
+
try:
|
|
273
|
+
lock = self._create_lock()
|
|
274
|
+
lock.acquire()
|
|
275
|
+
return lock
|
|
276
|
+
except LockError:
|
|
277
|
+
# Calculate remaining timeout
|
|
278
|
+
if timeout > 0:
|
|
279
|
+
elapsed = time.time() - start_time
|
|
280
|
+
remaining = timeout - elapsed
|
|
281
|
+
if remaining <= 0:
|
|
282
|
+
raise LockError(f"Timeout waiting for resource: {self.origin}")
|
|
283
|
+
else:
|
|
284
|
+
remaining = 0 # Wait indefinitely
|
|
285
|
+
|
|
286
|
+
# Wait for resource state to change
|
|
287
|
+
await self.origin.async_wait(timeout=remaining)
|
|
288
|
+
|
|
289
|
+
def __repr__(self) -> str:
|
|
290
|
+
return f"DynamicDep[{self.origin}]"
|
|
@@ -1,2 +1,6 @@
|
|
|
1
|
-
from .
|
|
2
|
-
|
|
1
|
+
from .configuration import ( # noqa: F401
|
|
2
|
+
configuration,
|
|
3
|
+
ConfigurationBase,
|
|
4
|
+
DirtyGitAction,
|
|
5
|
+
)
|
|
6
|
+
from .cli import ExperimentHelper # noqa: F401
|