experimaestro 2.0.0b4__py3-none-any.whl → 2.0.0b17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of experimaestro might be problematic. Click here for more details.
- experimaestro/__init__.py +12 -5
- experimaestro/cli/__init__.py +393 -134
- experimaestro/cli/filter.py +48 -23
- experimaestro/cli/jobs.py +253 -71
- experimaestro/cli/refactor.py +1 -2
- experimaestro/commandline.py +7 -4
- experimaestro/connectors/__init__.py +9 -1
- experimaestro/connectors/local.py +43 -3
- experimaestro/core/arguments.py +18 -18
- experimaestro/core/identifier.py +11 -11
- experimaestro/core/objects/config.py +96 -39
- experimaestro/core/objects/config_walk.py +3 -3
- experimaestro/core/{subparameters.py → partial.py} +16 -16
- experimaestro/core/partial_lock.py +394 -0
- experimaestro/core/types.py +12 -15
- experimaestro/dynamic.py +290 -0
- experimaestro/experiments/__init__.py +6 -2
- experimaestro/experiments/cli.py +223 -52
- experimaestro/experiments/configuration.py +24 -0
- experimaestro/generators.py +5 -5
- experimaestro/ipc.py +118 -1
- experimaestro/launcherfinder/__init__.py +2 -2
- experimaestro/launcherfinder/registry.py +6 -7
- experimaestro/launcherfinder/specs.py +2 -9
- experimaestro/launchers/slurm/__init__.py +2 -2
- experimaestro/launchers/slurm/base.py +62 -0
- experimaestro/locking.py +957 -1
- experimaestro/notifications.py +89 -201
- experimaestro/progress.py +63 -366
- experimaestro/rpyc.py +0 -2
- experimaestro/run.py +29 -2
- experimaestro/scheduler/__init__.py +8 -1
- experimaestro/scheduler/base.py +650 -53
- experimaestro/scheduler/dependencies.py +20 -16
- experimaestro/scheduler/experiment.py +764 -169
- experimaestro/scheduler/interfaces.py +338 -96
- experimaestro/scheduler/jobs.py +58 -20
- experimaestro/scheduler/remote/__init__.py +31 -0
- experimaestro/scheduler/remote/adaptive_sync.py +265 -0
- experimaestro/scheduler/remote/client.py +928 -0
- experimaestro/scheduler/remote/protocol.py +282 -0
- experimaestro/scheduler/remote/server.py +447 -0
- experimaestro/scheduler/remote/sync.py +144 -0
- experimaestro/scheduler/services.py +186 -35
- experimaestro/scheduler/state_provider.py +811 -2157
- experimaestro/scheduler/state_status.py +1247 -0
- experimaestro/scheduler/transient.py +31 -0
- experimaestro/scheduler/workspace.py +1 -1
- experimaestro/scheduler/workspace_state_provider.py +1273 -0
- experimaestro/scriptbuilder.py +4 -4
- experimaestro/settings.py +36 -0
- experimaestro/tests/conftest.py +33 -5
- experimaestro/tests/connectors/bin/executable.py +1 -1
- experimaestro/tests/fixtures/pre_experiment/experiment_check_env.py +16 -0
- experimaestro/tests/fixtures/pre_experiment/experiment_check_mock.py +14 -0
- experimaestro/tests/fixtures/pre_experiment/experiment_simple.py +12 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_env.py +5 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_error.py +3 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_mock.py +8 -0
- experimaestro/tests/launchers/bin/test.py +1 -0
- experimaestro/tests/launchers/test_slurm.py +9 -9
- experimaestro/tests/partial_reschedule.py +46 -0
- experimaestro/tests/restart.py +3 -3
- experimaestro/tests/restart_main.py +1 -0
- experimaestro/tests/scripts/notifyandwait.py +1 -0
- experimaestro/tests/task_partial.py +38 -0
- experimaestro/tests/task_tokens.py +2 -2
- experimaestro/tests/tasks/test_dynamic.py +6 -6
- experimaestro/tests/test_dependencies.py +3 -3
- experimaestro/tests/test_deprecated.py +15 -15
- experimaestro/tests/test_dynamic_locking.py +317 -0
- experimaestro/tests/test_environment.py +24 -14
- experimaestro/tests/test_experiment.py +171 -36
- experimaestro/tests/test_identifier.py +25 -25
- experimaestro/tests/test_identifier_stability.py +3 -5
- experimaestro/tests/test_multitoken.py +2 -4
- experimaestro/tests/{test_subparameters.py → test_partial.py} +25 -25
- experimaestro/tests/test_partial_paths.py +81 -138
- experimaestro/tests/test_pre_experiment.py +219 -0
- experimaestro/tests/test_progress.py +2 -8
- experimaestro/tests/test_remote_state.py +1132 -0
- experimaestro/tests/test_stray_jobs.py +261 -0
- experimaestro/tests/test_tasks.py +1 -2
- experimaestro/tests/test_token_locking.py +52 -67
- experimaestro/tests/test_tokens.py +5 -6
- experimaestro/tests/test_transient.py +225 -0
- experimaestro/tests/test_workspace_state_provider.py +768 -0
- experimaestro/tests/token_reschedule.py +1 -3
- experimaestro/tests/utils.py +2 -7
- experimaestro/tokens.py +227 -372
- experimaestro/tools/diff.py +1 -0
- experimaestro/tools/documentation.py +4 -5
- experimaestro/tools/jobs.py +1 -2
- experimaestro/tui/app.py +459 -1895
- experimaestro/tui/app.tcss +162 -0
- experimaestro/tui/dialogs.py +172 -0
- experimaestro/tui/log_viewer.py +253 -3
- experimaestro/tui/messages.py +137 -0
- experimaestro/tui/utils.py +54 -0
- experimaestro/tui/widgets/__init__.py +23 -0
- experimaestro/tui/widgets/experiments.py +468 -0
- experimaestro/tui/widgets/global_services.py +238 -0
- experimaestro/tui/widgets/jobs.py +972 -0
- experimaestro/tui/widgets/log.py +156 -0
- experimaestro/tui/widgets/orphans.py +363 -0
- experimaestro/tui/widgets/runs.py +185 -0
- experimaestro/tui/widgets/services.py +314 -0
- experimaestro/tui/widgets/stray_jobs.py +528 -0
- experimaestro/utils/__init__.py +1 -1
- experimaestro/utils/environment.py +105 -22
- experimaestro/utils/fswatcher.py +124 -0
- experimaestro/utils/jobs.py +1 -2
- experimaestro/utils/jupyter.py +1 -2
- experimaestro/utils/logging.py +72 -0
- experimaestro/version.py +2 -2
- experimaestro/webui/__init__.py +9 -0
- experimaestro/webui/app.py +117 -0
- experimaestro/{server → webui}/data/index.css +66 -11
- experimaestro/webui/data/index.css.map +1 -0
- experimaestro/{server → webui}/data/index.js +82763 -87217
- experimaestro/webui/data/index.js.map +1 -0
- experimaestro/webui/routes/__init__.py +5 -0
- experimaestro/webui/routes/auth.py +53 -0
- experimaestro/webui/routes/proxy.py +117 -0
- experimaestro/webui/server.py +200 -0
- experimaestro/webui/state_bridge.py +152 -0
- experimaestro/webui/websocket.py +413 -0
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/METADATA +8 -9
- experimaestro-2.0.0b17.dist-info/RECORD +219 -0
- experimaestro/cli/progress.py +0 -269
- experimaestro/scheduler/state.py +0 -75
- experimaestro/scheduler/state_db.py +0 -388
- experimaestro/scheduler/state_sync.py +0 -834
- experimaestro/server/__init__.py +0 -467
- experimaestro/server/data/index.css.map +0 -1
- experimaestro/server/data/index.js.map +0 -1
- experimaestro/tests/test_cli_jobs.py +0 -615
- experimaestro/tests/test_file_progress.py +0 -425
- experimaestro/tests/test_file_progress_integration.py +0 -477
- experimaestro/tests/test_state_db.py +0 -434
- experimaestro-2.0.0b4.dist-info/RECORD +0 -181
- /experimaestro/{server → webui}/data/1815e00441357e01619e.ttf +0 -0
- /experimaestro/{server → webui}/data/2463b90d9a316e4e5294.woff2 +0 -0
- /experimaestro/{server → webui}/data/2582b0e4bcf85eceead0.ttf +0 -0
- /experimaestro/{server → webui}/data/89999bdf5d835c012025.woff2 +0 -0
- /experimaestro/{server → webui}/data/914997e1bdfc990d0897.ttf +0 -0
- /experimaestro/{server → webui}/data/c210719e60948b211a12.woff2 +0 -0
- /experimaestro/{server → webui}/data/favicon.ico +0 -0
- /experimaestro/{server → webui}/data/index.html +0 -0
- /experimaestro/{server → webui}/data/login.html +0 -0
- /experimaestro/{server → webui}/data/manifest.json +0 -0
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/WHEEL +0 -0
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/entry_points.txt +0 -0
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,394 @@
|
|
|
1
|
+
"""Partial directory locking for exclusive access.
|
|
2
|
+
|
|
3
|
+
This module provides locking for partial directories to ensure only one job
|
|
4
|
+
can write to a partial at a time.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import sys
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Dict, Type
|
|
12
|
+
import fasteners
|
|
13
|
+
import logging
|
|
14
|
+
|
|
15
|
+
from experimaestro.locking import (
|
|
16
|
+
DynamicDependencyLock,
|
|
17
|
+
DynamicLockFile,
|
|
18
|
+
JobDependencyLock,
|
|
19
|
+
LockError,
|
|
20
|
+
TrackedDynamicResource,
|
|
21
|
+
)
|
|
22
|
+
from experimaestro.dynamic import DynamicDependency
|
|
23
|
+
from experimaestro.scheduler.dependencies import Resource
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger("xpm.partial")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class PartialLockFile(DynamicLockFile):
|
|
30
|
+
"""Lock file for partial directories.
|
|
31
|
+
|
|
32
|
+
Stores:
|
|
33
|
+
- job_uri: Reference to the job holding the lock
|
|
34
|
+
- information: {"partial_name": name_of_partial}
|
|
35
|
+
|
|
36
|
+
The lock file is created in the partial directory when a job acquires
|
|
37
|
+
exclusive access, enabling recovery if the scheduler or job crashes.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
partial_name: str
|
|
41
|
+
|
|
42
|
+
def from_information(self, info) -> None:
|
|
43
|
+
"""Set partial name from information dict."""
|
|
44
|
+
if info is None:
|
|
45
|
+
# Creating a new lock file
|
|
46
|
+
self.partial_name = ""
|
|
47
|
+
elif isinstance(info, dict):
|
|
48
|
+
self.partial_name = info.get("partial_name", "")
|
|
49
|
+
else:
|
|
50
|
+
raise ValueError(f"Invalid information format: {info}")
|
|
51
|
+
|
|
52
|
+
def to_information(self) -> dict:
|
|
53
|
+
"""Return partial name for JSON serialization."""
|
|
54
|
+
return {"partial_name": self.partial_name}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class PartialJobResource(Resource, TrackedDynamicResource):
|
|
58
|
+
"""Resource for partial directory locking.
|
|
59
|
+
|
|
60
|
+
Tracks the number of jobs holding write locks on the partial directory.
|
|
61
|
+
Uses file-based tracking for recovery, similar to CounterToken.
|
|
62
|
+
|
|
63
|
+
File structure in partial directory:
|
|
64
|
+
- {partial_path}/.experimaestro/locks/ipc.lock: IPC lock
|
|
65
|
+
- {partial_path}/.experimaestro/locks/informations.json: {"max_write_locks": 1}
|
|
66
|
+
- {partial_path}/.experimaestro/locks/jobs/{task_id}/{identifier}.json: lock file
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
#: Lock file class for partial lock files
|
|
70
|
+
lock_file_class: Type[DynamicLockFile] = PartialLockFile
|
|
71
|
+
|
|
72
|
+
#: Maps partial paths to PartialJobResource instances (singleton pattern)
|
|
73
|
+
RESOURCES: Dict[str, "PartialJobResource"] = {}
|
|
74
|
+
|
|
75
|
+
@staticmethod
|
|
76
|
+
def forkhandler():
|
|
77
|
+
"""Clear resources after fork to avoid sharing state."""
|
|
78
|
+
PartialJobResource.RESOURCES = {}
|
|
79
|
+
|
|
80
|
+
@staticmethod
|
|
81
|
+
def create(partial_path: Path, max_write_locks: int = 1) -> "PartialJobResource":
|
|
82
|
+
"""Get or create a PartialJobResource for the given path.
|
|
83
|
+
|
|
84
|
+
This implements a singleton pattern per path to ensure only one
|
|
85
|
+
resource instance exists for each partial directory.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
partial_path: Path to the partial directory
|
|
89
|
+
max_write_locks: Maximum number of concurrent write locks (default: 1)
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
PartialJobResource for the path
|
|
93
|
+
"""
|
|
94
|
+
key = str(partial_path)
|
|
95
|
+
resource = PartialJobResource.RESOURCES.get(key)
|
|
96
|
+
if resource is None:
|
|
97
|
+
resource = PartialJobResource(partial_path, max_write_locks)
|
|
98
|
+
PartialJobResource.RESOURCES[key] = resource
|
|
99
|
+
return resource
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def lock_folder(self) -> Path:
|
|
103
|
+
"""Path to the lock folder within the partial directory."""
|
|
104
|
+
return self._partial_path / ".experimaestro" / "locks"
|
|
105
|
+
|
|
106
|
+
@property
|
|
107
|
+
def partial_path(self) -> Path:
|
|
108
|
+
"""Path to the partial directory."""
|
|
109
|
+
return self._partial_path
|
|
110
|
+
|
|
111
|
+
def _write_informations(self, max_write_locks: int) -> None:
|
|
112
|
+
"""Write partial informations to disk."""
|
|
113
|
+
self.lock_folder.mkdir(parents=True, exist_ok=True)
|
|
114
|
+
with self.informations_path.open("w") as f:
|
|
115
|
+
json.dump({"max_write_locks": max_write_locks}, f)
|
|
116
|
+
|
|
117
|
+
def _read_informations(self) -> int:
|
|
118
|
+
"""Read max_write_locks from informations file."""
|
|
119
|
+
try:
|
|
120
|
+
with self.informations_path.open("r") as f:
|
|
121
|
+
data = json.load(f)
|
|
122
|
+
return data.get("max_write_locks", 1)
|
|
123
|
+
except FileNotFoundError:
|
|
124
|
+
return 1
|
|
125
|
+
|
|
126
|
+
def __init__(self, partial_path: Path, max_write_locks: int = 1):
|
|
127
|
+
"""Initialize a partial job resource.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
partial_path: Path to the partial directory
|
|
131
|
+
max_write_locks: Maximum number of concurrent write locks
|
|
132
|
+
"""
|
|
133
|
+
self._partial_path = partial_path
|
|
134
|
+
self.max_write_locks = max_write_locks
|
|
135
|
+
self.write_locks = 0 # Current number of write locks
|
|
136
|
+
|
|
137
|
+
# Create informations file if needed
|
|
138
|
+
self.lock_folder.mkdir(parents=True, exist_ok=True)
|
|
139
|
+
if not self.informations_path.is_file():
|
|
140
|
+
self._write_informations(max_write_locks)
|
|
141
|
+
|
|
142
|
+
# Initialize base classes
|
|
143
|
+
Resource.__init__(self)
|
|
144
|
+
TrackedDynamicResource.__init__(self, str(partial_path))
|
|
145
|
+
|
|
146
|
+
def __str__(self):
|
|
147
|
+
return f"partial[{self._partial_path}]"
|
|
148
|
+
|
|
149
|
+
# --- TrackedDynamicResource abstract method implementations ---
|
|
150
|
+
|
|
151
|
+
def _reset_state(self) -> None:
|
|
152
|
+
"""Reset state before re-reading lock files."""
|
|
153
|
+
self.max_write_locks = self._read_informations()
|
|
154
|
+
self.write_locks = 0
|
|
155
|
+
|
|
156
|
+
def _account_lock_file(self, lf: DynamicLockFile) -> None:
|
|
157
|
+
"""Account for a lock file - increment write lock count."""
|
|
158
|
+
self.write_locks += 1
|
|
159
|
+
|
|
160
|
+
def _unaccount_lock_file(self, lf: DynamicLockFile) -> None:
|
|
161
|
+
"""Unaccount a lock file - decrement write lock count."""
|
|
162
|
+
self.write_locks -= 1
|
|
163
|
+
|
|
164
|
+
def is_available(self, dependency: "PartialDependency") -> bool:
|
|
165
|
+
"""Check if the partial is available (write locks not exhausted)."""
|
|
166
|
+
return self.write_locks < self.max_write_locks
|
|
167
|
+
|
|
168
|
+
def _do_acquire(self, dependency: "PartialDependency") -> None:
|
|
169
|
+
"""Increment write lock count."""
|
|
170
|
+
self.write_locks += 1
|
|
171
|
+
logger.debug(
|
|
172
|
+
"Partial state [acquired]: write_locks %d, max %d, path %s",
|
|
173
|
+
self.write_locks,
|
|
174
|
+
self.max_write_locks,
|
|
175
|
+
self._partial_path,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
def _do_release(self, dependency: "PartialDependency") -> None:
|
|
179
|
+
"""Decrement write lock count."""
|
|
180
|
+
self.write_locks -= 1
|
|
181
|
+
logger.debug(
|
|
182
|
+
"Partial state [released]: write_locks %d, max %d, path %s",
|
|
183
|
+
self.write_locks,
|
|
184
|
+
self.max_write_locks,
|
|
185
|
+
self._partial_path,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
def _get_lock_file_information(self, dependency: "PartialDependency"):
|
|
189
|
+
"""Return partial name for lock file."""
|
|
190
|
+
return {"partial_name": dependency.partial_name}
|
|
191
|
+
|
|
192
|
+
def _handle_information_change(self) -> None:
|
|
193
|
+
"""Handle max_write_locks changes from informations.json."""
|
|
194
|
+
max_write_locks = self._read_informations()
|
|
195
|
+
delta = max_write_locks - self.max_write_locks
|
|
196
|
+
self.max_write_locks = max_write_locks
|
|
197
|
+
logger.debug(
|
|
198
|
+
"Partial information modified: write_locks %d, max %d, path %s",
|
|
199
|
+
self.write_locks,
|
|
200
|
+
self.max_write_locks,
|
|
201
|
+
self._partial_path,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
# Notify waiting tasks if more locks became available
|
|
205
|
+
if delta > 0:
|
|
206
|
+
self.available_condition.notify_all()
|
|
207
|
+
|
|
208
|
+
# --- Partial API ---
|
|
209
|
+
|
|
210
|
+
def dependency(self, partial_name: str) -> "PartialDependency":
|
|
211
|
+
"""Create a dependency on this partial resource.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
partial_name: Name of the partial (for symlink creation)
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
PartialDependency for this resource
|
|
218
|
+
"""
|
|
219
|
+
return PartialDependency(self, partial_name)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
# Register fork handler to clear resources after fork
|
|
223
|
+
if sys.platform != "win32":
|
|
224
|
+
os.register_at_fork(after_in_child=PartialJobResource.forkhandler)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
class PartialJobLock(JobDependencyLock):
|
|
228
|
+
"""Job-side lock for a partial directory.
|
|
229
|
+
|
|
230
|
+
Inherits from JobDependencyLock to participate in the dynamic lock lifecycle.
|
|
231
|
+
Uses IPC locking (fasteners.InterProcessLock) for exclusive access.
|
|
232
|
+
|
|
233
|
+
File structure:
|
|
234
|
+
- {partial_path}/.experimaestro/locks/ipc.lock: IPC lock
|
|
235
|
+
- {partial_path}/.experimaestro/locks/jobs/{task_id}/{identifier}.json: lock file
|
|
236
|
+
|
|
237
|
+
Lifecycle:
|
|
238
|
+
1. Scheduler acquires IPC lock and releases it in aio_job_started
|
|
239
|
+
2. Job acquires IPC lock (blocks until scheduler releases)
|
|
240
|
+
3. Job creates/updates lock file to track who holds the lock
|
|
241
|
+
4. On release: IPC lock released, lock file deleted (via base class)
|
|
242
|
+
"""
|
|
243
|
+
|
|
244
|
+
def __init__(self, data: dict):
|
|
245
|
+
self.partial_path = Path(data["partial_path"])
|
|
246
|
+
self.partial_name = data["partial_name"]
|
|
247
|
+
self.job_uri = data["job_uri"]
|
|
248
|
+
self.lock_folder = self.partial_path / ".experimaestro" / "locks"
|
|
249
|
+
self.ipc_lock_path = self.lock_folder / "ipc.lock"
|
|
250
|
+
self.lock_file_path = Path(data["lock_file_path"])
|
|
251
|
+
self._lock = None
|
|
252
|
+
|
|
253
|
+
def acquire(self) -> None:
|
|
254
|
+
"""Acquire exclusive lock on the partial directory.
|
|
255
|
+
|
|
256
|
+
Verifies the lock file exists (created by scheduler) and acquires
|
|
257
|
+
the IPC lock for exclusive access.
|
|
258
|
+
"""
|
|
259
|
+
# Verify lock file exists (base class)
|
|
260
|
+
super().acquire()
|
|
261
|
+
|
|
262
|
+
logger.info("Acquiring partial lock: %s", self.partial_path)
|
|
263
|
+
|
|
264
|
+
# Acquire the IPC lock (blocking - waits for other jobs to finish)
|
|
265
|
+
self._lock = fasteners.InterProcessLock(str(self.ipc_lock_path))
|
|
266
|
+
if not self._lock.acquire(blocking=True):
|
|
267
|
+
raise LockError(f"Could not lock partial: {self.partial_path}")
|
|
268
|
+
|
|
269
|
+
logger.info("Acquired partial lock: %s", self.partial_path)
|
|
270
|
+
|
|
271
|
+
def release(self) -> None:
|
|
272
|
+
"""Release the partial lock and delete the lock file."""
|
|
273
|
+
if self._lock is not None and self._lock.acquired:
|
|
274
|
+
logger.info("Releasing partial lock: %s", self.partial_path)
|
|
275
|
+
self._lock.release()
|
|
276
|
+
self._lock = None
|
|
277
|
+
|
|
278
|
+
# Delete the lock file (handled by base class)
|
|
279
|
+
super().release()
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
class PartialLock(DynamicDependencyLock):
|
|
283
|
+
"""Scheduler-side lock for a partial directory.
|
|
284
|
+
|
|
285
|
+
Inherits from DynamicDependencyLock to participate in the dynamic lock lifecycle.
|
|
286
|
+
Ensures exclusive access to a partial directory while a job is running.
|
|
287
|
+
|
|
288
|
+
Manages lock acquisition/release through the PartialJobResource, similar to
|
|
289
|
+
how CounterTokenLock manages through CounterToken.
|
|
290
|
+
|
|
291
|
+
File structure:
|
|
292
|
+
- {partial_path}/.experimaestro/locks/ipc.lock: IPC lock
|
|
293
|
+
- {partial_path}/.experimaestro/locks/jobs/{task_id}/{identifier}.json: lock file
|
|
294
|
+
"""
|
|
295
|
+
|
|
296
|
+
dependency: "PartialDependency"
|
|
297
|
+
|
|
298
|
+
def __init__(self, dependency: "PartialDependency"):
|
|
299
|
+
super().__init__(dependency)
|
|
300
|
+
|
|
301
|
+
@property
|
|
302
|
+
def lock_folder(self) -> Path:
|
|
303
|
+
"""Path to the lock folder within the partial directory."""
|
|
304
|
+
return self.dependency.resource.lock_folder
|
|
305
|
+
|
|
306
|
+
def _acquire(self):
|
|
307
|
+
"""Acquire exclusive lock via the resource."""
|
|
308
|
+
self.dependency.resource.acquire(self.dependency)
|
|
309
|
+
|
|
310
|
+
def _release(self):
|
|
311
|
+
"""Release the lock via the resource."""
|
|
312
|
+
self.dependency.resource.release(self.dependency)
|
|
313
|
+
|
|
314
|
+
def __str__(self):
|
|
315
|
+
return f"PartialLock({self.dependency.partial_name})"
|
|
316
|
+
|
|
317
|
+
async def aio_job_before_start(self, job) -> None:
|
|
318
|
+
"""Create symlink before job starts."""
|
|
319
|
+
from experimaestro.scheduler.jobs import Job
|
|
320
|
+
|
|
321
|
+
assert isinstance(job, Job)
|
|
322
|
+
|
|
323
|
+
# Create symlink in job's .experimaestro/partials directory
|
|
324
|
+
# The symlink points to the partial data directory (not the lock folder)
|
|
325
|
+
partials_dir = job.experimaestro_path / "partials"
|
|
326
|
+
partials_dir.mkdir(parents=True, exist_ok=True)
|
|
327
|
+
|
|
328
|
+
symlink_path = partials_dir / self.dependency.partial_name
|
|
329
|
+
if not symlink_path.exists():
|
|
330
|
+
symlink_path.symlink_to(self.dependency.partial_path)
|
|
331
|
+
logger.debug(
|
|
332
|
+
"Created partial symlink: %s -> %s",
|
|
333
|
+
symlink_path,
|
|
334
|
+
self.dependency.partial_path,
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
def to_json(self) -> dict:
|
|
338
|
+
"""Serialize lock for job process."""
|
|
339
|
+
data = super().to_json()
|
|
340
|
+
data.update(
|
|
341
|
+
{
|
|
342
|
+
"partial_path": str(self.dependency.partial_path),
|
|
343
|
+
"partial_name": self.dependency.partial_name,
|
|
344
|
+
"lock_file_path": str(self.lock_file_path),
|
|
345
|
+
"job_uri": str(self.dependency.target.basepath),
|
|
346
|
+
}
|
|
347
|
+
)
|
|
348
|
+
return data
|
|
349
|
+
|
|
350
|
+
@classmethod
|
|
351
|
+
def from_json(cls, data: dict) -> PartialJobLock:
|
|
352
|
+
"""Create job-side lock from serialized data."""
|
|
353
|
+
return PartialJobLock(data)
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
class PartialDependency(DynamicDependency):
|
|
357
|
+
"""A dependency on a partial directory (dynamic - availability can change).
|
|
358
|
+
|
|
359
|
+
This ensures that only one job can write to a partial directory at a time.
|
|
360
|
+
Uses PartialJobResource for state tracking and recovery.
|
|
361
|
+
"""
|
|
362
|
+
|
|
363
|
+
def __init__(self, resource: PartialJobResource, partial_name: str):
|
|
364
|
+
"""Create a partial dependency.
|
|
365
|
+
|
|
366
|
+
Args:
|
|
367
|
+
resource: The PartialJobResource managing this partial
|
|
368
|
+
partial_name: Name of the partial (used for symlink creation)
|
|
369
|
+
"""
|
|
370
|
+
super().__init__(resource)
|
|
371
|
+
self._resource = resource
|
|
372
|
+
self._partial_name = partial_name
|
|
373
|
+
|
|
374
|
+
def _create_lock(self) -> PartialLock:
|
|
375
|
+
"""Create a partial lock for this dependency."""
|
|
376
|
+
return PartialLock(self)
|
|
377
|
+
|
|
378
|
+
@property
|
|
379
|
+
def resource(self) -> PartialJobResource:
|
|
380
|
+
"""The resource managing this partial."""
|
|
381
|
+
return self._resource
|
|
382
|
+
|
|
383
|
+
@property
|
|
384
|
+
def partial_path(self) -> Path:
|
|
385
|
+
"""Path to the partial directory."""
|
|
386
|
+
return self._resource.partial_path
|
|
387
|
+
|
|
388
|
+
@property
|
|
389
|
+
def partial_name(self) -> str:
|
|
390
|
+
"""Name of the partial."""
|
|
391
|
+
return self._partial_name
|
|
392
|
+
|
|
393
|
+
def __repr__(self) -> str:
|
|
394
|
+
return f"PartialDep[{self.partial_name}]"
|
experimaestro/core/types.py
CHANGED
|
@@ -30,7 +30,7 @@ if typing.TYPE_CHECKING:
|
|
|
30
30
|
from experimaestro.scheduler.base import Job
|
|
31
31
|
from experimaestro.launchers import Launcher
|
|
32
32
|
from experimaestro.core.objects import Config
|
|
33
|
-
from experimaestro.core.
|
|
33
|
+
from experimaestro.core.partial import Partial
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
@dataclass
|
|
@@ -310,8 +310,8 @@ class ObjectType(Type):
|
|
|
310
310
|
# --- Value class (for external value types, e.g., nn.Module subclasses)
|
|
311
311
|
self._original_type: type = tp # Keep reference to original config class
|
|
312
312
|
|
|
313
|
-
# ---
|
|
314
|
-
self.
|
|
313
|
+
# --- Partial for partial identifier computation
|
|
314
|
+
self._partials: Dict[str, "Partial"] = {}
|
|
315
315
|
|
|
316
316
|
def set_value_type(self, value_class: type) -> None:
|
|
317
317
|
"""Register an explicit value class for this configuration.
|
|
@@ -357,15 +357,13 @@ class ObjectType(Type):
|
|
|
357
357
|
else:
|
|
358
358
|
self._file = Path(inspect.getfile(self.originaltype)).absolute()
|
|
359
359
|
|
|
360
|
-
assert (
|
|
361
|
-
|
|
362
|
-
)
|
|
360
|
+
assert (self._module and self._package) or self._file, (
|
|
361
|
+
f"Could not detect module/file for {self.originaltype}"
|
|
362
|
+
)
|
|
363
363
|
|
|
364
364
|
# The class of the object
|
|
365
365
|
|
|
366
|
-
self._arguments = ChainMap(
|
|
367
|
-
{}, *(tp.arguments for tp in self.parents())
|
|
368
|
-
) # type: ChainMap[Argument, Any]
|
|
366
|
+
self._arguments = ChainMap({}, *(tp.arguments for tp in self.parents())) # type: ChainMap[Argument, Any]
|
|
369
367
|
|
|
370
368
|
# Add arguments from annotations
|
|
371
369
|
for annotation in self.annotations:
|
|
@@ -409,15 +407,15 @@ class ObjectType(Type):
|
|
|
409
407
|
)
|
|
410
408
|
raise
|
|
411
409
|
|
|
412
|
-
# Collect
|
|
413
|
-
from .
|
|
410
|
+
# Collect partial from class attributes
|
|
411
|
+
from .partial import Partial as PartialClass
|
|
414
412
|
|
|
415
413
|
for name, value in self._original_type.__dict__.items():
|
|
416
|
-
if isinstance(value,
|
|
414
|
+
if isinstance(value, PartialClass):
|
|
417
415
|
# Auto-set name from attribute name if not already set
|
|
418
416
|
if value.name is None:
|
|
419
417
|
value.name = name
|
|
420
|
-
self.
|
|
418
|
+
self._partials[name] = value
|
|
421
419
|
|
|
422
420
|
def name(self):
|
|
423
421
|
return f"{self.value_type.__module__}.{self.value_type.__qualname__}"
|
|
@@ -488,8 +486,7 @@ class ObjectType(Type):
|
|
|
488
486
|
# Legacy mechanism: parent class is the target
|
|
489
487
|
if len(self.value_type.__bases__) != 1:
|
|
490
488
|
raise RuntimeError(
|
|
491
|
-
"Deprecated configurations must have "
|
|
492
|
-
"only one parent (the new configuration)"
|
|
489
|
+
"Deprecated configurations must have only one parent (the new configuration)"
|
|
493
490
|
)
|
|
494
491
|
parent = self.value_type.__bases__[0].__getxpmtype__()
|
|
495
492
|
self.identifier = parent.identifier
|