experimaestro 2.0.0a8__py3-none-any.whl → 2.0.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of experimaestro might be problematic. Click here for more details.
- experimaestro/__init__.py +10 -11
- experimaestro/annotations.py +167 -206
- experimaestro/cli/__init__.py +130 -5
- experimaestro/cli/filter.py +42 -74
- experimaestro/cli/jobs.py +157 -106
- experimaestro/cli/refactor.py +249 -0
- experimaestro/click.py +0 -1
- experimaestro/commandline.py +19 -3
- experimaestro/connectors/__init__.py +20 -1
- experimaestro/connectors/local.py +12 -0
- experimaestro/core/arguments.py +182 -46
- experimaestro/core/identifier.py +107 -6
- experimaestro/core/objects/__init__.py +6 -0
- experimaestro/core/objects/config.py +542 -25
- experimaestro/core/objects/config_walk.py +20 -0
- experimaestro/core/serialization.py +91 -34
- experimaestro/core/subparameters.py +164 -0
- experimaestro/core/types.py +175 -38
- experimaestro/exceptions.py +26 -0
- experimaestro/experiments/cli.py +107 -25
- experimaestro/generators.py +50 -9
- experimaestro/huggingface.py +3 -1
- experimaestro/launcherfinder/parser.py +29 -0
- experimaestro/launchers/__init__.py +26 -1
- experimaestro/launchers/direct.py +12 -0
- experimaestro/launchers/slurm/base.py +154 -2
- experimaestro/mkdocs/metaloader.py +0 -1
- experimaestro/mypy.py +452 -7
- experimaestro/notifications.py +63 -13
- experimaestro/progress.py +0 -2
- experimaestro/rpyc.py +0 -1
- experimaestro/run.py +19 -6
- experimaestro/scheduler/base.py +489 -125
- experimaestro/scheduler/dependencies.py +43 -28
- experimaestro/scheduler/dynamic_outputs.py +259 -130
- experimaestro/scheduler/experiment.py +225 -30
- experimaestro/scheduler/interfaces.py +474 -0
- experimaestro/scheduler/jobs.py +216 -206
- experimaestro/scheduler/services.py +186 -12
- experimaestro/scheduler/state_db.py +388 -0
- experimaestro/scheduler/state_provider.py +2345 -0
- experimaestro/scheduler/state_sync.py +834 -0
- experimaestro/scheduler/workspace.py +52 -10
- experimaestro/scriptbuilder.py +7 -0
- experimaestro/server/__init__.py +147 -57
- experimaestro/server/data/index.css +0 -125
- experimaestro/server/data/index.css.map +1 -1
- experimaestro/server/data/index.js +194 -58
- experimaestro/server/data/index.js.map +1 -1
- experimaestro/settings.py +44 -5
- experimaestro/sphinx/__init__.py +3 -3
- experimaestro/taskglobals.py +20 -0
- experimaestro/tests/conftest.py +80 -0
- experimaestro/tests/core/test_generics.py +2 -2
- experimaestro/tests/identifier_stability.json +45 -0
- experimaestro/tests/launchers/bin/sacct +6 -2
- experimaestro/tests/launchers/bin/sbatch +4 -2
- experimaestro/tests/launchers/test_slurm.py +80 -0
- experimaestro/tests/tasks/test_dynamic.py +231 -0
- experimaestro/tests/test_cli_jobs.py +615 -0
- experimaestro/tests/test_deprecated.py +630 -0
- experimaestro/tests/test_environment.py +200 -0
- experimaestro/tests/test_file_progress_integration.py +1 -1
- experimaestro/tests/test_forward.py +3 -3
- experimaestro/tests/test_identifier.py +372 -41
- experimaestro/tests/test_identifier_stability.py +458 -0
- experimaestro/tests/test_instance.py +3 -3
- experimaestro/tests/test_multitoken.py +442 -0
- experimaestro/tests/test_mypy.py +433 -0
- experimaestro/tests/test_objects.py +312 -5
- experimaestro/tests/test_outputs.py +2 -2
- experimaestro/tests/test_param.py +8 -12
- experimaestro/tests/test_partial_paths.py +231 -0
- experimaestro/tests/test_progress.py +0 -48
- experimaestro/tests/test_resumable_task.py +480 -0
- experimaestro/tests/test_serializers.py +141 -1
- experimaestro/tests/test_state_db.py +434 -0
- experimaestro/tests/test_subparameters.py +160 -0
- experimaestro/tests/test_tags.py +136 -0
- experimaestro/tests/test_tasks.py +107 -121
- experimaestro/tests/test_token_locking.py +252 -0
- experimaestro/tests/test_tokens.py +17 -13
- experimaestro/tests/test_types.py +123 -1
- experimaestro/tests/test_workspace_triggers.py +158 -0
- experimaestro/tests/token_reschedule.py +4 -2
- experimaestro/tests/utils.py +2 -2
- experimaestro/tokens.py +154 -57
- experimaestro/tools/diff.py +1 -1
- experimaestro/tui/__init__.py +8 -0
- experimaestro/tui/app.py +2303 -0
- experimaestro/tui/app.tcss +353 -0
- experimaestro/tui/log_viewer.py +228 -0
- experimaestro/utils/__init__.py +23 -0
- experimaestro/utils/environment.py +148 -0
- experimaestro/utils/git.py +129 -0
- experimaestro/utils/resources.py +1 -1
- experimaestro/version.py +34 -0
- {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b4.dist-info}/METADATA +68 -38
- experimaestro-2.0.0b4.dist-info/RECORD +181 -0
- {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b4.dist-info}/WHEEL +1 -1
- experimaestro-2.0.0b4.dist-info/entry_points.txt +16 -0
- experimaestro/compat.py +0 -6
- experimaestro/core/objects.pyi +0 -221
- experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
- experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
- experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
- experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
- experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
- experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
- experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
- experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
- experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
- experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
- experimaestro-2.0.0a8.dist-info/RECORD +0 -166
- experimaestro-2.0.0a8.dist-info/entry_points.txt +0 -17
- {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b4.dist-info}/licenses/LICENSE +0 -0
experimaestro/tests/test_tags.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from typing import Dict
|
|
2
2
|
from pathlib import Path
|
|
3
|
+
import logging
|
|
3
4
|
from experimaestro import (
|
|
4
5
|
tag,
|
|
5
6
|
LightweightTask,
|
|
@@ -118,3 +119,138 @@ def test_objects_tags():
|
|
|
118
119
|
a = A.C(x=tag(1))
|
|
119
120
|
a.__xpm__.seal(context)
|
|
120
121
|
assert a.__xpm__.tags() == {"x": 1}
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def test_conflicting_tags_warning(caplog):
|
|
125
|
+
"""Test that conflicting tag values produce a warning"""
|
|
126
|
+
|
|
127
|
+
class Inner(Config):
|
|
128
|
+
value: Param[int]
|
|
129
|
+
|
|
130
|
+
class Outer(Config):
|
|
131
|
+
inner: Param[Inner]
|
|
132
|
+
x: Param[int]
|
|
133
|
+
|
|
134
|
+
# Create inner config with tag "mytag" = 1
|
|
135
|
+
inner = Inner.C(value=10).tag("mytag", 1)
|
|
136
|
+
|
|
137
|
+
# Create outer config with same tag "mytag" = 2 (conflicting)
|
|
138
|
+
outer = Outer.C(inner=inner, x=5).tag("mytag", 2)
|
|
139
|
+
|
|
140
|
+
# Getting tags should warn about conflict
|
|
141
|
+
with caplog.at_level(logging.WARNING):
|
|
142
|
+
tags = outer.tags()
|
|
143
|
+
|
|
144
|
+
# The warning should mention the conflicting tag
|
|
145
|
+
assert any("mytag" in record.message for record in caplog.records)
|
|
146
|
+
assert any("conflicting" in record.message.lower() for record in caplog.records)
|
|
147
|
+
|
|
148
|
+
# The last value should win
|
|
149
|
+
assert tags["mytag"] == 2
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def test_same_tag_same_value_no_warning(caplog):
|
|
153
|
+
"""Test that same tag with same value does not produce a warning"""
|
|
154
|
+
|
|
155
|
+
class Inner(Config):
|
|
156
|
+
value: Param[int]
|
|
157
|
+
|
|
158
|
+
class Outer(Config):
|
|
159
|
+
inner: Param[Inner]
|
|
160
|
+
|
|
161
|
+
# Create inner config with tag "mytag" = 1
|
|
162
|
+
inner = Inner.C(value=10).tag("mytag", 1)
|
|
163
|
+
|
|
164
|
+
# Create outer config with same tag "mytag" = 1 (same value)
|
|
165
|
+
outer = Outer.C(inner=inner).tag("mytag", 1)
|
|
166
|
+
|
|
167
|
+
# Getting tags should NOT warn (same value)
|
|
168
|
+
with caplog.at_level(logging.WARNING):
|
|
169
|
+
tags = outer.tags()
|
|
170
|
+
|
|
171
|
+
# No warning for same values
|
|
172
|
+
assert not any("mytag" in record.message for record in caplog.records)
|
|
173
|
+
assert tags["mytag"] == 1
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def test_tag_source_tracking():
|
|
177
|
+
"""Test that tag source locations are tracked"""
|
|
178
|
+
|
|
179
|
+
class MyConfig(Config):
|
|
180
|
+
x: Param[int]
|
|
181
|
+
|
|
182
|
+
config = MyConfig.C(x=tag(5))
|
|
183
|
+
|
|
184
|
+
# Check that tags have source info stored internally
|
|
185
|
+
assert "x" in config.__xpm__._tags
|
|
186
|
+
value, source = config.__xpm__._tags["x"]
|
|
187
|
+
assert value == 5
|
|
188
|
+
# Source should contain file path and line number
|
|
189
|
+
assert ":" in source
|
|
190
|
+
assert "test_tags.py" in source
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def test_tag_method_source_tracking():
|
|
194
|
+
"""Test that tag() method also tracks source location"""
|
|
195
|
+
|
|
196
|
+
class MyConfig(Config):
|
|
197
|
+
x: Param[int]
|
|
198
|
+
|
|
199
|
+
config = MyConfig.C(x=5)
|
|
200
|
+
config.tag("mytag", "myvalue")
|
|
201
|
+
|
|
202
|
+
# Check that tag has source info
|
|
203
|
+
assert "mytag" in config.__xpm__._tags
|
|
204
|
+
value, source = config.__xpm__._tags["mytag"]
|
|
205
|
+
assert value == "myvalue"
|
|
206
|
+
assert ":" in source
|
|
207
|
+
assert "test_tags.py" in source
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def test_tag_via_setattr():
|
|
211
|
+
"""Test that config.key = tag(value) works and tracks source"""
|
|
212
|
+
|
|
213
|
+
class MyConfig(Config):
|
|
214
|
+
x: Param[int]
|
|
215
|
+
|
|
216
|
+
config = MyConfig.C(x=5)
|
|
217
|
+
config.x = tag(10)
|
|
218
|
+
|
|
219
|
+
# Check that tag was set correctly
|
|
220
|
+
assert config.tags() == {"x": 10}
|
|
221
|
+
assert config.x == 10
|
|
222
|
+
|
|
223
|
+
# Check that source is tracked
|
|
224
|
+
value, source = config.__xpm__._tags["x"]
|
|
225
|
+
assert value == 10
|
|
226
|
+
assert "test_tags.py" in source
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def test_tag_setattr_conflict_warning(caplog):
|
|
230
|
+
"""Test that setting conflicting tag via setattr produces warning"""
|
|
231
|
+
|
|
232
|
+
class Inner(Config):
|
|
233
|
+
value: Param[int]
|
|
234
|
+
|
|
235
|
+
class Outer(Config):
|
|
236
|
+
inner: Param[Inner]
|
|
237
|
+
x: Param[int]
|
|
238
|
+
|
|
239
|
+
# Create with tag via constructor
|
|
240
|
+
inner = Inner.C(value=tag(1))
|
|
241
|
+
|
|
242
|
+
# Create outer with same tag name
|
|
243
|
+
outer = Outer.C(inner=inner, x=5)
|
|
244
|
+
outer.x = tag(2) # Set tag on x
|
|
245
|
+
|
|
246
|
+
# Add a conflicting value tag
|
|
247
|
+
outer.tag("value", 99)
|
|
248
|
+
|
|
249
|
+
# Getting tags should warn about conflict
|
|
250
|
+
with caplog.at_level(logging.WARNING):
|
|
251
|
+
tags = outer.tags()
|
|
252
|
+
|
|
253
|
+
# The warning should mention the conflicting tag
|
|
254
|
+
assert any("value" in record.message for record in caplog.records)
|
|
255
|
+
assert tags["value"] == 99 # Last value wins
|
|
256
|
+
assert tags["x"] == 2
|
|
@@ -1,11 +1,20 @@
|
|
|
1
1
|
# --- Task and types definitions
|
|
2
2
|
|
|
3
|
+
import sys
|
|
4
|
+
import time
|
|
3
5
|
from pathlib import Path
|
|
4
6
|
import pytest
|
|
5
7
|
import logging
|
|
6
|
-
from experimaestro import
|
|
8
|
+
from experimaestro import (
|
|
9
|
+
Config,
|
|
10
|
+
Task,
|
|
11
|
+
Param,
|
|
12
|
+
ResumableTask,
|
|
13
|
+
Meta,
|
|
14
|
+
field,
|
|
15
|
+
PathGenerator,
|
|
16
|
+
)
|
|
7
17
|
from experimaestro.scheduler.workspace import RunMode
|
|
8
|
-
from experimaestro.tools.jobs import fix_deprecated
|
|
9
18
|
from experimaestro.scheduler import FailedExperiment, JobState
|
|
10
19
|
from experimaestro import SubmitHook, Job, Launcher, LightweightTask
|
|
11
20
|
|
|
@@ -144,125 +153,6 @@ def test_configcache():
|
|
|
144
153
|
assert task.__xpm__.job.wait() == JobState.DONE
|
|
145
154
|
|
|
146
155
|
|
|
147
|
-
# ---- Deprecation
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
class NewConfig(Config):
|
|
151
|
-
__xpmid__ = "new"
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
@deprecate
|
|
155
|
-
class DeprecatedConfig(NewConfig):
|
|
156
|
-
__xpmid__ = "deprecated"
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
class OldConfig(NewConfig):
|
|
160
|
-
__xpmid__ = "deprecated"
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
class TaskWithDeprecated(Task):
|
|
164
|
-
p: Param[NewConfig]
|
|
165
|
-
|
|
166
|
-
def execute(self):
|
|
167
|
-
pass
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
def checknewpaths(task_new, task_old_path):
|
|
171
|
-
task_new_path = task_new.__xpm__.job.path # type: Path
|
|
172
|
-
|
|
173
|
-
assert task_new_path.exists(), f"New path {task_new_path} should exist"
|
|
174
|
-
assert task_new_path.is_symlink(), f"New path {task_new_path} should be a symlink"
|
|
175
|
-
|
|
176
|
-
assert task_new_path.resolve() == task_old_path
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
def test_tasks_deprecated_inner():
|
|
180
|
-
"""Test that when submitting the task, the computed identifier is the one of
|
|
181
|
-
the new class"""
|
|
182
|
-
with TemporaryExperiment("deprecated", maxwait=0) as xp:
|
|
183
|
-
# --- Check that paths are really different first
|
|
184
|
-
task_new = TaskWithDeprecated.C(p=NewConfig.C()).submit(
|
|
185
|
-
run_mode=RunMode.DRY_RUN
|
|
186
|
-
)
|
|
187
|
-
task_old = TaskWithDeprecated.C(p=OldConfig.C()).submit(
|
|
188
|
-
run_mode=RunMode.DRY_RUN
|
|
189
|
-
)
|
|
190
|
-
task_deprecated = TaskWithDeprecated.C(p=DeprecatedConfig.C()).submit(
|
|
191
|
-
run_mode=RunMode.DRY_RUN
|
|
192
|
-
)
|
|
193
|
-
|
|
194
|
-
logging.debug("New task ID: %s", task_new.__xpm__.identifier.all.hex())
|
|
195
|
-
logging.debug("Old task ID: %s", task_old.__xpm__.identifier.all.hex())
|
|
196
|
-
logging.debug(
|
|
197
|
-
"Old task (with deprecated flag): %s",
|
|
198
|
-
task_deprecated.__xpm__.identifier.all.hex(),
|
|
199
|
-
)
|
|
200
|
-
assert (
|
|
201
|
-
task_new.stdout() != task_old.stdout()
|
|
202
|
-
), "Old and new path should be different"
|
|
203
|
-
|
|
204
|
-
assert (
|
|
205
|
-
task_new.stdout() == task_deprecated.stdout()
|
|
206
|
-
), "Deprecated path should be the same as non deprecated"
|
|
207
|
-
|
|
208
|
-
# --- Now check that automatic linking is performed
|
|
209
|
-
|
|
210
|
-
# Run old task with deprecated configuration
|
|
211
|
-
task_old = TaskWithDeprecated.C(p=OldConfig.C()).submit()
|
|
212
|
-
task_old.wait()
|
|
213
|
-
task_old_path = task_old.stdout().parent
|
|
214
|
-
|
|
215
|
-
# Fix deprecated
|
|
216
|
-
OldConfig.__xpmtype__.deprecate()
|
|
217
|
-
fix_deprecated(xp.workspace.path, True, False)
|
|
218
|
-
|
|
219
|
-
checknewpaths(task_new, task_old_path)
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
class NewTask(Task):
|
|
223
|
-
x: Param[int]
|
|
224
|
-
|
|
225
|
-
def execute(self):
|
|
226
|
-
pass
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
class OldTask(NewTask):
|
|
230
|
-
__xpmid__ = "deprecated"
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
@deprecate
|
|
234
|
-
class DeprecatedTask(NewTask):
|
|
235
|
-
__xpmid__ = "deprecated"
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
def test_tasks_deprecated():
|
|
239
|
-
"""Test that when submitting the task, the computed identifier is the one of
|
|
240
|
-
the new class"""
|
|
241
|
-
with TemporaryExperiment("deprecated", maxwait=20) as xp:
|
|
242
|
-
# Check that paths are really different first
|
|
243
|
-
task_new = NewTask.C(x=1).submit(run_mode=RunMode.DRY_RUN)
|
|
244
|
-
task_old = OldTask.C(x=1).submit(run_mode=RunMode.DRY_RUN)
|
|
245
|
-
task_deprecated = DeprecatedTask.C(x=1).submit(run_mode=RunMode.DRY_RUN)
|
|
246
|
-
|
|
247
|
-
assert (
|
|
248
|
-
task_new.stdout() != task_old.stdout()
|
|
249
|
-
), "Old and new path should be different"
|
|
250
|
-
assert (
|
|
251
|
-
task_new.stdout() == task_deprecated.stdout()
|
|
252
|
-
), "Deprecated path should be the same as non deprecated"
|
|
253
|
-
|
|
254
|
-
# OK, now check that automatic linking is performed
|
|
255
|
-
task_old = OldTask.C(x=1).submit()
|
|
256
|
-
task_old.wait()
|
|
257
|
-
task_old_path = task_old.stdout().parent
|
|
258
|
-
|
|
259
|
-
# Fix deprecated
|
|
260
|
-
OldTask.__xpmtype__.deprecate()
|
|
261
|
-
fix_deprecated(xp.workspace.path, True, False)
|
|
262
|
-
|
|
263
|
-
checknewpaths(task_new, task_old_path)
|
|
264
|
-
|
|
265
|
-
|
|
266
156
|
class needs_java(SubmitHook):
|
|
267
157
|
def __init__(self, version: int):
|
|
268
158
|
self.version = version
|
|
@@ -318,3 +208,99 @@ def test_task_lightweight_init():
|
|
|
318
208
|
MyLightweightTask.C(x=x).submit(init_tasks=[lwtask]).__xpm__.job.wait()
|
|
319
209
|
== JobState.DONE
|
|
320
210
|
), "Init tasks should be executed"
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
# --- Test for resumable task resubmission
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
class ControllableResumableTask(ResumableTask):
|
|
217
|
+
"""A resumable task that can be controlled via files"""
|
|
218
|
+
|
|
219
|
+
control_file: Meta[Path] = field(default_factory=PathGenerator("control"))
|
|
220
|
+
|
|
221
|
+
def execute(self):
|
|
222
|
+
# Wait for control file
|
|
223
|
+
while not self.control_file.is_file():
|
|
224
|
+
time.sleep(0.1)
|
|
225
|
+
|
|
226
|
+
# Read control: "fail" to exit with error, "complete" to succeed
|
|
227
|
+
action = self.control_file.read_text().strip()
|
|
228
|
+
self.control_file.unlink()
|
|
229
|
+
|
|
230
|
+
if action == "fail":
|
|
231
|
+
sys.exit(1)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def test_resumable_task_resubmit():
|
|
235
|
+
"""Test resubmitting a failed ResumableTask within the same experiment"""
|
|
236
|
+
with TemporaryExperiment("resumable_resubmit", maxwait=30):
|
|
237
|
+
task1 = ControllableResumableTask.C()
|
|
238
|
+
task1.submit()
|
|
239
|
+
|
|
240
|
+
# Tell task to fail
|
|
241
|
+
task1.control_file.parent.mkdir(parents=True, exist_ok=True)
|
|
242
|
+
task1.control_file.write_text("fail")
|
|
243
|
+
|
|
244
|
+
# Wait for the job to fail
|
|
245
|
+
job = task1.__xpm__.job
|
|
246
|
+
assert job.wait() == JobState.ERROR, "Job should have failed"
|
|
247
|
+
|
|
248
|
+
# Resubmit by creating a new instance with same parameters
|
|
249
|
+
task2 = ControllableResumableTask.C()
|
|
250
|
+
task2.submit()
|
|
251
|
+
|
|
252
|
+
# Tell task to complete
|
|
253
|
+
task2.control_file.write_text("complete")
|
|
254
|
+
|
|
255
|
+
# Wait for the resubmitted job to complete
|
|
256
|
+
assert task2.__xpm__.job.wait() == JobState.DONE
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def test_resumable_task_resubmit_across_experiments():
|
|
260
|
+
"""Test resubmitting a failed ResumableTask across two experiment instances"""
|
|
261
|
+
with TemporaryDirectory(prefix="xpm", suffix="resubmit_across") as workdir:
|
|
262
|
+
# First experiment: task fails
|
|
263
|
+
try:
|
|
264
|
+
with TemporaryExperiment("resubmit_across", maxwait=10, workdir=workdir):
|
|
265
|
+
task1 = ControllableResumableTask.C()
|
|
266
|
+
task1.submit()
|
|
267
|
+
|
|
268
|
+
# Tell task to fail
|
|
269
|
+
task1.control_file.parent.mkdir(parents=True, exist_ok=True)
|
|
270
|
+
task1.control_file.write_text("fail")
|
|
271
|
+
except Exception as e:
|
|
272
|
+
logging.info("First experiment ended (expected): %s", e)
|
|
273
|
+
|
|
274
|
+
# Second experiment: task completes
|
|
275
|
+
with TemporaryExperiment("resubmit_across", maxwait=30, workdir=workdir):
|
|
276
|
+
task2 = ControllableResumableTask.C()
|
|
277
|
+
task2.submit()
|
|
278
|
+
|
|
279
|
+
# Tell task to complete
|
|
280
|
+
task2.control_file.write_text("complete")
|
|
281
|
+
|
|
282
|
+
# Wait for the resubmitted job to complete
|
|
283
|
+
assert task2.__xpm__.job.wait() == JobState.DONE
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def test_task_resubmit_across_experiments():
|
|
287
|
+
"""Test resubmitting a completed task across two experiment instances"""
|
|
288
|
+
with TemporaryDirectory(prefix="xpm", suffix="resubmit_across") as workdir:
|
|
289
|
+
# First experiment: task completes
|
|
290
|
+
with TemporaryExperiment("resubmit_across", maxwait=30, workdir=workdir):
|
|
291
|
+
task1 = ControllableResumableTask.C()
|
|
292
|
+
task1.submit()
|
|
293
|
+
|
|
294
|
+
# Tell task to complete
|
|
295
|
+
task1.control_file.parent.mkdir(parents=True, exist_ok=True)
|
|
296
|
+
task1.control_file.write_text("complete")
|
|
297
|
+
|
|
298
|
+
assert task1.__xpm__.job.wait() == JobState.DONE
|
|
299
|
+
|
|
300
|
+
# Second experiment: resubmit completed task (uses same workdir)
|
|
301
|
+
with TemporaryExperiment("resubmit_across", maxwait=30, workdir=workdir):
|
|
302
|
+
task2 = ControllableResumableTask.C()
|
|
303
|
+
task2.submit()
|
|
304
|
+
|
|
305
|
+
# Task should recognize it's already done
|
|
306
|
+
assert task2.__xpm__.job.wait() == JobState.DONE
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
"""Unit tests for token locking mechanism
|
|
2
|
+
|
|
3
|
+
Tests the CounterToken condition variable-based synchronization
|
|
4
|
+
without requiring full scheduler integration.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import pytest
|
|
9
|
+
import tempfile
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
import time
|
|
12
|
+
|
|
13
|
+
from experimaestro.tokens import CounterToken
|
|
14
|
+
from experimaestro.locking import LockError
|
|
15
|
+
|
|
16
|
+
pytestmark = pytest.mark.anyio
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def test_token_acquire_release():
|
|
20
|
+
"""Test basic token acquire and release"""
|
|
21
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
22
|
+
token = CounterToken("test-basic", Path(tmpdir) / "token", count=1)
|
|
23
|
+
|
|
24
|
+
# Create a mock job target
|
|
25
|
+
class MockJob:
|
|
26
|
+
@property
|
|
27
|
+
def identifier(self):
|
|
28
|
+
return "mock-job-1"
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def basepath(self):
|
|
32
|
+
return Path(tmpdir) / "job1"
|
|
33
|
+
|
|
34
|
+
job = MockJob()
|
|
35
|
+
|
|
36
|
+
# Create dependency
|
|
37
|
+
dep = token.dependency(1)
|
|
38
|
+
dep.target = job
|
|
39
|
+
|
|
40
|
+
# Should be able to acquire
|
|
41
|
+
lock = await dep.aio_lock(timeout=1.0)
|
|
42
|
+
assert lock is not None
|
|
43
|
+
assert token.available == 0
|
|
44
|
+
|
|
45
|
+
# Release
|
|
46
|
+
lock.release()
|
|
47
|
+
assert token.available == 1
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
async def test_token_blocking():
|
|
51
|
+
"""Test that acquiring blocks when no tokens available"""
|
|
52
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
53
|
+
token = CounterToken("test-blocking", Path(tmpdir) / "token", count=1)
|
|
54
|
+
|
|
55
|
+
class MockJob:
|
|
56
|
+
def __init__(self, name):
|
|
57
|
+
self.name = name
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def identifier(self):
|
|
61
|
+
return f"mock-job-{self.name}"
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def basepath(self):
|
|
65
|
+
return Path(tmpdir) / self.name
|
|
66
|
+
|
|
67
|
+
job1 = MockJob("1")
|
|
68
|
+
job2 = MockJob("2")
|
|
69
|
+
|
|
70
|
+
dep1 = token.dependency(1)
|
|
71
|
+
dep1.target = job1
|
|
72
|
+
|
|
73
|
+
dep2 = token.dependency(1)
|
|
74
|
+
dep2.target = job2
|
|
75
|
+
|
|
76
|
+
# Acquire with first dependency
|
|
77
|
+
lock1 = await dep1.aio_lock(timeout=0.5)
|
|
78
|
+
assert token.available == 0
|
|
79
|
+
|
|
80
|
+
# Second acquire should timeout
|
|
81
|
+
start = time.time()
|
|
82
|
+
with pytest.raises(LockError, match="Timeout"):
|
|
83
|
+
await dep2.aio_lock(timeout=0.5)
|
|
84
|
+
elapsed = time.time() - start
|
|
85
|
+
assert 0.4 < elapsed < 0.7 # Should timeout around 0.5s
|
|
86
|
+
|
|
87
|
+
# Release first lock
|
|
88
|
+
lock1.release()
|
|
89
|
+
assert token.available == 1
|
|
90
|
+
|
|
91
|
+
# Now second should succeed
|
|
92
|
+
lock2 = await dep2.aio_lock(timeout=0.5)
|
|
93
|
+
assert lock2 is not None
|
|
94
|
+
lock2.release()
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
async def test_token_notification():
|
|
98
|
+
"""Test that condition notification wakes up waiting tasks"""
|
|
99
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
100
|
+
token = CounterToken("test-notify", Path(tmpdir) / "token", count=1)
|
|
101
|
+
|
|
102
|
+
class MockJob:
|
|
103
|
+
def __init__(self, name):
|
|
104
|
+
self.name = name
|
|
105
|
+
|
|
106
|
+
@property
|
|
107
|
+
def identifier(self):
|
|
108
|
+
return f"mock-job-{self.name}"
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
def basepath(self):
|
|
112
|
+
return Path(tmpdir) / self.name
|
|
113
|
+
|
|
114
|
+
job1 = MockJob("1")
|
|
115
|
+
job2 = MockJob("2")
|
|
116
|
+
|
|
117
|
+
dep1 = token.dependency(1)
|
|
118
|
+
dep1.target = job1
|
|
119
|
+
|
|
120
|
+
dep2 = token.dependency(1)
|
|
121
|
+
dep2.target = job2
|
|
122
|
+
|
|
123
|
+
# Acquire with first dependency
|
|
124
|
+
lock1 = await dep1.aio_lock(timeout=0.5)
|
|
125
|
+
|
|
126
|
+
# Start second acquisition in background
|
|
127
|
+
async def acquire_second():
|
|
128
|
+
lock = await dep2.aio_lock(timeout=5.0) # Long timeout
|
|
129
|
+
return lock
|
|
130
|
+
|
|
131
|
+
task = asyncio.create_task(acquire_second())
|
|
132
|
+
|
|
133
|
+
# Give it time to start waiting
|
|
134
|
+
await asyncio.sleep(0.1)
|
|
135
|
+
|
|
136
|
+
# Release first lock - should notify waiting task
|
|
137
|
+
start = time.time()
|
|
138
|
+
lock1.release()
|
|
139
|
+
|
|
140
|
+
# Second task should complete quickly (not timeout)
|
|
141
|
+
lock2 = await task
|
|
142
|
+
elapsed = time.time() - start
|
|
143
|
+
|
|
144
|
+
assert lock2 is not None
|
|
145
|
+
assert elapsed < 1.0 # Should wake up immediately, not wait 5s
|
|
146
|
+
lock2.release()
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
async def test_token_multiple_waiting():
|
|
150
|
+
"""Test multiple tasks waiting for tokens"""
|
|
151
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
152
|
+
token = CounterToken("test-multiple", Path(tmpdir) / "token", count=1)
|
|
153
|
+
|
|
154
|
+
class MockJob:
|
|
155
|
+
def __init__(self, name):
|
|
156
|
+
self.name = name
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def identifier(self):
|
|
160
|
+
return f"mock-job-{self.name}"
|
|
161
|
+
|
|
162
|
+
@property
|
|
163
|
+
def basepath(self):
|
|
164
|
+
return Path(tmpdir) / self.name
|
|
165
|
+
|
|
166
|
+
# Acquire the token
|
|
167
|
+
job1 = MockJob("1")
|
|
168
|
+
dep1 = token.dependency(1)
|
|
169
|
+
dep1.target = job1
|
|
170
|
+
lock1 = await dep1.aio_lock(timeout=0.5)
|
|
171
|
+
|
|
172
|
+
# Start multiple waiting tasks
|
|
173
|
+
acquired_order = []
|
|
174
|
+
|
|
175
|
+
async def acquire_task(name):
|
|
176
|
+
job = MockJob(name)
|
|
177
|
+
dep = token.dependency(1)
|
|
178
|
+
dep.target = job
|
|
179
|
+
lock = await dep.aio_lock(timeout=10.0)
|
|
180
|
+
acquired_order.append(name)
|
|
181
|
+
await asyncio.sleep(0.05) # Hold briefly
|
|
182
|
+
lock.release()
|
|
183
|
+
|
|
184
|
+
tasks = [
|
|
185
|
+
asyncio.create_task(acquire_task("2")),
|
|
186
|
+
asyncio.create_task(acquire_task("3")),
|
|
187
|
+
asyncio.create_task(acquire_task("4")),
|
|
188
|
+
]
|
|
189
|
+
|
|
190
|
+
# Give tasks time to start waiting
|
|
191
|
+
await asyncio.sleep(0.1)
|
|
192
|
+
|
|
193
|
+
# Release first lock
|
|
194
|
+
lock1.release()
|
|
195
|
+
|
|
196
|
+
# Wait for all tasks to complete
|
|
197
|
+
await asyncio.gather(*tasks)
|
|
198
|
+
|
|
199
|
+
# All tasks should have acquired the lock
|
|
200
|
+
assert len(acquired_order) == 3
|
|
201
|
+
assert set(acquired_order) == {"2", "3", "4"}
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
async def test_token_timeout_zero():
|
|
205
|
+
"""Test that timeout=0 waits indefinitely"""
|
|
206
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
207
|
+
token = CounterToken("test-timeout-zero", Path(tmpdir) / "token", count=1)
|
|
208
|
+
|
|
209
|
+
class MockJob:
|
|
210
|
+
def __init__(self, name):
|
|
211
|
+
self.name = name
|
|
212
|
+
|
|
213
|
+
@property
|
|
214
|
+
def identifier(self):
|
|
215
|
+
return f"mock-job-{self.name}"
|
|
216
|
+
|
|
217
|
+
@property
|
|
218
|
+
def basepath(self):
|
|
219
|
+
return Path(tmpdir) / self.name
|
|
220
|
+
|
|
221
|
+
job1 = MockJob("1")
|
|
222
|
+
job2 = MockJob("2")
|
|
223
|
+
|
|
224
|
+
dep1 = token.dependency(1)
|
|
225
|
+
dep1.target = job1
|
|
226
|
+
|
|
227
|
+
dep2 = token.dependency(1)
|
|
228
|
+
dep2.target = job2
|
|
229
|
+
|
|
230
|
+
# Acquire with first
|
|
231
|
+
lock1 = await dep1.aio_lock(timeout=0.5)
|
|
232
|
+
|
|
233
|
+
# Start waiting with timeout=0 (infinite)
|
|
234
|
+
async def acquire_infinite():
|
|
235
|
+
return await dep2.aio_lock(timeout=0) # Should wait forever
|
|
236
|
+
|
|
237
|
+
task = asyncio.create_task(acquire_infinite())
|
|
238
|
+
|
|
239
|
+
# Give it time to start waiting
|
|
240
|
+
await asyncio.sleep(0.1)
|
|
241
|
+
|
|
242
|
+
# Wait a bit more - task should still be waiting
|
|
243
|
+
await asyncio.sleep(0.5)
|
|
244
|
+
assert not task.done()
|
|
245
|
+
|
|
246
|
+
# Release first lock
|
|
247
|
+
lock1.release()
|
|
248
|
+
|
|
249
|
+
# Now task should complete
|
|
250
|
+
lock2 = await asyncio.wait_for(task, timeout=2.0)
|
|
251
|
+
assert lock2 is not None
|
|
252
|
+
lock2.release()
|
|
@@ -59,9 +59,12 @@ def token_experiment(xp, token, ntasks=3):
|
|
|
59
59
|
assert (times[i - 1] > times[i]) or (times[i] > times[i - 1])
|
|
60
60
|
|
|
61
61
|
|
|
62
|
-
@pytest.mark.xfail(
|
|
62
|
+
@pytest.mark.xfail(
|
|
63
|
+
strict=False,
|
|
64
|
+
reason="Timing-dependent: tasks may run sequentially even without token",
|
|
65
|
+
)
|
|
63
66
|
def test_token_fail():
|
|
64
|
-
"""Simple token test: should fail without token"""
|
|
67
|
+
"""Simple token test: should fail without token (but may pass due to timing)"""
|
|
65
68
|
with TemporaryExperiment("tokens", maxwait=20) as xp:
|
|
66
69
|
token_experiment(xp, None)
|
|
67
70
|
|
|
@@ -71,6 +74,7 @@ def test_token_ok():
|
|
|
71
74
|
with TemporaryExperiment("tokens", maxwait=20) as xp:
|
|
72
75
|
token = CounterToken("token-ok", xp.workdir / "token", 1)
|
|
73
76
|
token_experiment(xp, token)
|
|
77
|
+
|
|
74
78
|
logging.info("Finished token_ok test")
|
|
75
79
|
|
|
76
80
|
|
|
@@ -129,7 +133,7 @@ def test_token_cleanup():
|
|
|
129
133
|
|
|
130
134
|
|
|
131
135
|
def test_token_monitor():
|
|
132
|
-
"""Two different
|
|
136
|
+
"""Two different experiments (within the same process and workspace)
|
|
133
137
|
|
|
134
138
|
Test the ability of the token to monitor the filesystem
|
|
135
139
|
"""
|
|
@@ -141,18 +145,18 @@ def test_token_monitor():
|
|
|
141
145
|
)
|
|
142
146
|
return task
|
|
143
147
|
|
|
144
|
-
with TemporaryExperiment("tokens1", maxwait=20, port=0) as xp1
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
148
|
+
with TemporaryExperiment("tokens1", maxwait=20, port=0) as xp1:
|
|
149
|
+
# Use the same workspace for both experiments
|
|
150
|
+
with TemporaryExperiment(
|
|
151
|
+
"tokens2", workdir=xp1.workspace.path, maxwait=20
|
|
152
|
+
) as xp2:
|
|
153
|
+
path = xp1.workspace.path / "test_token.file"
|
|
154
|
+
task1 = run(xp1, 1, path)
|
|
155
|
+
task2 = run(xp2, 2, path)
|
|
150
156
|
|
|
151
|
-
|
|
152
|
-
|
|
157
|
+
time.sleep(0.5)
|
|
158
|
+
path.write_text("Hello world")
|
|
153
159
|
|
|
154
|
-
xp1.wait()
|
|
155
|
-
xp2.wait()
|
|
156
160
|
time1 = get_times(task1)
|
|
157
161
|
time2 = get_times(task2)
|
|
158
162
|
|