experimaestro 2.0.0b4__py3-none-any.whl → 2.0.0b17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of experimaestro might be problematic. Click here for more details.
- experimaestro/__init__.py +12 -5
- experimaestro/cli/__init__.py +393 -134
- experimaestro/cli/filter.py +48 -23
- experimaestro/cli/jobs.py +253 -71
- experimaestro/cli/refactor.py +1 -2
- experimaestro/commandline.py +7 -4
- experimaestro/connectors/__init__.py +9 -1
- experimaestro/connectors/local.py +43 -3
- experimaestro/core/arguments.py +18 -18
- experimaestro/core/identifier.py +11 -11
- experimaestro/core/objects/config.py +96 -39
- experimaestro/core/objects/config_walk.py +3 -3
- experimaestro/core/{subparameters.py → partial.py} +16 -16
- experimaestro/core/partial_lock.py +394 -0
- experimaestro/core/types.py +12 -15
- experimaestro/dynamic.py +290 -0
- experimaestro/experiments/__init__.py +6 -2
- experimaestro/experiments/cli.py +223 -52
- experimaestro/experiments/configuration.py +24 -0
- experimaestro/generators.py +5 -5
- experimaestro/ipc.py +118 -1
- experimaestro/launcherfinder/__init__.py +2 -2
- experimaestro/launcherfinder/registry.py +6 -7
- experimaestro/launcherfinder/specs.py +2 -9
- experimaestro/launchers/slurm/__init__.py +2 -2
- experimaestro/launchers/slurm/base.py +62 -0
- experimaestro/locking.py +957 -1
- experimaestro/notifications.py +89 -201
- experimaestro/progress.py +63 -366
- experimaestro/rpyc.py +0 -2
- experimaestro/run.py +29 -2
- experimaestro/scheduler/__init__.py +8 -1
- experimaestro/scheduler/base.py +650 -53
- experimaestro/scheduler/dependencies.py +20 -16
- experimaestro/scheduler/experiment.py +764 -169
- experimaestro/scheduler/interfaces.py +338 -96
- experimaestro/scheduler/jobs.py +58 -20
- experimaestro/scheduler/remote/__init__.py +31 -0
- experimaestro/scheduler/remote/adaptive_sync.py +265 -0
- experimaestro/scheduler/remote/client.py +928 -0
- experimaestro/scheduler/remote/protocol.py +282 -0
- experimaestro/scheduler/remote/server.py +447 -0
- experimaestro/scheduler/remote/sync.py +144 -0
- experimaestro/scheduler/services.py +186 -35
- experimaestro/scheduler/state_provider.py +811 -2157
- experimaestro/scheduler/state_status.py +1247 -0
- experimaestro/scheduler/transient.py +31 -0
- experimaestro/scheduler/workspace.py +1 -1
- experimaestro/scheduler/workspace_state_provider.py +1273 -0
- experimaestro/scriptbuilder.py +4 -4
- experimaestro/settings.py +36 -0
- experimaestro/tests/conftest.py +33 -5
- experimaestro/tests/connectors/bin/executable.py +1 -1
- experimaestro/tests/fixtures/pre_experiment/experiment_check_env.py +16 -0
- experimaestro/tests/fixtures/pre_experiment/experiment_check_mock.py +14 -0
- experimaestro/tests/fixtures/pre_experiment/experiment_simple.py +12 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_env.py +5 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_error.py +3 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_mock.py +8 -0
- experimaestro/tests/launchers/bin/test.py +1 -0
- experimaestro/tests/launchers/test_slurm.py +9 -9
- experimaestro/tests/partial_reschedule.py +46 -0
- experimaestro/tests/restart.py +3 -3
- experimaestro/tests/restart_main.py +1 -0
- experimaestro/tests/scripts/notifyandwait.py +1 -0
- experimaestro/tests/task_partial.py +38 -0
- experimaestro/tests/task_tokens.py +2 -2
- experimaestro/tests/tasks/test_dynamic.py +6 -6
- experimaestro/tests/test_dependencies.py +3 -3
- experimaestro/tests/test_deprecated.py +15 -15
- experimaestro/tests/test_dynamic_locking.py +317 -0
- experimaestro/tests/test_environment.py +24 -14
- experimaestro/tests/test_experiment.py +171 -36
- experimaestro/tests/test_identifier.py +25 -25
- experimaestro/tests/test_identifier_stability.py +3 -5
- experimaestro/tests/test_multitoken.py +2 -4
- experimaestro/tests/{test_subparameters.py → test_partial.py} +25 -25
- experimaestro/tests/test_partial_paths.py +81 -138
- experimaestro/tests/test_pre_experiment.py +219 -0
- experimaestro/tests/test_progress.py +2 -8
- experimaestro/tests/test_remote_state.py +1132 -0
- experimaestro/tests/test_stray_jobs.py +261 -0
- experimaestro/tests/test_tasks.py +1 -2
- experimaestro/tests/test_token_locking.py +52 -67
- experimaestro/tests/test_tokens.py +5 -6
- experimaestro/tests/test_transient.py +225 -0
- experimaestro/tests/test_workspace_state_provider.py +768 -0
- experimaestro/tests/token_reschedule.py +1 -3
- experimaestro/tests/utils.py +2 -7
- experimaestro/tokens.py +227 -372
- experimaestro/tools/diff.py +1 -0
- experimaestro/tools/documentation.py +4 -5
- experimaestro/tools/jobs.py +1 -2
- experimaestro/tui/app.py +459 -1895
- experimaestro/tui/app.tcss +162 -0
- experimaestro/tui/dialogs.py +172 -0
- experimaestro/tui/log_viewer.py +253 -3
- experimaestro/tui/messages.py +137 -0
- experimaestro/tui/utils.py +54 -0
- experimaestro/tui/widgets/__init__.py +23 -0
- experimaestro/tui/widgets/experiments.py +468 -0
- experimaestro/tui/widgets/global_services.py +238 -0
- experimaestro/tui/widgets/jobs.py +972 -0
- experimaestro/tui/widgets/log.py +156 -0
- experimaestro/tui/widgets/orphans.py +363 -0
- experimaestro/tui/widgets/runs.py +185 -0
- experimaestro/tui/widgets/services.py +314 -0
- experimaestro/tui/widgets/stray_jobs.py +528 -0
- experimaestro/utils/__init__.py +1 -1
- experimaestro/utils/environment.py +105 -22
- experimaestro/utils/fswatcher.py +124 -0
- experimaestro/utils/jobs.py +1 -2
- experimaestro/utils/jupyter.py +1 -2
- experimaestro/utils/logging.py +72 -0
- experimaestro/version.py +2 -2
- experimaestro/webui/__init__.py +9 -0
- experimaestro/webui/app.py +117 -0
- experimaestro/{server → webui}/data/index.css +66 -11
- experimaestro/webui/data/index.css.map +1 -0
- experimaestro/{server → webui}/data/index.js +82763 -87217
- experimaestro/webui/data/index.js.map +1 -0
- experimaestro/webui/routes/__init__.py +5 -0
- experimaestro/webui/routes/auth.py +53 -0
- experimaestro/webui/routes/proxy.py +117 -0
- experimaestro/webui/server.py +200 -0
- experimaestro/webui/state_bridge.py +152 -0
- experimaestro/webui/websocket.py +413 -0
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/METADATA +8 -9
- experimaestro-2.0.0b17.dist-info/RECORD +219 -0
- experimaestro/cli/progress.py +0 -269
- experimaestro/scheduler/state.py +0 -75
- experimaestro/scheduler/state_db.py +0 -388
- experimaestro/scheduler/state_sync.py +0 -834
- experimaestro/server/__init__.py +0 -467
- experimaestro/server/data/index.css.map +0 -1
- experimaestro/server/data/index.js.map +0 -1
- experimaestro/tests/test_cli_jobs.py +0 -615
- experimaestro/tests/test_file_progress.py +0 -425
- experimaestro/tests/test_file_progress_integration.py +0 -477
- experimaestro/tests/test_state_db.py +0 -434
- experimaestro-2.0.0b4.dist-info/RECORD +0 -181
- /experimaestro/{server → webui}/data/1815e00441357e01619e.ttf +0 -0
- /experimaestro/{server → webui}/data/2463b90d9a316e4e5294.woff2 +0 -0
- /experimaestro/{server → webui}/data/2582b0e4bcf85eceead0.ttf +0 -0
- /experimaestro/{server → webui}/data/89999bdf5d835c012025.woff2 +0 -0
- /experimaestro/{server → webui}/data/914997e1bdfc990d0897.ttf +0 -0
- /experimaestro/{server → webui}/data/c210719e60948b211a12.woff2 +0 -0
- /experimaestro/{server → webui}/data/favicon.ico +0 -0
- /experimaestro/{server → webui}/data/index.html +0 -0
- /experimaestro/{server → webui}/data/login.html +0 -0
- /experimaestro/{server → webui}/data/manifest.json +0 -0
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/WHEEL +0 -0
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/entry_points.txt +0 -0
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/licenses/LICENSE +0 -0
experimaestro/scriptbuilder.py
CHANGED
|
@@ -67,9 +67,9 @@ class PythonScriptBuilder:
|
|
|
67
67
|
Returns:
|
|
68
68
|
str -- The script path on disk
|
|
69
69
|
"""
|
|
70
|
-
assert isinstance(
|
|
71
|
-
job
|
|
72
|
-
)
|
|
70
|
+
assert isinstance(job, CommandLineJob), (
|
|
71
|
+
"Cannot handle a job which is not a command line job"
|
|
72
|
+
)
|
|
73
73
|
assert self.command is not None
|
|
74
74
|
assert job.workspace, "No workspace defined for the job"
|
|
75
75
|
assert job.launcher is not None, "No launcher defined for the job"
|
|
@@ -101,7 +101,7 @@ class PythonScriptBuilder:
|
|
|
101
101
|
"""format='%(levelname)s:%(process)d:%(asctime)s [%(name)s] %(message)s', datefmt='%y-%m-%d %H:%M:%S')\n\n"""
|
|
102
102
|
)
|
|
103
103
|
|
|
104
|
-
out.write("\nif __name__ == '__main__':\n\n"
|
|
104
|
+
out.write("\nif __name__ == '__main__':\n\n")
|
|
105
105
|
|
|
106
106
|
# --- Checks locks right away
|
|
107
107
|
|
experimaestro/settings.py
CHANGED
|
@@ -23,6 +23,36 @@ class ServerSettings:
|
|
|
23
23
|
"""Token for the server"""
|
|
24
24
|
|
|
25
25
|
|
|
26
|
+
@dataclass
|
|
27
|
+
class HistorySettings:
|
|
28
|
+
"""Settings for experiment history cleanup.
|
|
29
|
+
|
|
30
|
+
When an experiment ends, old runs are cleaned up according to these rules
|
|
31
|
+
(applied in order):
|
|
32
|
+
|
|
33
|
+
1. If current run succeeded, all past failed runs are removed
|
|
34
|
+
2. Failed runs that occurred before the newest successful run are removed
|
|
35
|
+
(since the success supersedes the earlier failures)
|
|
36
|
+
3. Keep at most `max_done` successful runs (oldest removed first)
|
|
37
|
+
4. Keep at most `max_failed` failed runs (oldest removed first)
|
|
38
|
+
|
|
39
|
+
Example: With max_done=2, max_failed=1 and runs:
|
|
40
|
+
- 10:00 completed
|
|
41
|
+
- 11:00 failed
|
|
42
|
+
- 12:00 completed
|
|
43
|
+
- 13:00 failed
|
|
44
|
+
|
|
45
|
+
Result: 11:00 failed is removed (before 12:00 success), 10:00 completed
|
|
46
|
+
is removed (max_done=2), leaving: 12:00 completed, 13:00 failed.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
max_done: int = 5
|
|
50
|
+
"""Maximum number of successful runs to keep per experiment"""
|
|
51
|
+
|
|
52
|
+
max_failed: int = 1
|
|
53
|
+
"""Maximum number of failed runs to keep per experiment"""
|
|
54
|
+
|
|
55
|
+
|
|
26
56
|
@dataclass
|
|
27
57
|
class WorkspaceSettings:
|
|
28
58
|
"""Defines the workspace"""
|
|
@@ -45,6 +75,9 @@ class WorkspaceSettings:
|
|
|
45
75
|
triggers: List[str] = field(default_factory=list)
|
|
46
76
|
"""Glob patterns to automatically select this workspace based on experiment ID"""
|
|
47
77
|
|
|
78
|
+
history: HistorySettings = field(default_factory=HistorySettings)
|
|
79
|
+
"""Settings for experiment history cleanup"""
|
|
80
|
+
|
|
48
81
|
def __post_init__(self):
|
|
49
82
|
self.path = self.path.expanduser().resolve()
|
|
50
83
|
|
|
@@ -57,6 +90,9 @@ class Settings:
|
|
|
57
90
|
env: Dict[str, str] = field(default_factory=dict)
|
|
58
91
|
"""Default environment variables"""
|
|
59
92
|
|
|
93
|
+
history: HistorySettings = field(default_factory=HistorySettings)
|
|
94
|
+
"""Default history settings (can be overridden per workspace)"""
|
|
95
|
+
|
|
60
96
|
|
|
61
97
|
@lru_cache()
|
|
62
98
|
def get_settings(path: Optional[Path] = None) -> Settings:
|
experimaestro/tests/conftest.py
CHANGED
|
@@ -3,6 +3,9 @@ import pytest
|
|
|
3
3
|
import os
|
|
4
4
|
import shutil
|
|
5
5
|
|
|
6
|
+
# Set shorter poll interval for tests (before any imports that read it)
|
|
7
|
+
os.environ.setdefault("XPM_POLL_INTERVAL_MAX", "5.0")
|
|
8
|
+
|
|
6
9
|
|
|
7
10
|
@pytest.fixture(scope="session")
|
|
8
11
|
def xpmdirectory(tmp_path_factory):
|
|
@@ -23,7 +26,17 @@ def xpmdirectory(tmp_path_factory):
|
|
|
23
26
|
def reset_scheduler():
|
|
24
27
|
"""Reset scheduler state between tests to avoid state leakage with singleton pattern"""
|
|
25
28
|
from experimaestro.scheduler.base import Scheduler
|
|
26
|
-
from experimaestro.
|
|
29
|
+
from experimaestro.webui import WebUIServer
|
|
30
|
+
from experimaestro.tokens import CounterToken
|
|
31
|
+
from experimaestro.core.partial_lock import PartialJobResource
|
|
32
|
+
from experimaestro.dynamic import ResourcePoller
|
|
33
|
+
|
|
34
|
+
# Clear token and resource caches
|
|
35
|
+
CounterToken.TOKENS.clear()
|
|
36
|
+
PartialJobResource.RESOURCES.clear()
|
|
37
|
+
|
|
38
|
+
# Reset ResourcePoller singleton
|
|
39
|
+
ResourcePoller.reset()
|
|
27
40
|
|
|
28
41
|
# Get the singleton instance if it exists
|
|
29
42
|
if Scheduler._instance is not None:
|
|
@@ -60,9 +73,9 @@ def reset_scheduler():
|
|
|
60
73
|
scheduler.addlistener(scheduler.state_provider)
|
|
61
74
|
|
|
62
75
|
# Reset server instance too
|
|
63
|
-
if
|
|
76
|
+
if WebUIServer._instance is not None:
|
|
64
77
|
logging.debug("FIXTURE: Clearing server instance")
|
|
65
|
-
|
|
78
|
+
WebUIServer.clear_instance()
|
|
66
79
|
|
|
67
80
|
yield
|
|
68
81
|
|
|
@@ -95,8 +108,15 @@ def reset_scheduler():
|
|
|
95
108
|
scheduler.addlistener(scheduler.state_provider)
|
|
96
109
|
|
|
97
110
|
# Reset server after test
|
|
98
|
-
if
|
|
99
|
-
|
|
111
|
+
if WebUIServer._instance is not None:
|
|
112
|
+
WebUIServer.clear_instance()
|
|
113
|
+
|
|
114
|
+
# Clear token and resource caches after test
|
|
115
|
+
CounterToken.TOKENS.clear()
|
|
116
|
+
PartialJobResource.RESOURCES.clear()
|
|
117
|
+
|
|
118
|
+
# Reset ResourcePoller singleton after test
|
|
119
|
+
ResourcePoller.reset()
|
|
100
120
|
|
|
101
121
|
|
|
102
122
|
# Sets a flag
|
|
@@ -105,6 +125,14 @@ def pytest_configure(config):
|
|
|
105
125
|
|
|
106
126
|
sys._called_from_test = True
|
|
107
127
|
|
|
128
|
+
# Disable peewee logging by default (too verbose)
|
|
129
|
+
logging.getLogger("peewee").setLevel(logging.WARNING)
|
|
130
|
+
|
|
131
|
+
# Enable IPCom testing mode with polling for reliable file watching in tests
|
|
132
|
+
from experimaestro.ipc import IPCom
|
|
133
|
+
|
|
134
|
+
IPCom.set_testing_mode(enabled=True, polling_interval=0.01)
|
|
135
|
+
|
|
108
136
|
|
|
109
137
|
def pytest_unconfigure(config):
|
|
110
138
|
import sys # This was missing from the manual
|
|
@@ -1 +1 @@
|
|
|
1
|
-
print("hello world")
|
|
1
|
+
print("hello world") # noqa: T201
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Experiment that checks pre_experiment set an environment variable."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from experimaestro.experiments import ExperimentHelper, configuration, ConfigurationBase
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@configuration()
|
|
8
|
+
class Configuration(ConfigurationBase):
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def run(helper: ExperimentHelper, cfg: Configuration):
|
|
13
|
+
assert os.environ.get("XPM_TEST_PRE_EXPERIMENT") == "executed", (
|
|
14
|
+
"Pre-experiment script was not executed"
|
|
15
|
+
)
|
|
16
|
+
print("PRE_EXPERIMENT_TEST_PASSED") # noqa: T201
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Experiment that checks pre_experiment mocked a module."""
|
|
2
|
+
|
|
3
|
+
import xpm_fake_module # noqa: F401 - this import will fail without pre_experiment
|
|
4
|
+
from experimaestro.experiments import ExperimentHelper, configuration, ConfigurationBase
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@configuration()
|
|
8
|
+
class Configuration(ConfigurationBase):
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def run(helper: ExperimentHelper, cfg: Configuration):
|
|
13
|
+
assert xpm_fake_module.value == 42, "Mock module should have value 42"
|
|
14
|
+
print("MOCK_MODULE_TEST_PASSED") # noqa: T201
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Simple experiment that does nothing."""
|
|
2
|
+
|
|
3
|
+
from experimaestro.experiments import ExperimentHelper, configuration, ConfigurationBase
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@configuration()
|
|
7
|
+
class Configuration(ConfigurationBase):
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def run(helper: ExperimentHelper, cfg: Configuration):
|
|
12
|
+
pass
|
|
@@ -104,14 +104,14 @@ class SlurmResumableTask(ResumableTask):
|
|
|
104
104
|
if self.checkpoint.exists():
|
|
105
105
|
attempt = int(self.checkpoint.read_text()) + 1
|
|
106
106
|
|
|
107
|
-
print(f"SlurmResumableTask attempt #{attempt}")
|
|
107
|
+
print(f"SlurmResumableTask attempt #{attempt}") # noqa: T201
|
|
108
108
|
|
|
109
109
|
# Write updated attempt count
|
|
110
110
|
self.checkpoint.write_text(str(attempt))
|
|
111
111
|
|
|
112
112
|
# Simulate timeout for first timeout_count attempts
|
|
113
113
|
if attempt <= self.timeout_count:
|
|
114
|
-
print(f"Simulating SLURM TIMEOUT on attempt {attempt}")
|
|
114
|
+
print(f"Simulating SLURM TIMEOUT on attempt {attempt}") # noqa: T201
|
|
115
115
|
# Create timeout marker file for mock SLURM
|
|
116
116
|
# The marker needs to be named <jobid>.timeout in the SLURM jobs directory
|
|
117
117
|
# Use SLURM_JOB_ID environment variable (set by mock sbatch, like real SLURM)
|
|
@@ -123,7 +123,7 @@ class SlurmResumableTask(ResumableTask):
|
|
|
123
123
|
raise RuntimeError(f"Simulated timeout on attempt {attempt}")
|
|
124
124
|
|
|
125
125
|
# Success - task completed
|
|
126
|
-
print(f"Task completed successfully on attempt {attempt}")
|
|
126
|
+
print(f"Task completed successfully on attempt {attempt}") # noqa: T201
|
|
127
127
|
if self.output_file:
|
|
128
128
|
self.output_file.write_text(f"Completed after {attempt} attempts")
|
|
129
129
|
|
|
@@ -151,15 +151,15 @@ def test_slurm_resumable_task(tmp_path: Path, slurmlauncher: SlurmLauncher):
|
|
|
151
151
|
|
|
152
152
|
# Verify task completed successfully after retries
|
|
153
153
|
assert state == JobState.DONE, f"Task did not complete successfully: {state}"
|
|
154
|
-
assert (
|
|
155
|
-
task.__xpm__.job.retry_count
|
|
156
|
-
)
|
|
154
|
+
assert task.__xpm__.job.retry_count == 2, (
|
|
155
|
+
f"Expected 2 retries, got {task.__xpm__.job.retry_count}"
|
|
156
|
+
)
|
|
157
157
|
|
|
158
158
|
# Verify checkpoint shows 3 attempts (2 timeouts + 1 success)
|
|
159
159
|
assert checkpoint.exists(), "Checkpoint file was not created"
|
|
160
|
-
assert (
|
|
161
|
-
|
|
162
|
-
)
|
|
160
|
+
assert int(checkpoint.read_text()) == 3, (
|
|
161
|
+
f"Expected 3 attempts, got {checkpoint.read_text()}"
|
|
162
|
+
)
|
|
163
163
|
|
|
164
164
|
# Verify output file was created on success
|
|
165
165
|
assert output_file.exists(), "Output file was not created"
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Script for running partial task in subprocess for concurrent testing"""
|
|
2
|
+
|
|
3
|
+
if __name__ == "__main__":
|
|
4
|
+
import sys
|
|
5
|
+
import logging
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
import time
|
|
8
|
+
|
|
9
|
+
from experimaestro.scheduler import JobState
|
|
10
|
+
from experimaestro.tests.utils import TemporaryExperiment
|
|
11
|
+
from experimaestro.tests.task_partial import PartialTask
|
|
12
|
+
|
|
13
|
+
root = logging.getLogger()
|
|
14
|
+
root.setLevel(logging.INFO)
|
|
15
|
+
logging.getLogger("xpm").setLevel(logging.DEBUG)
|
|
16
|
+
|
|
17
|
+
workdir, x, lockingpath, readypath, timepath = sys.argv[1:]
|
|
18
|
+
|
|
19
|
+
handler = logging.StreamHandler()
|
|
20
|
+
bf = logging.Formatter(
|
|
21
|
+
f"[XP{x}] [%(levelname)s] %(asctime)s.%(msecs)03d %(name)s "
|
|
22
|
+
f"[%(process)d/%(threadName)s]: %(message)s",
|
|
23
|
+
datefmt="%H:%M:%S",
|
|
24
|
+
)
|
|
25
|
+
handler.setFormatter(bf)
|
|
26
|
+
root.handlers.clear()
|
|
27
|
+
root.addHandler(handler)
|
|
28
|
+
|
|
29
|
+
with TemporaryExperiment("partial_reschedule%s" % x, workdir=workdir) as xp:
|
|
30
|
+
logging.info("Partial reschedule [%s]: starting task in %s", x, workdir)
|
|
31
|
+
task = PartialTask.C(path=lockingpath, x=int(x)).submit()
|
|
32
|
+
|
|
33
|
+
logging.info("Waiting for task (partial with %s) to be scheduled", lockingpath)
|
|
34
|
+
while task.job.state == JobState.UNSCHEDULED:
|
|
35
|
+
time.sleep(0.01)
|
|
36
|
+
|
|
37
|
+
# Write so that the test knows we are ready
|
|
38
|
+
Path(readypath).write_text("hello")
|
|
39
|
+
logging.info("Partial reschedule [%s]: ready", x)
|
|
40
|
+
|
|
41
|
+
# Wait until the experiment finishes
|
|
42
|
+
task.__xpm__.task.job.wait()
|
|
43
|
+
logging.info("Partial reschedule [%s]: finished", x)
|
|
44
|
+
|
|
45
|
+
# Write the timestamp from the task so the test can retrieve them easily
|
|
46
|
+
Path(timepath).write_text(Path(task.stdout()).read_text())
|
experimaestro/tests/restart.py
CHANGED
|
@@ -114,9 +114,9 @@ def restart(terminate: Callable, experiment):
|
|
|
114
114
|
time.sleep(0.1)
|
|
115
115
|
|
|
116
116
|
currentState = scheduler.getJobState(job).result()
|
|
117
|
-
assert (
|
|
118
|
-
|
|
119
|
-
)
|
|
117
|
+
assert currentState == JobState.RUNNING, (
|
|
118
|
+
f"Job is not running (state is {currentState})"
|
|
119
|
+
)
|
|
120
120
|
|
|
121
121
|
# Notify the task
|
|
122
122
|
with task.wait.open("w") as fp:
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Task definitions for partial locking tests"""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import time
|
|
5
|
+
from experimaestro import Task, Param, Meta, field, PathGenerator, partial, param_group
|
|
6
|
+
import logging
|
|
7
|
+
|
|
8
|
+
logging.basicConfig(level=logging.INFO)
|
|
9
|
+
|
|
10
|
+
# Define parameter groups
|
|
11
|
+
iter_group = param_group("iter")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PartialTask(Task):
|
|
15
|
+
"""Task that uses partial and waits for a file before completing"""
|
|
16
|
+
|
|
17
|
+
# Define a partial set
|
|
18
|
+
checkpoints = partial(exclude_groups=[iter_group])
|
|
19
|
+
|
|
20
|
+
# Parameter in iter_group - excluded from partial identifier
|
|
21
|
+
x: Param[int] = field(groups=[iter_group])
|
|
22
|
+
|
|
23
|
+
# The path to watch before completing
|
|
24
|
+
path: Param[Path]
|
|
25
|
+
|
|
26
|
+
# Path generated using the partial identifier
|
|
27
|
+
checkpoint_path: Meta[Path] = field(
|
|
28
|
+
default_factory=PathGenerator("checkpoint", partial=checkpoints)
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
def execute(self):
|
|
32
|
+
print(time.time()) # noqa: T201
|
|
33
|
+
# Create checkpoint directory
|
|
34
|
+
self.checkpoint_path.mkdir(parents=True, exist_ok=True)
|
|
35
|
+
# Wait for signal file
|
|
36
|
+
while not self.path.is_file():
|
|
37
|
+
time.sleep(0.1)
|
|
38
|
+
print(time.time()) # noqa: T201
|
|
@@ -16,7 +16,7 @@ class TokenTask(Task):
|
|
|
16
16
|
"""A dummy parameter to create several distinct token tasks"""
|
|
17
17
|
|
|
18
18
|
def execute(self):
|
|
19
|
-
print(time.time())
|
|
19
|
+
print(time.time()) # noqa: T201
|
|
20
20
|
while not self.path.is_file():
|
|
21
21
|
time.sleep(0.1)
|
|
22
|
-
print(time.time())
|
|
22
|
+
print(time.time()) # noqa: T201
|
|
@@ -201,9 +201,9 @@ def test_task_dynamic_replay():
|
|
|
201
201
|
logging.info("First run ended (expected): %s", e)
|
|
202
202
|
|
|
203
203
|
# First run should have produced at least one evaluation (for step 15)
|
|
204
|
-
assert (
|
|
205
|
-
len(evaluations_run1)
|
|
206
|
-
)
|
|
204
|
+
assert len(evaluations_run1) == 1, (
|
|
205
|
+
f"Run 1: Expected 1 evaluation, got {len(evaluations_run1)}"
|
|
206
|
+
)
|
|
207
207
|
|
|
208
208
|
# Second run: restart and continue
|
|
209
209
|
evaluations_run2 = []
|
|
@@ -226,6 +226,6 @@ def test_task_dynamic_replay():
|
|
|
226
226
|
# - Replayed the step 15 checkpoint (from first run)
|
|
227
227
|
# - Produced step 30 and 45 checkpoints
|
|
228
228
|
# Total: 3 evaluations (but step 15 was replayed, not re-produced)
|
|
229
|
-
assert (
|
|
230
|
-
len(evaluations_run2)
|
|
231
|
-
)
|
|
229
|
+
assert len(evaluations_run2) == 3, (
|
|
230
|
+
f"Run 2: Expected 3 evaluations, got {len(evaluations_run2)}"
|
|
231
|
+
)
|
|
@@ -20,9 +20,9 @@ def check_dependencies(task: Task, *tasks: Task):
|
|
|
20
20
|
|
|
21
21
|
assert len(deps) == len(tasks)
|
|
22
22
|
for task in tasks:
|
|
23
|
-
assert (
|
|
24
|
-
|
|
25
|
-
)
|
|
23
|
+
assert id(task) in deps, (
|
|
24
|
+
f"Task {task.__xpmtype__} with ID {task.__identifier__()}"
|
|
25
|
+
)
|
|
26
26
|
" is not not in the dependencies"
|
|
27
27
|
|
|
28
28
|
|
|
@@ -422,14 +422,14 @@ def test_task_deprecated_config_identifier():
|
|
|
422
422
|
)
|
|
423
423
|
|
|
424
424
|
# Old (non-deprecated) and new should have different paths
|
|
425
|
-
assert (
|
|
426
|
-
|
|
427
|
-
)
|
|
425
|
+
assert task_new.stdout() != task_old.stdout(), (
|
|
426
|
+
"Old and new path should be different"
|
|
427
|
+
)
|
|
428
428
|
|
|
429
429
|
# Deprecated should have same path as new (identifier matches)
|
|
430
|
-
assert (
|
|
431
|
-
|
|
432
|
-
)
|
|
430
|
+
assert task_new.stdout() == task_deprecated.stdout(), (
|
|
431
|
+
"Deprecated path should be the same as non deprecated"
|
|
432
|
+
)
|
|
433
433
|
|
|
434
434
|
|
|
435
435
|
def test_task_deprecated_config_fix_deprecated():
|
|
@@ -485,14 +485,14 @@ def test_task_deprecated_identifier():
|
|
|
485
485
|
task_deprecated = DeprecatedTask.C(x=1).submit(run_mode=RunMode.DRY_RUN)
|
|
486
486
|
|
|
487
487
|
# Old and new should have different paths
|
|
488
|
-
assert (
|
|
489
|
-
|
|
490
|
-
)
|
|
488
|
+
assert task_new.stdout() != task_old.stdout(), (
|
|
489
|
+
"Old and new path should be different"
|
|
490
|
+
)
|
|
491
491
|
|
|
492
492
|
# Deprecated should have same path as new
|
|
493
|
-
assert (
|
|
494
|
-
|
|
495
|
-
)
|
|
493
|
+
assert task_new.stdout() == task_deprecated.stdout(), (
|
|
494
|
+
"Deprecated path should be the same as non deprecated"
|
|
495
|
+
)
|
|
496
496
|
|
|
497
497
|
|
|
498
498
|
def test_task_deprecated_fix_deprecated():
|
|
@@ -550,9 +550,9 @@ def test_task_deprecated_with_convert_identifier():
|
|
|
550
550
|
task_new = NewTaskWithConvert.C(values=[42]).submit(run_mode=RunMode.DRY_RUN)
|
|
551
551
|
|
|
552
552
|
# Identifiers should match (computed via __convert__)
|
|
553
|
-
assert (
|
|
554
|
-
|
|
555
|
-
)
|
|
553
|
+
assert task_old.stdout() == task_new.stdout(), (
|
|
554
|
+
"Deprecated task should have same path as equivalent new task"
|
|
555
|
+
)
|
|
556
556
|
|
|
557
557
|
|
|
558
558
|
@deprecate(NewTaskWithConvert, replace=True)
|