experimaestro 2.0.0a8__py3-none-any.whl → 2.0.0b8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of experimaestro might be problematic. Click here for more details.
- experimaestro/__init__.py +10 -11
- experimaestro/annotations.py +167 -206
- experimaestro/cli/__init__.py +278 -7
- experimaestro/cli/filter.py +42 -74
- experimaestro/cli/jobs.py +157 -106
- experimaestro/cli/refactor.py +249 -0
- experimaestro/click.py +0 -1
- experimaestro/commandline.py +19 -3
- experimaestro/connectors/__init__.py +20 -1
- experimaestro/connectors/local.py +12 -0
- experimaestro/core/arguments.py +182 -46
- experimaestro/core/identifier.py +107 -6
- experimaestro/core/objects/__init__.py +6 -0
- experimaestro/core/objects/config.py +542 -25
- experimaestro/core/objects/config_walk.py +20 -0
- experimaestro/core/serialization.py +91 -34
- experimaestro/core/subparameters.py +164 -0
- experimaestro/core/types.py +175 -38
- experimaestro/exceptions.py +26 -0
- experimaestro/experiments/cli.py +111 -25
- experimaestro/generators.py +50 -9
- experimaestro/huggingface.py +3 -1
- experimaestro/launcherfinder/parser.py +29 -0
- experimaestro/launchers/__init__.py +26 -1
- experimaestro/launchers/direct.py +12 -0
- experimaestro/launchers/slurm/base.py +154 -2
- experimaestro/mkdocs/metaloader.py +0 -1
- experimaestro/mypy.py +452 -7
- experimaestro/notifications.py +63 -13
- experimaestro/progress.py +0 -2
- experimaestro/rpyc.py +0 -1
- experimaestro/run.py +19 -6
- experimaestro/scheduler/base.py +510 -125
- experimaestro/scheduler/dependencies.py +43 -28
- experimaestro/scheduler/dynamic_outputs.py +259 -130
- experimaestro/scheduler/experiment.py +256 -31
- experimaestro/scheduler/interfaces.py +501 -0
- experimaestro/scheduler/jobs.py +216 -206
- experimaestro/scheduler/remote/__init__.py +31 -0
- experimaestro/scheduler/remote/client.py +874 -0
- experimaestro/scheduler/remote/protocol.py +467 -0
- experimaestro/scheduler/remote/server.py +423 -0
- experimaestro/scheduler/remote/sync.py +144 -0
- experimaestro/scheduler/services.py +323 -23
- experimaestro/scheduler/state_db.py +437 -0
- experimaestro/scheduler/state_provider.py +2766 -0
- experimaestro/scheduler/state_sync.py +891 -0
- experimaestro/scheduler/workspace.py +52 -10
- experimaestro/scriptbuilder.py +7 -0
- experimaestro/server/__init__.py +147 -57
- experimaestro/server/data/index.css +0 -125
- experimaestro/server/data/index.css.map +1 -1
- experimaestro/server/data/index.js +194 -58
- experimaestro/server/data/index.js.map +1 -1
- experimaestro/settings.py +44 -5
- experimaestro/sphinx/__init__.py +3 -3
- experimaestro/taskglobals.py +20 -0
- experimaestro/tests/conftest.py +80 -0
- experimaestro/tests/core/test_generics.py +2 -2
- experimaestro/tests/identifier_stability.json +45 -0
- experimaestro/tests/launchers/bin/sacct +6 -2
- experimaestro/tests/launchers/bin/sbatch +4 -2
- experimaestro/tests/launchers/test_slurm.py +80 -0
- experimaestro/tests/tasks/test_dynamic.py +231 -0
- experimaestro/tests/test_cli_jobs.py +615 -0
- experimaestro/tests/test_deprecated.py +630 -0
- experimaestro/tests/test_environment.py +200 -0
- experimaestro/tests/test_file_progress_integration.py +1 -1
- experimaestro/tests/test_forward.py +3 -3
- experimaestro/tests/test_identifier.py +372 -41
- experimaestro/tests/test_identifier_stability.py +458 -0
- experimaestro/tests/test_instance.py +3 -3
- experimaestro/tests/test_multitoken.py +442 -0
- experimaestro/tests/test_mypy.py +433 -0
- experimaestro/tests/test_objects.py +312 -5
- experimaestro/tests/test_outputs.py +2 -2
- experimaestro/tests/test_param.py +8 -12
- experimaestro/tests/test_partial_paths.py +231 -0
- experimaestro/tests/test_progress.py +0 -48
- experimaestro/tests/test_remote_state.py +671 -0
- experimaestro/tests/test_resumable_task.py +480 -0
- experimaestro/tests/test_serializers.py +141 -1
- experimaestro/tests/test_state_db.py +434 -0
- experimaestro/tests/test_subparameters.py +160 -0
- experimaestro/tests/test_tags.py +136 -0
- experimaestro/tests/test_tasks.py +107 -121
- experimaestro/tests/test_token_locking.py +252 -0
- experimaestro/tests/test_tokens.py +17 -13
- experimaestro/tests/test_types.py +123 -1
- experimaestro/tests/test_workspace_triggers.py +158 -0
- experimaestro/tests/token_reschedule.py +4 -2
- experimaestro/tests/utils.py +2 -2
- experimaestro/tokens.py +154 -57
- experimaestro/tools/diff.py +1 -1
- experimaestro/tui/__init__.py +8 -0
- experimaestro/tui/app.py +2395 -0
- experimaestro/tui/app.tcss +353 -0
- experimaestro/tui/log_viewer.py +228 -0
- experimaestro/utils/__init__.py +23 -0
- experimaestro/utils/environment.py +148 -0
- experimaestro/utils/git.py +129 -0
- experimaestro/utils/resources.py +1 -1
- experimaestro/version.py +34 -0
- {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/METADATA +68 -38
- experimaestro-2.0.0b8.dist-info/RECORD +187 -0
- {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/WHEEL +1 -1
- experimaestro-2.0.0b8.dist-info/entry_points.txt +16 -0
- experimaestro/compat.py +0 -6
- experimaestro/core/objects.pyi +0 -221
- experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
- experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
- experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
- experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
- experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
- experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
- experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
- experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
- experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
- experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
- experimaestro-2.0.0a8.dist-info/RECORD +0 -166
- experimaestro-2.0.0a8.dist-info/entry_points.txt +0 -17
- {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,615 @@
|
|
|
1
|
+
"""Functional tests for CLI jobs commands
|
|
2
|
+
|
|
3
|
+
Tests the jobs list, kill, and clean commands using the WorkspaceStateProvider.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
import time
|
|
8
|
+
from click.testing import CliRunner
|
|
9
|
+
|
|
10
|
+
from experimaestro.cli import cli
|
|
11
|
+
from experimaestro.scheduler.state_provider import WorkspaceStateProvider
|
|
12
|
+
from experimaestro.scheduler.state_db import (
|
|
13
|
+
initialize_workspace_database,
|
|
14
|
+
close_workspace_database,
|
|
15
|
+
ExperimentModel,
|
|
16
|
+
ExperimentRunModel,
|
|
17
|
+
JobModel,
|
|
18
|
+
ALL_MODELS,
|
|
19
|
+
)
|
|
20
|
+
from experimaestro.scheduler.workspace import WORKSPACE_VERSION
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@pytest.fixture
|
|
24
|
+
def workspace_path(tmp_path):
|
|
25
|
+
"""Create a workspace directory with database initialized"""
|
|
26
|
+
ws_path = tmp_path / "workspace"
|
|
27
|
+
ws_path.mkdir()
|
|
28
|
+
|
|
29
|
+
# Create version file with current workspace version
|
|
30
|
+
(ws_path / ".__experimaestro__").write_text(str(WORKSPACE_VERSION))
|
|
31
|
+
|
|
32
|
+
# Initialize database
|
|
33
|
+
xpm_dir = ws_path / ".experimaestro"
|
|
34
|
+
xpm_dir.mkdir()
|
|
35
|
+
db_path = xpm_dir / "workspace.db"
|
|
36
|
+
db, _ = initialize_workspace_database(db_path, read_only=False)
|
|
37
|
+
|
|
38
|
+
# Create jobs directory
|
|
39
|
+
jobs_dir = ws_path / "jobs"
|
|
40
|
+
jobs_dir.mkdir()
|
|
41
|
+
|
|
42
|
+
yield ws_path
|
|
43
|
+
|
|
44
|
+
close_workspace_database(db)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@pytest.fixture
|
|
48
|
+
def workspace_with_jobs(workspace_path):
|
|
49
|
+
"""Create a workspace with some test jobs in the database"""
|
|
50
|
+
db_path = workspace_path / ".experimaestro" / "workspace.db"
|
|
51
|
+
db, _ = initialize_workspace_database(db_path, read_only=False)
|
|
52
|
+
|
|
53
|
+
with db.bind_ctx(ALL_MODELS):
|
|
54
|
+
# Create experiment and run
|
|
55
|
+
ExperimentModel.create(experiment_id="test_exp", current_run_id="run_001")
|
|
56
|
+
ExperimentRunModel.create(
|
|
57
|
+
experiment_id="test_exp", run_id="run_001", status="active"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Create jobs with different states
|
|
61
|
+
jobs_data = [
|
|
62
|
+
("job_done_1", "mymodule.DoneTask", "done"),
|
|
63
|
+
("job_done_2", "mymodule.DoneTask", "done"),
|
|
64
|
+
("job_error_1", "mymodule.ErrorTask", "error"),
|
|
65
|
+
("job_running_1", "mymodule.RunningTask", "running"),
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
jobs_dir = workspace_path / "jobs"
|
|
69
|
+
for job_id, task_id, state in jobs_data:
|
|
70
|
+
JobModel.create(
|
|
71
|
+
job_id=job_id,
|
|
72
|
+
experiment_id="test_exp",
|
|
73
|
+
run_id="run_001",
|
|
74
|
+
task_id=task_id,
|
|
75
|
+
locator=job_id,
|
|
76
|
+
state=state,
|
|
77
|
+
submitted_time=time.time(),
|
|
78
|
+
)
|
|
79
|
+
# Create job directories
|
|
80
|
+
job_dir = jobs_dir / task_id / job_id
|
|
81
|
+
job_dir.mkdir(parents=True, exist_ok=True)
|
|
82
|
+
# Create marker files based on state
|
|
83
|
+
script_name = task_id.rsplit(".", 1)[-1]
|
|
84
|
+
if state == "done":
|
|
85
|
+
(job_dir / f"{script_name}.done").touch()
|
|
86
|
+
elif state == "error":
|
|
87
|
+
(job_dir / f"{script_name}.failed").write_text("{}")
|
|
88
|
+
|
|
89
|
+
close_workspace_database(db)
|
|
90
|
+
|
|
91
|
+
yield workspace_path
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def test_jobs_list_empty_workspace(workspace_path):
|
|
95
|
+
"""Test jobs list on empty workspace"""
|
|
96
|
+
runner = CliRunner()
|
|
97
|
+
result = runner.invoke(cli, ["jobs", "--workdir", str(workspace_path), "list"])
|
|
98
|
+
|
|
99
|
+
assert result.exit_code == 0
|
|
100
|
+
assert "No jobs found" in result.output
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def test_jobs_list_with_jobs(workspace_with_jobs):
|
|
104
|
+
"""Test jobs list shows all jobs"""
|
|
105
|
+
runner = CliRunner()
|
|
106
|
+
result = runner.invoke(cli, ["jobs", "--workdir", str(workspace_with_jobs), "list"])
|
|
107
|
+
|
|
108
|
+
assert result.exit_code == 0
|
|
109
|
+
assert "job_done_1" in result.output
|
|
110
|
+
assert "job_done_2" in result.output
|
|
111
|
+
assert "job_error_1" in result.output
|
|
112
|
+
assert "job_running_1" in result.output
|
|
113
|
+
assert "DONE" in result.output
|
|
114
|
+
assert "FAIL" in result.output
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def test_jobs_list_with_filter(workspace_with_jobs):
|
|
118
|
+
"""Test jobs list with filter expression"""
|
|
119
|
+
runner = CliRunner()
|
|
120
|
+
result = runner.invoke(
|
|
121
|
+
cli,
|
|
122
|
+
[
|
|
123
|
+
"jobs",
|
|
124
|
+
"--workdir",
|
|
125
|
+
str(workspace_with_jobs),
|
|
126
|
+
"list",
|
|
127
|
+
"--filter",
|
|
128
|
+
'@state = "done"',
|
|
129
|
+
],
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
assert result.exit_code == 0
|
|
133
|
+
assert "job_done_1" in result.output
|
|
134
|
+
assert "job_done_2" in result.output
|
|
135
|
+
assert "job_error_1" not in result.output
|
|
136
|
+
assert "job_running_1" not in result.output
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def test_jobs_list_fullpath(workspace_with_jobs):
|
|
140
|
+
"""Test jobs list with fullpath option"""
|
|
141
|
+
runner = CliRunner()
|
|
142
|
+
result = runner.invoke(
|
|
143
|
+
cli, ["jobs", "--workdir", str(workspace_with_jobs), "list", "--fullpath"]
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
assert result.exit_code == 0
|
|
147
|
+
# Should show full paths instead of task_id/job_id format
|
|
148
|
+
assert str(workspace_with_jobs / "jobs") in result.output
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@pytest.fixture
|
|
152
|
+
def workspace_with_timed_jobs(workspace_path):
|
|
153
|
+
"""Create a workspace with jobs that have different submission times"""
|
|
154
|
+
db_path = workspace_path / ".experimaestro" / "workspace.db"
|
|
155
|
+
db, _ = initialize_workspace_database(db_path, read_only=False)
|
|
156
|
+
|
|
157
|
+
with db.bind_ctx(ALL_MODELS):
|
|
158
|
+
ExperimentModel.create(experiment_id="test_exp", current_run_id="run_001")
|
|
159
|
+
ExperimentRunModel.create(
|
|
160
|
+
experiment_id="test_exp", run_id="run_001", status="active"
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# Create jobs with different submission times (oldest to newest)
|
|
164
|
+
base_time = time.time()
|
|
165
|
+
jobs_data = [
|
|
166
|
+
("job_oldest", "mymodule.Task", "done", base_time - 3600), # 1 hour ago
|
|
167
|
+
("job_middle", "mymodule.Task", "done", base_time - 1800), # 30 min ago
|
|
168
|
+
("job_newest", "mymodule.Task", "done", base_time), # now
|
|
169
|
+
]
|
|
170
|
+
|
|
171
|
+
jobs_dir = workspace_path / "jobs"
|
|
172
|
+
for job_id, task_id, state, submit_time in jobs_data:
|
|
173
|
+
JobModel.create(
|
|
174
|
+
job_id=job_id,
|
|
175
|
+
experiment_id="test_exp",
|
|
176
|
+
run_id="run_001",
|
|
177
|
+
task_id=task_id,
|
|
178
|
+
locator=job_id,
|
|
179
|
+
state=state,
|
|
180
|
+
submitted_time=submit_time,
|
|
181
|
+
)
|
|
182
|
+
job_dir = jobs_dir / task_id / job_id
|
|
183
|
+
job_dir.mkdir(parents=True, exist_ok=True)
|
|
184
|
+
script_name = task_id.rsplit(".", 1)[-1]
|
|
185
|
+
(job_dir / f"{script_name}.done").touch()
|
|
186
|
+
|
|
187
|
+
close_workspace_database(db)
|
|
188
|
+
yield workspace_path
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def test_jobs_list_sorted_by_date(workspace_with_timed_jobs):
|
|
192
|
+
"""Test that jobs are sorted by submission date (most recent first)"""
|
|
193
|
+
runner = CliRunner()
|
|
194
|
+
result = runner.invoke(
|
|
195
|
+
cli, ["jobs", "--workdir", str(workspace_with_timed_jobs), "list"]
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
assert result.exit_code == 0
|
|
199
|
+
output = result.output
|
|
200
|
+
|
|
201
|
+
# Verify order: newest should appear before middle, middle before oldest
|
|
202
|
+
newest_pos = output.find("job_newest")
|
|
203
|
+
middle_pos = output.find("job_middle")
|
|
204
|
+
oldest_pos = output.find("job_oldest")
|
|
205
|
+
|
|
206
|
+
assert newest_pos < middle_pos < oldest_pos, (
|
|
207
|
+
f"Jobs should be sorted by date (newest first). "
|
|
208
|
+
f"Positions: newest={newest_pos}, middle={middle_pos}, oldest={oldest_pos}"
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def test_jobs_list_with_count(workspace_with_timed_jobs):
|
|
213
|
+
"""Test jobs list with --count option"""
|
|
214
|
+
runner = CliRunner()
|
|
215
|
+
result = runner.invoke(
|
|
216
|
+
cli, ["jobs", "--workdir", str(workspace_with_timed_jobs), "list", "-c", "2"]
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
assert result.exit_code == 0
|
|
220
|
+
output = result.output
|
|
221
|
+
|
|
222
|
+
# Should only show 2 most recent jobs
|
|
223
|
+
assert "job_newest" in output
|
|
224
|
+
assert "job_middle" in output
|
|
225
|
+
assert "job_oldest" not in output
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def test_jobs_list_count_zero_shows_all(workspace_with_timed_jobs):
|
|
229
|
+
"""Test that --count 0 shows all jobs (default behavior)"""
|
|
230
|
+
runner = CliRunner()
|
|
231
|
+
result = runner.invoke(
|
|
232
|
+
cli,
|
|
233
|
+
["jobs", "--workdir", str(workspace_with_timed_jobs), "list", "--count", "0"],
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
assert result.exit_code == 0
|
|
237
|
+
output = result.output
|
|
238
|
+
|
|
239
|
+
# All jobs should be present
|
|
240
|
+
assert "job_newest" in output
|
|
241
|
+
assert "job_middle" in output
|
|
242
|
+
assert "job_oldest" in output
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def test_jobs_list_with_experiment_filter(workspace_with_jobs):
|
|
246
|
+
"""Test jobs list filtered by experiment"""
|
|
247
|
+
runner = CliRunner()
|
|
248
|
+
result = runner.invoke(
|
|
249
|
+
cli,
|
|
250
|
+
[
|
|
251
|
+
"jobs",
|
|
252
|
+
"--workdir",
|
|
253
|
+
str(workspace_with_jobs),
|
|
254
|
+
"list",
|
|
255
|
+
"--experiment",
|
|
256
|
+
"test_exp",
|
|
257
|
+
],
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
assert result.exit_code == 0
|
|
261
|
+
assert "job_done_1" in result.output
|
|
262
|
+
|
|
263
|
+
# Test with non-existent experiment
|
|
264
|
+
result = runner.invoke(
|
|
265
|
+
cli,
|
|
266
|
+
[
|
|
267
|
+
"jobs",
|
|
268
|
+
"--workdir",
|
|
269
|
+
str(workspace_with_jobs),
|
|
270
|
+
"list",
|
|
271
|
+
"--experiment",
|
|
272
|
+
"nonexistent",
|
|
273
|
+
],
|
|
274
|
+
)
|
|
275
|
+
assert result.exit_code == 0
|
|
276
|
+
assert "No jobs found" in result.output
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def test_jobs_clean_dry_run(workspace_with_jobs):
|
|
280
|
+
"""Test jobs clean without --perform (dry run)"""
|
|
281
|
+
runner = CliRunner()
|
|
282
|
+
result = runner.invoke(
|
|
283
|
+
cli,
|
|
284
|
+
[
|
|
285
|
+
"jobs",
|
|
286
|
+
"--workdir",
|
|
287
|
+
str(workspace_with_jobs),
|
|
288
|
+
"clean",
|
|
289
|
+
"--filter",
|
|
290
|
+
'@state = "done"',
|
|
291
|
+
],
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
assert result.exit_code == 0
|
|
295
|
+
assert "dry run" in result.output.lower()
|
|
296
|
+
|
|
297
|
+
# Verify directories still exist
|
|
298
|
+
jobs_dir = workspace_with_jobs / "jobs"
|
|
299
|
+
assert (jobs_dir / "mymodule.DoneTask" / "job_done_1").exists()
|
|
300
|
+
assert (jobs_dir / "mymodule.DoneTask" / "job_done_2").exists()
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def test_jobs_clean_with_perform(workspace_with_jobs):
|
|
304
|
+
"""Test jobs clean with --perform actually deletes jobs"""
|
|
305
|
+
runner = CliRunner()
|
|
306
|
+
result = runner.invoke(
|
|
307
|
+
cli,
|
|
308
|
+
[
|
|
309
|
+
"jobs",
|
|
310
|
+
"--workdir",
|
|
311
|
+
str(workspace_with_jobs),
|
|
312
|
+
"clean",
|
|
313
|
+
"--filter",
|
|
314
|
+
'@state = "done"',
|
|
315
|
+
"--perform",
|
|
316
|
+
],
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
assert result.exit_code == 0
|
|
320
|
+
assert "Cleaned" in result.output
|
|
321
|
+
|
|
322
|
+
# Verify directories are deleted
|
|
323
|
+
jobs_dir = workspace_with_jobs / "jobs"
|
|
324
|
+
assert not (jobs_dir / "mymodule.DoneTask" / "job_done_1").exists()
|
|
325
|
+
assert not (jobs_dir / "mymodule.DoneTask" / "job_done_2").exists()
|
|
326
|
+
|
|
327
|
+
# Verify database entries are deleted
|
|
328
|
+
provider = WorkspaceStateProvider.get_instance(workspace_with_jobs, read_only=True)
|
|
329
|
+
try:
|
|
330
|
+
jobs = provider.get_all_jobs()
|
|
331
|
+
job_ids = [j.identifier for j in jobs]
|
|
332
|
+
assert "job_done_1" not in job_ids
|
|
333
|
+
assert "job_done_2" not in job_ids
|
|
334
|
+
# Error and running jobs should still exist
|
|
335
|
+
assert "job_error_1" in job_ids
|
|
336
|
+
assert "job_running_1" in job_ids
|
|
337
|
+
finally:
|
|
338
|
+
provider.close()
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def test_jobs_clean_does_not_clean_running(workspace_with_jobs):
|
|
342
|
+
"""Test that jobs clean does not clean running jobs"""
|
|
343
|
+
runner = CliRunner()
|
|
344
|
+
result = runner.invoke(
|
|
345
|
+
cli,
|
|
346
|
+
["jobs", "--workdir", str(workspace_with_jobs), "clean", "--perform"],
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
assert result.exit_code == 0
|
|
350
|
+
|
|
351
|
+
# Verify running job is NOT cleaned
|
|
352
|
+
provider = WorkspaceStateProvider.get_instance(workspace_with_jobs, read_only=True)
|
|
353
|
+
try:
|
|
354
|
+
jobs = provider.get_all_jobs()
|
|
355
|
+
job_ids = [j.identifier for j in jobs]
|
|
356
|
+
assert "job_running_1" in job_ids
|
|
357
|
+
finally:
|
|
358
|
+
provider.close()
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def test_jobs_kill_dry_run(workspace_with_jobs):
|
|
362
|
+
"""Test jobs kill without --perform (dry run)"""
|
|
363
|
+
runner = CliRunner()
|
|
364
|
+
result = runner.invoke(
|
|
365
|
+
cli,
|
|
366
|
+
[
|
|
367
|
+
"jobs",
|
|
368
|
+
"--workdir",
|
|
369
|
+
str(workspace_with_jobs),
|
|
370
|
+
"kill",
|
|
371
|
+
"--filter",
|
|
372
|
+
'@state = "running"',
|
|
373
|
+
],
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
assert result.exit_code == 0
|
|
377
|
+
assert "dry run" in result.output.lower()
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def test_jobs_kill_with_perform(workspace_with_jobs):
|
|
381
|
+
"""Test jobs kill with --perform actually kills jobs and updates DB"""
|
|
382
|
+
import json
|
|
383
|
+
from unittest.mock import MagicMock, patch
|
|
384
|
+
|
|
385
|
+
# Create a PID file for the running job so kill can find it
|
|
386
|
+
jobs_dir = workspace_with_jobs / "jobs"
|
|
387
|
+
running_job_dir = jobs_dir / "mymodule.RunningTask" / "job_running_1"
|
|
388
|
+
pid_file = running_job_dir / "RunningTask.pid"
|
|
389
|
+
pid_file.write_text(json.dumps({"type": "local", "pid": 12345}))
|
|
390
|
+
|
|
391
|
+
# Create a mock process
|
|
392
|
+
mock_process = MagicMock()
|
|
393
|
+
mock_process.kill = MagicMock()
|
|
394
|
+
|
|
395
|
+
with patch(
|
|
396
|
+
"experimaestro.connectors.Process.fromDefinition", return_value=mock_process
|
|
397
|
+
):
|
|
398
|
+
runner = CliRunner()
|
|
399
|
+
result = runner.invoke(
|
|
400
|
+
cli,
|
|
401
|
+
[
|
|
402
|
+
"jobs",
|
|
403
|
+
"--workdir",
|
|
404
|
+
str(workspace_with_jobs),
|
|
405
|
+
"kill",
|
|
406
|
+
"--filter",
|
|
407
|
+
'@state = "running"',
|
|
408
|
+
"--perform",
|
|
409
|
+
],
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
assert result.exit_code == 0
|
|
413
|
+
assert "KILLED" in result.output
|
|
414
|
+
|
|
415
|
+
# Verify kill was called on the mock process
|
|
416
|
+
mock_process.kill.assert_called_once()
|
|
417
|
+
|
|
418
|
+
# Verify database state was updated to error
|
|
419
|
+
provider = WorkspaceStateProvider.get_instance(workspace_with_jobs, read_only=True)
|
|
420
|
+
try:
|
|
421
|
+
jobs = provider.get_all_jobs()
|
|
422
|
+
running_job = next((j for j in jobs if j.identifier == "job_running_1"), None)
|
|
423
|
+
assert running_job is not None
|
|
424
|
+
# State should be updated to error after kill
|
|
425
|
+
from experimaestro.scheduler import JobState
|
|
426
|
+
|
|
427
|
+
assert running_job.state == JobState.ERROR
|
|
428
|
+
finally:
|
|
429
|
+
provider.close()
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
def test_jobs_path(workspace_with_jobs):
|
|
433
|
+
"""Test jobs path command"""
|
|
434
|
+
runner = CliRunner()
|
|
435
|
+
result = runner.invoke(
|
|
436
|
+
cli,
|
|
437
|
+
[
|
|
438
|
+
"jobs",
|
|
439
|
+
"--workdir",
|
|
440
|
+
str(workspace_with_jobs),
|
|
441
|
+
"path",
|
|
442
|
+
"mymodule.DoneTask/job_done_1",
|
|
443
|
+
],
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
assert result.exit_code == 0
|
|
447
|
+
expected_path = workspace_with_jobs / "jobs" / "mymodule.DoneTask" / "job_done_1"
|
|
448
|
+
assert str(expected_path) in result.output
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
def test_jobs_path_nonexistent(workspace_with_jobs):
|
|
452
|
+
"""Test jobs path command for non-existent job"""
|
|
453
|
+
runner = CliRunner()
|
|
454
|
+
result = runner.invoke(
|
|
455
|
+
cli,
|
|
456
|
+
[
|
|
457
|
+
"jobs",
|
|
458
|
+
"--workdir",
|
|
459
|
+
str(workspace_with_jobs),
|
|
460
|
+
"path",
|
|
461
|
+
"mymodule.Task/nonexistent",
|
|
462
|
+
],
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
assert result.exit_code == 0
|
|
466
|
+
assert "not found" in result.output.lower()
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def test_jobs_log_nonexistent(workspace_with_jobs):
|
|
470
|
+
"""Test jobs log command for non-existent log"""
|
|
471
|
+
runner = CliRunner()
|
|
472
|
+
result = runner.invoke(
|
|
473
|
+
cli,
|
|
474
|
+
[
|
|
475
|
+
"jobs",
|
|
476
|
+
"--workdir",
|
|
477
|
+
str(workspace_with_jobs),
|
|
478
|
+
"log",
|
|
479
|
+
"mymodule.DoneTask/job_done_1",
|
|
480
|
+
],
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
assert result.exit_code == 0
|
|
484
|
+
assert "not found" in result.output.lower()
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def test_sync_detects_dead_process_and_updates_disk(tmp_path):
|
|
488
|
+
"""Test that sync detects a dead 'running' job and updates disk state
|
|
489
|
+
|
|
490
|
+
This tests the scenario where a job crashed without updating its state:
|
|
491
|
+
- Job has a .pid file (marked as running)
|
|
492
|
+
- Process is no longer running (dead)
|
|
493
|
+
- Sync should detect this and create a .failed file
|
|
494
|
+
"""
|
|
495
|
+
import json
|
|
496
|
+
from unittest.mock import MagicMock, patch
|
|
497
|
+
|
|
498
|
+
from experimaestro.scheduler.state_sync import check_process_alive
|
|
499
|
+
|
|
500
|
+
# Create a job directory with a PID file for a non-existent process
|
|
501
|
+
job_path = tmp_path / "jobs" / "mymodule.CrashedTask" / "job_crashed_1"
|
|
502
|
+
job_path.mkdir(parents=True)
|
|
503
|
+
|
|
504
|
+
scriptname = "CrashedTask"
|
|
505
|
+
pid_file = job_path / f"{scriptname}.pid"
|
|
506
|
+
pid_file.write_text(
|
|
507
|
+
json.dumps({"type": "local", "pid": 999999})
|
|
508
|
+
) # Non-existent PID
|
|
509
|
+
|
|
510
|
+
# Create a mock process that reports as finished/dead
|
|
511
|
+
mock_process = MagicMock()
|
|
512
|
+
mock_state = MagicMock()
|
|
513
|
+
mock_state.finished = True
|
|
514
|
+
mock_state.exitcode = 137 # Killed by signal
|
|
515
|
+
|
|
516
|
+
# Mock aio_state to return the finished state
|
|
517
|
+
async def mock_aio_state(timeout):
|
|
518
|
+
return mock_state
|
|
519
|
+
|
|
520
|
+
mock_process.aio_state = mock_aio_state
|
|
521
|
+
|
|
522
|
+
with patch(
|
|
523
|
+
"experimaestro.connectors.Process.fromDefinition", return_value=mock_process
|
|
524
|
+
):
|
|
525
|
+
# Check that process is detected as dead
|
|
526
|
+
is_alive = check_process_alive(job_path, scriptname, update_disk=True)
|
|
527
|
+
|
|
528
|
+
assert is_alive is False
|
|
529
|
+
|
|
530
|
+
# Verify .failed file was created
|
|
531
|
+
failed_file = job_path / f"{scriptname}.failed"
|
|
532
|
+
assert failed_file.exists(), ".failed file should be created for dead process"
|
|
533
|
+
|
|
534
|
+
# Verify .failed file contains correct data
|
|
535
|
+
failed_data = json.loads(failed_file.read_text())
|
|
536
|
+
assert failed_data["exit_code"] == 137
|
|
537
|
+
assert failed_data["failure_status"] == "UNKNOWN"
|
|
538
|
+
|
|
539
|
+
# Verify .pid file was removed
|
|
540
|
+
assert not pid_file.exists(), ".pid file should be removed for dead process"
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
def test_sync_job_state_detects_dead_process(tmp_path):
|
|
544
|
+
"""Test that scan_job_state_from_disk detects dead 'running' jobs"""
|
|
545
|
+
import json
|
|
546
|
+
from unittest.mock import MagicMock, patch
|
|
547
|
+
|
|
548
|
+
from experimaestro.scheduler.state_sync import scan_job_state_from_disk
|
|
549
|
+
|
|
550
|
+
# Create a job directory with only a PID file (no .done or .failed)
|
|
551
|
+
job_path = tmp_path / "jobs" / "mymodule.CrashedTask" / "job_crashed_2"
|
|
552
|
+
job_path.mkdir(parents=True)
|
|
553
|
+
|
|
554
|
+
scriptname = "CrashedTask"
|
|
555
|
+
pid_file = job_path / f"{scriptname}.pid"
|
|
556
|
+
pid_file.write_text(json.dumps({"type": "local", "pid": 999998}))
|
|
557
|
+
|
|
558
|
+
# Mock process as dead
|
|
559
|
+
mock_process = MagicMock()
|
|
560
|
+
mock_state = MagicMock()
|
|
561
|
+
mock_state.finished = True
|
|
562
|
+
mock_state.exitcode = 1
|
|
563
|
+
|
|
564
|
+
async def mock_aio_state(timeout):
|
|
565
|
+
return mock_state
|
|
566
|
+
|
|
567
|
+
mock_process.aio_state = mock_aio_state
|
|
568
|
+
|
|
569
|
+
with patch(
|
|
570
|
+
"experimaestro.connectors.Process.fromDefinition", return_value=mock_process
|
|
571
|
+
):
|
|
572
|
+
# Scan job state - should detect dead process
|
|
573
|
+
job_state = scan_job_state_from_disk(job_path, scriptname, check_running=True)
|
|
574
|
+
|
|
575
|
+
assert job_state is not None
|
|
576
|
+
assert job_state["state"] == "error"
|
|
577
|
+
assert job_state["failure_reason"] == "UNKNOWN"
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
def test_check_process_alive_with_lock_held(tmp_path):
|
|
581
|
+
"""Test that check_process_alive returns True when lock is held (job running)"""
|
|
582
|
+
import json
|
|
583
|
+
from unittest.mock import patch, MagicMock
|
|
584
|
+
|
|
585
|
+
from experimaestro.scheduler.state_sync import check_process_alive
|
|
586
|
+
|
|
587
|
+
# Create a job directory with a PID file
|
|
588
|
+
job_path = tmp_path / "jobs" / "mymodule.RunningTask" / "job_locked"
|
|
589
|
+
job_path.mkdir(parents=True)
|
|
590
|
+
|
|
591
|
+
scriptname = "RunningTask"
|
|
592
|
+
pid_file = job_path / f"{scriptname}.pid"
|
|
593
|
+
pid_file.write_text(json.dumps({"type": "local", "pid": 12345}))
|
|
594
|
+
|
|
595
|
+
# Mock the lock to simulate it being held by another process
|
|
596
|
+
mock_lock = MagicMock()
|
|
597
|
+
mock_lock.acquire.return_value = False # Simulate lock already held
|
|
598
|
+
|
|
599
|
+
with patch(
|
|
600
|
+
"experimaestro.scheduler.state_sync.fasteners.InterProcessLock",
|
|
601
|
+
return_value=mock_lock,
|
|
602
|
+
):
|
|
603
|
+
# check_process_alive should return True because it can't acquire the lock
|
|
604
|
+
is_alive = check_process_alive(job_path, scriptname, update_disk=True)
|
|
605
|
+
|
|
606
|
+
assert is_alive is True, "Should assume job is running when lock is held"
|
|
607
|
+
|
|
608
|
+
# .pid file should still exist (not removed)
|
|
609
|
+
assert pid_file.exists(), ".pid file should not be removed when lock is held"
|
|
610
|
+
|
|
611
|
+
# No .failed file should be created
|
|
612
|
+
failed_file = job_path / f"{scriptname}.failed"
|
|
613
|
+
assert (
|
|
614
|
+
not failed_file.exists()
|
|
615
|
+
), ".failed file should not be created when lock is held"
|