experimaestro 2.0.0a8__py3-none-any.whl → 2.0.0b8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (122) hide show
  1. experimaestro/__init__.py +10 -11
  2. experimaestro/annotations.py +167 -206
  3. experimaestro/cli/__init__.py +278 -7
  4. experimaestro/cli/filter.py +42 -74
  5. experimaestro/cli/jobs.py +157 -106
  6. experimaestro/cli/refactor.py +249 -0
  7. experimaestro/click.py +0 -1
  8. experimaestro/commandline.py +19 -3
  9. experimaestro/connectors/__init__.py +20 -1
  10. experimaestro/connectors/local.py +12 -0
  11. experimaestro/core/arguments.py +182 -46
  12. experimaestro/core/identifier.py +107 -6
  13. experimaestro/core/objects/__init__.py +6 -0
  14. experimaestro/core/objects/config.py +542 -25
  15. experimaestro/core/objects/config_walk.py +20 -0
  16. experimaestro/core/serialization.py +91 -34
  17. experimaestro/core/subparameters.py +164 -0
  18. experimaestro/core/types.py +175 -38
  19. experimaestro/exceptions.py +26 -0
  20. experimaestro/experiments/cli.py +111 -25
  21. experimaestro/generators.py +50 -9
  22. experimaestro/huggingface.py +3 -1
  23. experimaestro/launcherfinder/parser.py +29 -0
  24. experimaestro/launchers/__init__.py +26 -1
  25. experimaestro/launchers/direct.py +12 -0
  26. experimaestro/launchers/slurm/base.py +154 -2
  27. experimaestro/mkdocs/metaloader.py +0 -1
  28. experimaestro/mypy.py +452 -7
  29. experimaestro/notifications.py +63 -13
  30. experimaestro/progress.py +0 -2
  31. experimaestro/rpyc.py +0 -1
  32. experimaestro/run.py +19 -6
  33. experimaestro/scheduler/base.py +510 -125
  34. experimaestro/scheduler/dependencies.py +43 -28
  35. experimaestro/scheduler/dynamic_outputs.py +259 -130
  36. experimaestro/scheduler/experiment.py +256 -31
  37. experimaestro/scheduler/interfaces.py +501 -0
  38. experimaestro/scheduler/jobs.py +216 -206
  39. experimaestro/scheduler/remote/__init__.py +31 -0
  40. experimaestro/scheduler/remote/client.py +874 -0
  41. experimaestro/scheduler/remote/protocol.py +467 -0
  42. experimaestro/scheduler/remote/server.py +423 -0
  43. experimaestro/scheduler/remote/sync.py +144 -0
  44. experimaestro/scheduler/services.py +323 -23
  45. experimaestro/scheduler/state_db.py +437 -0
  46. experimaestro/scheduler/state_provider.py +2766 -0
  47. experimaestro/scheduler/state_sync.py +891 -0
  48. experimaestro/scheduler/workspace.py +52 -10
  49. experimaestro/scriptbuilder.py +7 -0
  50. experimaestro/server/__init__.py +147 -57
  51. experimaestro/server/data/index.css +0 -125
  52. experimaestro/server/data/index.css.map +1 -1
  53. experimaestro/server/data/index.js +194 -58
  54. experimaestro/server/data/index.js.map +1 -1
  55. experimaestro/settings.py +44 -5
  56. experimaestro/sphinx/__init__.py +3 -3
  57. experimaestro/taskglobals.py +20 -0
  58. experimaestro/tests/conftest.py +80 -0
  59. experimaestro/tests/core/test_generics.py +2 -2
  60. experimaestro/tests/identifier_stability.json +45 -0
  61. experimaestro/tests/launchers/bin/sacct +6 -2
  62. experimaestro/tests/launchers/bin/sbatch +4 -2
  63. experimaestro/tests/launchers/test_slurm.py +80 -0
  64. experimaestro/tests/tasks/test_dynamic.py +231 -0
  65. experimaestro/tests/test_cli_jobs.py +615 -0
  66. experimaestro/tests/test_deprecated.py +630 -0
  67. experimaestro/tests/test_environment.py +200 -0
  68. experimaestro/tests/test_file_progress_integration.py +1 -1
  69. experimaestro/tests/test_forward.py +3 -3
  70. experimaestro/tests/test_identifier.py +372 -41
  71. experimaestro/tests/test_identifier_stability.py +458 -0
  72. experimaestro/tests/test_instance.py +3 -3
  73. experimaestro/tests/test_multitoken.py +442 -0
  74. experimaestro/tests/test_mypy.py +433 -0
  75. experimaestro/tests/test_objects.py +312 -5
  76. experimaestro/tests/test_outputs.py +2 -2
  77. experimaestro/tests/test_param.py +8 -12
  78. experimaestro/tests/test_partial_paths.py +231 -0
  79. experimaestro/tests/test_progress.py +0 -48
  80. experimaestro/tests/test_remote_state.py +671 -0
  81. experimaestro/tests/test_resumable_task.py +480 -0
  82. experimaestro/tests/test_serializers.py +141 -1
  83. experimaestro/tests/test_state_db.py +434 -0
  84. experimaestro/tests/test_subparameters.py +160 -0
  85. experimaestro/tests/test_tags.py +136 -0
  86. experimaestro/tests/test_tasks.py +107 -121
  87. experimaestro/tests/test_token_locking.py +252 -0
  88. experimaestro/tests/test_tokens.py +17 -13
  89. experimaestro/tests/test_types.py +123 -1
  90. experimaestro/tests/test_workspace_triggers.py +158 -0
  91. experimaestro/tests/token_reschedule.py +4 -2
  92. experimaestro/tests/utils.py +2 -2
  93. experimaestro/tokens.py +154 -57
  94. experimaestro/tools/diff.py +1 -1
  95. experimaestro/tui/__init__.py +8 -0
  96. experimaestro/tui/app.py +2395 -0
  97. experimaestro/tui/app.tcss +353 -0
  98. experimaestro/tui/log_viewer.py +228 -0
  99. experimaestro/utils/__init__.py +23 -0
  100. experimaestro/utils/environment.py +148 -0
  101. experimaestro/utils/git.py +129 -0
  102. experimaestro/utils/resources.py +1 -1
  103. experimaestro/version.py +34 -0
  104. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/METADATA +68 -38
  105. experimaestro-2.0.0b8.dist-info/RECORD +187 -0
  106. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/WHEEL +1 -1
  107. experimaestro-2.0.0b8.dist-info/entry_points.txt +16 -0
  108. experimaestro/compat.py +0 -6
  109. experimaestro/core/objects.pyi +0 -221
  110. experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
  111. experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
  112. experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
  113. experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
  114. experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
  115. experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
  116. experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
  117. experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
  118. experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
  119. experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
  120. experimaestro-2.0.0a8.dist-info/RECORD +0 -166
  121. experimaestro-2.0.0a8.dist-info/entry_points.txt +0 -17
  122. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,615 @@
1
+ """Functional tests for CLI jobs commands
2
+
3
+ Tests the jobs list, kill, and clean commands using the WorkspaceStateProvider.
4
+ """
5
+
6
+ import pytest
7
+ import time
8
+ from click.testing import CliRunner
9
+
10
+ from experimaestro.cli import cli
11
+ from experimaestro.scheduler.state_provider import WorkspaceStateProvider
12
+ from experimaestro.scheduler.state_db import (
13
+ initialize_workspace_database,
14
+ close_workspace_database,
15
+ ExperimentModel,
16
+ ExperimentRunModel,
17
+ JobModel,
18
+ ALL_MODELS,
19
+ )
20
+ from experimaestro.scheduler.workspace import WORKSPACE_VERSION
21
+
22
+
23
+ @pytest.fixture
24
+ def workspace_path(tmp_path):
25
+ """Create a workspace directory with database initialized"""
26
+ ws_path = tmp_path / "workspace"
27
+ ws_path.mkdir()
28
+
29
+ # Create version file with current workspace version
30
+ (ws_path / ".__experimaestro__").write_text(str(WORKSPACE_VERSION))
31
+
32
+ # Initialize database
33
+ xpm_dir = ws_path / ".experimaestro"
34
+ xpm_dir.mkdir()
35
+ db_path = xpm_dir / "workspace.db"
36
+ db, _ = initialize_workspace_database(db_path, read_only=False)
37
+
38
+ # Create jobs directory
39
+ jobs_dir = ws_path / "jobs"
40
+ jobs_dir.mkdir()
41
+
42
+ yield ws_path
43
+
44
+ close_workspace_database(db)
45
+
46
+
47
+ @pytest.fixture
48
+ def workspace_with_jobs(workspace_path):
49
+ """Create a workspace with some test jobs in the database"""
50
+ db_path = workspace_path / ".experimaestro" / "workspace.db"
51
+ db, _ = initialize_workspace_database(db_path, read_only=False)
52
+
53
+ with db.bind_ctx(ALL_MODELS):
54
+ # Create experiment and run
55
+ ExperimentModel.create(experiment_id="test_exp", current_run_id="run_001")
56
+ ExperimentRunModel.create(
57
+ experiment_id="test_exp", run_id="run_001", status="active"
58
+ )
59
+
60
+ # Create jobs with different states
61
+ jobs_data = [
62
+ ("job_done_1", "mymodule.DoneTask", "done"),
63
+ ("job_done_2", "mymodule.DoneTask", "done"),
64
+ ("job_error_1", "mymodule.ErrorTask", "error"),
65
+ ("job_running_1", "mymodule.RunningTask", "running"),
66
+ ]
67
+
68
+ jobs_dir = workspace_path / "jobs"
69
+ for job_id, task_id, state in jobs_data:
70
+ JobModel.create(
71
+ job_id=job_id,
72
+ experiment_id="test_exp",
73
+ run_id="run_001",
74
+ task_id=task_id,
75
+ locator=job_id,
76
+ state=state,
77
+ submitted_time=time.time(),
78
+ )
79
+ # Create job directories
80
+ job_dir = jobs_dir / task_id / job_id
81
+ job_dir.mkdir(parents=True, exist_ok=True)
82
+ # Create marker files based on state
83
+ script_name = task_id.rsplit(".", 1)[-1]
84
+ if state == "done":
85
+ (job_dir / f"{script_name}.done").touch()
86
+ elif state == "error":
87
+ (job_dir / f"{script_name}.failed").write_text("{}")
88
+
89
+ close_workspace_database(db)
90
+
91
+ yield workspace_path
92
+
93
+
94
+ def test_jobs_list_empty_workspace(workspace_path):
95
+ """Test jobs list on empty workspace"""
96
+ runner = CliRunner()
97
+ result = runner.invoke(cli, ["jobs", "--workdir", str(workspace_path), "list"])
98
+
99
+ assert result.exit_code == 0
100
+ assert "No jobs found" in result.output
101
+
102
+
103
+ def test_jobs_list_with_jobs(workspace_with_jobs):
104
+ """Test jobs list shows all jobs"""
105
+ runner = CliRunner()
106
+ result = runner.invoke(cli, ["jobs", "--workdir", str(workspace_with_jobs), "list"])
107
+
108
+ assert result.exit_code == 0
109
+ assert "job_done_1" in result.output
110
+ assert "job_done_2" in result.output
111
+ assert "job_error_1" in result.output
112
+ assert "job_running_1" in result.output
113
+ assert "DONE" in result.output
114
+ assert "FAIL" in result.output
115
+
116
+
117
+ def test_jobs_list_with_filter(workspace_with_jobs):
118
+ """Test jobs list with filter expression"""
119
+ runner = CliRunner()
120
+ result = runner.invoke(
121
+ cli,
122
+ [
123
+ "jobs",
124
+ "--workdir",
125
+ str(workspace_with_jobs),
126
+ "list",
127
+ "--filter",
128
+ '@state = "done"',
129
+ ],
130
+ )
131
+
132
+ assert result.exit_code == 0
133
+ assert "job_done_1" in result.output
134
+ assert "job_done_2" in result.output
135
+ assert "job_error_1" not in result.output
136
+ assert "job_running_1" not in result.output
137
+
138
+
139
+ def test_jobs_list_fullpath(workspace_with_jobs):
140
+ """Test jobs list with fullpath option"""
141
+ runner = CliRunner()
142
+ result = runner.invoke(
143
+ cli, ["jobs", "--workdir", str(workspace_with_jobs), "list", "--fullpath"]
144
+ )
145
+
146
+ assert result.exit_code == 0
147
+ # Should show full paths instead of task_id/job_id format
148
+ assert str(workspace_with_jobs / "jobs") in result.output
149
+
150
+
151
+ @pytest.fixture
152
+ def workspace_with_timed_jobs(workspace_path):
153
+ """Create a workspace with jobs that have different submission times"""
154
+ db_path = workspace_path / ".experimaestro" / "workspace.db"
155
+ db, _ = initialize_workspace_database(db_path, read_only=False)
156
+
157
+ with db.bind_ctx(ALL_MODELS):
158
+ ExperimentModel.create(experiment_id="test_exp", current_run_id="run_001")
159
+ ExperimentRunModel.create(
160
+ experiment_id="test_exp", run_id="run_001", status="active"
161
+ )
162
+
163
+ # Create jobs with different submission times (oldest to newest)
164
+ base_time = time.time()
165
+ jobs_data = [
166
+ ("job_oldest", "mymodule.Task", "done", base_time - 3600), # 1 hour ago
167
+ ("job_middle", "mymodule.Task", "done", base_time - 1800), # 30 min ago
168
+ ("job_newest", "mymodule.Task", "done", base_time), # now
169
+ ]
170
+
171
+ jobs_dir = workspace_path / "jobs"
172
+ for job_id, task_id, state, submit_time in jobs_data:
173
+ JobModel.create(
174
+ job_id=job_id,
175
+ experiment_id="test_exp",
176
+ run_id="run_001",
177
+ task_id=task_id,
178
+ locator=job_id,
179
+ state=state,
180
+ submitted_time=submit_time,
181
+ )
182
+ job_dir = jobs_dir / task_id / job_id
183
+ job_dir.mkdir(parents=True, exist_ok=True)
184
+ script_name = task_id.rsplit(".", 1)[-1]
185
+ (job_dir / f"{script_name}.done").touch()
186
+
187
+ close_workspace_database(db)
188
+ yield workspace_path
189
+
190
+
191
+ def test_jobs_list_sorted_by_date(workspace_with_timed_jobs):
192
+ """Test that jobs are sorted by submission date (most recent first)"""
193
+ runner = CliRunner()
194
+ result = runner.invoke(
195
+ cli, ["jobs", "--workdir", str(workspace_with_timed_jobs), "list"]
196
+ )
197
+
198
+ assert result.exit_code == 0
199
+ output = result.output
200
+
201
+ # Verify order: newest should appear before middle, middle before oldest
202
+ newest_pos = output.find("job_newest")
203
+ middle_pos = output.find("job_middle")
204
+ oldest_pos = output.find("job_oldest")
205
+
206
+ assert newest_pos < middle_pos < oldest_pos, (
207
+ f"Jobs should be sorted by date (newest first). "
208
+ f"Positions: newest={newest_pos}, middle={middle_pos}, oldest={oldest_pos}"
209
+ )
210
+
211
+
212
+ def test_jobs_list_with_count(workspace_with_timed_jobs):
213
+ """Test jobs list with --count option"""
214
+ runner = CliRunner()
215
+ result = runner.invoke(
216
+ cli, ["jobs", "--workdir", str(workspace_with_timed_jobs), "list", "-c", "2"]
217
+ )
218
+
219
+ assert result.exit_code == 0
220
+ output = result.output
221
+
222
+ # Should only show 2 most recent jobs
223
+ assert "job_newest" in output
224
+ assert "job_middle" in output
225
+ assert "job_oldest" not in output
226
+
227
+
228
+ def test_jobs_list_count_zero_shows_all(workspace_with_timed_jobs):
229
+ """Test that --count 0 shows all jobs (default behavior)"""
230
+ runner = CliRunner()
231
+ result = runner.invoke(
232
+ cli,
233
+ ["jobs", "--workdir", str(workspace_with_timed_jobs), "list", "--count", "0"],
234
+ )
235
+
236
+ assert result.exit_code == 0
237
+ output = result.output
238
+
239
+ # All jobs should be present
240
+ assert "job_newest" in output
241
+ assert "job_middle" in output
242
+ assert "job_oldest" in output
243
+
244
+
245
+ def test_jobs_list_with_experiment_filter(workspace_with_jobs):
246
+ """Test jobs list filtered by experiment"""
247
+ runner = CliRunner()
248
+ result = runner.invoke(
249
+ cli,
250
+ [
251
+ "jobs",
252
+ "--workdir",
253
+ str(workspace_with_jobs),
254
+ "list",
255
+ "--experiment",
256
+ "test_exp",
257
+ ],
258
+ )
259
+
260
+ assert result.exit_code == 0
261
+ assert "job_done_1" in result.output
262
+
263
+ # Test with non-existent experiment
264
+ result = runner.invoke(
265
+ cli,
266
+ [
267
+ "jobs",
268
+ "--workdir",
269
+ str(workspace_with_jobs),
270
+ "list",
271
+ "--experiment",
272
+ "nonexistent",
273
+ ],
274
+ )
275
+ assert result.exit_code == 0
276
+ assert "No jobs found" in result.output
277
+
278
+
279
+ def test_jobs_clean_dry_run(workspace_with_jobs):
280
+ """Test jobs clean without --perform (dry run)"""
281
+ runner = CliRunner()
282
+ result = runner.invoke(
283
+ cli,
284
+ [
285
+ "jobs",
286
+ "--workdir",
287
+ str(workspace_with_jobs),
288
+ "clean",
289
+ "--filter",
290
+ '@state = "done"',
291
+ ],
292
+ )
293
+
294
+ assert result.exit_code == 0
295
+ assert "dry run" in result.output.lower()
296
+
297
+ # Verify directories still exist
298
+ jobs_dir = workspace_with_jobs / "jobs"
299
+ assert (jobs_dir / "mymodule.DoneTask" / "job_done_1").exists()
300
+ assert (jobs_dir / "mymodule.DoneTask" / "job_done_2").exists()
301
+
302
+
303
+ def test_jobs_clean_with_perform(workspace_with_jobs):
304
+ """Test jobs clean with --perform actually deletes jobs"""
305
+ runner = CliRunner()
306
+ result = runner.invoke(
307
+ cli,
308
+ [
309
+ "jobs",
310
+ "--workdir",
311
+ str(workspace_with_jobs),
312
+ "clean",
313
+ "--filter",
314
+ '@state = "done"',
315
+ "--perform",
316
+ ],
317
+ )
318
+
319
+ assert result.exit_code == 0
320
+ assert "Cleaned" in result.output
321
+
322
+ # Verify directories are deleted
323
+ jobs_dir = workspace_with_jobs / "jobs"
324
+ assert not (jobs_dir / "mymodule.DoneTask" / "job_done_1").exists()
325
+ assert not (jobs_dir / "mymodule.DoneTask" / "job_done_2").exists()
326
+
327
+ # Verify database entries are deleted
328
+ provider = WorkspaceStateProvider.get_instance(workspace_with_jobs, read_only=True)
329
+ try:
330
+ jobs = provider.get_all_jobs()
331
+ job_ids = [j.identifier for j in jobs]
332
+ assert "job_done_1" not in job_ids
333
+ assert "job_done_2" not in job_ids
334
+ # Error and running jobs should still exist
335
+ assert "job_error_1" in job_ids
336
+ assert "job_running_1" in job_ids
337
+ finally:
338
+ provider.close()
339
+
340
+
341
+ def test_jobs_clean_does_not_clean_running(workspace_with_jobs):
342
+ """Test that jobs clean does not clean running jobs"""
343
+ runner = CliRunner()
344
+ result = runner.invoke(
345
+ cli,
346
+ ["jobs", "--workdir", str(workspace_with_jobs), "clean", "--perform"],
347
+ )
348
+
349
+ assert result.exit_code == 0
350
+
351
+ # Verify running job is NOT cleaned
352
+ provider = WorkspaceStateProvider.get_instance(workspace_with_jobs, read_only=True)
353
+ try:
354
+ jobs = provider.get_all_jobs()
355
+ job_ids = [j.identifier for j in jobs]
356
+ assert "job_running_1" in job_ids
357
+ finally:
358
+ provider.close()
359
+
360
+
361
+ def test_jobs_kill_dry_run(workspace_with_jobs):
362
+ """Test jobs kill without --perform (dry run)"""
363
+ runner = CliRunner()
364
+ result = runner.invoke(
365
+ cli,
366
+ [
367
+ "jobs",
368
+ "--workdir",
369
+ str(workspace_with_jobs),
370
+ "kill",
371
+ "--filter",
372
+ '@state = "running"',
373
+ ],
374
+ )
375
+
376
+ assert result.exit_code == 0
377
+ assert "dry run" in result.output.lower()
378
+
379
+
380
+ def test_jobs_kill_with_perform(workspace_with_jobs):
381
+ """Test jobs kill with --perform actually kills jobs and updates DB"""
382
+ import json
383
+ from unittest.mock import MagicMock, patch
384
+
385
+ # Create a PID file for the running job so kill can find it
386
+ jobs_dir = workspace_with_jobs / "jobs"
387
+ running_job_dir = jobs_dir / "mymodule.RunningTask" / "job_running_1"
388
+ pid_file = running_job_dir / "RunningTask.pid"
389
+ pid_file.write_text(json.dumps({"type": "local", "pid": 12345}))
390
+
391
+ # Create a mock process
392
+ mock_process = MagicMock()
393
+ mock_process.kill = MagicMock()
394
+
395
+ with patch(
396
+ "experimaestro.connectors.Process.fromDefinition", return_value=mock_process
397
+ ):
398
+ runner = CliRunner()
399
+ result = runner.invoke(
400
+ cli,
401
+ [
402
+ "jobs",
403
+ "--workdir",
404
+ str(workspace_with_jobs),
405
+ "kill",
406
+ "--filter",
407
+ '@state = "running"',
408
+ "--perform",
409
+ ],
410
+ )
411
+
412
+ assert result.exit_code == 0
413
+ assert "KILLED" in result.output
414
+
415
+ # Verify kill was called on the mock process
416
+ mock_process.kill.assert_called_once()
417
+
418
+ # Verify database state was updated to error
419
+ provider = WorkspaceStateProvider.get_instance(workspace_with_jobs, read_only=True)
420
+ try:
421
+ jobs = provider.get_all_jobs()
422
+ running_job = next((j for j in jobs if j.identifier == "job_running_1"), None)
423
+ assert running_job is not None
424
+ # State should be updated to error after kill
425
+ from experimaestro.scheduler import JobState
426
+
427
+ assert running_job.state == JobState.ERROR
428
+ finally:
429
+ provider.close()
430
+
431
+
432
+ def test_jobs_path(workspace_with_jobs):
433
+ """Test jobs path command"""
434
+ runner = CliRunner()
435
+ result = runner.invoke(
436
+ cli,
437
+ [
438
+ "jobs",
439
+ "--workdir",
440
+ str(workspace_with_jobs),
441
+ "path",
442
+ "mymodule.DoneTask/job_done_1",
443
+ ],
444
+ )
445
+
446
+ assert result.exit_code == 0
447
+ expected_path = workspace_with_jobs / "jobs" / "mymodule.DoneTask" / "job_done_1"
448
+ assert str(expected_path) in result.output
449
+
450
+
451
+ def test_jobs_path_nonexistent(workspace_with_jobs):
452
+ """Test jobs path command for non-existent job"""
453
+ runner = CliRunner()
454
+ result = runner.invoke(
455
+ cli,
456
+ [
457
+ "jobs",
458
+ "--workdir",
459
+ str(workspace_with_jobs),
460
+ "path",
461
+ "mymodule.Task/nonexistent",
462
+ ],
463
+ )
464
+
465
+ assert result.exit_code == 0
466
+ assert "not found" in result.output.lower()
467
+
468
+
469
+ def test_jobs_log_nonexistent(workspace_with_jobs):
470
+ """Test jobs log command for non-existent log"""
471
+ runner = CliRunner()
472
+ result = runner.invoke(
473
+ cli,
474
+ [
475
+ "jobs",
476
+ "--workdir",
477
+ str(workspace_with_jobs),
478
+ "log",
479
+ "mymodule.DoneTask/job_done_1",
480
+ ],
481
+ )
482
+
483
+ assert result.exit_code == 0
484
+ assert "not found" in result.output.lower()
485
+
486
+
487
+ def test_sync_detects_dead_process_and_updates_disk(tmp_path):
488
+ """Test that sync detects a dead 'running' job and updates disk state
489
+
490
+ This tests the scenario where a job crashed without updating its state:
491
+ - Job has a .pid file (marked as running)
492
+ - Process is no longer running (dead)
493
+ - Sync should detect this and create a .failed file
494
+ """
495
+ import json
496
+ from unittest.mock import MagicMock, patch
497
+
498
+ from experimaestro.scheduler.state_sync import check_process_alive
499
+
500
+ # Create a job directory with a PID file for a non-existent process
501
+ job_path = tmp_path / "jobs" / "mymodule.CrashedTask" / "job_crashed_1"
502
+ job_path.mkdir(parents=True)
503
+
504
+ scriptname = "CrashedTask"
505
+ pid_file = job_path / f"{scriptname}.pid"
506
+ pid_file.write_text(
507
+ json.dumps({"type": "local", "pid": 999999})
508
+ ) # Non-existent PID
509
+
510
+ # Create a mock process that reports as finished/dead
511
+ mock_process = MagicMock()
512
+ mock_state = MagicMock()
513
+ mock_state.finished = True
514
+ mock_state.exitcode = 137 # Killed by signal
515
+
516
+ # Mock aio_state to return the finished state
517
+ async def mock_aio_state(timeout):
518
+ return mock_state
519
+
520
+ mock_process.aio_state = mock_aio_state
521
+
522
+ with patch(
523
+ "experimaestro.connectors.Process.fromDefinition", return_value=mock_process
524
+ ):
525
+ # Check that process is detected as dead
526
+ is_alive = check_process_alive(job_path, scriptname, update_disk=True)
527
+
528
+ assert is_alive is False
529
+
530
+ # Verify .failed file was created
531
+ failed_file = job_path / f"{scriptname}.failed"
532
+ assert failed_file.exists(), ".failed file should be created for dead process"
533
+
534
+ # Verify .failed file contains correct data
535
+ failed_data = json.loads(failed_file.read_text())
536
+ assert failed_data["exit_code"] == 137
537
+ assert failed_data["failure_status"] == "UNKNOWN"
538
+
539
+ # Verify .pid file was removed
540
+ assert not pid_file.exists(), ".pid file should be removed for dead process"
541
+
542
+
543
+ def test_sync_job_state_detects_dead_process(tmp_path):
544
+ """Test that scan_job_state_from_disk detects dead 'running' jobs"""
545
+ import json
546
+ from unittest.mock import MagicMock, patch
547
+
548
+ from experimaestro.scheduler.state_sync import scan_job_state_from_disk
549
+
550
+ # Create a job directory with only a PID file (no .done or .failed)
551
+ job_path = tmp_path / "jobs" / "mymodule.CrashedTask" / "job_crashed_2"
552
+ job_path.mkdir(parents=True)
553
+
554
+ scriptname = "CrashedTask"
555
+ pid_file = job_path / f"{scriptname}.pid"
556
+ pid_file.write_text(json.dumps({"type": "local", "pid": 999998}))
557
+
558
+ # Mock process as dead
559
+ mock_process = MagicMock()
560
+ mock_state = MagicMock()
561
+ mock_state.finished = True
562
+ mock_state.exitcode = 1
563
+
564
+ async def mock_aio_state(timeout):
565
+ return mock_state
566
+
567
+ mock_process.aio_state = mock_aio_state
568
+
569
+ with patch(
570
+ "experimaestro.connectors.Process.fromDefinition", return_value=mock_process
571
+ ):
572
+ # Scan job state - should detect dead process
573
+ job_state = scan_job_state_from_disk(job_path, scriptname, check_running=True)
574
+
575
+ assert job_state is not None
576
+ assert job_state["state"] == "error"
577
+ assert job_state["failure_reason"] == "UNKNOWN"
578
+
579
+
580
+ def test_check_process_alive_with_lock_held(tmp_path):
581
+ """Test that check_process_alive returns True when lock is held (job running)"""
582
+ import json
583
+ from unittest.mock import patch, MagicMock
584
+
585
+ from experimaestro.scheduler.state_sync import check_process_alive
586
+
587
+ # Create a job directory with a PID file
588
+ job_path = tmp_path / "jobs" / "mymodule.RunningTask" / "job_locked"
589
+ job_path.mkdir(parents=True)
590
+
591
+ scriptname = "RunningTask"
592
+ pid_file = job_path / f"{scriptname}.pid"
593
+ pid_file.write_text(json.dumps({"type": "local", "pid": 12345}))
594
+
595
+ # Mock the lock to simulate it being held by another process
596
+ mock_lock = MagicMock()
597
+ mock_lock.acquire.return_value = False # Simulate lock already held
598
+
599
+ with patch(
600
+ "experimaestro.scheduler.state_sync.fasteners.InterProcessLock",
601
+ return_value=mock_lock,
602
+ ):
603
+ # check_process_alive should return True because it can't acquire the lock
604
+ is_alive = check_process_alive(job_path, scriptname, update_disk=True)
605
+
606
+ assert is_alive is True, "Should assume job is running when lock is held"
607
+
608
+ # .pid file should still exist (not removed)
609
+ assert pid_file.exists(), ".pid file should not be removed when lock is held"
610
+
611
+ # No .failed file should be created
612
+ failed_file = job_path / f"{scriptname}.failed"
613
+ assert (
614
+ not failed_file.exists()
615
+ ), ".failed file should not be created when lock is held"