experimaestro 1.11.1__py3-none-any.whl → 2.0.0a8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of experimaestro might be problematic. Click here for more details.
- experimaestro/annotations.py +1 -1
- experimaestro/cli/__init__.py +10 -11
- experimaestro/cli/progress.py +269 -0
- experimaestro/connectors/__init__.py +2 -2
- experimaestro/core/arguments.py +20 -1
- experimaestro/core/identifier.py +21 -7
- experimaestro/core/objects/config.py +174 -274
- experimaestro/core/objects/config_walk.py +4 -6
- experimaestro/core/objects.pyi +2 -6
- experimaestro/core/serializers.py +1 -8
- experimaestro/core/types.py +35 -57
- experimaestro/launcherfinder/registry.py +3 -3
- experimaestro/mkdocs/base.py +6 -8
- experimaestro/notifications.py +12 -3
- experimaestro/progress.py +406 -0
- experimaestro/scheduler/__init__.py +18 -1
- experimaestro/scheduler/base.py +87 -906
- experimaestro/scheduler/experiment.py +387 -0
- experimaestro/scheduler/jobs.py +475 -0
- experimaestro/scheduler/signal_handler.py +32 -0
- experimaestro/scheduler/state.py +1 -1
- experimaestro/server/__init__.py +36 -5
- experimaestro/settings.py +4 -2
- experimaestro/tests/launchers/common.py +2 -2
- experimaestro/tests/restart.py +1 -1
- experimaestro/tests/tasks/all.py +7 -0
- experimaestro/tests/test_checkers.py +2 -2
- experimaestro/tests/test_dependencies.py +11 -17
- experimaestro/tests/test_experiment.py +3 -3
- experimaestro/tests/test_file_progress.py +425 -0
- experimaestro/tests/test_file_progress_integration.py +477 -0
- experimaestro/tests/test_generators.py +93 -0
- experimaestro/tests/test_identifier.py +155 -135
- experimaestro/tests/test_instance.py +13 -18
- experimaestro/tests/test_objects.py +9 -32
- experimaestro/tests/test_outputs.py +6 -6
- experimaestro/tests/test_param.py +14 -14
- experimaestro/tests/test_progress.py +4 -4
- experimaestro/tests/test_serializers.py +0 -59
- experimaestro/tests/test_tags.py +15 -15
- experimaestro/tests/test_tasks.py +42 -51
- experimaestro/tests/test_tokens.py +8 -6
- experimaestro/tests/test_types.py +10 -10
- experimaestro/tests/test_validation.py +19 -19
- experimaestro/tests/token_reschedule.py +1 -1
- experimaestro/tools/diff.py +8 -1
- experimaestro/typingutils.py +11 -2
- {experimaestro-1.11.1.dist-info → experimaestro-2.0.0a8.dist-info}/METADATA +3 -2
- {experimaestro-1.11.1.dist-info → experimaestro-2.0.0a8.dist-info}/RECORD +52 -44
- {experimaestro-1.11.1.dist-info → experimaestro-2.0.0a8.dist-info}/WHEEL +1 -1
- {experimaestro-1.11.1.dist-info → experimaestro-2.0.0a8.dist-info}/entry_points.txt +0 -0
- {experimaestro-1.11.1.dist-info → experimaestro-2.0.0a8.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,477 @@
|
|
|
1
|
+
"""Integration tests for file-based progress tracking with actual Task execution"""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from copy import copy
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import List, Tuple
|
|
7
|
+
|
|
8
|
+
import fasteners
|
|
9
|
+
from experimaestro import Task, Annotated, pathgenerator, progress, tqdm
|
|
10
|
+
from experimaestro.core.objects import logger
|
|
11
|
+
from experimaestro.notifications import LevelInformation
|
|
12
|
+
from experimaestro.progress import ProgressFileReader
|
|
13
|
+
from experimaestro.scheduler import Job, Listener
|
|
14
|
+
from queue import Queue
|
|
15
|
+
from .utils import TemporaryExperiment
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class FileProgressListener(Listener):
|
|
19
|
+
"""Listener that tracks both in-memory and file-based progress"""
|
|
20
|
+
|
|
21
|
+
def __init__(self):
|
|
22
|
+
self.current = []
|
|
23
|
+
self.progresses: Queue[List[LevelInformation]] = Queue()
|
|
24
|
+
self.jobs_seen = set()
|
|
25
|
+
|
|
26
|
+
def job_state(self, job: Job):
|
|
27
|
+
# Track job for file-based progress verification
|
|
28
|
+
self.jobs_seen.add(job)
|
|
29
|
+
|
|
30
|
+
if (len(self.current) != len(job.progress)) or any(
|
|
31
|
+
l1 != l2 for l1, l2 in zip(self.current, job.progress)
|
|
32
|
+
):
|
|
33
|
+
logger.info("Got some progress: %s", job.progress)
|
|
34
|
+
self.current = [copy(level) for level in job.progress]
|
|
35
|
+
self.progresses.put(self.current)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class SimpleProgressTask(Task):
|
|
39
|
+
"""Simple task that reports progress at specific intervals"""
|
|
40
|
+
|
|
41
|
+
path: Annotated[Path, pathgenerator("progress.txt")]
|
|
42
|
+
|
|
43
|
+
def execute(self):
|
|
44
|
+
"""Execute task and report progress based on file instructions"""
|
|
45
|
+
_progress = 0.0
|
|
46
|
+
|
|
47
|
+
while True:
|
|
48
|
+
time.sleep(1e-4)
|
|
49
|
+
if self.path.is_file():
|
|
50
|
+
with fasteners.InterProcessLock(self.path.with_suffix(".lock")):
|
|
51
|
+
content = self.path.read_text().strip()
|
|
52
|
+
if not content:
|
|
53
|
+
continue
|
|
54
|
+
|
|
55
|
+
parts = content.split(" ", maxsplit=2)
|
|
56
|
+
_level = int(parts[0])
|
|
57
|
+
_progress = float(parts[1])
|
|
58
|
+
_desc = parts[2] if len(parts) > 2 and parts[2] else None
|
|
59
|
+
|
|
60
|
+
self.path.unlink()
|
|
61
|
+
|
|
62
|
+
if _progress > 0:
|
|
63
|
+
progress(_progress, level=_level, desc=_desc)
|
|
64
|
+
if _progress >= 0.99 and _level == 0:
|
|
65
|
+
break
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class MultiLevelProgressTask(Task):
|
|
69
|
+
"""Task that reports progress at multiple levels"""
|
|
70
|
+
|
|
71
|
+
path: Annotated[Path, pathgenerator("progress.txt")]
|
|
72
|
+
|
|
73
|
+
def execute(self):
|
|
74
|
+
"""Execute with nested progress levels"""
|
|
75
|
+
progress(0.0, level=0, desc="Starting main task")
|
|
76
|
+
|
|
77
|
+
# Simulate subtasks
|
|
78
|
+
for i in range(3):
|
|
79
|
+
progress(i / 3.0, level=0, desc=f"Main task step {i + 1}")
|
|
80
|
+
|
|
81
|
+
# Nested progress
|
|
82
|
+
for j in range(5):
|
|
83
|
+
progress(j / 5.0, level=1, desc=f"Subtask {i + 1}.{j + 1}")
|
|
84
|
+
time.sleep(0.01) # Small delay to ensure file writes
|
|
85
|
+
|
|
86
|
+
progress(1.0, level=1, desc=f"Subtask {i + 1} complete")
|
|
87
|
+
|
|
88
|
+
progress(1.0, level=0, desc="Main task complete")
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class TqdmProgressTask(Task):
|
|
92
|
+
"""Task that uses tqdm for progress reporting"""
|
|
93
|
+
|
|
94
|
+
path: Annotated[Path, pathgenerator("progress.txt")]
|
|
95
|
+
|
|
96
|
+
def execute(self):
|
|
97
|
+
"""Execute with tqdm progress bars"""
|
|
98
|
+
# Main progress bar
|
|
99
|
+
for i in tqdm(range(10), desc="Main task", miniters=1, mininterval=0):
|
|
100
|
+
# Nested progress bar
|
|
101
|
+
for j in tqdm(range(5), desc=f"Subtask {i + 1}", miniters=1, mininterval=0):
|
|
102
|
+
time.sleep(0.001) # Small delay
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def write_progress_instruction(
|
|
106
|
+
path: Path, progress_val: float, level: int = 0, desc: str = None
|
|
107
|
+
):
|
|
108
|
+
"""Write progress instruction to file for task to read"""
|
|
109
|
+
while True:
|
|
110
|
+
time.sleep(5e-2)
|
|
111
|
+
with fasteners.InterProcessLock(path.with_suffix(".lock")):
|
|
112
|
+
if not path.is_file():
|
|
113
|
+
desc_str = desc if desc else ""
|
|
114
|
+
path.write_text(f"{level} {progress_val:.3f} {desc_str}")
|
|
115
|
+
break
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def verify_file_progress(job: Job, expected_entries: List[Tuple[int, float, str]]):
|
|
119
|
+
"""Verify that file-based progress matches expected entries"""
|
|
120
|
+
reader = ProgressFileReader(job.path)
|
|
121
|
+
|
|
122
|
+
# Wait a bit for file writes to complete
|
|
123
|
+
time.sleep(0.1)
|
|
124
|
+
|
|
125
|
+
entries = list(reader.read_all_entries())
|
|
126
|
+
|
|
127
|
+
# Filter out entries that are too close in time (duplicates from rapid updates)
|
|
128
|
+
filtered_entries = []
|
|
129
|
+
last_timestamp = 0
|
|
130
|
+
for entry in entries:
|
|
131
|
+
if entry.timestamp - last_timestamp > 0.001: # 1ms threshold
|
|
132
|
+
filtered_entries.append(entry)
|
|
133
|
+
last_timestamp = entry.timestamp
|
|
134
|
+
|
|
135
|
+
logger.info(f"Found {len(filtered_entries)} file progress entries")
|
|
136
|
+
for entry in filtered_entries:
|
|
137
|
+
logger.info(f" Level {entry.level}: {entry.progress:.3f} - {entry.desc}")
|
|
138
|
+
|
|
139
|
+
# Verify we have at least the expected number of significant progress updates
|
|
140
|
+
assert len(filtered_entries) >= len(
|
|
141
|
+
expected_entries
|
|
142
|
+
), f"Expected at least {len(expected_entries)} entries, got {len(filtered_entries)}"
|
|
143
|
+
|
|
144
|
+
# Verify current progress state
|
|
145
|
+
current_progress = reader.get_current_progress()
|
|
146
|
+
|
|
147
|
+
# Check that we have progress for expected levels
|
|
148
|
+
expected_levels = set(level for level, _, _ in expected_entries)
|
|
149
|
+
actual_levels = set(current_progress.keys())
|
|
150
|
+
|
|
151
|
+
assert expected_levels.issubset(
|
|
152
|
+
actual_levels
|
|
153
|
+
), f"Expected levels {expected_levels}, got {actual_levels}"
|
|
154
|
+
|
|
155
|
+
return filtered_entries, current_progress
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def test_file_progress_basic():
|
|
159
|
+
"""Test that file-based progress tracking works with basic task execution"""
|
|
160
|
+
with TemporaryExperiment("file-progress-basic", maxwait=10, port=0) as xp:
|
|
161
|
+
listener = FileProgressListener()
|
|
162
|
+
xp.scheduler.addlistener(listener)
|
|
163
|
+
|
|
164
|
+
# Submit task
|
|
165
|
+
out = SimpleProgressTask.C().submit()
|
|
166
|
+
path = out.path
|
|
167
|
+
job = out.__xpm__.job
|
|
168
|
+
|
|
169
|
+
# Wait for job to start
|
|
170
|
+
logger.info("Waiting for job to start")
|
|
171
|
+
while job.state.notstarted():
|
|
172
|
+
time.sleep(1e-2)
|
|
173
|
+
|
|
174
|
+
# Send progress instructions
|
|
175
|
+
progress_values = [0.1, 0.3, 0.5, 0.7, 0.9, 1.0]
|
|
176
|
+
expected_entries = []
|
|
177
|
+
|
|
178
|
+
for i, v in enumerate(progress_values):
|
|
179
|
+
desc = f"Step {i + 1}"
|
|
180
|
+
write_progress_instruction(path, v, level=0, desc=desc)
|
|
181
|
+
expected_entries.append((0, v, desc))
|
|
182
|
+
|
|
183
|
+
if v < 1.0:
|
|
184
|
+
# Verify in-memory progress (with timeout)
|
|
185
|
+
try:
|
|
186
|
+
info = listener.progresses.get(timeout=2.0)[0]
|
|
187
|
+
logger.info("Got in-memory progress: %s", info)
|
|
188
|
+
assert abs(info.progress - v) < 0.01
|
|
189
|
+
except Exception as e:
|
|
190
|
+
logger.warning(f"Failed to get progress for {v}: {e}")
|
|
191
|
+
# Continue anyway, we'll verify file-based progress later
|
|
192
|
+
|
|
193
|
+
# Wait for job to complete
|
|
194
|
+
while not job.state.finished():
|
|
195
|
+
time.sleep(1e-2)
|
|
196
|
+
|
|
197
|
+
# Verify file-based progress
|
|
198
|
+
entries, current_progress = verify_file_progress(job, expected_entries)
|
|
199
|
+
|
|
200
|
+
# Check that final progress is 1.0
|
|
201
|
+
assert current_progress[0].progress == 1.0
|
|
202
|
+
assert current_progress[0].desc == "Step 6"
|
|
203
|
+
|
|
204
|
+
# Verify progress files exist
|
|
205
|
+
progress_dir = job.path / ".experimaestro"
|
|
206
|
+
assert progress_dir.exists()
|
|
207
|
+
|
|
208
|
+
progress_files = list(progress_dir.glob("progress-*.jsonl"))
|
|
209
|
+
assert len(progress_files) >= 1
|
|
210
|
+
|
|
211
|
+
# Verify symlink exists
|
|
212
|
+
latest_link = progress_dir / "progress-latest.jsonl"
|
|
213
|
+
assert latest_link.exists()
|
|
214
|
+
assert latest_link.is_symlink()
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def test_file_progress_multilevel():
|
|
218
|
+
"""Test file-based progress tracking with multiple levels"""
|
|
219
|
+
with TemporaryExperiment("file-progress-multilevel", maxwait=15, port=0) as xp:
|
|
220
|
+
listener = FileProgressListener()
|
|
221
|
+
xp.scheduler.addlistener(listener)
|
|
222
|
+
|
|
223
|
+
# Submit task
|
|
224
|
+
out = MultiLevelProgressTask.C().submit()
|
|
225
|
+
job = out.__xpm__.job
|
|
226
|
+
|
|
227
|
+
# Wait for job to start
|
|
228
|
+
logger.info("Waiting for job to start")
|
|
229
|
+
while job.state.notstarted():
|
|
230
|
+
time.sleep(1e-2)
|
|
231
|
+
|
|
232
|
+
# Wait for job to complete
|
|
233
|
+
while not job.state.finished():
|
|
234
|
+
time.sleep(1e-2)
|
|
235
|
+
|
|
236
|
+
# Verify file-based progress
|
|
237
|
+
reader = ProgressFileReader(job.path)
|
|
238
|
+
entries = list(reader.read_all_entries())
|
|
239
|
+
current_progress = reader.get_current_progress()
|
|
240
|
+
|
|
241
|
+
logger.info(f"Found {len(entries)} total progress entries")
|
|
242
|
+
|
|
243
|
+
# Should have progress for both levels 0 and 1
|
|
244
|
+
assert 0 in current_progress
|
|
245
|
+
assert 1 in current_progress
|
|
246
|
+
|
|
247
|
+
# Final progress should be 1.0 for both levels
|
|
248
|
+
assert current_progress[0].progress == 1.0
|
|
249
|
+
assert current_progress[1].progress == 1.0
|
|
250
|
+
|
|
251
|
+
# Check descriptions
|
|
252
|
+
assert "complete" in current_progress[0].desc.lower()
|
|
253
|
+
assert "complete" in current_progress[1].desc.lower()
|
|
254
|
+
|
|
255
|
+
# Verify we have entries for both levels
|
|
256
|
+
level_0_entries = [e for e in entries if e.level == 0]
|
|
257
|
+
level_1_entries = [e for e in entries if e.level == 1]
|
|
258
|
+
|
|
259
|
+
assert len(level_0_entries) >= 3 # At least start, middle, end
|
|
260
|
+
assert len(level_1_entries) >= 3 # Multiple subtask updates
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def test_file_progress_tqdm():
|
|
264
|
+
"""Test file-based progress tracking with tqdm"""
|
|
265
|
+
with TemporaryExperiment("file-progress-tqdm", maxwait=15, port=0) as xp:
|
|
266
|
+
listener = FileProgressListener()
|
|
267
|
+
xp.scheduler.addlistener(listener)
|
|
268
|
+
|
|
269
|
+
# Submit task
|
|
270
|
+
out = TqdmProgressTask.C().submit()
|
|
271
|
+
job = out.__xpm__.job
|
|
272
|
+
|
|
273
|
+
# Wait for job to start
|
|
274
|
+
logger.info("Waiting for job to start")
|
|
275
|
+
while job.state.notstarted():
|
|
276
|
+
time.sleep(1e-2)
|
|
277
|
+
|
|
278
|
+
# Wait for job to complete
|
|
279
|
+
while not job.state.finished():
|
|
280
|
+
time.sleep(1e-2)
|
|
281
|
+
|
|
282
|
+
# Verify file-based progress
|
|
283
|
+
reader = ProgressFileReader(job.path)
|
|
284
|
+
entries = list(reader.read_all_entries())
|
|
285
|
+
current_progress = reader.get_current_progress()
|
|
286
|
+
|
|
287
|
+
logger.info(f"Found {len(entries)} total progress entries from tqdm")
|
|
288
|
+
|
|
289
|
+
# Should have progress for multiple levels (tqdm creates nested levels)
|
|
290
|
+
assert len(current_progress) >= 1
|
|
291
|
+
|
|
292
|
+
# Should have multiple progress entries
|
|
293
|
+
assert len(entries) >= 10 # At least one per main iteration
|
|
294
|
+
|
|
295
|
+
# Verify progress files structure
|
|
296
|
+
progress_dir = job.path / ".experimaestro"
|
|
297
|
+
assert progress_dir.exists()
|
|
298
|
+
|
|
299
|
+
progress_files = list(progress_dir.glob("progress-*.jsonl"))
|
|
300
|
+
assert len(progress_files) >= 1
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def test_file_progress_concurrent_experiments():
|
|
304
|
+
"""Test that file-based progress works with multiple concurrent experiments"""
|
|
305
|
+
max_wait = 10
|
|
306
|
+
|
|
307
|
+
with TemporaryExperiment(
|
|
308
|
+
"file-progress-concurrent-1", maxwait=max_wait, port=0
|
|
309
|
+
) as xp1:
|
|
310
|
+
listener1 = FileProgressListener()
|
|
311
|
+
xp1.scheduler.addlistener(listener1)
|
|
312
|
+
|
|
313
|
+
# Submit first task
|
|
314
|
+
out1 = SimpleProgressTask.C().submit()
|
|
315
|
+
job1 = out1.__xpm__.job
|
|
316
|
+
path1 = out1.path
|
|
317
|
+
|
|
318
|
+
# Wait for first job to start
|
|
319
|
+
while job1.state.notstarted():
|
|
320
|
+
time.sleep(1e-2)
|
|
321
|
+
|
|
322
|
+
with TemporaryExperiment(
|
|
323
|
+
"file-progress-concurrent-2",
|
|
324
|
+
workdir=xp1.workdir,
|
|
325
|
+
maxwait=max_wait,
|
|
326
|
+
port=0,
|
|
327
|
+
) as xp2:
|
|
328
|
+
listener2 = FileProgressListener()
|
|
329
|
+
xp2.scheduler.addlistener(listener2)
|
|
330
|
+
|
|
331
|
+
# Submit second task
|
|
332
|
+
out2 = SimpleProgressTask.C().submit()
|
|
333
|
+
job2 = out2.__xpm__.job
|
|
334
|
+
path2 = out2.path
|
|
335
|
+
|
|
336
|
+
# Wait for second job to start
|
|
337
|
+
while job2.state.notstarted():
|
|
338
|
+
time.sleep(1e-2)
|
|
339
|
+
|
|
340
|
+
# Send progress to both tasks
|
|
341
|
+
progress_values = [0.2, 0.6, 1.0]
|
|
342
|
+
|
|
343
|
+
for v in progress_values:
|
|
344
|
+
write_progress_instruction(path1, v, desc=f"Task1-{v}")
|
|
345
|
+
write_progress_instruction(path2, v, desc=f"Task2-{v}")
|
|
346
|
+
|
|
347
|
+
if v < 1.0:
|
|
348
|
+
# Verify both get progress (with timeout)
|
|
349
|
+
try:
|
|
350
|
+
info1 = listener1.progresses.get(timeout=2.0)[0]
|
|
351
|
+
info2 = listener2.progresses.get(timeout=2.0)[0]
|
|
352
|
+
|
|
353
|
+
assert abs(info1.progress - v) < 0.01
|
|
354
|
+
assert abs(info2.progress - v) < 0.01
|
|
355
|
+
except Exception as e:
|
|
356
|
+
logger.warning(
|
|
357
|
+
f"Failed to get concurrent progress for {v}: {e}"
|
|
358
|
+
)
|
|
359
|
+
# Continue anyway, we'll verify file-based progress later
|
|
360
|
+
|
|
361
|
+
# Wait for both jobs to complete
|
|
362
|
+
while not job1.state.finished() or not job2.state.finished():
|
|
363
|
+
time.sleep(1e-2)
|
|
364
|
+
|
|
365
|
+
# Verify both have separate file-based progress
|
|
366
|
+
reader1 = ProgressFileReader(job1.path)
|
|
367
|
+
reader2 = ProgressFileReader(job2.path)
|
|
368
|
+
|
|
369
|
+
entries1 = list(reader1.read_all_entries())
|
|
370
|
+
entries2 = list(reader2.read_all_entries())
|
|
371
|
+
|
|
372
|
+
# Both should have progress entries
|
|
373
|
+
assert len(entries1) >= 3
|
|
374
|
+
assert len(entries2) >= 3
|
|
375
|
+
|
|
376
|
+
# Verify they have different descriptions
|
|
377
|
+
task1_entries = [e for e in entries1 if e.desc and "Task1" in e.desc]
|
|
378
|
+
task2_entries = [e for e in entries2 if e.desc and "Task2" in e.desc]
|
|
379
|
+
|
|
380
|
+
assert len(task1_entries) >= 1
|
|
381
|
+
assert len(task2_entries) >= 1
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def test_file_progress_persistence():
|
|
385
|
+
"""Test that file-based progress persists during experiment execution"""
|
|
386
|
+
with TemporaryExperiment("file-progress-persistence", maxwait=10, port=0):
|
|
387
|
+
out = MultiLevelProgressTask.C().submit()
|
|
388
|
+
job = out.__xpm__.job
|
|
389
|
+
job_path = job.path
|
|
390
|
+
|
|
391
|
+
# Wait for job to complete
|
|
392
|
+
while not job.state.finished():
|
|
393
|
+
time.sleep(1e-2)
|
|
394
|
+
|
|
395
|
+
# Verify we can read progress files while experiment is still active
|
|
396
|
+
reader = ProgressFileReader(job_path)
|
|
397
|
+
entries = list(reader.read_all_entries())
|
|
398
|
+
current_progress = reader.get_current_progress()
|
|
399
|
+
|
|
400
|
+
# Should have progress data
|
|
401
|
+
assert len(entries) > 0
|
|
402
|
+
assert len(current_progress) > 0
|
|
403
|
+
|
|
404
|
+
# Verify progress files exist
|
|
405
|
+
progress_dir = job_path / ".experimaestro"
|
|
406
|
+
assert progress_dir.exists()
|
|
407
|
+
|
|
408
|
+
progress_files = list(progress_dir.glob("progress-*.jsonl"))
|
|
409
|
+
assert len(progress_files) >= 1
|
|
410
|
+
|
|
411
|
+
# Verify symlink works
|
|
412
|
+
latest_link = progress_dir / "progress-latest.jsonl"
|
|
413
|
+
assert latest_link.exists()
|
|
414
|
+
assert latest_link.is_symlink()
|
|
415
|
+
|
|
416
|
+
# Verify we can read from the symlink
|
|
417
|
+
latest_entries = list(reader.read_entries(latest_link.resolve()))
|
|
418
|
+
assert len(latest_entries) > 0
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def test_file_progress_error_handling():
|
|
422
|
+
"""Test file-based progress tracking handles errors gracefully"""
|
|
423
|
+
with TemporaryExperiment("file-progress-errors", maxwait=10, port=0) as xp:
|
|
424
|
+
listener = FileProgressListener()
|
|
425
|
+
xp.scheduler.addlistener(listener)
|
|
426
|
+
|
|
427
|
+
# Submit task
|
|
428
|
+
out = SimpleProgressTask.C().submit()
|
|
429
|
+
job = out.__xpm__.job
|
|
430
|
+
path = out.path
|
|
431
|
+
|
|
432
|
+
# Wait for job to start
|
|
433
|
+
while job.state.notstarted():
|
|
434
|
+
time.sleep(1e-2)
|
|
435
|
+
|
|
436
|
+
# Send some valid progress
|
|
437
|
+
write_progress_instruction(path, 0.5, desc="Valid progress")
|
|
438
|
+
|
|
439
|
+
# Wait a bit longer for the task to process the instruction
|
|
440
|
+
time.sleep(0.5)
|
|
441
|
+
|
|
442
|
+
# Complete the task
|
|
443
|
+
write_progress_instruction(path, 1.0, desc="Complete")
|
|
444
|
+
|
|
445
|
+
# Wait for completion
|
|
446
|
+
while not job.state.finished():
|
|
447
|
+
time.sleep(1e-2)
|
|
448
|
+
|
|
449
|
+
# Verify progress was recorded after completion
|
|
450
|
+
reader = ProgressFileReader(job.path)
|
|
451
|
+
entries = list(reader.read_all_entries())
|
|
452
|
+
current_progress = reader.get_current_progress()
|
|
453
|
+
|
|
454
|
+
# Should have at least some progress entries
|
|
455
|
+
assert len(entries) >= 1
|
|
456
|
+
|
|
457
|
+
# Verify final state
|
|
458
|
+
assert current_progress[0].progress == 1.0
|
|
459
|
+
|
|
460
|
+
# Verify progress files exist and are readable
|
|
461
|
+
progress_dir = job.path / ".experimaestro"
|
|
462
|
+
assert progress_dir.exists()
|
|
463
|
+
|
|
464
|
+
progress_files = list(progress_dir.glob("progress-*.jsonl"))
|
|
465
|
+
assert len(progress_files) >= 1
|
|
466
|
+
|
|
467
|
+
# Verify we can read from files without errors
|
|
468
|
+
for progress_file in progress_files:
|
|
469
|
+
file_entries = list(reader.read_entries(progress_file))
|
|
470
|
+
# Each file should be readable (may be empty if no progress written to it)
|
|
471
|
+
assert isinstance(file_entries, list)
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
if __name__ == "__main__":
|
|
475
|
+
import pytest
|
|
476
|
+
|
|
477
|
+
pytest.main([__file__])
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
from experimaestro import Config, Task, Param, Meta, Path, field, PathGenerator
|
|
2
|
+
from experimaestro.scheduler.workspace import Workspace
|
|
3
|
+
from experimaestro.settings import Settings, WorkspaceSettings
|
|
4
|
+
import pytest
|
|
5
|
+
from experimaestro.scheduler import RunMode
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Validation(Config):
|
|
9
|
+
best_checkpoint: Meta[Path] = field(default_factory=PathGenerator("index"))
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Learner(Task):
|
|
13
|
+
validation: Param[Validation]
|
|
14
|
+
x: Param[int]
|
|
15
|
+
|
|
16
|
+
@staticmethod
|
|
17
|
+
def create(x: int, validation: Param[Validation]):
|
|
18
|
+
return Learner.C(x=x, validation=validation)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class LearnerList(Task):
|
|
22
|
+
validation: Param[list[Validation]]
|
|
23
|
+
x: Param[int]
|
|
24
|
+
|
|
25
|
+
@staticmethod
|
|
26
|
+
def create(x: int, validation: Param[Validation]):
|
|
27
|
+
return LearnerList.C(x=x, validation=[validation])
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class LearnerDict(Task):
|
|
31
|
+
validation: Param[dict[str, Validation]]
|
|
32
|
+
x: Param[int]
|
|
33
|
+
|
|
34
|
+
@staticmethod
|
|
35
|
+
def create(x: int, validation: Param[Validation]):
|
|
36
|
+
return LearnerDict.C(x=x, validation={"key": validation})
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ModuleLoader(Task):
|
|
40
|
+
validation: Param[Validation] = field(ignore_generated=True)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@pytest.mark.parametrize("cls", [Learner, LearnerDict, LearnerList])
|
|
44
|
+
def test_generators_reuse_on_submit(cls):
|
|
45
|
+
# We have one way to select the best model
|
|
46
|
+
validation = Validation.C()
|
|
47
|
+
|
|
48
|
+
workspace = Workspace(
|
|
49
|
+
Settings(),
|
|
50
|
+
WorkspaceSettings("test_generators_reuse", path=Path("/tmp")),
|
|
51
|
+
run_mode=RunMode.DRY_RUN,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# OK, the path is generated depending on Learner with x=1
|
|
55
|
+
cls.create(1, validation).submit(workspace=workspace)
|
|
56
|
+
|
|
57
|
+
with pytest.raises((AttributeError)):
|
|
58
|
+
# Here we have a problem...
|
|
59
|
+
# the path is still the previous one
|
|
60
|
+
cls.create(2, validation).submit(workspace=workspace)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@pytest.mark.parametrize("cls", [Learner, LearnerDict, LearnerList])
|
|
64
|
+
def test_generators_delayed_submit(cls):
|
|
65
|
+
workspace = Workspace(
|
|
66
|
+
Settings(),
|
|
67
|
+
WorkspaceSettings("test_generators_simple", path=Path("/tmp")),
|
|
68
|
+
run_mode=RunMode.DRY_RUN,
|
|
69
|
+
)
|
|
70
|
+
validation = Validation.C()
|
|
71
|
+
task1 = cls.create(1, validation)
|
|
72
|
+
task2 = cls.create(2, validation)
|
|
73
|
+
task1.submit(workspace=workspace)
|
|
74
|
+
with pytest.raises((AttributeError)):
|
|
75
|
+
task2.submit(workspace=workspace)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@pytest.mark.parametrize("cls", [Learner, LearnerDict, LearnerList])
|
|
79
|
+
def test_generators_reuse_on_set(cls):
|
|
80
|
+
workspace = Workspace(
|
|
81
|
+
Settings(),
|
|
82
|
+
WorkspaceSettings("test_generators_simple", path=Path("/tmp")),
|
|
83
|
+
run_mode=RunMode.DRY_RUN,
|
|
84
|
+
)
|
|
85
|
+
validation = Validation.C()
|
|
86
|
+
cls.create(1, validation).submit(workspace=workspace)
|
|
87
|
+
with pytest.raises((AttributeError)):
|
|
88
|
+
# We should not be able to *create* a second task with the same validation,
|
|
89
|
+
# even without submitting it
|
|
90
|
+
cls.create(2, validation)
|
|
91
|
+
|
|
92
|
+
# This should run OK
|
|
93
|
+
ModuleLoader.C(validation=validation)
|