experimaestro 2.0.0a8__py3-none-any.whl → 2.0.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of experimaestro might be problematic. Click here for more details.
- experimaestro/__init__.py +10 -11
- experimaestro/annotations.py +167 -206
- experimaestro/cli/__init__.py +130 -5
- experimaestro/cli/filter.py +42 -74
- experimaestro/cli/jobs.py +157 -106
- experimaestro/cli/refactor.py +249 -0
- experimaestro/click.py +0 -1
- experimaestro/commandline.py +19 -3
- experimaestro/connectors/__init__.py +20 -1
- experimaestro/connectors/local.py +12 -0
- experimaestro/core/arguments.py +182 -46
- experimaestro/core/identifier.py +107 -6
- experimaestro/core/objects/__init__.py +6 -0
- experimaestro/core/objects/config.py +542 -25
- experimaestro/core/objects/config_walk.py +20 -0
- experimaestro/core/serialization.py +91 -34
- experimaestro/core/subparameters.py +164 -0
- experimaestro/core/types.py +175 -38
- experimaestro/exceptions.py +26 -0
- experimaestro/experiments/cli.py +107 -25
- experimaestro/generators.py +50 -9
- experimaestro/huggingface.py +3 -1
- experimaestro/launcherfinder/parser.py +29 -0
- experimaestro/launchers/__init__.py +26 -1
- experimaestro/launchers/direct.py +12 -0
- experimaestro/launchers/slurm/base.py +154 -2
- experimaestro/mkdocs/metaloader.py +0 -1
- experimaestro/mypy.py +452 -7
- experimaestro/notifications.py +63 -13
- experimaestro/progress.py +0 -2
- experimaestro/rpyc.py +0 -1
- experimaestro/run.py +19 -6
- experimaestro/scheduler/base.py +489 -125
- experimaestro/scheduler/dependencies.py +43 -28
- experimaestro/scheduler/dynamic_outputs.py +259 -130
- experimaestro/scheduler/experiment.py +225 -30
- experimaestro/scheduler/interfaces.py +474 -0
- experimaestro/scheduler/jobs.py +216 -206
- experimaestro/scheduler/services.py +186 -12
- experimaestro/scheduler/state_db.py +388 -0
- experimaestro/scheduler/state_provider.py +2345 -0
- experimaestro/scheduler/state_sync.py +834 -0
- experimaestro/scheduler/workspace.py +52 -10
- experimaestro/scriptbuilder.py +7 -0
- experimaestro/server/__init__.py +147 -57
- experimaestro/server/data/index.css +0 -125
- experimaestro/server/data/index.css.map +1 -1
- experimaestro/server/data/index.js +194 -58
- experimaestro/server/data/index.js.map +1 -1
- experimaestro/settings.py +44 -5
- experimaestro/sphinx/__init__.py +3 -3
- experimaestro/taskglobals.py +20 -0
- experimaestro/tests/conftest.py +80 -0
- experimaestro/tests/core/test_generics.py +2 -2
- experimaestro/tests/identifier_stability.json +45 -0
- experimaestro/tests/launchers/bin/sacct +6 -2
- experimaestro/tests/launchers/bin/sbatch +4 -2
- experimaestro/tests/launchers/test_slurm.py +80 -0
- experimaestro/tests/tasks/test_dynamic.py +231 -0
- experimaestro/tests/test_cli_jobs.py +615 -0
- experimaestro/tests/test_deprecated.py +630 -0
- experimaestro/tests/test_environment.py +200 -0
- experimaestro/tests/test_file_progress_integration.py +1 -1
- experimaestro/tests/test_forward.py +3 -3
- experimaestro/tests/test_identifier.py +372 -41
- experimaestro/tests/test_identifier_stability.py +458 -0
- experimaestro/tests/test_instance.py +3 -3
- experimaestro/tests/test_multitoken.py +442 -0
- experimaestro/tests/test_mypy.py +433 -0
- experimaestro/tests/test_objects.py +312 -5
- experimaestro/tests/test_outputs.py +2 -2
- experimaestro/tests/test_param.py +8 -12
- experimaestro/tests/test_partial_paths.py +231 -0
- experimaestro/tests/test_progress.py +0 -48
- experimaestro/tests/test_resumable_task.py +480 -0
- experimaestro/tests/test_serializers.py +141 -1
- experimaestro/tests/test_state_db.py +434 -0
- experimaestro/tests/test_subparameters.py +160 -0
- experimaestro/tests/test_tags.py +136 -0
- experimaestro/tests/test_tasks.py +107 -121
- experimaestro/tests/test_token_locking.py +252 -0
- experimaestro/tests/test_tokens.py +17 -13
- experimaestro/tests/test_types.py +123 -1
- experimaestro/tests/test_workspace_triggers.py +158 -0
- experimaestro/tests/token_reschedule.py +4 -2
- experimaestro/tests/utils.py +2 -2
- experimaestro/tokens.py +154 -57
- experimaestro/tools/diff.py +1 -1
- experimaestro/tui/__init__.py +8 -0
- experimaestro/tui/app.py +2303 -0
- experimaestro/tui/app.tcss +353 -0
- experimaestro/tui/log_viewer.py +228 -0
- experimaestro/utils/__init__.py +23 -0
- experimaestro/utils/environment.py +148 -0
- experimaestro/utils/git.py +129 -0
- experimaestro/utils/resources.py +1 -1
- experimaestro/version.py +34 -0
- {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b4.dist-info}/METADATA +68 -38
- experimaestro-2.0.0b4.dist-info/RECORD +181 -0
- {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b4.dist-info}/WHEEL +1 -1
- experimaestro-2.0.0b4.dist-info/entry_points.txt +16 -0
- experimaestro/compat.py +0 -6
- experimaestro/core/objects.pyi +0 -221
- experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
- experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
- experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
- experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
- experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
- experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
- experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
- experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
- experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
- experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
- experimaestro-2.0.0a8.dist-info/RECORD +0 -166
- experimaestro-2.0.0a8.dist-info/entry_points.txt +0 -17
- {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b4.dist-info}/licenses/LICENSE +0 -0
experimaestro/notifications.py
CHANGED
|
@@ -115,14 +115,27 @@ class Reporter(threading.Thread):
|
|
|
115
115
|
|
|
116
116
|
def check_urls(self):
|
|
117
117
|
"""Check whether we have new schedulers to notify"""
|
|
118
|
-
|
|
118
|
+
# Check if path exists (it might have been deleted during cleanup)
|
|
119
|
+
if not self.path.exists():
|
|
120
|
+
return
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
mtime = os.path.getmtime(self.path)
|
|
124
|
+
except (OSError, FileNotFoundError):
|
|
125
|
+
# Path was deleted while we were checking
|
|
126
|
+
return
|
|
127
|
+
|
|
119
128
|
if mtime > self.lastcheck:
|
|
120
129
|
for f in self.path.iterdir():
|
|
121
130
|
self.urls[f.name] = ListenerInformation(f.read_text().strip())
|
|
122
131
|
logger.info("Added new notification URL: %s", self.urls[f.name].url)
|
|
123
132
|
f.unlink()
|
|
124
133
|
|
|
125
|
-
|
|
134
|
+
try:
|
|
135
|
+
self.lastcheck = os.path.getmtime(self.path)
|
|
136
|
+
except (OSError, FileNotFoundError):
|
|
137
|
+
# Path was deleted during iteration
|
|
138
|
+
return
|
|
126
139
|
|
|
127
140
|
def run(self):
|
|
128
141
|
logger.info("Running notification thread")
|
|
@@ -236,12 +249,21 @@ class Reporter(threading.Thread):
|
|
|
236
249
|
|
|
237
250
|
|
|
238
251
|
def progress(value: float, level=0, desc: Optional[str] = None, console=False):
|
|
239
|
-
"""
|
|
252
|
+
"""Report task progress to the experimaestro server.
|
|
253
|
+
|
|
254
|
+
Call this function from within a running task to report progress.
|
|
255
|
+
Progress is displayed in the web UI and TUI monitors.
|
|
256
|
+
|
|
257
|
+
Example::
|
|
240
258
|
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
259
|
+
for i, batch in enumerate(dataloader):
|
|
260
|
+
train(batch)
|
|
261
|
+
progress(i / len(dataloader), desc="Training")
|
|
262
|
+
|
|
263
|
+
:param value: Progress value between 0.0 and 1.0
|
|
264
|
+
:param level: Nesting level for nested progress bars (default: 0)
|
|
265
|
+
:param desc: Optional description of the current operation
|
|
266
|
+
:param console: If True, also print to console when no server is available
|
|
245
267
|
"""
|
|
246
268
|
if TaskEnv.instance().slave:
|
|
247
269
|
# Skip if in a slave process
|
|
@@ -255,8 +277,21 @@ def report_eoj():
|
|
|
255
277
|
|
|
256
278
|
|
|
257
279
|
class xpm_tqdm(std_tqdm):
|
|
258
|
-
"""
|
|
259
|
-
|
|
280
|
+
"""Experimaestro-aware tqdm progress bar.
|
|
281
|
+
|
|
282
|
+
A drop-in replacement for ``tqdm`` that automatically reports progress
|
|
283
|
+
to the experimaestro server. Use this instead of the standard ``tqdm``
|
|
284
|
+
in your task's ``execute()`` method.
|
|
285
|
+
|
|
286
|
+
Example::
|
|
287
|
+
|
|
288
|
+
from experimaestro import tqdm
|
|
289
|
+
|
|
290
|
+
class MyTask(Task):
|
|
291
|
+
def execute(self):
|
|
292
|
+
for batch in tqdm(dataloader, desc="Training"):
|
|
293
|
+
train(batch)
|
|
294
|
+
"""
|
|
260
295
|
|
|
261
296
|
def __init__(self, iterable=None, file=None, *args, **kwargs):
|
|
262
297
|
# Report progress bar
|
|
@@ -279,14 +314,29 @@ class xpm_tqdm(std_tqdm):
|
|
|
279
314
|
|
|
280
315
|
|
|
281
316
|
@overload
|
|
282
|
-
def tqdm(**kwargs) -> xpm_tqdm:
|
|
283
|
-
...
|
|
317
|
+
def tqdm(**kwargs) -> xpm_tqdm: ...
|
|
284
318
|
|
|
285
319
|
|
|
286
320
|
@overload
|
|
287
|
-
def tqdm(iterable: Optional[Iterator[T]] = None, **kwargs) -> Iterator[T]:
|
|
288
|
-
...
|
|
321
|
+
def tqdm(iterable: Optional[Iterator[T]] = None, **kwargs) -> Iterator[T]: ...
|
|
289
322
|
|
|
290
323
|
|
|
291
324
|
def tqdm(*args, **kwargs):
|
|
325
|
+
"""Create an experimaestro-aware progress bar.
|
|
326
|
+
|
|
327
|
+
A drop-in replacement for ``tqdm.tqdm`` that automatically reports progress
|
|
328
|
+
to the experimaestro server. Use this in task ``execute()`` methods.
|
|
329
|
+
|
|
330
|
+
Example::
|
|
331
|
+
|
|
332
|
+
from experimaestro import tqdm
|
|
333
|
+
|
|
334
|
+
for epoch in tqdm(range(100), desc="Epochs"):
|
|
335
|
+
for batch in tqdm(dataloader, desc="Batches"):
|
|
336
|
+
train(batch)
|
|
337
|
+
|
|
338
|
+
:param iterable: Iterable to wrap (optional)
|
|
339
|
+
:param kwargs: Additional arguments passed to tqdm
|
|
340
|
+
:return: A progress bar iterator
|
|
341
|
+
"""
|
|
292
342
|
return xpm_tqdm(*args, **kwargs) # type: ignore
|
experimaestro/progress.py
CHANGED
experimaestro/rpyc.py
CHANGED
experimaestro/run.py
CHANGED
|
@@ -9,6 +9,7 @@ from typing import List
|
|
|
9
9
|
import fasteners
|
|
10
10
|
from experimaestro.notifications import progress, report_eoj
|
|
11
11
|
from experimaestro.utils.multiprocessing import delayed_shutdown
|
|
12
|
+
from experimaestro.exceptions import GracefulTimeout
|
|
12
13
|
from .core.types import ObjectType
|
|
13
14
|
from experimaestro.utils import logger
|
|
14
15
|
from experimaestro.core.objects import ConfigInformation
|
|
@@ -41,9 +42,6 @@ def run(parameters: Path):
|
|
|
41
42
|
task = ConfigInformation.fromParameters(params["objects"])
|
|
42
43
|
task.__taskdir__ = Path.cwd()
|
|
43
44
|
|
|
44
|
-
# Set the tags
|
|
45
|
-
task.__tags__ = params["tags"]
|
|
46
|
-
|
|
47
45
|
# Notify that the task has started
|
|
48
46
|
progress(0)
|
|
49
47
|
|
|
@@ -92,9 +90,20 @@ class TaskRunner:
|
|
|
92
90
|
report_eoj()
|
|
93
91
|
logger.info("Finished cleanup")
|
|
94
92
|
|
|
95
|
-
def handle_error(self, code, frame_type):
|
|
96
|
-
|
|
97
|
-
|
|
93
|
+
def handle_error(self, code, frame_type, reason: str = "failed", message: str = ""):
|
|
94
|
+
"""Handle task error and write failure information.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
code: Exit code
|
|
98
|
+
frame_type: Signal frame type (unused)
|
|
99
|
+
reason: Failure reason (e.g., "failed", "timeout")
|
|
100
|
+
message: Optional message with details
|
|
101
|
+
"""
|
|
102
|
+
logger.info("Error handler: finished with code %d, reason=%s", code, reason)
|
|
103
|
+
failure_info = {"code": code, "reason": reason}
|
|
104
|
+
if message:
|
|
105
|
+
failure_info["message"] = message
|
|
106
|
+
self.failedpath.write_text(json.dumps(failure_info))
|
|
98
107
|
self.cleanup()
|
|
99
108
|
logger.info("Exiting")
|
|
100
109
|
delayed_shutdown(60, exit_code=code)
|
|
@@ -147,6 +156,10 @@ class TaskRunner:
|
|
|
147
156
|
# Everything went OK
|
|
148
157
|
logger.info("Task ended successfully")
|
|
149
158
|
sys.exit(0)
|
|
159
|
+
except GracefulTimeout as e:
|
|
160
|
+
logger.info("Task requested graceful timeout: %s", e.message)
|
|
161
|
+
self.handle_error(1, None, reason="timeout", message=e.message)
|
|
162
|
+
|
|
150
163
|
except Exception:
|
|
151
164
|
logger.exception("Got exception while running")
|
|
152
165
|
self.handle_error(1, None)
|