experimaestro 2.0.0a8__py3-none-any.whl → 2.0.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (116) hide show
  1. experimaestro/__init__.py +10 -11
  2. experimaestro/annotations.py +167 -206
  3. experimaestro/cli/__init__.py +130 -5
  4. experimaestro/cli/filter.py +42 -74
  5. experimaestro/cli/jobs.py +157 -106
  6. experimaestro/cli/refactor.py +249 -0
  7. experimaestro/click.py +0 -1
  8. experimaestro/commandline.py +19 -3
  9. experimaestro/connectors/__init__.py +20 -1
  10. experimaestro/connectors/local.py +12 -0
  11. experimaestro/core/arguments.py +182 -46
  12. experimaestro/core/identifier.py +107 -6
  13. experimaestro/core/objects/__init__.py +6 -0
  14. experimaestro/core/objects/config.py +542 -25
  15. experimaestro/core/objects/config_walk.py +20 -0
  16. experimaestro/core/serialization.py +91 -34
  17. experimaestro/core/subparameters.py +164 -0
  18. experimaestro/core/types.py +175 -38
  19. experimaestro/exceptions.py +26 -0
  20. experimaestro/experiments/cli.py +107 -25
  21. experimaestro/generators.py +50 -9
  22. experimaestro/huggingface.py +3 -1
  23. experimaestro/launcherfinder/parser.py +29 -0
  24. experimaestro/launchers/__init__.py +26 -1
  25. experimaestro/launchers/direct.py +12 -0
  26. experimaestro/launchers/slurm/base.py +154 -2
  27. experimaestro/mkdocs/metaloader.py +0 -1
  28. experimaestro/mypy.py +452 -7
  29. experimaestro/notifications.py +63 -13
  30. experimaestro/progress.py +0 -2
  31. experimaestro/rpyc.py +0 -1
  32. experimaestro/run.py +19 -6
  33. experimaestro/scheduler/base.py +489 -125
  34. experimaestro/scheduler/dependencies.py +43 -28
  35. experimaestro/scheduler/dynamic_outputs.py +259 -130
  36. experimaestro/scheduler/experiment.py +225 -30
  37. experimaestro/scheduler/interfaces.py +474 -0
  38. experimaestro/scheduler/jobs.py +216 -206
  39. experimaestro/scheduler/services.py +186 -12
  40. experimaestro/scheduler/state_db.py +388 -0
  41. experimaestro/scheduler/state_provider.py +2345 -0
  42. experimaestro/scheduler/state_sync.py +834 -0
  43. experimaestro/scheduler/workspace.py +52 -10
  44. experimaestro/scriptbuilder.py +7 -0
  45. experimaestro/server/__init__.py +147 -57
  46. experimaestro/server/data/index.css +0 -125
  47. experimaestro/server/data/index.css.map +1 -1
  48. experimaestro/server/data/index.js +194 -58
  49. experimaestro/server/data/index.js.map +1 -1
  50. experimaestro/settings.py +44 -5
  51. experimaestro/sphinx/__init__.py +3 -3
  52. experimaestro/taskglobals.py +20 -0
  53. experimaestro/tests/conftest.py +80 -0
  54. experimaestro/tests/core/test_generics.py +2 -2
  55. experimaestro/tests/identifier_stability.json +45 -0
  56. experimaestro/tests/launchers/bin/sacct +6 -2
  57. experimaestro/tests/launchers/bin/sbatch +4 -2
  58. experimaestro/tests/launchers/test_slurm.py +80 -0
  59. experimaestro/tests/tasks/test_dynamic.py +231 -0
  60. experimaestro/tests/test_cli_jobs.py +615 -0
  61. experimaestro/tests/test_deprecated.py +630 -0
  62. experimaestro/tests/test_environment.py +200 -0
  63. experimaestro/tests/test_file_progress_integration.py +1 -1
  64. experimaestro/tests/test_forward.py +3 -3
  65. experimaestro/tests/test_identifier.py +372 -41
  66. experimaestro/tests/test_identifier_stability.py +458 -0
  67. experimaestro/tests/test_instance.py +3 -3
  68. experimaestro/tests/test_multitoken.py +442 -0
  69. experimaestro/tests/test_mypy.py +433 -0
  70. experimaestro/tests/test_objects.py +312 -5
  71. experimaestro/tests/test_outputs.py +2 -2
  72. experimaestro/tests/test_param.py +8 -12
  73. experimaestro/tests/test_partial_paths.py +231 -0
  74. experimaestro/tests/test_progress.py +0 -48
  75. experimaestro/tests/test_resumable_task.py +480 -0
  76. experimaestro/tests/test_serializers.py +141 -1
  77. experimaestro/tests/test_state_db.py +434 -0
  78. experimaestro/tests/test_subparameters.py +160 -0
  79. experimaestro/tests/test_tags.py +136 -0
  80. experimaestro/tests/test_tasks.py +107 -121
  81. experimaestro/tests/test_token_locking.py +252 -0
  82. experimaestro/tests/test_tokens.py +17 -13
  83. experimaestro/tests/test_types.py +123 -1
  84. experimaestro/tests/test_workspace_triggers.py +158 -0
  85. experimaestro/tests/token_reschedule.py +4 -2
  86. experimaestro/tests/utils.py +2 -2
  87. experimaestro/tokens.py +154 -57
  88. experimaestro/tools/diff.py +1 -1
  89. experimaestro/tui/__init__.py +8 -0
  90. experimaestro/tui/app.py +2303 -0
  91. experimaestro/tui/app.tcss +353 -0
  92. experimaestro/tui/log_viewer.py +228 -0
  93. experimaestro/utils/__init__.py +23 -0
  94. experimaestro/utils/environment.py +148 -0
  95. experimaestro/utils/git.py +129 -0
  96. experimaestro/utils/resources.py +1 -1
  97. experimaestro/version.py +34 -0
  98. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b4.dist-info}/METADATA +68 -38
  99. experimaestro-2.0.0b4.dist-info/RECORD +181 -0
  100. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b4.dist-info}/WHEEL +1 -1
  101. experimaestro-2.0.0b4.dist-info/entry_points.txt +16 -0
  102. experimaestro/compat.py +0 -6
  103. experimaestro/core/objects.pyi +0 -221
  104. experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
  105. experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
  106. experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
  107. experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
  108. experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
  109. experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
  110. experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
  111. experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
  112. experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
  113. experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
  114. experimaestro-2.0.0a8.dist-info/RECORD +0 -166
  115. experimaestro-2.0.0a8.dist-info/entry_points.txt +0 -17
  116. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b4.dist-info}/licenses/LICENSE +0 -0
@@ -115,14 +115,27 @@ class Reporter(threading.Thread):
115
115
 
116
116
  def check_urls(self):
117
117
  """Check whether we have new schedulers to notify"""
118
- mtime = os.path.getmtime(self.path)
118
+ # Check if path exists (it might have been deleted during cleanup)
119
+ if not self.path.exists():
120
+ return
121
+
122
+ try:
123
+ mtime = os.path.getmtime(self.path)
124
+ except (OSError, FileNotFoundError):
125
+ # Path was deleted while we were checking
126
+ return
127
+
119
128
  if mtime > self.lastcheck:
120
129
  for f in self.path.iterdir():
121
130
  self.urls[f.name] = ListenerInformation(f.read_text().strip())
122
131
  logger.info("Added new notification URL: %s", self.urls[f.name].url)
123
132
  f.unlink()
124
133
 
125
- self.lastcheck = os.path.getmtime(self.path)
134
+ try:
135
+ self.lastcheck = os.path.getmtime(self.path)
136
+ except (OSError, FileNotFoundError):
137
+ # Path was deleted during iteration
138
+ return
126
139
 
127
140
  def run(self):
128
141
  logger.info("Running notification thread")
@@ -236,12 +249,21 @@ class Reporter(threading.Thread):
236
249
 
237
250
 
238
251
  def progress(value: float, level=0, desc: Optional[str] = None, console=False):
239
- """When called from a running task, report the progress
252
+ """Report task progress to the experimaestro server.
253
+
254
+ Call this function from within a running task to report progress.
255
+ Progress is displayed in the web UI and TUI monitors.
256
+
257
+ Example::
240
258
 
241
- Args:
242
- level: The level (starting from 0)
243
- value: The current value
244
- desc: An optional description of the current task
259
+ for i, batch in enumerate(dataloader):
260
+ train(batch)
261
+ progress(i / len(dataloader), desc="Training")
262
+
263
+ :param value: Progress value between 0.0 and 1.0
264
+ :param level: Nesting level for nested progress bars (default: 0)
265
+ :param desc: Optional description of the current operation
266
+ :param console: If True, also print to console when no server is available
245
267
  """
246
268
  if TaskEnv.instance().slave:
247
269
  # Skip if in a slave process
@@ -255,8 +277,21 @@ def report_eoj():
255
277
 
256
278
 
257
279
  class xpm_tqdm(std_tqdm):
258
- """XPM wrapper for experimaestro that automatically reports progress to the
259
- server"""
280
+ """Experimaestro-aware tqdm progress bar.
281
+
282
+ A drop-in replacement for ``tqdm`` that automatically reports progress
283
+ to the experimaestro server. Use this instead of the standard ``tqdm``
284
+ in your task's ``execute()`` method.
285
+
286
+ Example::
287
+
288
+ from experimaestro import tqdm
289
+
290
+ class MyTask(Task):
291
+ def execute(self):
292
+ for batch in tqdm(dataloader, desc="Training"):
293
+ train(batch)
294
+ """
260
295
 
261
296
  def __init__(self, iterable=None, file=None, *args, **kwargs):
262
297
  # Report progress bar
@@ -279,14 +314,29 @@ class xpm_tqdm(std_tqdm):
279
314
 
280
315
 
281
316
  @overload
282
- def tqdm(**kwargs) -> xpm_tqdm:
283
- ...
317
+ def tqdm(**kwargs) -> xpm_tqdm: ...
284
318
 
285
319
 
286
320
  @overload
287
- def tqdm(iterable: Optional[Iterator[T]] = None, **kwargs) -> Iterator[T]:
288
- ...
321
+ def tqdm(iterable: Optional[Iterator[T]] = None, **kwargs) -> Iterator[T]: ...
289
322
 
290
323
 
291
324
  def tqdm(*args, **kwargs):
325
+ """Create an experimaestro-aware progress bar.
326
+
327
+ A drop-in replacement for ``tqdm.tqdm`` that automatically reports progress
328
+ to the experimaestro server. Use this in task ``execute()`` methods.
329
+
330
+ Example::
331
+
332
+ from experimaestro import tqdm
333
+
334
+ for epoch in tqdm(range(100), desc="Epochs"):
335
+ for batch in tqdm(dataloader, desc="Batches"):
336
+ train(batch)
337
+
338
+ :param iterable: Iterable to wrap (optional)
339
+ :param kwargs: Additional arguments passed to tqdm
340
+ :return: A progress bar iterator
341
+ """
292
342
  return xpm_tqdm(*args, **kwargs) # type: ignore
experimaestro/progress.py CHANGED
@@ -106,8 +106,6 @@ class StateFile:
106
106
 
107
107
 
108
108
  class ProgressFileWriter:
109
- # TODO: Implement buffering and flushing
110
-
111
109
  def __init__(
112
110
  self, task_path: Path, max_entries_per_file: int = DEFAULT_MAX_ENTRIES_PER_FILE
113
111
  ):
experimaestro/rpyc.py CHANGED
@@ -1,5 +1,4 @@
1
1
  import atexit
2
- import shutil
3
2
  import tempfile
4
3
  from pathlib import Path
5
4
  from subprocess import Popen, PIPE, run
experimaestro/run.py CHANGED
@@ -9,6 +9,7 @@ from typing import List
9
9
  import fasteners
10
10
  from experimaestro.notifications import progress, report_eoj
11
11
  from experimaestro.utils.multiprocessing import delayed_shutdown
12
+ from experimaestro.exceptions import GracefulTimeout
12
13
  from .core.types import ObjectType
13
14
  from experimaestro.utils import logger
14
15
  from experimaestro.core.objects import ConfigInformation
@@ -41,9 +42,6 @@ def run(parameters: Path):
41
42
  task = ConfigInformation.fromParameters(params["objects"])
42
43
  task.__taskdir__ = Path.cwd()
43
44
 
44
- # Set the tags
45
- task.__tags__ = params["tags"]
46
-
47
45
  # Notify that the task has started
48
46
  progress(0)
49
47
 
@@ -92,9 +90,20 @@ class TaskRunner:
92
90
  report_eoj()
93
91
  logger.info("Finished cleanup")
94
92
 
95
- def handle_error(self, code, frame_type):
96
- logger.info("Error handler: finished with code %d", code)
97
- self.failedpath.write_text(str(code))
93
+ def handle_error(self, code, frame_type, reason: str = "failed", message: str = ""):
94
+ """Handle task error and write failure information.
95
+
96
+ Args:
97
+ code: Exit code
98
+ frame_type: Signal frame type (unused)
99
+ reason: Failure reason (e.g., "failed", "timeout")
100
+ message: Optional message with details
101
+ """
102
+ logger.info("Error handler: finished with code %d, reason=%s", code, reason)
103
+ failure_info = {"code": code, "reason": reason}
104
+ if message:
105
+ failure_info["message"] = message
106
+ self.failedpath.write_text(json.dumps(failure_info))
98
107
  self.cleanup()
99
108
  logger.info("Exiting")
100
109
  delayed_shutdown(60, exit_code=code)
@@ -147,6 +156,10 @@ class TaskRunner:
147
156
  # Everything went OK
148
157
  logger.info("Task ended successfully")
149
158
  sys.exit(0)
159
+ except GracefulTimeout as e:
160
+ logger.info("Task requested graceful timeout: %s", e.message)
161
+ self.handle_error(1, None, reason="timeout", message=e.message)
162
+
150
163
  except Exception:
151
164
  logger.exception("Got exception while running")
152
165
  self.handle_error(1, None)