experimaestro 2.0.0b4__py3-none-any.whl → 2.0.0b17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (154) hide show
  1. experimaestro/__init__.py +12 -5
  2. experimaestro/cli/__init__.py +393 -134
  3. experimaestro/cli/filter.py +48 -23
  4. experimaestro/cli/jobs.py +253 -71
  5. experimaestro/cli/refactor.py +1 -2
  6. experimaestro/commandline.py +7 -4
  7. experimaestro/connectors/__init__.py +9 -1
  8. experimaestro/connectors/local.py +43 -3
  9. experimaestro/core/arguments.py +18 -18
  10. experimaestro/core/identifier.py +11 -11
  11. experimaestro/core/objects/config.py +96 -39
  12. experimaestro/core/objects/config_walk.py +3 -3
  13. experimaestro/core/{subparameters.py → partial.py} +16 -16
  14. experimaestro/core/partial_lock.py +394 -0
  15. experimaestro/core/types.py +12 -15
  16. experimaestro/dynamic.py +290 -0
  17. experimaestro/experiments/__init__.py +6 -2
  18. experimaestro/experiments/cli.py +223 -52
  19. experimaestro/experiments/configuration.py +24 -0
  20. experimaestro/generators.py +5 -5
  21. experimaestro/ipc.py +118 -1
  22. experimaestro/launcherfinder/__init__.py +2 -2
  23. experimaestro/launcherfinder/registry.py +6 -7
  24. experimaestro/launcherfinder/specs.py +2 -9
  25. experimaestro/launchers/slurm/__init__.py +2 -2
  26. experimaestro/launchers/slurm/base.py +62 -0
  27. experimaestro/locking.py +957 -1
  28. experimaestro/notifications.py +89 -201
  29. experimaestro/progress.py +63 -366
  30. experimaestro/rpyc.py +0 -2
  31. experimaestro/run.py +29 -2
  32. experimaestro/scheduler/__init__.py +8 -1
  33. experimaestro/scheduler/base.py +650 -53
  34. experimaestro/scheduler/dependencies.py +20 -16
  35. experimaestro/scheduler/experiment.py +764 -169
  36. experimaestro/scheduler/interfaces.py +338 -96
  37. experimaestro/scheduler/jobs.py +58 -20
  38. experimaestro/scheduler/remote/__init__.py +31 -0
  39. experimaestro/scheduler/remote/adaptive_sync.py +265 -0
  40. experimaestro/scheduler/remote/client.py +928 -0
  41. experimaestro/scheduler/remote/protocol.py +282 -0
  42. experimaestro/scheduler/remote/server.py +447 -0
  43. experimaestro/scheduler/remote/sync.py +144 -0
  44. experimaestro/scheduler/services.py +186 -35
  45. experimaestro/scheduler/state_provider.py +811 -2157
  46. experimaestro/scheduler/state_status.py +1247 -0
  47. experimaestro/scheduler/transient.py +31 -0
  48. experimaestro/scheduler/workspace.py +1 -1
  49. experimaestro/scheduler/workspace_state_provider.py +1273 -0
  50. experimaestro/scriptbuilder.py +4 -4
  51. experimaestro/settings.py +36 -0
  52. experimaestro/tests/conftest.py +33 -5
  53. experimaestro/tests/connectors/bin/executable.py +1 -1
  54. experimaestro/tests/fixtures/pre_experiment/experiment_check_env.py +16 -0
  55. experimaestro/tests/fixtures/pre_experiment/experiment_check_mock.py +14 -0
  56. experimaestro/tests/fixtures/pre_experiment/experiment_simple.py +12 -0
  57. experimaestro/tests/fixtures/pre_experiment/pre_setup_env.py +5 -0
  58. experimaestro/tests/fixtures/pre_experiment/pre_setup_error.py +3 -0
  59. experimaestro/tests/fixtures/pre_experiment/pre_setup_mock.py +8 -0
  60. experimaestro/tests/launchers/bin/test.py +1 -0
  61. experimaestro/tests/launchers/test_slurm.py +9 -9
  62. experimaestro/tests/partial_reschedule.py +46 -0
  63. experimaestro/tests/restart.py +3 -3
  64. experimaestro/tests/restart_main.py +1 -0
  65. experimaestro/tests/scripts/notifyandwait.py +1 -0
  66. experimaestro/tests/task_partial.py +38 -0
  67. experimaestro/tests/task_tokens.py +2 -2
  68. experimaestro/tests/tasks/test_dynamic.py +6 -6
  69. experimaestro/tests/test_dependencies.py +3 -3
  70. experimaestro/tests/test_deprecated.py +15 -15
  71. experimaestro/tests/test_dynamic_locking.py +317 -0
  72. experimaestro/tests/test_environment.py +24 -14
  73. experimaestro/tests/test_experiment.py +171 -36
  74. experimaestro/tests/test_identifier.py +25 -25
  75. experimaestro/tests/test_identifier_stability.py +3 -5
  76. experimaestro/tests/test_multitoken.py +2 -4
  77. experimaestro/tests/{test_subparameters.py → test_partial.py} +25 -25
  78. experimaestro/tests/test_partial_paths.py +81 -138
  79. experimaestro/tests/test_pre_experiment.py +219 -0
  80. experimaestro/tests/test_progress.py +2 -8
  81. experimaestro/tests/test_remote_state.py +1132 -0
  82. experimaestro/tests/test_stray_jobs.py +261 -0
  83. experimaestro/tests/test_tasks.py +1 -2
  84. experimaestro/tests/test_token_locking.py +52 -67
  85. experimaestro/tests/test_tokens.py +5 -6
  86. experimaestro/tests/test_transient.py +225 -0
  87. experimaestro/tests/test_workspace_state_provider.py +768 -0
  88. experimaestro/tests/token_reschedule.py +1 -3
  89. experimaestro/tests/utils.py +2 -7
  90. experimaestro/tokens.py +227 -372
  91. experimaestro/tools/diff.py +1 -0
  92. experimaestro/tools/documentation.py +4 -5
  93. experimaestro/tools/jobs.py +1 -2
  94. experimaestro/tui/app.py +459 -1895
  95. experimaestro/tui/app.tcss +162 -0
  96. experimaestro/tui/dialogs.py +172 -0
  97. experimaestro/tui/log_viewer.py +253 -3
  98. experimaestro/tui/messages.py +137 -0
  99. experimaestro/tui/utils.py +54 -0
  100. experimaestro/tui/widgets/__init__.py +23 -0
  101. experimaestro/tui/widgets/experiments.py +468 -0
  102. experimaestro/tui/widgets/global_services.py +238 -0
  103. experimaestro/tui/widgets/jobs.py +972 -0
  104. experimaestro/tui/widgets/log.py +156 -0
  105. experimaestro/tui/widgets/orphans.py +363 -0
  106. experimaestro/tui/widgets/runs.py +185 -0
  107. experimaestro/tui/widgets/services.py +314 -0
  108. experimaestro/tui/widgets/stray_jobs.py +528 -0
  109. experimaestro/utils/__init__.py +1 -1
  110. experimaestro/utils/environment.py +105 -22
  111. experimaestro/utils/fswatcher.py +124 -0
  112. experimaestro/utils/jobs.py +1 -2
  113. experimaestro/utils/jupyter.py +1 -2
  114. experimaestro/utils/logging.py +72 -0
  115. experimaestro/version.py +2 -2
  116. experimaestro/webui/__init__.py +9 -0
  117. experimaestro/webui/app.py +117 -0
  118. experimaestro/{server → webui}/data/index.css +66 -11
  119. experimaestro/webui/data/index.css.map +1 -0
  120. experimaestro/{server → webui}/data/index.js +82763 -87217
  121. experimaestro/webui/data/index.js.map +1 -0
  122. experimaestro/webui/routes/__init__.py +5 -0
  123. experimaestro/webui/routes/auth.py +53 -0
  124. experimaestro/webui/routes/proxy.py +117 -0
  125. experimaestro/webui/server.py +200 -0
  126. experimaestro/webui/state_bridge.py +152 -0
  127. experimaestro/webui/websocket.py +413 -0
  128. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/METADATA +8 -9
  129. experimaestro-2.0.0b17.dist-info/RECORD +219 -0
  130. experimaestro/cli/progress.py +0 -269
  131. experimaestro/scheduler/state.py +0 -75
  132. experimaestro/scheduler/state_db.py +0 -388
  133. experimaestro/scheduler/state_sync.py +0 -834
  134. experimaestro/server/__init__.py +0 -467
  135. experimaestro/server/data/index.css.map +0 -1
  136. experimaestro/server/data/index.js.map +0 -1
  137. experimaestro/tests/test_cli_jobs.py +0 -615
  138. experimaestro/tests/test_file_progress.py +0 -425
  139. experimaestro/tests/test_file_progress_integration.py +0 -477
  140. experimaestro/tests/test_state_db.py +0 -434
  141. experimaestro-2.0.0b4.dist-info/RECORD +0 -181
  142. /experimaestro/{server → webui}/data/1815e00441357e01619e.ttf +0 -0
  143. /experimaestro/{server → webui}/data/2463b90d9a316e4e5294.woff2 +0 -0
  144. /experimaestro/{server → webui}/data/2582b0e4bcf85eceead0.ttf +0 -0
  145. /experimaestro/{server → webui}/data/89999bdf5d835c012025.woff2 +0 -0
  146. /experimaestro/{server → webui}/data/914997e1bdfc990d0897.ttf +0 -0
  147. /experimaestro/{server → webui}/data/c210719e60948b211a12.woff2 +0 -0
  148. /experimaestro/{server → webui}/data/favicon.ico +0 -0
  149. /experimaestro/{server → webui}/data/index.html +0 -0
  150. /experimaestro/{server → webui}/data/login.html +0 -0
  151. /experimaestro/{server → webui}/data/manifest.json +0 -0
  152. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/WHEEL +0 -0
  153. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/entry_points.txt +0 -0
  154. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/licenses/LICENSE +0 -0
@@ -1,258 +1,143 @@
1
- import urllib.parse
1
+ """File-based progress notification system for experimaestro tasks.
2
+
3
+ Progress is reported by writing to job event files, which are then read
4
+ by monitors (TUI, web UI) via file watching.
5
+ """
6
+
2
7
  from dataclasses import dataclass
3
8
  from pathlib import Path
4
9
  from typing import ClassVar, Dict, Iterator, Optional, TypeVar, overload
5
- import os.path
6
- from urllib.request import urlopen
7
- from urllib.error import HTTPError, URLError
8
- import threading
9
10
  import sys
10
- import socket
11
11
  from tqdm.auto import tqdm as std_tqdm
12
12
 
13
13
  from .utils import logger
14
14
  from experimaestro.taskglobals import Env as TaskEnv
15
15
  from .progress import FileBasedProgressReporter
16
16
 
17
- # --- Progress and other notifications
18
-
19
17
  T = TypeVar("T")
20
18
 
21
19
 
22
20
  @dataclass
23
21
  class LevelInformation:
22
+ """Progress information for a single nesting level"""
23
+
24
24
  level: int
25
25
  desc: Optional[str]
26
26
  progress: float
27
27
 
28
- previous_progress: float = -1
29
- previous_desc: Optional[str] = None
28
+ def to_dict(self) -> Dict:
29
+ """Convert to a dictionary for JSON serialization."""
30
+ return {
31
+ "level": self.level,
32
+ "desc": self.desc,
33
+ "progress": self.progress,
34
+ }
30
35
 
31
- def modified(self, reporter: "Reporter"):
32
- return (
33
- abs(self.progress - self.previous_progress) > reporter.progress_threshold
34
- ) or (self.previous_desc != self.desc)
36
+ @classmethod
37
+ def from_dict(cls, d: Dict) -> "LevelInformation":
38
+ """Create LevelInformation from a dictionary (e.g., from JSON).
35
39
 
36
- def report(self):
37
- self.previous_progress = self.progress
38
- result = {"level": self.level, "progress": self.progress}
39
- if self.previous_desc != self.desc:
40
- self.previous_desc = self.desc
41
- result["desc"] = self.desc
42
- return result
40
+ Args:
41
+ d: Dictionary with keys 'level', 'progress', and optionally 'desc'
42
+
43
+ Returns:
44
+ LevelInformation instance
45
+ """
46
+ return cls(
47
+ level=d.get("level", 0),
48
+ desc=d.get("desc"),
49
+ progress=d.get("progress", 0),
50
+ )
43
51
 
44
52
  def __repr__(self) -> str:
45
53
  return f"[{self.level}] {self.desc} {int(self.progress * 1000) / 10}%"
46
54
 
47
55
 
48
- class ListenerInformation:
49
- def __init__(self, url: str):
50
- self.url = url
51
- self.error_count = 0
56
+ # Type alias for progress information
57
+ ProgressInformation = list[LevelInformation]
52
58
 
53
59
 
54
- class Reporter(threading.Thread):
55
- NOTIFICATION_FOLDER = ".notifications"
60
+ def get_progress_information_from_dict(dicts: list[dict]) -> ProgressInformation:
61
+ """Convert a list of progress dicts to ProgressInformation.
56
62
 
57
- console: bool
58
- """Whether to output to the console if no notification server is up"""
63
+ Handles both dict and LevelInformation items for robustness.
59
64
 
60
- def __init__(self, path: Path):
61
- """Starts a notification thread
65
+ Args:
66
+ dicts: List of dictionaries with 'level', 'progress', 'desc' keys
62
67
 
63
- Arguments:
64
- path: The path where notification URLs will be put (one file per URL)
65
- """
66
- super().__init__(daemon=True)
67
- self.path = path / Reporter.NOTIFICATION_FOLDER
68
- self.path.mkdir(exist_ok=True)
69
- self.urls: Dict[str, ListenerInformation] = {}
68
+ Returns:
69
+ List of LevelInformation instances (ProgressInformation)
70
+ """
71
+ return [LevelInformation.from_dict(p) if isinstance(p, dict) else p for p in dicts]
70
72
 
71
- # Last check of notification URLs
72
- self.lastcheck = 0
73
73
 
74
- self.levels = [LevelInformation(0, None, -1)]
74
+ class Reporter:
75
+ """File-based progress reporter for running tasks.
75
76
 
76
- self.stopping = False
77
+ Progress events are written to job event files at:
78
+ .events/jobs/{task_id}/event-{job_id}-*.jsonl
77
79
 
78
- self.console = False
80
+ These files are watched by monitors (TUI, web UI) to display progress.
81
+ """
79
82
 
80
- self.progress_threshold = 0.01
81
- self.cv = threading.Condition()
83
+ def __init__(self, path: Path):
84
+ """Initialize the file-based reporter.
82
85
 
83
- # File-based progress reporter
86
+ Args:
87
+ path: The task path ({workspace}/jobs/{task_id}/{job_id}/)
88
+ """
89
+ self.path = path
84
90
  self.file_reporter = FileBasedProgressReporter(task_path=path)
85
-
86
- def stop(self):
87
- self.stopping = True
88
- with self.cv:
89
- # self.cv.notifyAll()
90
- self.cv.notify_all()
91
-
92
- @staticmethod
93
- def isfatal_httperror(e: Exception, info: ListenerInformation) -> bool:
94
- """Returns True if this HTTP error indicates that the server won't recover"""
95
- if isinstance(e, HTTPError):
96
- if e.code >= 400 and e.code < 500:
97
- return True
98
- elif isinstance(e, URLError):
99
- if isinstance(e.reason, ConnectionRefusedError):
100
- return True
101
- if isinstance(e.reason, socket.gaierror) and e.reason.errno == -2:
102
- return True
103
- if isinstance(e.reason, TimeoutError):
104
- info.error_count += 1
105
-
106
- # Too many errors
107
- if info.error_count > 3:
108
- logger.info("Too many errors with %s", info.error_count)
109
- return True
110
-
111
- return False
112
-
113
- def modified(self):
114
- return any(level.modified(self) for level in self.levels)
115
-
116
- def check_urls(self):
117
- """Check whether we have new schedulers to notify"""
118
- # Check if path exists (it might have been deleted during cleanup)
119
- if not self.path.exists():
120
- return
121
-
122
- try:
123
- mtime = os.path.getmtime(self.path)
124
- except (OSError, FileNotFoundError):
125
- # Path was deleted while we were checking
126
- return
127
-
128
- if mtime > self.lastcheck:
129
- for f in self.path.iterdir():
130
- self.urls[f.name] = ListenerInformation(f.read_text().strip())
131
- logger.info("Added new notification URL: %s", self.urls[f.name].url)
132
- f.unlink()
133
-
134
- try:
135
- self.lastcheck = os.path.getmtime(self.path)
136
- except (OSError, FileNotFoundError):
137
- # Path was deleted during iteration
138
- return
139
-
140
- def run(self):
141
- logger.info("Running notification thread")
142
-
143
- while True:
144
- with self.cv:
145
- self.cv.wait_for(lambda: self.stopping or self.modified())
146
- if self.stopping:
147
- break
148
-
149
- # Notify (out of the CV locking)
150
- toremove = []
151
-
152
- # Check if new notification servers are on
153
- self.check_urls()
154
-
155
- if self.urls:
156
- # OK, let's go
157
- for level in self.levels:
158
- if level.modified(self):
159
- params = level.report()
160
-
161
- # Go over all URLs
162
- for key, info in self.urls.items():
163
- baseurl = info.url
164
-
165
- url = "{}/progress?{}".format(
166
- baseurl, urllib.parse.urlencode(params)
167
- )
168
- logger.debug("Reporting progress %s", params)
169
- try:
170
- with urlopen(url) as _:
171
- logger.debug(
172
- "Notification send for %s [%s]",
173
- baseurl,
174
- level,
175
- )
176
- except Exception as e:
177
- logger.warning(
178
- "Progress: %s [error while notifying %s]: %s",
179
- level,
180
- url,
181
- e,
182
- )
183
- if Reporter.isfatal_httperror(e, info):
184
- toremove.append(key)
185
-
186
- # Removes unvalid URLs
187
- for key in toremove:
188
- logger.info("Removing notification URL %s", self.urls[key])
189
- del self.urls[key]
190
- elif self.console:
191
- for level in self.levels:
192
- if level.modified(self):
193
- params = level.report()
194
- logger.info("Progress: %s", level)
91
+ self.levels: list[LevelInformation] = [LevelInformation(0, None, -1)]
92
+ self.console = False
195
93
 
196
94
  def eoj(self):
197
- with self.cv:
198
- self.check_urls()
199
- if self.urls:
200
- # Go over all URLs
201
- for key, info in self.urls.items():
202
- baseurl = info.url
203
- url = "{}?status=eoj".format(baseurl)
204
- try:
205
- with urlopen(url) as _:
206
- logger.debug(
207
- "EOJ notification sent for %s",
208
- baseurl,
209
- )
210
- except Exception:
211
- logger.warning(
212
- "Could not report EOJ",
213
- )
214
-
215
- self.file_reporter.eoj()
95
+ """End of job notification"""
96
+ self.file_reporter.eoj()
216
97
 
217
98
  def set_progress(
218
99
  self, progress: float, level: int, desc: Optional[str], console=False
219
100
  ):
220
- """Sets the new progress if sufficiently different"""
221
- with self.cv:
222
- if (
223
- (level + 1) != len(self.levels)
224
- or (progress != self.levels[level].progress)
225
- or (desc != self.levels[level].desc)
226
- ):
227
- self.console = console
228
- self.levels = self.levels[: (level + 1)]
229
- while level >= len(self.levels):
230
- self.levels.append(LevelInformation(level, None, 0.0))
231
- if desc:
232
- self.levels[level].desc = desc
233
- self.levels[level].progress = progress
234
-
235
- self.file_reporter.set_progress(progress, level, desc)
236
-
237
- self.cv.notify_all()
101
+ """Set progress for a specific level.
102
+
103
+ Args:
104
+ progress: Progress value between 0.0 and 1.0
105
+ level: Nesting level (0 is top level)
106
+ desc: Optional description
107
+ console: If True, also print to console
108
+ """
109
+ # Update in-memory levels
110
+ self.levels = self.levels[: (level + 1)]
111
+ while level >= len(self.levels):
112
+ self.levels.append(LevelInformation(level, None, 0.0))
113
+ if desc:
114
+ self.levels[level].desc = desc
115
+ self.levels[level].progress = progress
116
+
117
+ # Write to file
118
+ self.file_reporter.set_progress(progress, level, desc)
119
+
120
+ # Optionally log to console
121
+ if console:
122
+ logger.info("Progress: %s", self.levels[level])
238
123
 
239
124
  INSTANCE: ClassVar[Optional["Reporter"]] = None
240
125
 
241
126
  @staticmethod
242
127
  def instance():
128
+ """Get or create the singleton Reporter instance."""
243
129
  if Reporter.INSTANCE is None:
244
130
  taskpath = TaskEnv.instance().taskpath
245
131
  assert taskpath is not None, "Task path is not defined"
246
132
  Reporter.INSTANCE = Reporter(taskpath)
247
- Reporter.INSTANCE.start()
248
133
  return Reporter.INSTANCE
249
134
 
250
135
 
251
136
  def progress(value: float, level=0, desc: Optional[str] = None, console=False):
252
- """Report task progress to the experimaestro server.
137
+ """Report task progress.
253
138
 
254
139
  Call this function from within a running task to report progress.
255
- Progress is displayed in the web UI and TUI monitors.
140
+ Progress is written to job event files and displayed in monitors.
256
141
 
257
142
  Example::
258
143
 
@@ -263,7 +148,7 @@ def progress(value: float, level=0, desc: Optional[str] = None, console=False):
263
148
  :param value: Progress value between 0.0 and 1.0
264
149
  :param level: Nesting level for nested progress bars (default: 0)
265
150
  :param desc: Optional description of the current operation
266
- :param console: If True, also print to console when no server is available
151
+ :param console: If True, also print to console
267
152
  """
268
153
  if TaskEnv.instance().slave:
269
154
  # Skip if in a slave process
@@ -276,11 +161,16 @@ def report_eoj():
276
161
  Reporter.instance().eoj()
277
162
 
278
163
 
164
+ def start_of_job():
165
+ """Notify that the job has started running"""
166
+ Reporter.instance().file_reporter.start_of_job()
167
+
168
+
279
169
  class xpm_tqdm(std_tqdm):
280
170
  """Experimaestro-aware tqdm progress bar.
281
171
 
282
172
  A drop-in replacement for ``tqdm`` that automatically reports progress
283
- to the experimaestro server. Use this instead of the standard ``tqdm``
173
+ to job event files. Use this instead of the standard ``tqdm``
284
174
  in your task's ``execute()`` method.
285
175
 
286
176
  Example::
@@ -294,8 +184,6 @@ class xpm_tqdm(std_tqdm):
294
184
  """
295
185
 
296
186
  def __init__(self, iterable=None, file=None, *args, **kwargs):
297
- # Report progress bar
298
- # newprogress(title=, pos=abs(self.pos))
299
187
  _file = file or sys.stderr
300
188
  self.is_tty = hasattr(_file, "isatty") or _file.isatty()
301
189
 
@@ -325,7 +213,7 @@ def tqdm(*args, **kwargs):
325
213
  """Create an experimaestro-aware progress bar.
326
214
 
327
215
  A drop-in replacement for ``tqdm.tqdm`` that automatically reports progress
328
- to the experimaestro server. Use this in task ``execute()`` methods.
216
+ to job event files. Use this in task ``execute()`` methods.
329
217
 
330
218
  Example::
331
219