experimaestro 2.0.0a8__py3-none-any.whl → 2.0.0b8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (122) hide show
  1. experimaestro/__init__.py +10 -11
  2. experimaestro/annotations.py +167 -206
  3. experimaestro/cli/__init__.py +278 -7
  4. experimaestro/cli/filter.py +42 -74
  5. experimaestro/cli/jobs.py +157 -106
  6. experimaestro/cli/refactor.py +249 -0
  7. experimaestro/click.py +0 -1
  8. experimaestro/commandline.py +19 -3
  9. experimaestro/connectors/__init__.py +20 -1
  10. experimaestro/connectors/local.py +12 -0
  11. experimaestro/core/arguments.py +182 -46
  12. experimaestro/core/identifier.py +107 -6
  13. experimaestro/core/objects/__init__.py +6 -0
  14. experimaestro/core/objects/config.py +542 -25
  15. experimaestro/core/objects/config_walk.py +20 -0
  16. experimaestro/core/serialization.py +91 -34
  17. experimaestro/core/subparameters.py +164 -0
  18. experimaestro/core/types.py +175 -38
  19. experimaestro/exceptions.py +26 -0
  20. experimaestro/experiments/cli.py +111 -25
  21. experimaestro/generators.py +50 -9
  22. experimaestro/huggingface.py +3 -1
  23. experimaestro/launcherfinder/parser.py +29 -0
  24. experimaestro/launchers/__init__.py +26 -1
  25. experimaestro/launchers/direct.py +12 -0
  26. experimaestro/launchers/slurm/base.py +154 -2
  27. experimaestro/mkdocs/metaloader.py +0 -1
  28. experimaestro/mypy.py +452 -7
  29. experimaestro/notifications.py +63 -13
  30. experimaestro/progress.py +0 -2
  31. experimaestro/rpyc.py +0 -1
  32. experimaestro/run.py +19 -6
  33. experimaestro/scheduler/base.py +510 -125
  34. experimaestro/scheduler/dependencies.py +43 -28
  35. experimaestro/scheduler/dynamic_outputs.py +259 -130
  36. experimaestro/scheduler/experiment.py +256 -31
  37. experimaestro/scheduler/interfaces.py +501 -0
  38. experimaestro/scheduler/jobs.py +216 -206
  39. experimaestro/scheduler/remote/__init__.py +31 -0
  40. experimaestro/scheduler/remote/client.py +874 -0
  41. experimaestro/scheduler/remote/protocol.py +467 -0
  42. experimaestro/scheduler/remote/server.py +423 -0
  43. experimaestro/scheduler/remote/sync.py +144 -0
  44. experimaestro/scheduler/services.py +323 -23
  45. experimaestro/scheduler/state_db.py +437 -0
  46. experimaestro/scheduler/state_provider.py +2766 -0
  47. experimaestro/scheduler/state_sync.py +891 -0
  48. experimaestro/scheduler/workspace.py +52 -10
  49. experimaestro/scriptbuilder.py +7 -0
  50. experimaestro/server/__init__.py +147 -57
  51. experimaestro/server/data/index.css +0 -125
  52. experimaestro/server/data/index.css.map +1 -1
  53. experimaestro/server/data/index.js +194 -58
  54. experimaestro/server/data/index.js.map +1 -1
  55. experimaestro/settings.py +44 -5
  56. experimaestro/sphinx/__init__.py +3 -3
  57. experimaestro/taskglobals.py +20 -0
  58. experimaestro/tests/conftest.py +80 -0
  59. experimaestro/tests/core/test_generics.py +2 -2
  60. experimaestro/tests/identifier_stability.json +45 -0
  61. experimaestro/tests/launchers/bin/sacct +6 -2
  62. experimaestro/tests/launchers/bin/sbatch +4 -2
  63. experimaestro/tests/launchers/test_slurm.py +80 -0
  64. experimaestro/tests/tasks/test_dynamic.py +231 -0
  65. experimaestro/tests/test_cli_jobs.py +615 -0
  66. experimaestro/tests/test_deprecated.py +630 -0
  67. experimaestro/tests/test_environment.py +200 -0
  68. experimaestro/tests/test_file_progress_integration.py +1 -1
  69. experimaestro/tests/test_forward.py +3 -3
  70. experimaestro/tests/test_identifier.py +372 -41
  71. experimaestro/tests/test_identifier_stability.py +458 -0
  72. experimaestro/tests/test_instance.py +3 -3
  73. experimaestro/tests/test_multitoken.py +442 -0
  74. experimaestro/tests/test_mypy.py +433 -0
  75. experimaestro/tests/test_objects.py +312 -5
  76. experimaestro/tests/test_outputs.py +2 -2
  77. experimaestro/tests/test_param.py +8 -12
  78. experimaestro/tests/test_partial_paths.py +231 -0
  79. experimaestro/tests/test_progress.py +0 -48
  80. experimaestro/tests/test_remote_state.py +671 -0
  81. experimaestro/tests/test_resumable_task.py +480 -0
  82. experimaestro/tests/test_serializers.py +141 -1
  83. experimaestro/tests/test_state_db.py +434 -0
  84. experimaestro/tests/test_subparameters.py +160 -0
  85. experimaestro/tests/test_tags.py +136 -0
  86. experimaestro/tests/test_tasks.py +107 -121
  87. experimaestro/tests/test_token_locking.py +252 -0
  88. experimaestro/tests/test_tokens.py +17 -13
  89. experimaestro/tests/test_types.py +123 -1
  90. experimaestro/tests/test_workspace_triggers.py +158 -0
  91. experimaestro/tests/token_reschedule.py +4 -2
  92. experimaestro/tests/utils.py +2 -2
  93. experimaestro/tokens.py +154 -57
  94. experimaestro/tools/diff.py +1 -1
  95. experimaestro/tui/__init__.py +8 -0
  96. experimaestro/tui/app.py +2395 -0
  97. experimaestro/tui/app.tcss +353 -0
  98. experimaestro/tui/log_viewer.py +228 -0
  99. experimaestro/utils/__init__.py +23 -0
  100. experimaestro/utils/environment.py +148 -0
  101. experimaestro/utils/git.py +129 -0
  102. experimaestro/utils/resources.py +1 -1
  103. experimaestro/version.py +34 -0
  104. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/METADATA +68 -38
  105. experimaestro-2.0.0b8.dist-info/RECORD +187 -0
  106. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/WHEEL +1 -1
  107. experimaestro-2.0.0b8.dist-info/entry_points.txt +16 -0
  108. experimaestro/compat.py +0 -6
  109. experimaestro/core/objects.pyi +0 -221
  110. experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
  111. experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
  112. experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
  113. experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
  114. experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
  115. experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
  116. experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
  117. experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
  118. experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
  119. experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
  120. experimaestro-2.0.0a8.dist-info/RECORD +0 -166
  121. experimaestro-2.0.0a8.dist-info/entry_points.txt +0 -17
  122. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/licenses/LICENSE +0 -0
@@ -1,8 +1,16 @@
1
1
  import abc
2
2
  from enum import Enum
3
- import functools
3
+ import logging
4
4
  import threading
5
- from typing import Set
5
+ from pathlib import Path
6
+ from typing import Callable, Optional, Set, TYPE_CHECKING
7
+
8
+ from experimaestro.scheduler.interfaces import BaseService
9
+
10
+ if TYPE_CHECKING:
11
+ from experimaestro.scheduler.experiment import Experiment
12
+
13
+ logger = logging.getLogger(__name__)
6
14
 
7
15
 
8
16
  class ServiceListener:
@@ -13,38 +21,160 @@ class ServiceListener:
13
21
 
14
22
 
15
23
  class ServiceState(Enum):
24
+ """State of a service lifecycle.
25
+
26
+ Services transition through these states:
27
+ STOPPED -> STARTING -> RUNNING -> STOPPING -> STOPPED
28
+ """
29
+
16
30
  STOPPED = 0
17
31
  STARTING = 1
18
32
  RUNNING = 2
19
33
  STOPPING = 3
20
34
 
21
35
 
22
- class Service:
36
+ class Service(BaseService):
23
37
  """An experiment service
24
38
 
25
39
  Services can be associated with an experiment. They send
26
40
  notifications to service listeners.
41
+
42
+ To support restarting services from monitor mode, subclasses should
43
+ override :meth:`state_dict` to return the data needed to recreate
44
+ the service, and implement :meth:`from_state_dict` to recreate it.
27
45
  """
28
46
 
29
47
  id: str
30
48
  _state: ServiceState = ServiceState.STOPPED
31
49
 
32
50
  def __init__(self):
33
- self.listeners: Set[ServiceListener] = set()
51
+ self._listeners: Set[ServiceListener] = set()
52
+ self._listeners_lock = threading.Lock()
53
+
54
+ def set_experiment(self, xp: "Experiment") -> None:
55
+ """Called when the service is added to an experiment.
56
+
57
+ Override this method to access the experiment context (e.g., workdir).
58
+ The default implementation does nothing.
59
+
60
+ Args:
61
+ xp: The experiment this service is being added to.
62
+ """
63
+ pass
64
+
65
+ def state_dict(self) -> dict:
66
+ """Return parameters needed to recreate this service.
67
+
68
+ Subclasses should override this to return constructor arguments.
69
+ Path values are automatically serialized and restored (with
70
+ translation for remote monitoring).
71
+
72
+ Example::
73
+
74
+ def state_dict(self):
75
+ return {
76
+ "log_dir": self.log_dir, # Path is auto-handled
77
+ "name": self.name,
78
+ }
79
+
80
+ Returns:
81
+ Dict with constructor kwargs (no need to include __class__).
82
+ """
83
+ return {}
84
+
85
+ def _full_state_dict(self) -> dict:
86
+ """Get complete state_dict including __class__ for serialization."""
87
+ d = self.state_dict()
88
+ d["__class__"] = f"{self.__class__.__module__}.{self.__class__.__name__}"
89
+ return d
90
+
91
+ @staticmethod
92
+ def serialize_state_dict(data: dict) -> dict:
93
+ """Serialize a state_dict, converting Path objects to serializable format.
94
+
95
+ This is called automatically when storing services. Path values are
96
+ converted to {"__path__": "/path/string"} format.
97
+
98
+ Args:
99
+ data: Raw state_dict from service (should include __class__)
100
+
101
+ Returns:
102
+ Serializable dictionary with paths converted
103
+ """
104
+ result = {}
105
+ for k, v in data.items():
106
+ if isinstance(v, Path):
107
+ result[k] = {"__path__": str(v)}
108
+ else:
109
+ result[k] = v
110
+ return result
111
+
112
+ @staticmethod
113
+ def from_state_dict(
114
+ data: dict, path_translator: Optional[Callable[[str], Path]] = None
115
+ ) -> "Service":
116
+ """Recreate a service from a state dictionary.
117
+
118
+ Args:
119
+ data: Dictionary from :meth:`state_dict` (may be serialized)
120
+ path_translator: Optional function to translate remote paths to local.
121
+ Used by remote clients to map paths to local cache.
122
+
123
+ Returns:
124
+ A new Service instance, or raises if the class cannot be loaded.
125
+
126
+ Raises:
127
+ ValueError: If __unserializable__ is True or __class__ is missing
128
+ """
129
+ import importlib
130
+
131
+ # Check if service is marked as unserializable
132
+ if data.get("__unserializable__"):
133
+ raise ValueError(
134
+ f"Service cannot be recreated: {data.get('__reason__', 'unknown reason')}"
135
+ )
136
+
137
+ class_path = data.get("__class__")
138
+ if not class_path:
139
+ raise ValueError("Missing '__class__' in service state_dict")
140
+
141
+ module_name, class_name = class_path.rsplit(".", 1)
142
+ module = importlib.import_module(module_name)
143
+ cls = getattr(module, class_name)
144
+
145
+ # Build kwargs, detecting and translating paths automatically
146
+ kwargs = {}
147
+ for k, v in data.items():
148
+ if k.startswith("__"):
149
+ continue # Skip special keys
150
+ if isinstance(v, dict) and "__path__" in v:
151
+ # Serialized path - deserialize with optional translation
152
+ path_str = v["__path__"]
153
+ if path_translator:
154
+ kwargs[k] = path_translator(path_str)
155
+ else:
156
+ kwargs[k] = Path(path_str)
157
+ else:
158
+ kwargs[k] = v
159
+
160
+ logger.debug("Creating %s with kwargs: %s", cls.__name__, kwargs)
161
+ return cls(**kwargs)
34
162
 
35
163
  def add_listener(self, listener: ServiceListener):
36
164
  """Adds a listener
37
165
 
38
166
  :param listener: The listener to add
39
167
  """
40
- self.listeners.add(listener)
168
+ with self._listeners_lock:
169
+ self._listeners.add(listener)
41
170
 
42
171
  def remove_listener(self, listener: ServiceListener):
43
172
  """Removes a listener
44
173
 
45
174
  :param listener: The listener to remove
46
175
  """
47
- self.listeners.remove(listener)
176
+ with self._listeners_lock:
177
+ self._listeners.discard(listener)
48
178
 
49
179
  def description(self):
50
180
  return ""
@@ -58,46 +188,216 @@ class Service:
58
188
  # Set the state
59
189
  self._state = state
60
190
 
61
- for listener in self.listeners:
62
- listener.service_state_changed(self)
191
+ # Notify listeners with thread-safe snapshot
192
+ with self._listeners_lock:
193
+ listeners_snapshot = list(self._listeners)
194
+
195
+ for listener in listeners_snapshot:
196
+ try:
197
+ listener.service_state_changed(self)
198
+ except Exception:
199
+ logger.exception("Error notifying listener %s", listener)
63
200
 
64
201
 
65
202
  class WebService(Service):
66
- """Web service"""
203
+ """Base class for web-based experiment services.
204
+
205
+ Web services provide HTTP endpoints that can be accessed through the
206
+ experimaestro web interface. When an experiment is running with a port
207
+ configured, web services are automatically proxied through the main
208
+ experimaestro server.
209
+
210
+ To implement a web service:
211
+
212
+ 1. Subclass ``WebService``
213
+ 2. Set a unique ``id`` class attribute
214
+ 3. Implement the :meth:`_serve` method to start your web server
215
+ 4. Set ``self.url`` and call ``running.set()`` when ready
216
+ 5. Optionally check ``self.should_stop()`` to handle graceful shutdown
217
+
218
+ Example::
219
+
220
+ class MyWebService(WebService):
221
+ id = "myservice"
222
+
223
+ def _serve(self, running: threading.Event):
224
+ # Start your web server
225
+ self.url = "http://localhost:8080"
226
+ running.set()
227
+ # Keep serving, checking for stop signal
228
+ while not self.should_stop():
229
+ time.sleep(1)
230
+ """
67
231
 
68
232
  def __init__(self):
69
233
  super().__init__()
70
234
  self.url = None
235
+ self.thread = None
236
+ self._stop_event = threading.Event()
237
+ self._start_lock = threading.Lock()
238
+ self._running_event: Optional[threading.Event] = None
239
+
240
+ def should_stop(self) -> bool:
241
+ """Check if the service should stop.
242
+
243
+ Subclasses can call this in their _serve loop to check for
244
+ graceful shutdown requests.
245
+
246
+ :return: True if stop() has been called
247
+ """
248
+ return self._stop_event.is_set()
71
249
 
72
250
  def get_url(self):
73
- if self.state == ServiceState.STOPPED:
74
- self.state = ServiceState.STARTING
75
- self.running = threading.Event()
76
- self.serve()
251
+ """Get the URL of this web service, starting it if needed.
77
252
 
78
- # Wait until the server is ready
79
- self.running.wait()
253
+ If the service is not running, this method will start it and
254
+ block until the URL is available. If the service is already
255
+ starting or running, returns the existing URL.
256
+
257
+ :return: The URL where this service can be accessed
258
+ :raises RuntimeError: If called while service is stopping
259
+ """
260
+ with self._start_lock:
261
+ if self.state == ServiceState.STOPPING:
262
+ raise RuntimeError("Cannot start service while it is stopping")
263
+
264
+ if self.state == ServiceState.RUNNING:
265
+ logger.debug("Service already running, returning existing URL")
266
+ return self.url
267
+
268
+ if self.state == ServiceState.STOPPED:
269
+ logger.info(
270
+ "Starting service %s (id=%s)", self.__class__.__name__, id(self)
271
+ )
272
+ self._stop_event.clear()
273
+ self.state = ServiceState.STARTING
274
+ self._running_event = threading.Event()
275
+ self.serve()
276
+ else:
277
+ logger.info(
278
+ "Service %s (id=%s) already starting, waiting for it",
279
+ self.__class__.__name__,
280
+ id(self),
281
+ )
282
+
283
+ # State is STARTING - wait for it to be ready
284
+ running_event = self._running_event
285
+
286
+ # Wait outside the lock to avoid blocking other callers
287
+ if running_event:
288
+ running_event.wait()
289
+ # Set state to RUNNING (this will notify listeners)
290
+ with self._start_lock:
291
+ if self.state == ServiceState.STARTING:
292
+ self.state = ServiceState.RUNNING
80
293
 
81
- # Returns the URL
82
294
  return self.url
83
295
 
84
- def stop(self):
85
- ...
296
+ def stop(self, timeout: float = 2.0):
297
+ """Stop the web service.
298
+
299
+ This method signals the service to stop and waits for the thread
300
+ to terminate. If the thread doesn't stop gracefully within the
301
+ timeout, it attempts to forcefully terminate it.
302
+
303
+ :param timeout: Seconds to wait for graceful shutdown before forcing
304
+ """
305
+ with self._start_lock:
306
+ if self.state == ServiceState.STOPPED:
307
+ return
308
+
309
+ if self.state == ServiceState.STARTING:
310
+ # Wait for service to finish starting before stopping
311
+ running_event = self._running_event
312
+ else:
313
+ running_event = None
314
+
315
+ self.state = ServiceState.STOPPING
316
+
317
+ # Wait for starting to complete if needed (outside lock to avoid deadlock)
318
+ if running_event is not None:
319
+ running_event.wait()
320
+
321
+ # Signal the service to stop
322
+ self._stop_event.set()
323
+
324
+ # Wait for the thread to finish
325
+ if self.thread is not None and self.thread.is_alive():
326
+ self.thread.join(timeout=timeout)
327
+
328
+ # If thread is still alive, try to terminate it forcefully
329
+ if self.thread.is_alive():
330
+ self._force_stop_thread()
331
+
332
+ with self._start_lock:
333
+ self.url = None
334
+ self._running_event = None
335
+ self.state = ServiceState.STOPPED
336
+
337
+ def _force_stop_thread(self):
338
+ """Attempt to forcefully stop the service thread.
339
+
340
+ This uses ctypes to raise an exception in the thread. It's not
341
+ guaranteed to work (e.g., if the thread is blocked in C code),
342
+ but it's the best we can do in Python.
343
+ """
344
+ import ctypes
345
+
346
+ if self.thread is None or not self.thread.is_alive():
347
+ return
348
+
349
+ thread_id = self.thread.ident
350
+ if thread_id is None:
351
+ return
352
+
353
+ # Raise SystemExit in the target thread
354
+ res = ctypes.pythonapi.PyThreadState_SetAsyncExc(
355
+ ctypes.c_ulong(thread_id), ctypes.py_object(SystemExit)
356
+ )
357
+
358
+ if res == 0:
359
+ # Thread ID was invalid
360
+ pass
361
+ elif res > 1:
362
+ # Multiple threads affected - reset
363
+ ctypes.pythonapi.PyThreadState_SetAsyncExc(
364
+ ctypes.c_ulong(thread_id), ctypes.c_long(0)
365
+ )
86
366
 
87
367
  def serve(self):
88
- import threading
368
+ """Start the web service in a background thread.
89
369
 
370
+ This method creates a daemon thread that calls :meth:`_serve`.
371
+ """
90
372
  self.thread = threading.Thread(
91
- target=functools.partial(self._serve, self.running),
373
+ target=self._serve_wrapper,
92
374
  name=f"service[{self.id}]",
93
375
  )
94
376
  self.thread.daemon = True
95
377
  self.thread.start()
96
378
 
379
+ def _serve_wrapper(self):
380
+ """Wrapper for _serve that handles state transitions."""
381
+ running_event = self._running_event
382
+ try:
383
+ self._serve(running_event)
384
+ finally:
385
+ # Ensure the event is set even if _serve fails
386
+ if running_event and not running_event.is_set():
387
+ running_event.set()
388
+
97
389
  @abc.abstractmethod
98
- def _server(self, running: threading.Event):
99
- """Starts the web service
390
+ def _serve(self, running: threading.Event):
391
+ """Start the web server (implement in subclasses).
392
+
393
+ This method should:
394
+
395
+ 1. Start your web server
396
+ 2. Set ``self.url`` to the service URL
397
+ 3. Call ``running.set()`` to signal readiness
398
+ 4. Keep the server running (this runs in a background thread)
399
+ 5. Optionally check ``self.should_stop()`` for graceful shutdown
100
400
 
101
- :param running: signals that `self.url` is set
401
+ :param running: Event to signal when ``self.url`` is set
102
402
  """
103
403
  ...