experimaestro 2.0.0b8__py3-none-any.whl → 2.0.0b17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (152) hide show
  1. experimaestro/__init__.py +12 -5
  2. experimaestro/cli/__init__.py +239 -126
  3. experimaestro/cli/filter.py +48 -23
  4. experimaestro/cli/jobs.py +253 -71
  5. experimaestro/cli/refactor.py +1 -2
  6. experimaestro/commandline.py +7 -4
  7. experimaestro/connectors/__init__.py +9 -1
  8. experimaestro/connectors/local.py +43 -3
  9. experimaestro/core/arguments.py +18 -18
  10. experimaestro/core/identifier.py +11 -11
  11. experimaestro/core/objects/config.py +96 -39
  12. experimaestro/core/objects/config_walk.py +3 -3
  13. experimaestro/core/{subparameters.py → partial.py} +16 -16
  14. experimaestro/core/partial_lock.py +394 -0
  15. experimaestro/core/types.py +12 -15
  16. experimaestro/dynamic.py +290 -0
  17. experimaestro/experiments/__init__.py +6 -2
  18. experimaestro/experiments/cli.py +217 -50
  19. experimaestro/experiments/configuration.py +24 -0
  20. experimaestro/generators.py +5 -5
  21. experimaestro/ipc.py +118 -1
  22. experimaestro/launcherfinder/__init__.py +2 -2
  23. experimaestro/launcherfinder/registry.py +6 -7
  24. experimaestro/launcherfinder/specs.py +2 -9
  25. experimaestro/launchers/slurm/__init__.py +2 -2
  26. experimaestro/launchers/slurm/base.py +62 -0
  27. experimaestro/locking.py +957 -1
  28. experimaestro/notifications.py +89 -201
  29. experimaestro/progress.py +63 -366
  30. experimaestro/rpyc.py +0 -2
  31. experimaestro/run.py +29 -2
  32. experimaestro/scheduler/__init__.py +8 -1
  33. experimaestro/scheduler/base.py +629 -53
  34. experimaestro/scheduler/dependencies.py +20 -16
  35. experimaestro/scheduler/experiment.py +732 -167
  36. experimaestro/scheduler/interfaces.py +316 -101
  37. experimaestro/scheduler/jobs.py +58 -20
  38. experimaestro/scheduler/remote/adaptive_sync.py +265 -0
  39. experimaestro/scheduler/remote/client.py +171 -117
  40. experimaestro/scheduler/remote/protocol.py +8 -193
  41. experimaestro/scheduler/remote/server.py +95 -71
  42. experimaestro/scheduler/services.py +53 -28
  43. experimaestro/scheduler/state_provider.py +663 -2430
  44. experimaestro/scheduler/state_status.py +1247 -0
  45. experimaestro/scheduler/transient.py +31 -0
  46. experimaestro/scheduler/workspace.py +1 -1
  47. experimaestro/scheduler/workspace_state_provider.py +1273 -0
  48. experimaestro/scriptbuilder.py +4 -4
  49. experimaestro/settings.py +36 -0
  50. experimaestro/tests/conftest.py +33 -5
  51. experimaestro/tests/connectors/bin/executable.py +1 -1
  52. experimaestro/tests/fixtures/pre_experiment/experiment_check_env.py +16 -0
  53. experimaestro/tests/fixtures/pre_experiment/experiment_check_mock.py +14 -0
  54. experimaestro/tests/fixtures/pre_experiment/experiment_simple.py +12 -0
  55. experimaestro/tests/fixtures/pre_experiment/pre_setup_env.py +5 -0
  56. experimaestro/tests/fixtures/pre_experiment/pre_setup_error.py +3 -0
  57. experimaestro/tests/fixtures/pre_experiment/pre_setup_mock.py +8 -0
  58. experimaestro/tests/launchers/bin/test.py +1 -0
  59. experimaestro/tests/launchers/test_slurm.py +9 -9
  60. experimaestro/tests/partial_reschedule.py +46 -0
  61. experimaestro/tests/restart.py +3 -3
  62. experimaestro/tests/restart_main.py +1 -0
  63. experimaestro/tests/scripts/notifyandwait.py +1 -0
  64. experimaestro/tests/task_partial.py +38 -0
  65. experimaestro/tests/task_tokens.py +2 -2
  66. experimaestro/tests/tasks/test_dynamic.py +6 -6
  67. experimaestro/tests/test_dependencies.py +3 -3
  68. experimaestro/tests/test_deprecated.py +15 -15
  69. experimaestro/tests/test_dynamic_locking.py +317 -0
  70. experimaestro/tests/test_environment.py +24 -14
  71. experimaestro/tests/test_experiment.py +171 -36
  72. experimaestro/tests/test_identifier.py +25 -25
  73. experimaestro/tests/test_identifier_stability.py +3 -5
  74. experimaestro/tests/test_multitoken.py +2 -4
  75. experimaestro/tests/{test_subparameters.py → test_partial.py} +25 -25
  76. experimaestro/tests/test_partial_paths.py +81 -138
  77. experimaestro/tests/test_pre_experiment.py +219 -0
  78. experimaestro/tests/test_progress.py +2 -8
  79. experimaestro/tests/test_remote_state.py +560 -99
  80. experimaestro/tests/test_stray_jobs.py +261 -0
  81. experimaestro/tests/test_tasks.py +1 -2
  82. experimaestro/tests/test_token_locking.py +52 -67
  83. experimaestro/tests/test_tokens.py +5 -6
  84. experimaestro/tests/test_transient.py +225 -0
  85. experimaestro/tests/test_workspace_state_provider.py +768 -0
  86. experimaestro/tests/token_reschedule.py +1 -3
  87. experimaestro/tests/utils.py +2 -7
  88. experimaestro/tokens.py +227 -372
  89. experimaestro/tools/diff.py +1 -0
  90. experimaestro/tools/documentation.py +4 -5
  91. experimaestro/tools/jobs.py +1 -2
  92. experimaestro/tui/app.py +438 -1966
  93. experimaestro/tui/app.tcss +162 -0
  94. experimaestro/tui/dialogs.py +172 -0
  95. experimaestro/tui/log_viewer.py +253 -3
  96. experimaestro/tui/messages.py +137 -0
  97. experimaestro/tui/utils.py +54 -0
  98. experimaestro/tui/widgets/__init__.py +23 -0
  99. experimaestro/tui/widgets/experiments.py +468 -0
  100. experimaestro/tui/widgets/global_services.py +238 -0
  101. experimaestro/tui/widgets/jobs.py +972 -0
  102. experimaestro/tui/widgets/log.py +156 -0
  103. experimaestro/tui/widgets/orphans.py +363 -0
  104. experimaestro/tui/widgets/runs.py +185 -0
  105. experimaestro/tui/widgets/services.py +314 -0
  106. experimaestro/tui/widgets/stray_jobs.py +528 -0
  107. experimaestro/utils/__init__.py +1 -1
  108. experimaestro/utils/environment.py +105 -22
  109. experimaestro/utils/fswatcher.py +124 -0
  110. experimaestro/utils/jobs.py +1 -2
  111. experimaestro/utils/jupyter.py +1 -2
  112. experimaestro/utils/logging.py +72 -0
  113. experimaestro/version.py +2 -2
  114. experimaestro/webui/__init__.py +9 -0
  115. experimaestro/webui/app.py +117 -0
  116. experimaestro/{server → webui}/data/index.css +66 -11
  117. experimaestro/webui/data/index.css.map +1 -0
  118. experimaestro/{server → webui}/data/index.js +82763 -87217
  119. experimaestro/webui/data/index.js.map +1 -0
  120. experimaestro/webui/routes/__init__.py +5 -0
  121. experimaestro/webui/routes/auth.py +53 -0
  122. experimaestro/webui/routes/proxy.py +117 -0
  123. experimaestro/webui/server.py +200 -0
  124. experimaestro/webui/state_bridge.py +152 -0
  125. experimaestro/webui/websocket.py +413 -0
  126. {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/METADATA +5 -6
  127. experimaestro-2.0.0b17.dist-info/RECORD +219 -0
  128. experimaestro/cli/progress.py +0 -269
  129. experimaestro/scheduler/state.py +0 -75
  130. experimaestro/scheduler/state_db.py +0 -437
  131. experimaestro/scheduler/state_sync.py +0 -891
  132. experimaestro/server/__init__.py +0 -467
  133. experimaestro/server/data/index.css.map +0 -1
  134. experimaestro/server/data/index.js.map +0 -1
  135. experimaestro/tests/test_cli_jobs.py +0 -615
  136. experimaestro/tests/test_file_progress.py +0 -425
  137. experimaestro/tests/test_file_progress_integration.py +0 -477
  138. experimaestro/tests/test_state_db.py +0 -434
  139. experimaestro-2.0.0b8.dist-info/RECORD +0 -187
  140. /experimaestro/{server → webui}/data/1815e00441357e01619e.ttf +0 -0
  141. /experimaestro/{server → webui}/data/2463b90d9a316e4e5294.woff2 +0 -0
  142. /experimaestro/{server → webui}/data/2582b0e4bcf85eceead0.ttf +0 -0
  143. /experimaestro/{server → webui}/data/89999bdf5d835c012025.woff2 +0 -0
  144. /experimaestro/{server → webui}/data/914997e1bdfc990d0897.ttf +0 -0
  145. /experimaestro/{server → webui}/data/c210719e60948b211a12.woff2 +0 -0
  146. /experimaestro/{server → webui}/data/favicon.ico +0 -0
  147. /experimaestro/{server → webui}/data/index.html +0 -0
  148. /experimaestro/{server → webui}/data/login.html +0 -0
  149. /experimaestro/{server → webui}/data/manifest.json +0 -0
  150. {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/WHEEL +0 -0
  151. {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/entry_points.txt +0 -0
  152. {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/licenses/LICENSE +0 -0
@@ -5,7 +5,7 @@ import json
5
5
  import logging
6
6
  import sys
7
7
  from pathlib import Path
8
- from typing import Any, List, Optional, Protocol, Tuple
8
+ from typing import TYPE_CHECKING, Any, List, Optional, Protocol, Tuple
9
9
 
10
10
  import click
11
11
  import omegaconf
@@ -13,16 +13,21 @@ import yaml
13
13
  from omegaconf import OmegaConf, SCMode
14
14
  from termcolor import cprint
15
15
 
16
- from experimaestro import LauncherRegistry, RunMode, experiment
17
16
  from experimaestro.exceptions import HandledException
18
17
  from experimaestro.experiments.configuration import ConfigurationBase
18
+ from experimaestro.launcherfinder.registry import LauncherRegistry
19
+ from experimaestro.scheduler.workspace import RunMode
19
20
  from experimaestro.settings import find_workspace
21
+ from experimaestro.utils.logging import setup_logging
22
+
23
+ if TYPE_CHECKING:
24
+ from experimaestro.scheduler.experiment import experiment
20
25
 
21
26
 
22
27
  class ExperimentHelper:
23
28
  """Helper for experiments"""
24
29
 
25
- xp: experiment
30
+ xp: "experiment"
26
31
  """The experiment object"""
27
32
 
28
33
  #: Run function
@@ -52,7 +57,7 @@ class ExperimentHelper:
52
57
  class ExperimentCallable(Protocol):
53
58
  """Protocol for the run function"""
54
59
 
55
- def __call__(self, helper: ExperimentHelper, configuration: Any): ... # noqa: E704
60
+ def __call__(self, helper: ExperimentHelper, configuration: Any): ...
56
61
 
57
62
 
58
63
  class ConfigurationLoader:
@@ -74,6 +79,11 @@ class ConfigurationLoader:
74
79
  if not path.is_absolute():
75
80
  _data["file"] = str((yaml_file.parent / path).resolve())
76
81
 
82
+ if "pre_experiment" in _data:
83
+ path = Path(_data["pre_experiment"])
84
+ if not path.is_absolute():
85
+ _data["pre_experiment"] = str((yaml_file.parent / path).resolve())
86
+
77
87
  if "module" in _data:
78
88
  # Keeps track of the YAML file where the module was defined
79
89
  self.yaml_module_file = yaml_file
@@ -93,6 +103,18 @@ class ConfigurationLoader:
93
103
 
94
104
  @click.option("--debug", is_flag=True, help="Print debug information")
95
105
  @click.option("--show", is_flag=True, help="Print configuration and exits")
106
+ @click.option(
107
+ "--watcher",
108
+ type=click.Choice(["auto", "polling", "inotify", "fsevents", "kqueue", "windows"]),
109
+ default="auto",
110
+ help="Filesystem watcher type (auto=platform default, polling=network mounts)",
111
+ )
112
+ @click.option(
113
+ "--polling-interval",
114
+ type=float,
115
+ default=1.0,
116
+ help="Polling interval in seconds (only for --watcher=polling)",
117
+ )
96
118
  @click.option(
97
119
  "--env",
98
120
  help="Define one environment variable",
@@ -103,8 +125,8 @@ class ConfigurationLoader:
103
125
  "--host",
104
126
  type=str,
105
127
  default=None,
106
- help="Server hostname (default to localhost,"
107
- " not suitable if your jobs are remote)",
128
+ help="[DEPRECATED] Server hostname (use --web instead)",
129
+ hidden=True,
108
130
  )
109
131
  @click.option(
110
132
  "--run-mode",
@@ -116,20 +138,31 @@ class ConfigurationLoader:
116
138
  "--xpm-config-dir",
117
139
  type=Path,
118
140
  default=None,
119
- help="Path for the experimaestro config directory "
120
- "(if not specified, use $HOME/.config/experimaestro)",
141
+ help="Path for the experimaestro config directory (if not specified, use $HOME/.config/experimaestro)",
121
142
  )
122
143
  @click.option(
123
144
  "--port",
124
145
  type=int,
125
146
  default=None,
126
- help="Port for monitoring (can be defined in the settings.yaml file)",
147
+ help="[DEPRECATED] Port for monitoring (use --web instead)",
148
+ hidden=True,
149
+ )
150
+ @click.option(
151
+ "--web",
152
+ is_flag=True,
153
+ help="Start web server for monitoring (use settings.yaml for host/port config)",
127
154
  )
128
155
  @click.option(
129
156
  "--console",
130
157
  is_flag=True,
131
158
  help="Launch Textual console UI for monitoring with logs",
132
159
  )
160
+ @click.option(
161
+ "--no-db",
162
+ "no_db",
163
+ is_flag=True,
164
+ help="Disable database state tracking for this experiment",
165
+ )
133
166
  @click.option(
134
167
  "--file",
135
168
  "xp_file",
@@ -166,7 +199,9 @@ def experiments_cli( # noqa: C901
166
199
  xp_file: str,
167
200
  host: str,
168
201
  port: int,
202
+ web: bool,
169
203
  console: bool,
204
+ no_db: bool,
170
205
  xpm_config_dir: Path,
171
206
  workdir: Optional[Path],
172
207
  workspace: Optional[str],
@@ -178,13 +213,40 @@ def experiments_cli( # noqa: C901
178
213
  module_name: Optional[str],
179
214
  args: List[str],
180
215
  show: bool,
216
+ watcher: str,
217
+ polling_interval: float,
181
218
  debug: bool,
182
219
  ):
183
220
  """Run an experiment"""
184
-
185
- # --- Set the logger
186
- logging.getLogger().setLevel(logging.DEBUG if debug else logging.INFO)
187
- logging.getLogger("xpm.hash").setLevel(logging.INFO)
221
+ import warnings
222
+
223
+ # --- Set the logger with colors if outputting to terminal
224
+ setup_logging(debug=debug)
225
+
226
+ # --- Configure filesystem watcher type
227
+ from experimaestro.ipc import IPCom, WatcherType
228
+
229
+ if watcher != "auto":
230
+ IPCom.set_watcher_type(WatcherType(watcher), polling_interval)
231
+ elif polling_interval != 1.0:
232
+ # If polling interval is specified but watcher is auto, use polling
233
+ IPCom.set_watcher_type(WatcherType.POLLING, polling_interval)
234
+
235
+ # --- Warn about deprecated options
236
+ if host is not None:
237
+ warnings.warn(
238
+ "The '--host' option is deprecated. Use '--web' flag instead. "
239
+ "Configure host in settings.yaml.",
240
+ DeprecationWarning,
241
+ stacklevel=2,
242
+ )
243
+ if port is not None:
244
+ warnings.warn(
245
+ "The '--port' option is deprecated. Use '--web' flag instead. "
246
+ "Configure port in settings.yaml.",
247
+ DeprecationWarning,
248
+ stacklevel=2,
249
+ )
188
250
 
189
251
  # --- Loads the YAML
190
252
  conf_loader = ConfigurationLoader()
@@ -207,9 +269,9 @@ def experiments_cli( # noqa: C901
207
269
  if xp_file is None:
208
270
  xp_file = configuration.get("file", None)
209
271
  if xp_file:
210
- assert (
211
- not module_name
212
- ), "Module name and experiment file are mutually exclusive options"
272
+ assert not module_name, (
273
+ "Module name and experiment file are mutually exclusive options"
274
+ )
213
275
  xp_file = Path(xp_file)
214
276
  if not python_path:
215
277
  python_path.append(xp_file.parent.absolute())
@@ -217,9 +279,9 @@ def experiments_cli( # noqa: C901
217
279
  "Using python path: %s", ", ".join(str(s) for s in python_path)
218
280
  )
219
281
 
220
- assert (
221
- module_name or xp_file
222
- ), "Either the module name or experiment file should be given"
282
+ assert module_name or xp_file, (
283
+ "Either the module name or experiment file should be given"
284
+ )
223
285
 
224
286
  # --- Set some options
225
287
 
@@ -233,6 +295,27 @@ def experiments_cli( # noqa: C901
233
295
  for path in python_path:
234
296
  sys.path.append(str(path))
235
297
 
298
+ # --- Execute pre-experiment script if specified
299
+ pre_experiment = configuration.get("pre_experiment", None)
300
+ if pre_experiment:
301
+ pre_exp_path = Path(pre_experiment)
302
+ if pre_exp_path.exists():
303
+ logging.info("Executing pre-experiment script: %s", pre_exp_path)
304
+ try:
305
+ spec = importlib.util.spec_from_file_location(
306
+ "pre_experiment", str(pre_exp_path.absolute())
307
+ )
308
+ pre_mod = importlib.util.module_from_spec(spec)
309
+ spec.loader.exec_module(pre_mod)
310
+ except Exception as e:
311
+ raise click.ClickException(
312
+ f"Failed to execute pre-experiment script '{pre_exp_path}': {e}"
313
+ )
314
+ else:
315
+ raise click.ClickException(
316
+ f"Pre-experiment script not found: {pre_exp_path}"
317
+ )
318
+
236
319
  # --- Adds automatically the experiment module if not found
237
320
  if module_name and conf_loader.yaml_module_file:
238
321
  try:
@@ -285,6 +368,14 @@ def experiments_cli( # noqa: C901
285
368
  )
286
369
 
287
370
  schema = list_parameters[1].annotation
371
+ if isinstance(schema, str):
372
+ # Get the schema from the module
373
+ schema = getattr(mod.__dict__, schema, None)
374
+ assert schema is not None, (
375
+ f"Could not find schema {list_parameters[1].annotation} "
376
+ f"in module {module_name}"
377
+ )
378
+
288
379
  omegaconf_schema = OmegaConf.structured(schema())
289
380
 
290
381
  if omegaconf_schema is not None:
@@ -323,20 +414,48 @@ def experiments_cli( # noqa: C901
323
414
  str(workdir.resolve()),
324
415
  )
325
416
 
417
+ # Determine project path for git info
418
+ project_paths = []
419
+ if xp_file:
420
+ project_paths.append(xp_file.resolve().parent)
421
+ elif hasattr(mod, "__file__") and mod.__file__:
422
+ project_paths.append(Path(mod.__file__).resolve().parent)
423
+
326
424
  # Define the experiment execution function
327
- def run_experiment_code(xp_holder=None, xp_ready_event=None, register_signals=True):
425
+ def run_experiment_code(
426
+ xp_holder=None, xp_ready_event=None, register_signals=True, in_thread=False
427
+ ):
328
428
  """Run the experiment code - optionally storing xp in xp_holder"""
329
429
  try:
430
+ from experimaestro.scheduler.experiment import experiment
431
+ from experimaestro.settings import get_settings
432
+
330
433
  with experiment(
331
434
  ws_env,
332
435
  experiment_id,
333
- host=host,
334
- port=port,
335
436
  run_mode=run_mode,
336
437
  register_signals=register_signals,
438
+ project_paths=project_paths,
439
+ dirty_git=xp_configuration.dirty_git,
440
+ no_db=no_db,
337
441
  ) as xp:
338
442
  if xp_holder is not None:
339
443
  xp_holder["xp"] = xp
444
+
445
+ # Start web server if requested
446
+ if web and run_mode == RunMode.NORMAL:
447
+ settings = get_settings()
448
+ xp.scheduler.start_server(
449
+ settings.server,
450
+ workspace=xp.workspace,
451
+ wait_for_quit=False,
452
+ )
453
+ logging.info(
454
+ "Web server started at http://%s:%d",
455
+ settings.server.host or "localhost",
456
+ settings.server.port or 12345,
457
+ )
458
+
340
459
  if xp_ready_event is not None:
341
460
  xp_ready_event.set() # Signal that xp is ready
342
461
 
@@ -357,7 +476,11 @@ def experiments_cli( # noqa: C901
357
476
  # ... and wait
358
477
  xp.wait()
359
478
 
360
- except HandledException:
479
+ except HandledException as e:
480
+ if in_thread:
481
+ # Re-raise to preserve exception info for the main thread
482
+ raise
483
+ cprint(f"Experiment failed: {e}", "red", file=sys.stderr)
361
484
  sys.exit(1)
362
485
 
363
486
  # Console mode is only available in NORMAL run mode
@@ -366,47 +489,91 @@ def experiments_cli( # noqa: C901
366
489
  logging.warning("--console is ignored when run_mode is not NORMAL")
367
490
 
368
491
  if use_console:
369
- # Run experiment in background thread, console UI in main thread
492
+ # Start TUI first, then run experiment in background thread
493
+ # This ensures all logs (including startup) are captured
370
494
  import threading
371
495
  from experimaestro.tui import ExperimentTUI
372
496
 
497
+ # Initialize multiprocessing resource tracker before Textual takes over
498
+ # terminal file descriptors. This prevents "bad value(s) in fds_to_keep"
499
+ # errors when code in the background thread (e.g., tqdm in torchvision)
500
+ # tries to use multiprocessing.
501
+ try:
502
+ from multiprocessing import resource_tracker
503
+
504
+ resource_tracker.ensure_running()
505
+ except Exception:
506
+ pass # Best effort - may not be needed on all systems
507
+
373
508
  xp_holder = {"xp": None}
374
509
  exception_holder = {"exception": None}
375
- xp_ready = threading.Event()
376
510
 
377
- def run_in_thread():
511
+ # Create TUI first in deferred mode (no state_provider yet)
512
+ # This allows capturing all logs from experiment startup
513
+ tui_app = ExperimentTUI(show_logs=True)
514
+
515
+ def run_experiment_in_thread():
516
+ """Run experiment and connect state provider to TUI when ready"""
378
517
  try:
379
- # Don't register signals in background thread
380
- run_experiment_code(xp_holder, xp_ready, register_signals=False)
381
- # Add a test message after experiment completes
518
+ from experimaestro.scheduler.experiment import experiment as exp_context
519
+ from experimaestro.settings import get_settings
520
+
521
+ with exp_context(
522
+ ws_env,
523
+ experiment_id,
524
+ run_mode=run_mode,
525
+ register_signals=False, # TUI handles signals
526
+ project_paths=project_paths,
527
+ dirty_git=xp_configuration.dirty_git,
528
+ no_db=no_db,
529
+ ) as xp:
530
+ xp_holder["xp"] = xp
531
+
532
+ # Connect TUI to the experiment's scheduler
533
+ tui_app.call_from_thread(tui_app.set_state_provider, xp.scheduler)
534
+
535
+ # Start web server if requested
536
+ if web:
537
+ settings = get_settings()
538
+ xp.scheduler.start_server(
539
+ settings.server,
540
+ workspace=xp.workspace,
541
+ wait_for_quit=False,
542
+ )
543
+ logging.info(
544
+ "Web server started at http://%s:%d",
545
+ settings.server.host or "localhost",
546
+ settings.server.port or 12345,
547
+ )
548
+
549
+ logging.info("Experiment started")
550
+
551
+ # Set up the environment
552
+ for key, value in env:
553
+ xp.setenv(key, value)
554
+
555
+ # Sets the python path
556
+ xp.workspace.python_path.extend(python_path)
557
+
558
+ # Run the experiment
559
+ helper.xp = xp
560
+ helper.run(list(args), xp_configuration)
561
+
562
+ # ... and wait
563
+ xp.wait()
564
+
382
565
  logging.info("Experiment thread completed")
383
- except Exception as e:
566
+
567
+ except BaseException as e:
568
+ # Use BaseException to also catch SystemExit from sys.exit()
384
569
  exception_holder["exception"] = e
385
- xp_ready.set() # Signal even on error
386
570
 
387
571
  # Start experiment in background thread
388
- exp_thread = threading.Thread(target=run_in_thread, daemon=True)
572
+ exp_thread = threading.Thread(target=run_experiment_in_thread, daemon=True)
389
573
  exp_thread.start()
390
574
 
391
- # Wait for experiment to start (up to 30 seconds)
392
- if not xp_ready.wait(timeout=30.0):
393
- cprint("Timeout waiting for experiment to start", "red", file=sys.stderr)
394
- sys.exit(1)
395
-
396
- if xp_holder["xp"] is None:
397
- cprint("Failed to start experiment", "red", file=sys.stderr)
398
- if exception_holder["exception"]:
399
- raise exception_holder["exception"]
400
- sys.exit(1)
401
-
402
- # Run TUI in main thread (handles signals via Textual)
403
- tui_app = ExperimentTUI(
404
- workdir=workdir,
405
- state_provider=xp_holder["xp"].state_provider,
406
- show_logs=True,
407
- )
408
-
409
575
  try:
576
+ # Run TUI in main thread (handles signals via Textual)
410
577
  # Textual automatically captures stdout/stderr via Print events
411
578
  tui_app.run()
412
579
  finally:
@@ -1,3 +1,4 @@
1
+ from enum import Enum
1
2
  from omegaconf import MISSING
2
3
  from typing import Optional, List
3
4
  import attr
@@ -8,6 +9,19 @@ except ImportError:
8
9
  from typing_extensions import dataclass_transform
9
10
 
10
11
 
12
+ class DirtyGitAction(str, Enum):
13
+ """Action to take when the git repository has uncommitted changes"""
14
+
15
+ IGNORE = "ignore"
16
+ """Don't check or warn about dirty git state"""
17
+
18
+ WARN = "warn"
19
+ """Warn about dirty git state (default)"""
20
+
21
+ ERROR = "error"
22
+ """Raise an error if git is dirty"""
23
+
24
+
11
25
  @dataclass_transform(kw_only_default=True)
12
26
  def configuration(*args, **kwargs):
13
27
  """Method to define keyword only dataclasses
@@ -40,6 +54,13 @@ class ConfigurationBase:
40
54
  parent: Optional[str] = None
41
55
  """Relative path of a YAML file that should be merged"""
42
56
 
57
+ pre_experiment: Optional[str] = None
58
+ """Relative path to a Python file to execute before importing the experiment.
59
+
60
+ This is useful for setting environment variables or mocking modules to speed up
61
+ the experiment setup phase (e.g., mocking torch.compile or torch.nn).
62
+ The actual job execution will use real modules."""
63
+
43
64
  title: str = ""
44
65
  """Short description of the experiment"""
45
66
 
@@ -54,3 +75,6 @@ class ConfigurationBase:
54
75
 
55
76
  add_timestamp: bool = False
56
77
  """Adds a timestamp YYYY_MM_DD-HH_MM to the experiment ID"""
78
+
79
+ dirty_git: DirtyGitAction = DirtyGitAction.WARN
80
+ """Action when git repository has uncommitted changes: ignore, warn (default), error"""
@@ -6,7 +6,7 @@ from experimaestro.core.arguments import ArgumentOptions, TypeAnnotation
6
6
  from experimaestro.core.objects import ConfigWalkContext, Config
7
7
 
8
8
  if TYPE_CHECKING:
9
- from experimaestro.core.subparameters import Subparameters
9
+ from experimaestro.core.partial import Partial
10
10
 
11
11
 
12
12
  class Generator(ABC):
@@ -33,7 +33,7 @@ class PathGenerator(Generator):
33
33
  output: Meta[Path] = field(default_factory=PathGenerator("results.json"))
34
34
  model: Meta[Path] = field(default_factory=PathGenerator("model.pt"))
35
35
 
36
- For shared directories across related tasks, use with subparameters::
36
+ For shared directories across related tasks, use with partial::
37
37
 
38
38
  training_group = param_group("training")
39
39
 
@@ -42,13 +42,13 @@ class PathGenerator(Generator):
42
42
  checkpoint: Meta[Path] = field(
43
43
  default_factory=PathGenerator(
44
44
  "model.pt",
45
- subparameters=subparameters(exclude=[training_group])
45
+ partial=partial(exclude=[training_group])
46
46
  )
47
47
  )
48
48
 
49
49
  :param path: Relative path within the task directory. Can be a string,
50
50
  Path, or callable that takes (context, config) and returns a Path.
51
- :param subparameters: Optional subparameters for partial directory sharing.
51
+ :param partial: Optional partial for partial directory sharing.
52
52
  When provided, the path is generated in a shared partial directory.
53
53
  """
54
54
 
@@ -56,7 +56,7 @@ class PathGenerator(Generator):
56
56
  self,
57
57
  path: Union[str, Path, Callable[[ConfigWalkContext, Config], Path]] = "",
58
58
  *,
59
- partial: "Subparameters" = None,
59
+ partial: "Partial" = None,
60
60
  ):
61
61
  self.path = path
62
62
  self.partial = partial
experimaestro/ipc.py CHANGED
@@ -1,5 +1,6 @@
1
1
  """IPC utilities"""
2
2
 
3
+ from enum import Enum
3
4
  from typing import Optional
4
5
  from pathlib import Path
5
6
  import os
@@ -11,16 +12,132 @@ from watchdog.observers.api import ObservedWatch
11
12
  from watchdog.events import FileSystemEventHandler
12
13
 
13
14
 
15
+ class WatcherType(str, Enum):
16
+ """Available filesystem watcher types"""
17
+
18
+ AUTO = "auto"
19
+ """Use the best available watcher for the platform (default)"""
20
+
21
+ POLLING = "polling"
22
+ """Platform-independent polling (works on network mounts)"""
23
+
24
+ INOTIFY = "inotify"
25
+ """Linux inotify (Linux 2.6.13+ only)"""
26
+
27
+ FSEVENTS = "fsevents"
28
+ """macOS FSEvents (macOS only)"""
29
+
30
+ KQUEUE = "kqueue"
31
+ """BSD/macOS kqueue (less scalable for deep directories)"""
32
+
33
+ WINDOWS = "windows"
34
+ """Windows API (Windows only)"""
35
+
36
+
37
+ def _create_observer(watcher_type: WatcherType, polling_interval: float = 1.0):
38
+ """Create an observer of the specified type
39
+
40
+ Args:
41
+ watcher_type: The type of filesystem watcher to use
42
+ polling_interval: Polling interval in seconds (for polling watcher)
43
+
44
+ Returns:
45
+ An observer instance
46
+
47
+ Raises:
48
+ ImportError: If the requested watcher type is not available on this platform
49
+ ValueError: If watcher_type is invalid
50
+ """
51
+ match watcher_type:
52
+ case WatcherType.AUTO:
53
+ return Observer()
54
+
55
+ case WatcherType.POLLING:
56
+ from watchdog.observers.polling import PollingObserver
57
+
58
+ return PollingObserver(timeout=polling_interval)
59
+
60
+ case WatcherType.INOTIFY:
61
+ from watchdog.observers.inotify import InotifyObserver
62
+
63
+ return InotifyObserver()
64
+
65
+ case WatcherType.FSEVENTS:
66
+ from watchdog.observers.fsevents import FSEventsObserver
67
+
68
+ return FSEventsObserver()
69
+
70
+ case WatcherType.KQUEUE:
71
+ from watchdog.observers.kqueue import KqueueObserver
72
+
73
+ return KqueueObserver()
74
+
75
+ case WatcherType.WINDOWS:
76
+ from watchdog.observers.read_directory_changes import WindowsApiObserver
77
+
78
+ return WindowsApiObserver()
79
+
80
+ case _:
81
+ raise ValueError(f"Unknown watcher type: {watcher_type}")
82
+
83
+
14
84
  class IPCom:
15
85
  """IPC async thread"""
16
86
 
17
87
  INSTANCE: Optional["IPCom"] = None
88
+ # Testing mode: use polling observer with small interval
89
+ TESTING_MODE: bool = False
90
+ POLLING_INTERVAL: float = 0.01
91
+ # Watcher type configuration
92
+ WATCHER_TYPE: WatcherType = WatcherType.AUTO
18
93
 
19
94
  def __init__(self):
20
- self.observer = Observer()
95
+ if IPCom.TESTING_MODE:
96
+ from watchdog.observers.polling import PollingObserver
97
+
98
+ self.observer = PollingObserver(timeout=IPCom.POLLING_INTERVAL)
99
+ else:
100
+ self.observer = _create_observer(IPCom.WATCHER_TYPE, IPCom.POLLING_INTERVAL)
21
101
  self.observer.start()
22
102
  self.pid = os.getpid()
23
103
 
104
+ @classmethod
105
+ def set_watcher_type(cls, watcher_type: WatcherType, polling_interval: float = 1.0):
106
+ """Set the filesystem watcher type
107
+
108
+ Args:
109
+ watcher_type: The type of watcher to use
110
+ polling_interval: Polling interval in seconds (for polling watcher)
111
+
112
+ Note:
113
+ This must be called before the first IPCom instance is created.
114
+ If an instance already exists, it will be reset.
115
+ """
116
+ cls.WATCHER_TYPE = watcher_type
117
+ cls.POLLING_INTERVAL = polling_interval
118
+ # Reset instance to apply new settings
119
+ if cls.INSTANCE is not None:
120
+ cls.INSTANCE.observer.stop()
121
+ cls.INSTANCE.observer.join(timeout=5)
122
+ cls.INSTANCE = None
123
+ logger.info("Set watcher type to %s", watcher_type.value)
124
+
125
+ @classmethod
126
+ def set_testing_mode(cls, enabled: bool = True, polling_interval: float = 0.01):
127
+ """Enable testing mode with polling observer
128
+
129
+ Args:
130
+ enabled: Whether to enable testing mode
131
+ polling_interval: Polling interval in seconds (default 0.01)
132
+ """
133
+ cls.TESTING_MODE = enabled
134
+ cls.POLLING_INTERVAL = polling_interval
135
+ # Reset instance to apply new settings
136
+ if cls.INSTANCE is not None:
137
+ cls.INSTANCE.observer.stop()
138
+ cls.INSTANCE.observer.join(timeout=5)
139
+ cls.INSTANCE = None
140
+
24
141
  def fswatch(
25
142
  self, watcher: FileSystemEventHandler, path: Path, recursive=False
26
143
  ) -> ObservedWatch:
@@ -1,6 +1,6 @@
1
- # flake8: noqa: F401
1
+ # ruff: noqa: F401
2
2
 
3
- from .base import *
3
+ from .base import ConnectorConfiguration, TokenConfiguration
4
4
  from .specs import (
5
5
  cpu,
6
6
  cuda_gpu,
@@ -59,9 +59,9 @@ class LauncherRegistry:
59
59
  ).expanduser()
60
60
 
61
61
  if LauncherRegistry.CURRENT_CONFIG_DIR not in LauncherRegistry.INSTANCES:
62
- LauncherRegistry.INSTANCES[
63
- LauncherRegistry.CURRENT_CONFIG_DIR
64
- ] = LauncherRegistry(LauncherRegistry.CURRENT_CONFIG_DIR)
62
+ LauncherRegistry.INSTANCES[LauncherRegistry.CURRENT_CONFIG_DIR] = (
63
+ LauncherRegistry(LauncherRegistry.CURRENT_CONFIG_DIR)
64
+ )
65
65
 
66
66
  return LauncherRegistry.INSTANCES[LauncherRegistry.CURRENT_CONFIG_DIR]
67
67
 
@@ -83,7 +83,6 @@ class LauncherRegistry:
83
83
 
84
84
  # Register the find launcher function if it exists
85
85
  launchers_py = basepath / "launchers.py"
86
- print(f"basepath {launchers_py}")
87
86
  if launchers_py.is_file():
88
87
  logger.info("Loading %s", launchers_py)
89
88
 
@@ -163,9 +162,9 @@ class LauncherRegistry:
163
162
  if self.find_launcher_fn is not None:
164
163
  for spec in specs.requirements:
165
164
  if launcher := self.find_launcher_fn(spec, tags):
166
- assert isinstance(
167
- launcher, Launcher
168
- ), "f{self.find_launcher_fn} did not return a Launcher but {type(launcher)}"
165
+ assert isinstance(launcher, Launcher), (
166
+ "f{self.find_launcher_fn} did not return a Launcher but {type(launcher)}"
167
+ )
169
168
  return launcher
170
169
 
171
170
  return None