experimaestro 2.0.0a8__py3-none-any.whl → 2.0.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (116) hide show
  1. experimaestro/__init__.py +10 -11
  2. experimaestro/annotations.py +167 -206
  3. experimaestro/cli/__init__.py +130 -5
  4. experimaestro/cli/filter.py +42 -74
  5. experimaestro/cli/jobs.py +157 -106
  6. experimaestro/cli/refactor.py +249 -0
  7. experimaestro/click.py +0 -1
  8. experimaestro/commandline.py +19 -3
  9. experimaestro/connectors/__init__.py +20 -1
  10. experimaestro/connectors/local.py +12 -0
  11. experimaestro/core/arguments.py +182 -46
  12. experimaestro/core/identifier.py +107 -6
  13. experimaestro/core/objects/__init__.py +6 -0
  14. experimaestro/core/objects/config.py +542 -25
  15. experimaestro/core/objects/config_walk.py +20 -0
  16. experimaestro/core/serialization.py +91 -34
  17. experimaestro/core/subparameters.py +164 -0
  18. experimaestro/core/types.py +175 -38
  19. experimaestro/exceptions.py +26 -0
  20. experimaestro/experiments/cli.py +107 -25
  21. experimaestro/generators.py +50 -9
  22. experimaestro/huggingface.py +3 -1
  23. experimaestro/launcherfinder/parser.py +29 -0
  24. experimaestro/launchers/__init__.py +26 -1
  25. experimaestro/launchers/direct.py +12 -0
  26. experimaestro/launchers/slurm/base.py +154 -2
  27. experimaestro/mkdocs/metaloader.py +0 -1
  28. experimaestro/mypy.py +452 -7
  29. experimaestro/notifications.py +63 -13
  30. experimaestro/progress.py +0 -2
  31. experimaestro/rpyc.py +0 -1
  32. experimaestro/run.py +19 -6
  33. experimaestro/scheduler/base.py +489 -125
  34. experimaestro/scheduler/dependencies.py +43 -28
  35. experimaestro/scheduler/dynamic_outputs.py +259 -130
  36. experimaestro/scheduler/experiment.py +225 -30
  37. experimaestro/scheduler/interfaces.py +474 -0
  38. experimaestro/scheduler/jobs.py +216 -206
  39. experimaestro/scheduler/services.py +186 -12
  40. experimaestro/scheduler/state_db.py +388 -0
  41. experimaestro/scheduler/state_provider.py +2345 -0
  42. experimaestro/scheduler/state_sync.py +834 -0
  43. experimaestro/scheduler/workspace.py +52 -10
  44. experimaestro/scriptbuilder.py +7 -0
  45. experimaestro/server/__init__.py +147 -57
  46. experimaestro/server/data/index.css +0 -125
  47. experimaestro/server/data/index.css.map +1 -1
  48. experimaestro/server/data/index.js +194 -58
  49. experimaestro/server/data/index.js.map +1 -1
  50. experimaestro/settings.py +44 -5
  51. experimaestro/sphinx/__init__.py +3 -3
  52. experimaestro/taskglobals.py +20 -0
  53. experimaestro/tests/conftest.py +80 -0
  54. experimaestro/tests/core/test_generics.py +2 -2
  55. experimaestro/tests/identifier_stability.json +45 -0
  56. experimaestro/tests/launchers/bin/sacct +6 -2
  57. experimaestro/tests/launchers/bin/sbatch +4 -2
  58. experimaestro/tests/launchers/test_slurm.py +80 -0
  59. experimaestro/tests/tasks/test_dynamic.py +231 -0
  60. experimaestro/tests/test_cli_jobs.py +615 -0
  61. experimaestro/tests/test_deprecated.py +630 -0
  62. experimaestro/tests/test_environment.py +200 -0
  63. experimaestro/tests/test_file_progress_integration.py +1 -1
  64. experimaestro/tests/test_forward.py +3 -3
  65. experimaestro/tests/test_identifier.py +372 -41
  66. experimaestro/tests/test_identifier_stability.py +458 -0
  67. experimaestro/tests/test_instance.py +3 -3
  68. experimaestro/tests/test_multitoken.py +442 -0
  69. experimaestro/tests/test_mypy.py +433 -0
  70. experimaestro/tests/test_objects.py +312 -5
  71. experimaestro/tests/test_outputs.py +2 -2
  72. experimaestro/tests/test_param.py +8 -12
  73. experimaestro/tests/test_partial_paths.py +231 -0
  74. experimaestro/tests/test_progress.py +0 -48
  75. experimaestro/tests/test_resumable_task.py +480 -0
  76. experimaestro/tests/test_serializers.py +141 -1
  77. experimaestro/tests/test_state_db.py +434 -0
  78. experimaestro/tests/test_subparameters.py +160 -0
  79. experimaestro/tests/test_tags.py +136 -0
  80. experimaestro/tests/test_tasks.py +107 -121
  81. experimaestro/tests/test_token_locking.py +252 -0
  82. experimaestro/tests/test_tokens.py +17 -13
  83. experimaestro/tests/test_types.py +123 -1
  84. experimaestro/tests/test_workspace_triggers.py +158 -0
  85. experimaestro/tests/token_reschedule.py +4 -2
  86. experimaestro/tests/utils.py +2 -2
  87. experimaestro/tokens.py +154 -57
  88. experimaestro/tools/diff.py +1 -1
  89. experimaestro/tui/__init__.py +8 -0
  90. experimaestro/tui/app.py +2303 -0
  91. experimaestro/tui/app.tcss +353 -0
  92. experimaestro/tui/log_viewer.py +228 -0
  93. experimaestro/utils/__init__.py +23 -0
  94. experimaestro/utils/environment.py +148 -0
  95. experimaestro/utils/git.py +129 -0
  96. experimaestro/utils/resources.py +1 -1
  97. experimaestro/version.py +34 -0
  98. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b4.dist-info}/METADATA +68 -38
  99. experimaestro-2.0.0b4.dist-info/RECORD +181 -0
  100. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b4.dist-info}/WHEEL +1 -1
  101. experimaestro-2.0.0b4.dist-info/entry_points.txt +16 -0
  102. experimaestro/compat.py +0 -6
  103. experimaestro/core/objects.pyi +0 -221
  104. experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
  105. experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
  106. experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
  107. experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
  108. experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
  109. experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
  110. experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
  111. experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
  112. experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
  113. experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
  114. experimaestro-2.0.0a8.dist-info/RECORD +0 -166
  115. experimaestro-2.0.0a8.dist-info/entry_points.txt +0 -17
  116. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b4.dist-info}/licenses/LICENSE +0 -0
@@ -1,7 +1,18 @@
1
1
  from abc import ABC, abstractmethod
2
+ from dataclasses import dataclass
2
3
  import inspect
3
4
  import sys
4
- from typing import Set, TypeVar, Union, Dict, Iterator, List, get_args, get_origin
5
+ from typing import (
6
+ Set,
7
+ TypeVar,
8
+ Union,
9
+ Dict,
10
+ Iterator,
11
+ List,
12
+ Optional,
13
+ get_args,
14
+ get_origin,
15
+ )
5
16
  from collections import ChainMap
6
17
  from pathlib import Path
7
18
  import typing
@@ -13,15 +24,27 @@ from enum import Enum
13
24
  import ast
14
25
  import textwrap
15
26
 
16
- if sys.version_info.major == 3 and sys.version_info.minor < 9:
17
- from typing_extensions import _AnnotatedAlias, get_type_hints
18
- else:
19
- from typing import _AnnotatedAlias, get_type_hints
27
+ from typing import _AnnotatedAlias, get_type_hints
20
28
 
21
29
  if typing.TYPE_CHECKING:
22
30
  from experimaestro.scheduler.base import Job
23
31
  from experimaestro.launchers import Launcher
24
32
  from experimaestro.core.objects import Config
33
+ from experimaestro.core.subparameters import Subparameters
34
+
35
+
36
+ @dataclass
37
+ class DeprecationInfo:
38
+ """Information about a deprecated configuration type."""
39
+
40
+ #: The original identifier before deprecation
41
+ original_identifier: "Identifier"
42
+
43
+ #: The target configuration class to convert to
44
+ target: type
45
+
46
+ #: If True, creating an instance immediately converts to the target type
47
+ replace: bool = False
25
48
 
26
49
 
27
50
  class Identifier:
@@ -221,6 +244,9 @@ class ObjectType(Type):
221
244
  self._title = None
222
245
  self.submit_hooks = set()
223
246
 
247
+ # Warning flag for non-resumable task directory cleanup
248
+ self.warned_clean_not_resumable = False
249
+
224
250
  # --- Get the identifier
225
251
  if identifier is None and hasattr(tp, "__xpmid__"):
226
252
  __xpmid__ = getattr(tp, "__xpmid__")
@@ -279,7 +305,20 @@ class ObjectType(Type):
279
305
  self.__initialized__ = False
280
306
  self._runtype = None
281
307
  self.annotations = []
282
- self._deprecated = False
308
+ self._deprecation: Optional[DeprecationInfo] = None
309
+
310
+ # --- Value class (for external value types, e.g., nn.Module subclasses)
311
+ self._original_type: type = tp # Keep reference to original config class
312
+
313
+ # --- Subparameters for partial identifier computation
314
+ self._subparameters: Dict[str, "Subparameters"] = {}
315
+
316
+ def set_value_type(self, value_class: type) -> None:
317
+ """Register an explicit value class for this configuration.
318
+
319
+ The value class will be used when creating instances via .instance().
320
+ """
321
+ self.value_type = value_class
283
322
 
284
323
  def addAnnotation(self, annotation):
285
324
  assert not self.__initialized__
@@ -335,15 +374,18 @@ class ObjectType(Type):
335
374
  # Add task
336
375
  if self.taskcommandfactory is not None:
337
376
  self.task = self.taskcommandfactory(self)
338
- elif issubclass(self.value_type, Task):
377
+ elif issubclass(self._original_type, Task):
339
378
  self.task = self.getpythontaskcommand()
340
379
 
341
380
  # Add arguments from type hints
381
+ # Use _original_type since value_type may have been overridden by set_value_type
342
382
  from .arguments import TypeAnnotation
343
383
 
344
- if hasattr(self.value_type, "__annotations__"):
345
- typekeys = set(self.value_type.__dict__.get("__annotations__", {}).keys())
346
- hints = get_type_hints(self.value_type, include_extras=True)
384
+ if hasattr(self._original_type, "__annotations__"):
385
+ typekeys = set(
386
+ self._original_type.__dict__.get("__annotations__", {}).keys()
387
+ )
388
+ hints = get_type_hints(self._original_type, include_extras=True)
347
389
  for key, typehint in hints.items():
348
390
  # Filter out hints from parent classes
349
391
  if key in typekeys:
@@ -356,17 +398,27 @@ class ObjectType(Type):
356
398
  try:
357
399
  self.addArgument(
358
400
  options.create(
359
- key, self.value_type, typehint.__args__[0]
401
+ key, self._original_type, typehint.__args__[0]
360
402
  )
361
403
  )
362
404
  except Exception:
363
405
  logger.error(
364
406
  "while adding argument %s of %s",
365
407
  key,
366
- self.value_type,
408
+ self._original_type,
367
409
  )
368
410
  raise
369
411
 
412
+ # Collect subparameters from class attributes
413
+ from .subparameters import Subparameters as SubparametersClass
414
+
415
+ for name, value in self._original_type.__dict__.items():
416
+ if isinstance(value, SubparametersClass):
417
+ # Auto-set name from attribute name if not already set
418
+ if value.name is None:
419
+ value.name = name
420
+ self._subparameters[name] = value
421
+
370
422
  def name(self):
371
423
  return f"{self.value_type.__module__}.{self.value_type.__qualname__}"
372
424
 
@@ -378,7 +430,8 @@ class ObjectType(Type):
378
430
  self.__initialize__()
379
431
 
380
432
  # Get description from documentation
381
- __doc__ = self.value_type.__dict__.get("__doc__", None)
433
+ # Use _original_type since value_type may have been overridden
434
+ __doc__ = self._original_type.__dict__.get("__doc__", None)
382
435
  if __doc__:
383
436
  parseddoc = parse(__doc__)
384
437
  self._title = parseddoc.short_description
@@ -407,24 +460,56 @@ class ObjectType(Type):
407
460
 
408
461
  argname = None
409
462
 
410
- def deprecate(self):
411
- if len(self.value_type.__bases__) != 1:
412
- raise RuntimeError(
413
- "Deprecated configurations must have "
414
- "only one parent (the new configuration)"
415
- )
416
- assert not self._deprecated, "Already deprecated"
417
-
418
- # Uses the parent identifier (and saves the deprecated one for path updates)
419
- self._deprecated_identifier = self.identifier
420
- parent = self.value_type.__bases__[0].__getxpmtype__()
421
- self.identifier = parent.identifier
422
- self._deprecated = True
463
+ def deprecate(self, target=None, replace: bool = False):
464
+ """Mark this configuration type as deprecated.
465
+
466
+ Args:
467
+ target: Optional target configuration class. If provided, uses
468
+ target's identifier. If None, uses parent class's identifier
469
+ (legacy behavior requiring single inheritance).
470
+ replace: If True, creating an instance of this class immediately
471
+ returns a converted instance of the target class.
472
+
473
+ When a target is specified, the deprecated class should define a
474
+ __convert__ method that returns an equivalent target configuration.
475
+ The identifier is computed from the converted configuration.
476
+ """
477
+ assert self._deprecation is None, "Already deprecated"
478
+
479
+ # Save the deprecated identifier for migration tools (fix_deprecated)
480
+ original_identifier = self.identifier
481
+
482
+ if target is not None:
483
+ # New mechanism: explicit target class
484
+ target_xpmtype = target.__getxpmtype__()
485
+ self.identifier = target_xpmtype.identifier
486
+ deprecation_target = target
487
+ else:
488
+ # Legacy mechanism: parent class is the target
489
+ if len(self.value_type.__bases__) != 1:
490
+ raise RuntimeError(
491
+ "Deprecated configurations must have "
492
+ "only one parent (the new configuration)"
493
+ )
494
+ parent = self.value_type.__bases__[0].__getxpmtype__()
495
+ self.identifier = parent.identifier
496
+ deprecation_target = self.value_type.__bases__[0]
497
+
498
+ self._deprecation = DeprecationInfo(
499
+ original_identifier=original_identifier,
500
+ target=deprecation_target,
501
+ replace=replace,
502
+ )
423
503
 
424
504
  @property
425
505
  def deprecated(self) -> bool:
426
506
  """Returns true if this type is deprecated"""
427
- return self._deprecated
507
+ return self._deprecation is not None
508
+
509
+ @property
510
+ def _deprecated_identifier(self) -> Optional["Identifier"]:
511
+ """Returns the original identifier before deprecation (for backwards compatibility)"""
512
+ return self._deprecation.original_identifier if self._deprecation else None
428
513
 
429
514
  @property
430
515
  def description(self) -> str:
@@ -442,23 +527,72 @@ class ObjectType(Type):
442
527
  return self._arguments
443
528
 
444
529
  def addArgument(self, argument: Argument):
530
+ # Check if this argument overrides a parent argument
531
+ # _arguments is a ChainMap where maps[0] is current class, maps[1:] are parents
532
+ parent_argument = None
533
+ for parent_map in self._arguments.maps[1:]:
534
+ if argument.name in parent_map:
535
+ parent_argument = parent_map[argument.name]
536
+ break
537
+
538
+ if parent_argument is not None:
539
+ # Check type compatibility (child type should be subtype of parent type)
540
+ self._check_override_type_compatibility(argument, parent_argument)
541
+
542
+ # Warn if overrides flag is not set
543
+ if not argument.overrides:
544
+ logger.warning(
545
+ "Parameter '%s' in %s overrides parent parameter from %s. "
546
+ "Use field(overrides=True) to suppress this warning.",
547
+ argument.name,
548
+ self._original_type.__qualname__,
549
+ (
550
+ parent_argument.objecttype._original_type.__qualname__
551
+ if parent_argument.objecttype
552
+ else "unknown"
553
+ ),
554
+ )
555
+
445
556
  self._arguments[argument.name] = argument
446
557
  argument.objecttype = self
447
558
 
448
- # The the attribute for the config type
449
- setattr(
450
- self.config_type,
451
- argument.name,
452
- property(
453
- lambda _self: _self.__xpm__.get(argument.name),
454
- lambda _self, value: _self.__xpm__.set(argument.name, value),
455
- ),
456
- )
457
-
458
559
  # Check default value
459
560
  if argument.default is not None:
460
561
  argument.type.validate(argument.default)
461
562
 
563
+ def _check_override_type_compatibility(
564
+ self, child_arg: Argument, parent_arg: Argument
565
+ ):
566
+ """Check that the child argument type is compatible with the parent type.
567
+
568
+ For Config types, the child type should be a subtype of the parent type
569
+ (covariant). For other types, we check for exact match.
570
+ """
571
+ child_type = child_arg.type
572
+ parent_type = parent_arg.type
573
+
574
+ # Check if both are ObjectType (Config types)
575
+ if isinstance(child_type, ObjectType) and isinstance(parent_type, ObjectType):
576
+ child_pytype = child_type.value_type
577
+ parent_pytype = parent_type.value_type
578
+
579
+ # Check if child is a subtype of parent
580
+ if not issubclass(child_pytype, parent_pytype):
581
+ raise TypeError(
582
+ f"Parameter '{child_arg.name}' type {child_pytype.__qualname__} "
583
+ f"is not a subtype of parent type {parent_pytype.__qualname__}. "
584
+ f"Override types must be subtypes of the parent type."
585
+ )
586
+ elif type(child_type) is not type(parent_type):
587
+ # For non-Config types, check for exact type match
588
+ # Different type classes (e.g., IntType vs StrType) are incompatible
589
+ raise TypeError(
590
+ f"Parameter '{child_arg.name}' type {type(child_type).__name__} "
591
+ f"is not compatible with parent type {type(parent_type).__name__}. "
592
+ f"Override types must be the same type or a subtype."
593
+ )
594
+ # Same type class is allowed (e.g., both are IntType)
595
+
462
596
  def getArgument(self, key: str) -> Argument:
463
597
  self.__initialize__()
464
598
  return self._arguments[key]
@@ -466,7 +600,10 @@ class ObjectType(Type):
466
600
  def parents(self) -> Iterator["ObjectType"]:
467
601
  from .objects import Config, Task
468
602
 
469
- for tp in self.value_type.__bases__:
603
+ # Use _original_type to avoid issues when value_type has been
604
+ # overridden by set_value_type (the value class would create
605
+ # circular references since it inherits from the config class)
606
+ for tp in self._original_type.__bases__:
470
607
  if issubclass(tp, Config) and tp not in [Config, Task]:
471
608
  yield tp.__xpmtype__
472
609
 
@@ -1,2 +1,28 @@
1
1
  class HandledException(Exception):
2
2
  pass
3
+
4
+
5
+ class GracefulTimeout(Exception):
6
+ """Exception raised to signal a graceful timeout in resumable tasks.
7
+
8
+ Raise this exception when a task needs to checkpoint and exit before
9
+ a time limit (e.g., SLURM walltime). The task will be marked for retry
10
+ rather than as failed.
11
+
12
+ Example::
13
+
14
+ ```python
15
+ class LongTraining(ResumableTask):
16
+ def execute(self):
17
+ for epoch in range(self.epochs):
18
+ remaining = self.remaining_time()
19
+ if remaining is not None and remaining < 300:
20
+ save_checkpoint(self.checkpoint, epoch)
21
+ raise GracefulTimeout("Not enough time for another epoch")
22
+ train_one_epoch()
23
+ ```
24
+ """
25
+
26
+ def __init__(self, message: str = "Task stopped gracefully before timeout"):
27
+ self.message = message
28
+ super().__init__(message)
@@ -52,8 +52,7 @@ class ExperimentHelper:
52
52
  class ExperimentCallable(Protocol):
53
53
  """Protocol for the run function"""
54
54
 
55
- def __call__(self, helper: ExperimentHelper, configuration: Any):
56
- ...
55
+ def __call__(self, helper: ExperimentHelper, configuration: Any): ... # noqa: E704
57
56
 
58
57
 
59
58
  class ConfigurationLoader:
@@ -126,6 +125,11 @@ class ConfigurationLoader:
126
125
  default=None,
127
126
  help="Port for monitoring (can be defined in the settings.yaml file)",
128
127
  )
128
+ @click.option(
129
+ "--console",
130
+ is_flag=True,
131
+ help="Launch Textual console UI for monitoring with logs",
132
+ )
129
133
  @click.option(
130
134
  "--file",
131
135
  "xp_file",
@@ -162,6 +166,7 @@ def experiments_cli( # noqa: C901
162
166
  xp_file: str,
163
167
  host: str,
164
168
  port: int,
169
+ console: bool,
165
170
  xpm_config_dir: Path,
166
171
  workdir: Optional[Path],
167
172
  workspace: Optional[str],
@@ -298,43 +303,120 @@ def experiments_cli( # noqa: C901
298
303
  configuration, structured_config_mode=SCMode.INSTANTIATE
299
304
  )
300
305
 
301
- # Define the workspace
302
- ws_env = find_workspace(workdir=workdir, workspace=workspace)
303
-
304
- workdir = ws_env.path
305
-
306
306
  # --- Sets up the experiment ID
307
-
308
- # --- Runs the experiment
309
307
  if xp_configuration.add_timestamp:
310
308
  timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M")
311
309
  experiment_id = f"""{xp_configuration.id}-{timestamp}"""
312
310
  else:
313
311
  experiment_id = xp_configuration.id
314
312
 
313
+ # Define the workspace (may auto-select based on experiment_id triggers)
314
+ ws_env = find_workspace(
315
+ workdir=workdir, workspace=workspace, experiment_id=experiment_id
316
+ )
317
+
318
+ workdir = ws_env.path
319
+
315
320
  logging.info(
316
321
  "Running experiment %s working directory %s",
317
322
  experiment_id,
318
323
  str(workdir.resolve()),
319
324
  )
320
- with experiment(
321
- ws_env, experiment_id, host=host, port=port, run_mode=run_mode
322
- ) as xp:
323
- # Set up the environment
324
- # (1) global settings (2) workspace settings and (3) command line settings
325
- for key, value in env:
326
- xp.setenv(key, value)
327
-
328
- # Sets the python path
329
- xp.workspace.python_path.extend(python_path)
330
325
 
326
+ # Define the experiment execution function
327
+ def run_experiment_code(xp_holder=None, xp_ready_event=None, register_signals=True):
328
+ """Run the experiment code - optionally storing xp in xp_holder"""
331
329
  try:
332
- # Run the experiment
333
- helper.xp = xp
334
- helper.run(list(args), xp_configuration)
335
-
336
- # ... and wait
337
- xp.wait()
330
+ with experiment(
331
+ ws_env,
332
+ experiment_id,
333
+ host=host,
334
+ port=port,
335
+ run_mode=run_mode,
336
+ register_signals=register_signals,
337
+ ) as xp:
338
+ if xp_holder is not None:
339
+ xp_holder["xp"] = xp
340
+ if xp_ready_event is not None:
341
+ xp_ready_event.set() # Signal that xp is ready
342
+
343
+ # Test logging from experiment thread
344
+ logging.info("Experiment started in background thread")
345
+
346
+ # Set up the environment
347
+ for key, value in env:
348
+ xp.setenv(key, value)
349
+
350
+ # Sets the python path
351
+ xp.workspace.python_path.extend(python_path)
352
+
353
+ # Run the experiment
354
+ helper.xp = xp
355
+ helper.run(list(args), xp_configuration)
356
+
357
+ # ... and wait
358
+ xp.wait()
338
359
 
339
360
  except HandledException:
340
361
  sys.exit(1)
362
+
363
+ if console:
364
+ # Run experiment in background thread, console UI in main thread
365
+ import threading
366
+ from experimaestro.tui import ExperimentTUI
367
+
368
+ xp_holder = {"xp": None}
369
+ exception_holder = {"exception": None}
370
+ xp_ready = threading.Event()
371
+
372
+ def run_in_thread():
373
+ try:
374
+ # Don't register signals in background thread
375
+ run_experiment_code(xp_holder, xp_ready, register_signals=False)
376
+ # Add a test message after experiment completes
377
+ logging.info("Experiment thread completed")
378
+ print("Experiment thread print test")
379
+ except Exception as e:
380
+ exception_holder["exception"] = e
381
+ xp_ready.set() # Signal even on error
382
+
383
+ # Start experiment in background thread
384
+ exp_thread = threading.Thread(target=run_in_thread, daemon=True)
385
+ exp_thread.start()
386
+
387
+ # Wait for experiment to start (up to 30 seconds)
388
+ if not xp_ready.wait(timeout=30.0):
389
+ cprint("Timeout waiting for experiment to start", "red", file=sys.stderr)
390
+ sys.exit(1)
391
+
392
+ if xp_holder["xp"] is None:
393
+ cprint("Failed to start experiment", "red", file=sys.stderr)
394
+ if exception_holder["exception"]:
395
+ raise exception_holder["exception"]
396
+ sys.exit(1)
397
+
398
+ # Run TUI in main thread (handles signals via Textual)
399
+ tui_app = ExperimentTUI(
400
+ workdir=workdir,
401
+ state_provider=xp_holder["xp"].state_provider,
402
+ show_logs=True,
403
+ )
404
+
405
+ try:
406
+ # Textual automatically captures stdout/stderr via Print events
407
+ tui_app.run()
408
+ finally:
409
+ # TUI exited (user pressed q or Ctrl+C) - stop the experiment
410
+ if xp_holder["xp"]:
411
+ xp_holder["xp"].stop()
412
+
413
+ # Wait for experiment thread to finish
414
+ exp_thread.join(timeout=5.0)
415
+
416
+ # Handle exceptions
417
+ if exception_holder["exception"]:
418
+ raise exception_holder["exception"]
419
+
420
+ else:
421
+ # Normal mode without TUI - run directly
422
+ run_experiment_code()
@@ -1,10 +1,13 @@
1
1
  import inspect
2
2
  from pathlib import Path
3
3
  from abc import ABC, abstractmethod
4
- from typing import Callable, Union
4
+ from typing import Callable, Union, TYPE_CHECKING
5
5
  from experimaestro.core.arguments import ArgumentOptions, TypeAnnotation
6
6
  from experimaestro.core.objects import ConfigWalkContext, Config
7
7
 
8
+ if TYPE_CHECKING:
9
+ from experimaestro.core.subparameters import Subparameters
10
+
8
11
 
9
12
  class Generator(ABC):
10
13
  """Base class for all generators"""
@@ -15,25 +18,63 @@ class Generator(ABC):
15
18
  return False
16
19
 
17
20
  @abstractmethod
18
- def __call__(self, context: ConfigWalkContext, config: Config):
19
- ...
21
+ def __call__(self, context: ConfigWalkContext, config: Config): ...
20
22
 
21
23
 
22
24
  class PathGenerator(Generator):
23
- """Generates a path"""
25
+ """Generate paths within the task directory.
26
+
27
+ Use ``PathGenerator`` with ``field(default_factory=...)`` to create
28
+ paths relative to the task's working directory.
29
+
30
+ Example::
31
+
32
+ class MyTask(Task):
33
+ output: Meta[Path] = field(default_factory=PathGenerator("results.json"))
34
+ model: Meta[Path] = field(default_factory=PathGenerator("model.pt"))
35
+
36
+ For shared directories across related tasks, use with subparameters::
37
+
38
+ training_group = param_group("training")
39
+
40
+ class Train(Task):
41
+ epochs: Param[int] = field(groups=[training_group])
42
+ checkpoint: Meta[Path] = field(
43
+ default_factory=PathGenerator(
44
+ "model.pt",
45
+ subparameters=subparameters(exclude=[training_group])
46
+ )
47
+ )
48
+
49
+ :param path: Relative path within the task directory. Can be a string,
50
+ Path, or callable that takes (context, config) and returns a Path.
51
+ :param subparameters: Optional subparameters for partial directory sharing.
52
+ When provided, the path is generated in a shared partial directory.
53
+ """
24
54
 
25
55
  def __init__(
26
- self, path: Union[str, Path, Callable[[ConfigWalkContext, Config], Path]]
56
+ self,
57
+ path: Union[str, Path, Callable[[ConfigWalkContext, Config], Path]] = "",
58
+ *,
59
+ partial: "Subparameters" = None,
27
60
  ):
28
61
  self.path = path
62
+ self.partial = partial
29
63
 
30
64
  def __call__(self, context: ConfigWalkContext, config: Config):
31
- if inspect.isfunction(self.path):
32
- path = context.currentpath() / self.path(context, config) # type: Path
65
+ # Determine base path: partial directory or job directory
66
+ if self.partial is not None:
67
+ base_path = context.partial_path(self.partial, config)
33
68
  else:
34
- path = context.currentpath() / Path(self.path)
69
+ base_path = context.currentpath()
35
70
 
36
- return path
71
+ # Generate the final path
72
+ if inspect.isfunction(self.path):
73
+ return base_path / self.path(context, config)
74
+ elif self.path:
75
+ return base_path / Path(self.path)
76
+ else:
77
+ return base_path
37
78
 
38
79
  def isoutput(self):
39
80
  return True
@@ -82,4 +82,6 @@ class ExperimaestroHFHub(ModelHubMixin):
82
82
  )
83
83
  return hf_path
84
84
 
85
- return ConfigInformation.deserialize(data_loader, as_instance=as_instance)
85
+ return ConfigInformation.deserialize(
86
+ data_loader, as_instance=as_instance, partial_loading=True
87
+ )
@@ -105,6 +105,35 @@ class Visitor(PTNodeVisitor):
105
105
 
106
106
 
107
107
  def parse(expr: str):
108
+ """Parse a requirement specification string into a HostRequirement object.
109
+
110
+ The specification string describes hardware requirements for running a task.
111
+ Multiple alternatives can be specified using ``|`` (OR), and requirements
112
+ within an alternative are combined using ``&`` (AND).
113
+
114
+ **Syntax elements:**
115
+
116
+ - ``duration=<N><unit>``: Job duration (units: h/hours, d/days, m/mins)
117
+ - ``cpu(mem=<size>, cores=<N>)``: CPU requirements
118
+ - ``cuda(mem=<size>) * <N>``: GPU requirements (memory and count)
119
+ - Memory sizes: ``<N>G``, ``<N>GiB``, ``<N>M``, ``<N>MiB``
120
+
121
+ :param expr: The requirement specification string
122
+ :return: A :class:`~experimaestro.launcherfinder.specs.HostRequirement` object
123
+
124
+ **Example:**
125
+
126
+ .. code-block:: python
127
+
128
+ from experimaestro.launcherfinder.parser import parse
129
+
130
+ # Request 2 GPUs with 32GB each, 700GB RAM, for 40 hours
131
+ # OR 4 GPUs with 32GB each for 50 hours
132
+ req = parse(
133
+ "duration=40h & cpu(mem=700GiB) & cuda(mem=32GiB) * 2"
134
+ " | duration=50h & cpu(mem=700GiB) & cuda(mem=32GiB) * 4"
135
+ )
136
+ """
108
137
  parser = ParserPython(grammar, syntax_classes={"StrMatch": SuppressStrMatch})
109
138
  parse_tree = parser.parse(expr)
110
139
  return visit_parse_tree(parse_tree, Visitor(debug=False))
@@ -31,7 +31,19 @@ SubmitListener = Callable[[Job], None]
31
31
 
32
32
 
33
33
  class Launcher(ABC):
34
- """A launcher"""
34
+ """Base class for task launchers.
35
+
36
+ Launchers are responsible for executing tasks on a compute resource.
37
+ They work with a :class:`~experimaestro.connectors.Connector` to
38
+ access the target system and manage process execution.
39
+
40
+ Subclasses include:
41
+
42
+ - :class:`~experimaestro.launchers.direct.DirectLauncher`: Local execution
43
+ - :class:`~experimaestro.launchers.slurm.SlurmLauncher`: SLURM cluster
44
+
45
+ :param connector: The connector to use for accessing the compute resource
46
+ """
35
47
 
36
48
  submit_listeners: List[SubmitListener]
37
49
 
@@ -69,6 +81,19 @@ class Launcher(ABC):
69
81
  By default, returns the associated connector builder"""
70
82
  return self.connector.processbuilder()
71
83
 
84
+ @abstractmethod
85
+ def launcher_info_code(self) -> str:
86
+ """Returns Python code to set up launcher info during task execution.
87
+
88
+ This code is inserted into the generated task script to set up
89
+ launcher-specific information (like LauncherInformation for
90
+ querying remaining time).
91
+
92
+ Returns:
93
+ Python code as a string, or empty string if no setup needed.
94
+ """
95
+ ...
96
+
72
97
  @staticmethod
73
98
  def get(path: Path):
74
99
  """Get a default launcher for a given path"""