PyPI - experimaestro - Versions diffs - 1.11.1__py3-none-any.whl → 2.0.0b4__py3-none-any.whl - Mend

experimaestro 1.11.1py3-none-any.whl → 2.0.0b4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of experimaestro might be problematic. Click here for more details.

Files changed (133) hide show

experimaestro/__init__.py +10 -11
experimaestro/annotations.py +167 -206
experimaestro/cli/__init__.py +140 -16
experimaestro/cli/filter.py +42 -74
experimaestro/cli/jobs.py +157 -106
experimaestro/cli/progress.py +269 -0
experimaestro/cli/refactor.py +249 -0
experimaestro/click.py +0 -1
experimaestro/commandline.py +19 -3
experimaestro/connectors/__init__.py +22 -3
experimaestro/connectors/local.py +12 -0
experimaestro/core/arguments.py +192 -37
experimaestro/core/identifier.py +127 -12
experimaestro/core/objects/__init__.py +6 -0
experimaestro/core/objects/config.py +702 -285
experimaestro/core/objects/config_walk.py +24 -6
experimaestro/core/serialization.py +91 -34
experimaestro/core/serializers.py +1 -8
experimaestro/core/subparameters.py +164 -0
experimaestro/core/types.py +198 -83
experimaestro/exceptions.py +26 -0
experimaestro/experiments/cli.py +107 -25
experimaestro/generators.py +50 -9
experimaestro/huggingface.py +3 -1
experimaestro/launcherfinder/parser.py +29 -0
experimaestro/launcherfinder/registry.py +3 -3
experimaestro/launchers/__init__.py +26 -1
experimaestro/launchers/direct.py +12 -0
experimaestro/launchers/slurm/base.py +154 -2
experimaestro/mkdocs/base.py +6 -8
experimaestro/mkdocs/metaloader.py +0 -1
experimaestro/mypy.py +452 -7
experimaestro/notifications.py +75 -16
experimaestro/progress.py +404 -0
experimaestro/rpyc.py +0 -1
experimaestro/run.py +19 -6
experimaestro/scheduler/__init__.py +18 -1
experimaestro/scheduler/base.py +504 -959
experimaestro/scheduler/dependencies.py +43 -28
experimaestro/scheduler/dynamic_outputs.py +259 -130
experimaestro/scheduler/experiment.py +582 -0
experimaestro/scheduler/interfaces.py +474 -0
experimaestro/scheduler/jobs.py +485 -0
experimaestro/scheduler/services.py +186 -12
experimaestro/scheduler/signal_handler.py +32 -0
experimaestro/scheduler/state.py +1 -1
experimaestro/scheduler/state_db.py +388 -0
experimaestro/scheduler/state_provider.py +2345 -0
experimaestro/scheduler/state_sync.py +834 -0
experimaestro/scheduler/workspace.py +52 -10
experimaestro/scriptbuilder.py +7 -0
experimaestro/server/__init__.py +153 -32
experimaestro/server/data/index.css +0 -125
experimaestro/server/data/index.css.map +1 -1
experimaestro/server/data/index.js +194 -58
experimaestro/server/data/index.js.map +1 -1
experimaestro/settings.py +47 -6
experimaestro/sphinx/__init__.py +3 -3
experimaestro/taskglobals.py +20 -0
experimaestro/tests/conftest.py +80 -0
experimaestro/tests/core/test_generics.py +2 -2
experimaestro/tests/identifier_stability.json +45 -0
experimaestro/tests/launchers/bin/sacct +6 -2
experimaestro/tests/launchers/bin/sbatch +4 -2
experimaestro/tests/launchers/common.py +2 -2
experimaestro/tests/launchers/test_slurm.py +80 -0
experimaestro/tests/restart.py +1 -1
experimaestro/tests/tasks/all.py +7 -0
experimaestro/tests/tasks/test_dynamic.py +231 -0
experimaestro/tests/test_checkers.py +2 -2
experimaestro/tests/test_cli_jobs.py +615 -0
experimaestro/tests/test_dependencies.py +11 -17
experimaestro/tests/test_deprecated.py +630 -0
experimaestro/tests/test_environment.py +200 -0
experimaestro/tests/test_experiment.py +3 -3
experimaestro/tests/test_file_progress.py +425 -0
experimaestro/tests/test_file_progress_integration.py +477 -0
experimaestro/tests/test_forward.py +3 -3
experimaestro/tests/test_generators.py +93 -0
experimaestro/tests/test_identifier.py +520 -169
experimaestro/tests/test_identifier_stability.py +458 -0
experimaestro/tests/test_instance.py +16 -21
experimaestro/tests/test_multitoken.py +442 -0
experimaestro/tests/test_mypy.py +433 -0
experimaestro/tests/test_objects.py +314 -30
experimaestro/tests/test_outputs.py +8 -8
experimaestro/tests/test_param.py +22 -26
experimaestro/tests/test_partial_paths.py +231 -0
experimaestro/tests/test_progress.py +2 -50
experimaestro/tests/test_resumable_task.py +480 -0
experimaestro/tests/test_serializers.py +141 -60
experimaestro/tests/test_state_db.py +434 -0
experimaestro/tests/test_subparameters.py +160 -0
experimaestro/tests/test_tags.py +151 -15
experimaestro/tests/test_tasks.py +137 -160
experimaestro/tests/test_token_locking.py +252 -0
experimaestro/tests/test_tokens.py +25 -19
experimaestro/tests/test_types.py +133 -11
experimaestro/tests/test_validation.py +19 -19
experimaestro/tests/test_workspace_triggers.py +158 -0
experimaestro/tests/token_reschedule.py +5 -3
experimaestro/tests/utils.py +2 -2
experimaestro/tokens.py +154 -57
experimaestro/tools/diff.py +8 -1
experimaestro/tui/__init__.py +8 -0
experimaestro/tui/app.py +2303 -0
experimaestro/tui/app.tcss +353 -0
experimaestro/tui/log_viewer.py +228 -0
experimaestro/typingutils.py +11 -2
experimaestro/utils/__init__.py +23 -0
experimaestro/utils/environment.py +148 -0
experimaestro/utils/git.py +129 -0
experimaestro/utils/resources.py +1 -1
experimaestro/version.py +34 -0
{experimaestro-1.11.1.dist-info → experimaestro-2.0.0b4.dist-info}/METADATA +70 -39
experimaestro-2.0.0b4.dist-info/RECORD +181 -0
{experimaestro-1.11.1.dist-info → experimaestro-2.0.0b4.dist-info}/WHEEL +1 -1
experimaestro-2.0.0b4.dist-info/entry_points.txt +16 -0
experimaestro/compat.py +0 -6
experimaestro/core/objects.pyi +0 -225
experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
experimaestro-1.11.1.dist-info/RECORD +0 -158
experimaestro-1.11.1.dist-info/entry_points.txt +0 -17
{experimaestro-1.11.1.dist-info → experimaestro-2.0.0b4.dist-info/licenses}/LICENSE +0 -0

experimaestro/settings.py CHANGED Viewed

@@ -5,6 +5,7 @@ from functools import lru_cache
 from pathlib import Path
 from typing import Dict, Optional, List
 import logging
+import fnmatch
 @dataclass
@@ -37,7 +38,13 @@ class WorkspaceSettings:
     alt_workspaces: List[str] = field(default_factory=list)
     """Alternative workspaces to find jobs or experiments"""
+    max_retries: int = 3
+    """Maximum number of retries for resumable tasks that timeout (default: 3)"""
+    triggers: List[str] = field(default_factory=list)
+    """Glob patterns to automatically select this workspace based on experiment ID"""
     def __post_init__(self):
         self.path = self.path.expanduser().resolve()
@@ -83,8 +90,22 @@ def get_workspace(id: Optional[str] = None) -> Optional[WorkspaceSettings]:
     return None
-def find_workspace(*, workspace: Optional[str] = None, workdir: Optional[Path] = None) -> WorkspaceSettings:
-    """Find workspace"""
+def find_workspace(
+    *,
+    workspace: Optional[str] = None,
+    workdir: Optional[Path] = None,
+    experiment_id: Optional[str] = None,
+) -> WorkspaceSettings:
+    """Find workspace
+    Args:
+        workspace: Explicit workspace ID to use
+        workdir: Explicit working directory path
+        experiment_id: Experiment ID to match against workspace triggers
+    Returns:
+        WorkspaceSettings object
+    """
     workdir = Path(workdir) if workdir else None
     if workspace:
@@ -101,8 +122,28 @@ def find_workspace(*, workspace: Optional[str] = None, workdir: Optional[Path] =
         logging.info("Using workdir %s", workdir)
         ws_env = WorkspaceSettings("", workdir)
     else:
-        ws_env = get_workspace()
-        assert ws_env is not None, "No workdir or workspace defined, and no default"
-        logging.info("Using default workspace %s", ws_env.id)
+        # Try to match experiment_id against workspace triggers
+        matched_workspace = None
+        if experiment_id:
+            workspaces = get_settings().workspaces
+            for ws in workspaces:
+                for trigger in ws.triggers:
+                    if fnmatch.fnmatch(experiment_id, trigger):
+                        matched_workspace = ws
+                        logging.info(
+                            "Auto-selected workspace %s (matched trigger '%s')",
+                            ws.id,
+                            trigger,
+                        )
+                        break
+                if matched_workspace:
+                    break
+        if matched_workspace:
+            ws_env = matched_workspace
+        else:
+            ws_env = get_workspace()
+            assert ws_env is not None, "No workdir or workspace defined, and no default"
+            logging.info("Using default workspace %s", ws_env.id)
     return ws_env

experimaestro/sphinx/__init__.py CHANGED Viewed

@@ -113,12 +113,12 @@ class ConfigDocumenter(ClassDocumenter):
     @staticmethod
     def formatDefault(value) -> str:
         if isinstance(value, Config):
-            objecttype = value.__xpmtype__.objecttype
+            value_type = value.__xpmtype__.value_type
             params = ", ".join(
                 [f"{key}={value}" for key, value in value.__xpm__.values.items()]
             )
             # It would be possible to do better... if not
-            return f"{objecttype.__module__}.{objecttype.__qualname__}({params})"
+            return f"{value_type.__module__}.{value_type.__qualname__}({params})"
         return str(value)
@@ -176,7 +176,7 @@ class ConfigDocumenter(ClassDocumenter):
             self.add_line("   " + _("Bases: %s") % ", ".join(base_classes), sourcename)
         # Adds return type if different
-        if xpminfo.returntype != xpminfo.objecttype:
+        if xpminfo.returntype != xpminfo.value_type:
             self.add_line("", sourcename)
             self.add_line(
                 "   " + _("Submit type: %s") % restify(xpminfo.returntype), sourcename

experimaestro/taskglobals.py CHANGED Viewed

@@ -3,6 +3,23 @@ from pathlib import Path
 from typing import Optional
+class LauncherInformation:
+    """Minimal launcher information available during task execution.
+    This is a lightweight class used to query launcher-specific information
+    (like remaining time) during task execution. It's set by the generated
+    Python script that runs the task.
+    """
+    def remaining_time(self) -> Optional[float]:
+        """Returns the remaining time in seconds before the job times out.
+        Returns:
+            The remaining time in seconds, or None if no time limit.
+        """
+        return None
 class Env:
     _instance = None
@@ -12,6 +29,9 @@ class Env:
     # The current task path
     taskpath: Optional[Path] = None
+    # Launcher information (only set when running a task)
+    launcher_info: Optional[LauncherInformation] = None
     # Set to True when multi-processing when
     # in slave mode:
     # - no progress report

experimaestro/tests/conftest.py CHANGED Viewed

@@ -19,6 +19,86 @@ def xpmdirectory(tmp_path_factory):
         shutil.rmtree(workdir)
+@pytest.fixture(scope="function", autouse=True)
+def reset_scheduler():
+    """Reset scheduler state between tests to avoid state leakage with singleton pattern"""
+    from experimaestro.scheduler.base import Scheduler
+    from experimaestro.server import Server
+    # Get the singleton instance if it exists
+    if Scheduler._instance is not None:
+        scheduler = Scheduler._instance
+        # Clear job registrations but keep scheduler running
+        logging.debug(
+            f"FIXTURE: Clearing scheduler before test - jobs count: {len(scheduler.jobs)}"
+        )
+        # Clear experiment references from all jobs
+        for job in scheduler.jobs.values():
+            job.experiments.clear()
+        scheduler.jobs.clear()
+        scheduler.waitingjobs.clear()
+        scheduler.experiments.clear()
+        # Clear state provider experiment providers to avoid stale references
+        if (
+            hasattr(scheduler, "state_provider")
+            and scheduler.state_provider is not None
+        ):
+            # Close all experiment providers
+            for provider in scheduler.state_provider.experiment_providers.values():
+                provider.close()
+            scheduler.state_provider.experiment_providers.clear()
+            logging.debug("FIXTURE: Cleared state provider experiment providers")
+        # Also clear listeners to prevent stale listeners
+        scheduler.clear_listeners()
+        # Re-add state_provider as listener if it exists
+        if (
+            hasattr(scheduler, "state_provider")
+            and scheduler.state_provider is not None
+        ):
+            scheduler.addlistener(scheduler.state_provider)
+    # Reset server instance too
+    if Server._instance is not None:
+        logging.debug("FIXTURE: Clearing server instance")
+        Server._instance = None
+    yield
+    # Cleanup after test - clear again
+    if Scheduler._instance is not None:
+        scheduler = Scheduler._instance
+        logging.debug(
+            f"FIXTURE: Clearing scheduler after test - jobs count: {len(scheduler.jobs)}"
+        )
+        # Clear experiment references from all jobs
+        for job in scheduler.jobs.values():
+            job.experiments.clear()
+        scheduler.jobs.clear()
+        scheduler.waitingjobs.clear()
+        scheduler.experiments.clear()
+        # Clear state provider experiment providers
+        if (
+            hasattr(scheduler, "state_provider")
+            and scheduler.state_provider is not None
+        ):
+            for provider in scheduler.state_provider.experiment_providers.values():
+                provider.close()
+            scheduler.state_provider.experiment_providers.clear()
+        scheduler.clear_listeners()
+        # Re-add state_provider as listener if it exists
+        if (
+            hasattr(scheduler, "state_provider")
+            and scheduler.state_provider is not None
+        ):
+            scheduler.addlistener(scheduler.state_provider)
+    # Reset server after test
+    if Server._instance is not None:
+        Server._instance = None
 # Sets a flag
 def pytest_configure(config):
     import sys

experimaestro/tests/core/test_generics.py CHANGED Viewed

@@ -3,7 +3,7 @@
 from typing import Generic, Optional, TypeVar
 import pytest
-from experimaestro import Config, Param
+from experimaestro import field, Config, Param
 from experimaestro.core.arguments import Argument
 from experimaestro.core.types import TypeVarType
@@ -162,7 +162,7 @@ class TreeGenericConfig(Config, Generic[T]):
 class TagTreeGenericConfig(TreeGenericConfig[T], Generic[T]):
     """A tagged version of TreeGenericConfig to test recursive generics"""
-    tag: Param[str] = "default"
+    tag: Param[str] = field(ignore_default="default")
 def test_core_generics_recursive():

experimaestro/tests/identifier_stability.json ADDED Viewed

@@ -0,0 +1,45 @@
+{
+  "bool_false": "bb61efa2769d20e6665fd63911d8a1e2fcdd2af22ff1e6c860d2b26ab7b04ab2",
+  "bool_true": "e718f2e3a3cc5b6b816a9645f587d3009efb08642bd22db45c8c288b78ff11f4",
+  "cycle_simple": "a73ef01b1c3e4e0187aee95eda96d1c069fd4757ad0137ac66adbf3a9502673f",
+  "default_override": "90951821af9c0d84b3f300fadfab63387bbfad6d1982dfbaa5b4d7ebbbfcf800",
+  "default_with_default": "0203eb7eb6a13e3c4592c9366f76a5f53dd2c5211c576547873184af86558bc3",
+  "dict_empty": "d2c32c9305431266e4ab1f5a70face4cee13b02a01af4ba0a6046fb254971b5f",
+  "dict_multiple": "01994d7bc212a73ea9d80332bf460922ca786a9d4ab8d8f444b3673901c75c99",
+  "dict_nested_empty": "77ebb66bcfe1c24c166dd80ceaae5840c1729f5c435a9d4ae040e8285b9beca7",
+  "dict_nested_multiple": "4476cd6934c5cc4a63cce1594cbbed622d0e70f6291b6d8cb4092d7b2612bb15",
+  "dict_nested_single": "373a3e409042029439fdbcb679b4e6388242901772d273ddefafd5663f9e57e4",
+  "dict_single": "dd879ad5038694c95134926ab3524696437f6ec96e52341a4e8c8fd44a1c2ae2",
+  "enum_value_a": "96c98d4683658b0a8e62d67abcb32c918506f8e455685680ee50f7b174e91366",
+  "enum_value_b": "433e23f7e2ee01a850bd97da097cf158873cb7465d3faa86218d86c0f7c38834",
+  "enum_value_c": "5881adaa535d2c3c842e4f163fbabbe5f537bc18f48a5bc2e9887ad9b3deb00b",
+  "float_negative": "67a86cea76bc90be4ec052e8c2f08829fb989e64a1d419541e68485dff85dba1",
+  "float_simple": "f445ab15c80965e89436c9e58a369fb969f216f392e4cbb19846830731f9a1e4",
+  "float_zero": "35879908ca1652ea07a4b49f0c44e9aa2ca264bedf14974dd9a765b1fafd1921",
+  "instance_different_values": "e175afb36163f56078beb516cf7489238b491571519e25c2b5ff68afbeccc643",
+  "instance_separate": "6d6274a5b541f60833e5d15a556a1b8adfaaa6dd0970a09a57849edd7a0c6fdd",
+  "instance_shared": "d9a76235da634b81b7559d561322263698852fa2012005781569154d7ad3cfc5",
+  "int_negative": "4e2ad6ee44e1c9429b56900aea7ba81a6b746a38678d2e29a89805bfb32b9153",
+  "int_positive": "2c57a590b8bf1bb5283a54d971814e196f395269f2973096dc277dbc24218962",
+  "int_zero": "2696ea881e0f601d4ad75679560e0e3305fa2f15552952d88ac87df4cc6f9f49",
+  "list_empty": "457140939f4e4ea43c5cfa4e05f4db0ed82891f0b550e23bfedf1510aea94d0c",
+  "list_multiple": "14575fd83be49b8f23d43d54fab90768ea0c296a829eeaa1b5a312f8322fb2ef",
+  "list_nested_empty": "fa30a32619931b4048a9f9854d82975e955c48017cd72b474344fa6b5a9c9bbe",
+  "list_nested_multiple": "4cc6e0d3d4ac32209334b8667d6b18f37cc5fd1677309eaeec89e7862d98ec5f",
+  "list_nested_single": "160a7e361e3482536479beaf8250f3107436c59e60ca5da573548da60e4b9bcf",
+  "list_single": "d33a881039f9a79cb7d688e547acdc79092b86b9e05fcb65faebbebfb38b3067",
+  "nested_multi": "8a1a37250d6f90caa549b02f8899dcba51ce01e5f6f511896b6c00b9c4a714a0",
+  "nested_simple": "a52569bc853a4cacbc044ba3082bbc10670f9174279e283168115828de979be1",
+  "option_override": "e98d969a3a309b2a43bab46c0fce6a6ea3c8c337a63ecb78c3705b8c281927b5",
+  "option_with_default": "e98d969a3a309b2a43bab46c0fce6a6ea3c8c337a63ecb78c3705b8c281927b5",
+  "str_empty": "bfdf01b69cbed525f27d0a9c1ba1043086ae5705fbc4027f5cf899a394e38bca",
+  "str_simple": "ad42020604bcc3c36bbcef8843de7b7c3af80b4198a109b7768d65bc2f788b1a",
+  "str_unicode": "26972840e4f5f71b2303902e0247aaf1e27d8a14ab6495c433d1f95c32dd40e8",
+  "task_simple": "1ff8ca42cdc94959e1b0c3c019ef4ab1f45b30a4309cc9fef3e42f4ea7da3e86",
+  "task_submitted": "834fae0fffb762b20064e8c648221dab99e81ba6f00622219fccce1bd0a18a17",
+  "task_using_output": "6c51d8124133038482472973a439d785b7ce53e46bac096e047e0e6cf1fc104e",
+  "task_with_config": "b26f8a8f7b1b9f6bda7e9c7e334071097b377ac48caec9d7da7fe98bc8c97c84",
+  "task_with_init": "285697abd5eaef36264f640ef790880f076daea4aff1814f1a518aa014ba4b0d",
+  "task_with_multiple_init": "d0e8610e1312d9a3398b839c691e16c741d4520823763067465a3ddab63acb30",
+  "task_with_output": "9cbaadb16fc6168286703afe35805108c4600abd05380fe56160f50e20b3cbb6"
+}

experimaestro/tests/launchers/bin/sacct CHANGED Viewed

@@ -13,9 +13,13 @@ fi
 find "$XPM_SLURM_DIR/jobs" -name "*.start" | while read name; do
     jobid=${name%.start}
     sf="$jobid.status"
+    timeout_marker="$jobid.timeout"
     if test -f "$sf"; then
         exitcode="$(cat $sf)"
-        if test "$exitcode" == 0; then
+        # Check for timeout marker file
+        if test -f "$timeout_marker"; then
+            status=TIMEOUT
+        elif test "$exitcode" == 0; then
             status=COMPLETED
         else
             status=FAILED
@@ -25,4 +29,4 @@ find "$XPM_SLURM_DIR/jobs" -name "*.start" | while read name; do
     fi
     echo "$(basename $jobid)|${status}|$(cat ${jobid}.start)|$(cat ${jobid}.start)|"
-done
+done

experimaestro/tests/launchers/bin/sbatch CHANGED Viewed

@@ -65,12 +65,14 @@ done < "$1"
 cd "$chdir"
 echo "Starting $@ ${args[@]} > $stdout 2> $stderr" >&2
+# Get job ID before forking
+JOBID="$$"
 (
     export PATH="${CURDIR}/bin:$PATH"
+    export SLURM_JOB_ID="$JOBID"
     eval "$@" "${args[@]}"
-    echo $? > "$XPM_SLURM_DIR/jobs/$$.status"
+    echo $? > "$XPM_SLURM_DIR/jobs/$JOBID.status"
 ) > $stdout 2> $stderr &
-JOBID="$$"
 date > "$XPM_SLURM_DIR/jobs/$JOBID.start"
 disown

experimaestro/tests/launchers/common.py CHANGED Viewed

@@ -80,7 +80,7 @@ def takeback(launcher, datapath, txp1, txp2):
     waiting = datapath / "waiting"
     with txp1:
-        task: WaitUntilTouched = WaitUntilTouched(
+        task: WaitUntilTouched = WaitUntilTouched.C(
             touching=touching, waiting=waiting
         ).submit(launcher=launcher)
@@ -91,7 +91,7 @@ def takeback(launcher, datapath, txp1, txp2):
             time.sleep(0.01)
         with txp2:
-            result = WaitUntilTouched(touching=touching, waiting=waiting).submit(
+            result = WaitUntilTouched.C(touching=touching, waiting=waiting).submit(
                 launcher=launcher
             )

experimaestro/tests/launchers/test_slurm.py CHANGED Viewed

@@ -6,6 +6,8 @@ from experimaestro.connectors.local import LocalConnector
 from experimaestro.launchers.slurm import (
     SlurmLauncher,
 )
+from experimaestro import field, ResumableTask, Param
+from experimaestro.scheduler import JobState
 import shutil
 import pytest
 from .common import waitFromSpec, takeback
@@ -84,3 +86,81 @@ def test_slurm_takeback(slurmlauncher, tmp_path):
     datapath = tmp_path / "data"
     takeback(slurmlauncher, datapath, txp1, txp2)
+class SlurmResumableTask(ResumableTask):
+    """ResumableTask that simulates timeout on first N attempts for SLURM testing"""
+    checkpoint: Param[Path]
+    timeout_count: Param[int] = field(ignore_default=2)
+    slurm_jobs_dir: Param[Path]  # Path to mock SLURM jobs directory
+    output_file: Param[Path] = field(ignore_default=None)
+    def execute(self):
+        import os
+        # Read current attempt count from checkpoint
+        attempt = 1
+        if self.checkpoint.exists():
+            attempt = int(self.checkpoint.read_text()) + 1
+        print(f"SlurmResumableTask attempt #{attempt}")
+        # Write updated attempt count
+        self.checkpoint.write_text(str(attempt))
+        # Simulate timeout for first timeout_count attempts
+        if attempt <= self.timeout_count:
+            print(f"Simulating SLURM TIMEOUT on attempt {attempt}")
+            # Create timeout marker file for mock SLURM
+            # The marker needs to be named <jobid>.timeout in the SLURM jobs directory
+            # Use SLURM_JOB_ID environment variable (set by mock sbatch, like real SLURM)
+            job_id = os.environ.get("SLURM_JOB_ID")
+            if job_id:
+                timeout_marker = self.slurm_jobs_dir / f"{job_id}.timeout"
+                timeout_marker.write_text(f"timeout on attempt {attempt}")
+            # Exit with error to trigger SLURM timeout handling
+            raise RuntimeError(f"Simulated timeout on attempt {attempt}")
+        # Success - task completed
+        print(f"Task completed successfully on attempt {attempt}")
+        if self.output_file:
+            self.output_file.write_text(f"Completed after {attempt} attempts")
+@pytest.mark.timeout(30)
+def test_slurm_resumable_task(tmp_path: Path, slurmlauncher: SlurmLauncher):
+    """Test that ResumableTask retries and resumes after SLURM timeouts"""
+    with TemporaryExperiment("slurm-resumable", workdir=tmp_path / "xp", maxwait=25):
+        checkpoint = tmp_path / "checkpoint.txt"
+        output_file = tmp_path / "output.txt"
+        # Get the SLURM jobs directory from the launcher's binpath
+        slurm_jobs_dir = slurmlauncher.binpath.parent / "slurm" / "jobs"
+        # Submit task with max_retries to allow multiple timeout retries
+        task = SlurmResumableTask.C(
+            checkpoint=checkpoint,
+            timeout_count=2,  # Timeout on first 2 attempts
+            slurm_jobs_dir=slurm_jobs_dir,
+            output_file=output_file,
+        ).submit(launcher=slurmlauncher, max_retries=5)
+        # Wait for the task to complete
+        state = task.__xpm__.job.wait()
+        # Verify task completed successfully after retries
+        assert state == JobState.DONE, f"Task did not complete successfully: {state}"
+        assert (
+            task.__xpm__.job.retry_count == 2
+        ), f"Expected 2 retries, got {task.__xpm__.job.retry_count}"
+        # Verify checkpoint shows 3 attempts (2 timeouts + 1 success)
+        assert checkpoint.exists(), "Checkpoint file was not created"
+        assert (
+            int(checkpoint.read_text()) == 3
+        ), f"Expected 3 attempts, got {checkpoint.read_text()}"
+        # Verify output file was created on success
+        assert output_file.exists(), "Output file was not created"
+        assert "Completed after 3 attempts" in output_file.read_text()

experimaestro/tests/restart.py CHANGED Viewed

@@ -64,7 +64,7 @@ def restart(terminate: Callable, experiment):
     try:
         with TemporaryExperiment("restart", maxwait=20) as xp:
             # Create the task with dry_run and so we can get the file paths
-            task = Restart()
+            task = Restart.C()
             task.submit(run_mode=RunMode.DRY_RUN)
         # Start the experiment with another process, and kill the job

experimaestro/tests/tasks/all.py CHANGED Viewed

@@ -19,6 +19,13 @@ class SimpleTask(Task):
     def execute(self):
         print(self.x)  # noqa: T201
+    def task_outputs(self, dep):
+        return dep(SimpleTaskOutput.C(task=self))
+class SimpleTaskOutput(Config):
+    task: Param[SimpleTask]
 class Say(Task):
     out: Meta[Path] = field(default_factory=PathGenerator(STDOUT))

experimaestro 1.11.1__py3-none-any.whl → 2.0.0b4__py3-none-any.whl

Potentially problematic release.

experimaestro 1.11.1py3-none-any.whl → 2.0.0b4py3-none-any.whl