PyPI - experimaestro - Versions diffs - 2.0.0b8__py3-none-any.whl → 2.0.0b17__py3-none-any.whl - Mend

experimaestro 2.0.0b8py3-none-any.whl → 2.0.0b17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of experimaestro might be problematic. Click here for more details.

Files changed (152) hide show

experimaestro/__init__.py +12 -5
experimaestro/cli/__init__.py +239 -126
experimaestro/cli/filter.py +48 -23
experimaestro/cli/jobs.py +253 -71
experimaestro/cli/refactor.py +1 -2
experimaestro/commandline.py +7 -4
experimaestro/connectors/__init__.py +9 -1
experimaestro/connectors/local.py +43 -3
experimaestro/core/arguments.py +18 -18
experimaestro/core/identifier.py +11 -11
experimaestro/core/objects/config.py +96 -39
experimaestro/core/objects/config_walk.py +3 -3
experimaestro/core/{subparameters.py → partial.py} +16 -16
experimaestro/core/partial_lock.py +394 -0
experimaestro/core/types.py +12 -15
experimaestro/dynamic.py +290 -0
experimaestro/experiments/__init__.py +6 -2
experimaestro/experiments/cli.py +217 -50
experimaestro/experiments/configuration.py +24 -0
experimaestro/generators.py +5 -5
experimaestro/ipc.py +118 -1
experimaestro/launcherfinder/__init__.py +2 -2
experimaestro/launcherfinder/registry.py +6 -7
experimaestro/launcherfinder/specs.py +2 -9
experimaestro/launchers/slurm/__init__.py +2 -2
experimaestro/launchers/slurm/base.py +62 -0
experimaestro/locking.py +957 -1
experimaestro/notifications.py +89 -201
experimaestro/progress.py +63 -366
experimaestro/rpyc.py +0 -2
experimaestro/run.py +29 -2
experimaestro/scheduler/__init__.py +8 -1
experimaestro/scheduler/base.py +629 -53
experimaestro/scheduler/dependencies.py +20 -16
experimaestro/scheduler/experiment.py +732 -167
experimaestro/scheduler/interfaces.py +316 -101
experimaestro/scheduler/jobs.py +58 -20
experimaestro/scheduler/remote/adaptive_sync.py +265 -0
experimaestro/scheduler/remote/client.py +171 -117
experimaestro/scheduler/remote/protocol.py +8 -193
experimaestro/scheduler/remote/server.py +95 -71
experimaestro/scheduler/services.py +53 -28
experimaestro/scheduler/state_provider.py +663 -2430
experimaestro/scheduler/state_status.py +1247 -0
experimaestro/scheduler/transient.py +31 -0
experimaestro/scheduler/workspace.py +1 -1
experimaestro/scheduler/workspace_state_provider.py +1273 -0
experimaestro/scriptbuilder.py +4 -4
experimaestro/settings.py +36 -0
experimaestro/tests/conftest.py +33 -5
experimaestro/tests/connectors/bin/executable.py +1 -1
experimaestro/tests/fixtures/pre_experiment/experiment_check_env.py +16 -0
experimaestro/tests/fixtures/pre_experiment/experiment_check_mock.py +14 -0
experimaestro/tests/fixtures/pre_experiment/experiment_simple.py +12 -0
experimaestro/tests/fixtures/pre_experiment/pre_setup_env.py +5 -0
experimaestro/tests/fixtures/pre_experiment/pre_setup_error.py +3 -0
experimaestro/tests/fixtures/pre_experiment/pre_setup_mock.py +8 -0
experimaestro/tests/launchers/bin/test.py +1 -0
experimaestro/tests/launchers/test_slurm.py +9 -9
experimaestro/tests/partial_reschedule.py +46 -0
experimaestro/tests/restart.py +3 -3
experimaestro/tests/restart_main.py +1 -0
experimaestro/tests/scripts/notifyandwait.py +1 -0
experimaestro/tests/task_partial.py +38 -0
experimaestro/tests/task_tokens.py +2 -2
experimaestro/tests/tasks/test_dynamic.py +6 -6
experimaestro/tests/test_dependencies.py +3 -3
experimaestro/tests/test_deprecated.py +15 -15
experimaestro/tests/test_dynamic_locking.py +317 -0
experimaestro/tests/test_environment.py +24 -14
experimaestro/tests/test_experiment.py +171 -36
experimaestro/tests/test_identifier.py +25 -25
experimaestro/tests/test_identifier_stability.py +3 -5
experimaestro/tests/test_multitoken.py +2 -4
experimaestro/tests/{test_subparameters.py → test_partial.py} +25 -25
experimaestro/tests/test_partial_paths.py +81 -138
experimaestro/tests/test_pre_experiment.py +219 -0
experimaestro/tests/test_progress.py +2 -8
experimaestro/tests/test_remote_state.py +560 -99
experimaestro/tests/test_stray_jobs.py +261 -0
experimaestro/tests/test_tasks.py +1 -2
experimaestro/tests/test_token_locking.py +52 -67
experimaestro/tests/test_tokens.py +5 -6
experimaestro/tests/test_transient.py +225 -0
experimaestro/tests/test_workspace_state_provider.py +768 -0
experimaestro/tests/token_reschedule.py +1 -3
experimaestro/tests/utils.py +2 -7
experimaestro/tokens.py +227 -372
experimaestro/tools/diff.py +1 -0
experimaestro/tools/documentation.py +4 -5
experimaestro/tools/jobs.py +1 -2
experimaestro/tui/app.py +438 -1966
experimaestro/tui/app.tcss +162 -0
experimaestro/tui/dialogs.py +172 -0
experimaestro/tui/log_viewer.py +253 -3
experimaestro/tui/messages.py +137 -0
experimaestro/tui/utils.py +54 -0
experimaestro/tui/widgets/__init__.py +23 -0
experimaestro/tui/widgets/experiments.py +468 -0
experimaestro/tui/widgets/global_services.py +238 -0
experimaestro/tui/widgets/jobs.py +972 -0
experimaestro/tui/widgets/log.py +156 -0
experimaestro/tui/widgets/orphans.py +363 -0
experimaestro/tui/widgets/runs.py +185 -0
experimaestro/tui/widgets/services.py +314 -0
experimaestro/tui/widgets/stray_jobs.py +528 -0
experimaestro/utils/__init__.py +1 -1
experimaestro/utils/environment.py +105 -22
experimaestro/utils/fswatcher.py +124 -0
experimaestro/utils/jobs.py +1 -2
experimaestro/utils/jupyter.py +1 -2
experimaestro/utils/logging.py +72 -0
experimaestro/version.py +2 -2
experimaestro/webui/__init__.py +9 -0
experimaestro/webui/app.py +117 -0
experimaestro/{server → webui}/data/index.css +66 -11
experimaestro/webui/data/index.css.map +1 -0
experimaestro/{server → webui}/data/index.js +82763 -87217
experimaestro/webui/data/index.js.map +1 -0
experimaestro/webui/routes/__init__.py +5 -0
experimaestro/webui/routes/auth.py +53 -0
experimaestro/webui/routes/proxy.py +117 -0
experimaestro/webui/server.py +200 -0
experimaestro/webui/state_bridge.py +152 -0
experimaestro/webui/websocket.py +413 -0
{experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/METADATA +5 -6
experimaestro-2.0.0b17.dist-info/RECORD +219 -0
experimaestro/cli/progress.py +0 -269
experimaestro/scheduler/state.py +0 -75
experimaestro/scheduler/state_db.py +0 -437
experimaestro/scheduler/state_sync.py +0 -891
experimaestro/server/__init__.py +0 -467
experimaestro/server/data/index.css.map +0 -1
experimaestro/server/data/index.js.map +0 -1
experimaestro/tests/test_cli_jobs.py +0 -615
experimaestro/tests/test_file_progress.py +0 -425
experimaestro/tests/test_file_progress_integration.py +0 -477
experimaestro/tests/test_state_db.py +0 -434
experimaestro-2.0.0b8.dist-info/RECORD +0 -187
/experimaestro/{server → webui}/data/1815e00441357e01619e.ttf +0 -0
/experimaestro/{server → webui}/data/2463b90d9a316e4e5294.woff2 +0 -0
/experimaestro/{server → webui}/data/2582b0e4bcf85eceead0.ttf +0 -0
/experimaestro/{server → webui}/data/89999bdf5d835c012025.woff2 +0 -0
/experimaestro/{server → webui}/data/914997e1bdfc990d0897.ttf +0 -0
/experimaestro/{server → webui}/data/c210719e60948b211a12.woff2 +0 -0
/experimaestro/{server → webui}/data/favicon.ico +0 -0
/experimaestro/{server → webui}/data/index.html +0 -0
/experimaestro/{server → webui}/data/login.html +0 -0
/experimaestro/{server → webui}/data/manifest.json +0 -0
{experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/WHEEL +0 -0
{experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/entry_points.txt +0 -0
{experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/licenses/LICENSE +0 -0

experimaestro/cli/filter.py CHANGED Viewed

@@ -5,12 +5,27 @@ tags, and other attributes.
 """
 import re
-from typing import Callable, TYPE_CHECKING
+from dataclasses import dataclass, field
+from typing import Callable, Dict, TYPE_CHECKING
 import pyparsing as pp
 if TYPE_CHECKING:
     from experimaestro.scheduler.state_provider import MockJob
+# Type alias for tags map: job_id -> {tag_key: tag_value}
+TagsMap = Dict[str, Dict[str, str]]
+@dataclass
+class FilterContext:
+    """Context for filter evaluation containing experiment-scoped data
+    Attributes:
+        tags_map: Maps job identifiers to their tags dict for the current experiment/run
+    """
+    tags_map: TagsMap = field(default_factory=dict)
 # --- classes for processing
@@ -19,14 +34,16 @@ class VarExpr:
     def __init__(self, values):
         (self.varname,) = values
-    def get(self, job: "MockJob"):
+    def get(self, job: "MockJob", ctx: FilterContext):
         if self.varname == "@state":
             return job.state.name if job.state else None
         if self.varname == "@name":
             return str(job.path.parent.name)
-        return job.tags.get(self.varname, None)
+        # Tags are stored in JobTagModel, accessed via ctx.tags_map keyed by job identifier
+        job_tags = ctx.tags_map.get(job.identifier, {})
+        return job_tags.get(self.varname, None)
     def __repr__(self):
         return f"""VAR<{self.varname}>"""
@@ -39,8 +56,8 @@ class BaseInExpr:
 class InExpr(BaseInExpr):
-    def filter(self, job: "MockJob"):
-        value = self.var.get(job)
+    def filter(self, job: "MockJob", ctx: FilterContext):
+        value = self.var.get(job, ctx)
         return value in self.values
     def __repr__(self):
@@ -48,8 +65,8 @@ class InExpr(BaseInExpr):
 class NotInExpr(BaseInExpr):
-    def filter(self, job: "MockJob"):
-        value = self.var.get(job)
+    def filter(self, job: "MockJob", ctx: FilterContext):
+        value = self.var.get(job, ctx)
         return value not in self.values
     def __repr__(self):
@@ -70,8 +87,8 @@ class RegexExpr:
         raise AssertionError()
-    def filter(self, job: "MockJob"):
-        value = self.var.get(job)
+    def filter(self, job: "MockJob", ctx: FilterContext):
+        value = self.var.get(job, ctx)
         if not value:
             return False
@@ -82,7 +99,7 @@ class ConstantString:
     def __init__(self, tokens):
         (self.value,) = tokens
-    def get(self, _job: "MockJob"):
+    def get(self, _job: "MockJob", _ctx: FilterContext):
         return self.value
     def __repr__(self):
@@ -96,8 +113,8 @@ class EqExpr:
     def __repr__(self):
         return f"""EQ[{self.var1}, {self.var2}]"""
-    def filter(self, job: "MockJob"):
-        return self.var1.get(job) == self.var2.get(job)
+    def filter(self, job: "MockJob", ctx: FilterContext):
+        return self.var1.get(job, ctx) == self.var2.get(job, ctx)
 class LogicExpr:
@@ -107,11 +124,11 @@ class LogicExpr:
         self.operator, self.y = tokens
         self.x = None
-    def filter(self, job: "MockJob"):
+    def filter(self, job: "MockJob", ctx: FilterContext):
         if self.operator == "and":
-            return self.y.filter(job) and self.x.filter(job)
+            return self.y.filter(job, ctx) and self.x.filter(job, ctx)
-        return self.y.filter(job) or self.x.filter(job)
+        return self.y.filter(job, ctx) or self.x.filter(job, ctx)
     @staticmethod
     def summary(tokens):
@@ -138,7 +155,7 @@ class LogicExpr:
 # --- Grammar
-l = pp.Literal
+lit = pp.Literal
 lpar, rpar, lbra, rbra, eq, comma, pipe, tilde = map(pp.Suppress, "()[]=,|~")
 quotedString = pp.QuotedString('"', unquoteResults=True) | pp.QuotedString(
@@ -148,7 +165,7 @@ quotedString = pp.QuotedString('"', unquoteResults=True) | pp.QuotedString(
 # Tag names can contain letters, digits, underscores, and hyphens
 # First character must be a letter, rest can include digits, underscores, hyphens
 tag_name = pp.Word(pp.alphas, pp.alphanums + "_-")
-var = l("@state") | l("@name") | tag_name
+var = lit("@state") | lit("@name") | tag_name
 var.setParseAction(VarExpr)
 regexExpr = var + tilde + quotedString
@@ -161,15 +178,15 @@ eqExpr.setParseAction(EqExpr)
 stringList = quotedString + pp.ZeroOrMore(comma + quotedString)
-notInExpr = var + (pp.Suppress(l("not in")) + lbra + stringList + rbra)
+notInExpr = var + (pp.Suppress(lit("not in")) + lbra + stringList + rbra)
 notInExpr.setParseAction(NotInExpr)
-inExpr = var + (pp.Suppress(l("in")) + lbra + stringList + rbra)
+inExpr = var + (pp.Suppress(lit("in")) + lbra + stringList + rbra)
 inExpr.setParseAction(InExpr)
 matchExpr = eqExpr | regexExpr | inExpr | notInExpr
-booleanOp = l("and") | l("or")
+booleanOp = lit("and") | lit("or")
 logicExpr = (
     matchExpr + pp.ZeroOrMore((booleanOp + matchExpr).setParseAction(LogicExpr))
 ).setParseAction(LogicExpr.summary)
@@ -181,14 +198,22 @@ filterExpr = (
 expr = (matchExpr + pp.Optional(pipe + filterExpr)).setParseAction(LogicExpr.generator)
-def createFilter(query: str) -> Callable[["MockJob"], bool]:
+def createFilter(query: str, ctx: FilterContext = None) -> Callable[["MockJob"], bool]:
     """Returns a filter function given a query string
     Args:
         query: Filter expression (e.g., '@state = "DONE" and model = "bm25"')
+        ctx: FilterContext containing tags map and other experiment-scoped data.
+             If None, an empty context is used.
     Returns:
-        A callable that takes a MockJob and returns True if it matches
+        A callable that takes a MockJob and returns True if it matches.
     """
+    if ctx is None:
+        ctx = FilterContext()
     (r,) = logicExpr.parseString(query, parseAll=True)
-    return r.filter
+    def filter_fn(job: "MockJob") -> bool:
+        return r.filter(job, ctx)
+    return filter_fn

experimaestro/cli/jobs.py CHANGED Viewed

@@ -65,90 +65,91 @@ def process(
         fullpath: Show full paths instead of short names
         count: Limit output to N most recent jobs (0 = no limit)
     """
-    from .filter import createFilter
-    from experimaestro.scheduler.state_provider import WorkspaceStateProvider
+    from .filter import createFilter, FilterContext
+    from experimaestro.scheduler.workspace_state_provider import WorkspaceStateProvider
     from experimaestro.scheduler import JobState
-    _filter = createFilter(filter) if filter else None
+    # Get state provider (read-only monitoring)
+    provider = WorkspaceStateProvider.get_instance(workspace.path)
-    # Get state provider (write mode for kill/clean operations)
-    read_only = not (kill or clean)
-    provider = WorkspaceStateProvider.get_instance(workspace.path, read_only=read_only)
-    try:
-        # Get all jobs from the database
+    # Get jobs from the database, optionally filtered by experiment
+    if experiment:
+        all_jobs = provider.get_jobs(experiment_id=experiment)
+    else:
         all_jobs = provider.get_all_jobs()
-        # Filter by experiment if specified
-        if experiment:
-            all_jobs = [j for j in all_jobs if j.experiment_id == experiment]
+    # Load tags map for the experiment (if specified)
+    tags_map = {}
+    if experiment:
+        tags_map = provider.get_tags_map(experiment_id=experiment)
-        # Apply filter expression
-        if _filter:
-            all_jobs = [j for j in all_jobs if _filter(j)]
+    # Create filter context with tags map
+    filter_ctx = FilterContext(tags_map=tags_map)
-        # Sort by submission time (most recent first)
-        # Jobs without submittime go to the end
-        all_jobs.sort(key=lambda j: j.submittime or 0, reverse=True)
+    # Create filter function with context
+    _filter = createFilter(filter, filter_ctx) if filter else None
-        # Limit to N most recent jobs if count is specified
-        if count > 0:
-            all_jobs = all_jobs[:count]
+    # Apply filter expression
+    if _filter:
+        all_jobs = [j for j in all_jobs if _filter(j)]
-        if not all_jobs:
-            cprint("No jobs found.", "yellow")
-            return
+    # Sort by submission time (most recent first)
+    # Jobs without submittime go to the end
+    all_jobs.sort(key=lambda j: j.submittime or 0, reverse=True)
+    # Limit to N most recent jobs if count is specified
+    if count > 0:
+        all_jobs = all_jobs[:count]
-        # Process each job
-        for job in all_jobs:
-            job_str = str(job.path) if fullpath else f"{job.task_id}/{job.identifier}"
-            # Add experiment info
-            if job.experiment_id:
-                job_str += f" [{job.experiment_id}]"
-            if job.state is None or job.state == JobState.UNSCHEDULED:
-                print(colored(f"UNSCHED {job_str}", "red"), end="")
-            elif job.state.running():
-                if kill:
-                    if perform:
-                        if provider.kill_job(job, perform=True):
-                            cprint(f"KILLED  {job_str}", "light_red")
-                        else:
-                            cprint(f"KILL FAILED {job_str}", "red")
+    if not all_jobs:
+        cprint("No jobs found.", "yellow")
+        return
+    # Process each job
+    for job in all_jobs:
+        job_str = str(job.path) if fullpath else f"{job.task_id}/{job.identifier}"
+        if job.state is None or job.state == JobState.UNSCHEDULED:
+            print(colored(f"UNSCHED {job_str}", "red"), end="")
+        elif job.state.running():
+            if kill:
+                if perform:
+                    if provider.kill_job(job, perform=True):
+                        cprint(f"KILLED  {job_str}", "light_red")
                     else:
-                        cprint(f"KILLING {job_str} (dry run)", "yellow")
+                        cprint(f"KILL FAILED {job_str}", "red")
                 else:
-                    print(colored(f"{job.state.name:8}{job_str}", "yellow"), end="")
-            elif job.state == JobState.DONE:
-                print(colored(f"DONE    {job_str}", "green"), end="")
-            elif job.state == JobState.ERROR:
-                print(colored(f"FAIL    {job_str}", "red"), end="")
+                    cprint(f"KILLING {job_str} (dry run)", "yellow")
             else:
-                print(colored(f"{job.state.name:8}{job_str}", "red"), end="")
+                print(colored(f"{job.state.name:8}{job_str}", "yellow"), end="")
+        elif job.state == JobState.DONE:
+            print(colored(f"DONE    {job_str}", "green"), end="")
+        elif job.state == JobState.ERROR:
+            print(colored(f"FAIL    {job_str}", "red"), end="")
+        else:
+            print(colored(f"{job.state.name:8}{job_str}", "red"), end="")
-            # Show tags if requested
-            if tags and job.tags:
-                print(f""" {" ".join(f"{k}={v}" for k, v in job.tags.items())}""")
+        # Show tags if requested (from tags_map)
+        if tags:
+            job_tags = tags_map.get(job.identifier, {})
+            if job_tags:
+                print(f""" {" ".join(f"{k}={v}" for k, v in job_tags.items())}""")
             elif not (kill and perform):
                 print()
+        elif not (kill and perform):
+            print()
-            # Clean finished jobs
-            if clean and job.state and job.state.finished():
-                if perform:
-                    if provider.clean_job(job, perform=True):
-                        cprint("  Cleaned", "red")
-                    else:
-                        cprint("  Clean failed", "red")
+        # Clean finished jobs
+        if clean and job.state and job.state.finished():
+            if perform:
+                if provider.clean_job(job, perform=True):
+                    cprint("  Cleaned", "red")
                 else:
-                    cprint("  Would clean (dry run)", "yellow")
-        print()
+                    cprint("  Clean failed", "red")
+            else:
+                cprint("  Would clean (dry run)", "yellow")
-    finally:
-        # Close provider if we created it for write mode
-        if not read_only:
-            provider.close()
+    print()
 @click.option("--experiment", default=None, help="Restrict to this experiment")
@@ -250,7 +251,7 @@ def log(ctx, jobid: str, follow: bool, std: bool):
         / "jobs"
         / task_name
         / task_hash
-        / f"""{name}.{'out' if std else 'err'}"""
+        / f"""{name}.{"out" if std else "err"}"""
     )
     if not log_path.exists():
         cprint(f"Log file not found: {log_path}", "red")
@@ -284,17 +285,15 @@ def cleanup_partials(ctx, perform: bool):
     """Clean up orphan partial directories
     Partial directories are shared checkpoint locations created by
-    subparameters. When all jobs using a partial are deleted, the
+    partial. When all jobs using a partial are deleted, the
     partial becomes orphaned and can be cleaned up.
     This command finds all orphan partials and deletes them (or shows
     what would be deleted in dry-run mode).
     """
-    from experimaestro.scheduler.state_provider import WorkspaceStateProvider
+    from experimaestro.scheduler.workspace_state_provider import WorkspaceStateProvider
-    provider = WorkspaceStateProvider.get_instance(
-        ctx.obj.workspace.path, read_only=not perform
-    )
+    provider = WorkspaceStateProvider.get_instance(ctx.obj.workspace.path)
     try:
         orphan_paths = provider.cleanup_orphan_partials(perform=perform)
@@ -317,3 +316,186 @@ def cleanup_partials(ctx, perform: bool):
     finally:
         if perform:
             provider.close()
+@click.option(
+    "--kill", is_flag=True, help="Kill running stray jobs (requires --perform)"
+)
+@click.option(
+    "--delete", is_flag=True, help="Delete non-running stray jobs (requires --perform)"
+)
+@click.option("--perform", is_flag=True, help="Actually perform the operation")
+@click.option(
+    "--force",
+    is_flag=True,
+    help="Bypass safety checks (e.g., when scheduler is running)",
+)
+@click.option("--size", is_flag=True, help="Show size of each job folder")
+@click.option("--fullpath", is_flag=True, help="Show full paths")
+@jobs.command()
+@click.pass_context
+def stray(
+    ctx,
+    kill: bool,
+    delete: bool,
+    perform: bool,
+    force: bool,
+    size: bool,
+    fullpath: bool,
+):
+    """Manage stray jobs (jobs not associated with any experiment)
+    Stray jobs are jobs that exist on disk but are not referenced by any
+    experiment. This can happen when:
+    \b
+    - An experiment plan changes and a job is no longer needed
+    - An experiment is deleted but jobs remain on disk
+    - Jobs are manually created outside of experiments
+    Safety: By default, this command will warn if an experiment appears to be
+    running (scheduler active). Use --force to bypass this check.
+    Examples:
+    \b
+    # List all stray jobs
+    experimaestro jobs stray
+    \b
+    # List stray jobs with sizes
+    experimaestro jobs stray --size
+    \b
+    # Kill running stray jobs (dry run)
+    experimaestro jobs stray --kill
+    \b
+    # Kill running stray jobs (for real)
+    experimaestro jobs stray --kill --perform
+    \b
+    # Delete non-running stray jobs
+    experimaestro jobs stray --delete --perform
+    \b
+    # Kill and delete all stray jobs (dangerous!)
+    experimaestro jobs stray --kill --delete --perform --force
+    """
+    from experimaestro.scheduler.workspace_state_provider import WorkspaceStateProvider
+    from experimaestro.scheduler import JobState
+    provider = WorkspaceStateProvider.get_instance(ctx.obj.workspace.path)
+    # Safety check: warn if scheduler appears to be running
+    if provider.is_live and not force:
+        cprint(
+            "Warning: Scheduler appears to be running. Stray detection may be inaccurate.",
+            "yellow",
+        )
+        cprint("Use --force to proceed anyway.", "yellow")
+        if perform:
+            cprint("Aborting due to active scheduler.", "red")
+            return
+    # Get stray jobs (running orphans) and all orphan jobs
+    stray_jobs = provider.get_stray_jobs()
+    stray_jobs = [j for j in stray_jobs if j.path and j.path.exists()]
+    orphan_jobs = provider.get_orphan_jobs()
+    orphan_jobs = [j for j in orphan_jobs if j.path and j.path.exists()]
+    # Finished orphans = orphans that are not stray (not running)
+    stray_ids = {j.identifier for j in stray_jobs}
+    finished_jobs = [j for j in orphan_jobs if j.identifier not in stray_ids]
+    if not stray_jobs and not finished_jobs:
+        cprint("No stray or orphan jobs found.", "green")
+        return
+    # Print summary
+    print(
+        f"Found {len(stray_jobs)} stray (running) and {len(finished_jobs)} orphan (finished) jobs:"
+    )
+    if stray_jobs:
+        cprint(f"  {len(stray_jobs)} stray (running)", "yellow")
+    if finished_jobs:
+        cprint(f"  {len(finished_jobs)} orphan (finished)", "cyan")
+    print()
+    # Combine for display (stray first, then finished orphans)
+    all_jobs = stray_jobs + finished_jobs
+    # Process each job
+    killed_count = 0
+    deleted_count = 0
+    for job in all_jobs:
+        job_str = str(job.path) if fullpath else f"{job.task_id}/{job.identifier}"
+        state_name = job.state.name if job.state else "UNKNOWN"
+        # Determine color based on state
+        if job.state and job.state.running():
+            state_color = "yellow"
+        elif job.state == JobState.DONE:
+            state_color = "green"
+        elif job.state == JobState.ERROR:
+            state_color = "red"
+        else:
+            state_color = "white"
+        # Show job info
+        print(colored(f"{state_name:10}{job_str}", state_color), end="")
+        # Show size if requested
+        if size and job.path and job.path.exists():
+            try:
+                result = subprocess.run(
+                    ["du", "-hs", str(job.path)],
+                    capture_output=True,
+                    text=True,
+                    timeout=10,
+                )
+                if result.returncode == 0:
+                    size_str = result.stdout.strip().split()[0]
+                    print(f"  [{size_str}]", end="")
+            except (subprocess.TimeoutExpired, Exception):
+                print("  [?]", end="")
+        print()
+        # Kill running jobs if requested
+        if kill and job.state and job.state.running():
+            if perform:
+                if provider.kill_job(job, perform=True):
+                    cprint("  KILLED", "light_red")
+                    killed_count += 1
+                else:
+                    cprint("  KILL FAILED", "red")
+            else:
+                cprint("  Would kill (dry run)", "yellow")
+        # Delete non-running jobs if requested
+        if delete and (not job.state or not job.state.running()):
+            if perform:
+                success, msg = provider.delete_job_safely(job, cascade_orphans=False)
+                if success:
+                    cprint("  DELETED", "light_red")
+                    deleted_count += 1
+                else:
+                    cprint(f"  DELETE FAILED: {msg}", "red")
+            else:
+                cprint("  Would delete (dry run)", "yellow")
+    # Summary
+    print()
+    if perform:
+        if kill and killed_count > 0:
+            cprint(f"Killed {killed_count} running job(s)", "green")
+        if delete and deleted_count > 0:
+            cprint(f"Deleted {deleted_count} job(s)", "green")
+            # Clean up orphan partials after deleting jobs
+            provider.cleanup_orphan_partials(perform=True)
+    else:
+        if kill or delete:
+            cprint("Dry run - no changes made. Use --perform to execute.", "yellow")

experimaestro/cli/refactor.py CHANGED Viewed

@@ -153,8 +153,7 @@ def refactor_file(file_path: Path, perform: bool) -> int:
             # Multi-line value - more complex handling needed
             # For now, just report it
             cprint(
-                f"  {file_path}:{line_num}: {class_name}.{param_name} has multi-line default "
-                f"(manual fix required)",
+                f"  {file_path}:{line_num}: {class_name}.{param_name} has multi-line default (manual fix required)",
                 "red",
             )
             changes_made += 1

experimaestro/commandline.py CHANGED Viewed

@@ -233,6 +233,7 @@ class CommandLineJob(Job):
         launcher=None,
         run_mode: RunMode = None,
         max_retries=None,
+        transient=None,
     ):
         super().__init__(
             parameters,
@@ -240,6 +241,7 @@ class CommandLineJob(Job):
             launcher=launcher,
             run_mode=run_mode,
             max_retries=max_retries,
+            transient=transient,
         )
         self.commandline = commandline
@@ -305,11 +307,10 @@ class CommandLineJob(Job):
         self._process = processbuilder.start(True)
         with self.pidpath.open("w") as fp:
-            process_spec = self._process.tospec()
-            json.dump(process_spec, fp)
+            json.dump(self._process.tospec(), fp)
-        # Write process spec to metadata (contains launcher type, job ID, etc.)
-        self.write_metadata(process=process_spec)
+        # Write status with process info
+        self.status_path.write_text(json.dumps(self.state_dict()))
         self.state = JobState.RUNNING
         logger.info("Process started (%s)", self._process)
@@ -328,6 +329,7 @@ class CommandLineTask:
         workspace=None,
         run_mode=None,
         max_retries=None,
+        transient=None,
     ) -> Job:
         return CommandLineJob(
             self.commandline,
@@ -336,4 +338,5 @@ class CommandLineTask:
             workspace=workspace,
             run_mode=run_mode,
             max_retries=max_retries,
+            transient=transient,
         )

experimaestro/connectors/__init__.py CHANGED Viewed

@@ -117,6 +117,14 @@ class Process:
         """Wait until the process finishes and returns the error code"""
         raise NotImplementedError(f"Not implemented: {self.__class__}.wait")
+    async def aio_wait(self) -> int:
+        """Asynchronously wait until the process finishes and returns the error code.
+        Subclasses should override this with a truly async implementation.
+        Default implementation uses asyncThreadcheck to run wait() in a thread.
+        """
+        return await asyncThreadcheck("aio_wait", self.wait)
     async def aio_state(self, timeout: float | None = None) -> ProcessState:
         """Returns the job state
@@ -134,7 +142,7 @@ class Process:
         Returns None if the process has already finished – and no information is
         known about the process.
         """
-        code = await asyncThreadcheck("aio_code", self.wait)
+        code = await self.aio_wait()
         logger.debug("Got return code %s for %s", code, self)
         return code

experimaestro 2.0.0b8__py3-none-any.whl → 2.0.0b17__py3-none-any.whl

Potentially problematic release.

experimaestro 2.0.0b8py3-none-any.whl → 2.0.0b17py3-none-any.whl