PyPI - parsl - Versions diffs - 2025.10.13__py3-none-any.whl → 2025.10.20__py3-none-any.whl - Mend

parsl 2025.10.13py3-none-any.whl → 2025.10.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of parsl might be problematic. Click here for more details.

Files changed (19) hide show

parsl/app/bash.py CHANGED Viewed

@@ -88,7 +88,7 @@ def remote_side_bash_executor(func, *args, **kwargs):
         raise pe.AppTimeout(f"App {func_name} exceeded walltime: {timeout} seconds")
     except Exception as e:
-        raise pe.AppException(f"App {func_name} caught exception with returncode: {returncode}", e)
+        raise pe.AppException(f"App {func_name} caught exception", e)
     if returncode != 0:
         raise pe.BashExitFailure(func_name, proc.returncode)

parsl/dataflow/dflow.py CHANGED Viewed

@@ -6,7 +6,6 @@ import datetime
 import inspect
 import logging
 import os
-import pickle
 import random
 import sys
 import threading
@@ -50,7 +49,7 @@ from parsl.monitoring.radios.multiprocessing import MultiprocessingQueueRadioSen
 from parsl.monitoring.remote import monitor_wrapper
 from parsl.process_loggers import wrap_with_logs
 from parsl.usage_tracking.usage import UsageTracker
-from parsl.utils import Timer, get_all_checkpoints, get_std_fname_mode, get_version
+from parsl.utils import get_std_fname_mode, get_version
 logger = logging.getLogger(__name__)
@@ -101,8 +100,6 @@ class DataFlowKernel:
         logger.info("Parsl version: {}".format(get_version()))
-        self.checkpoint_lock = threading.Lock()
         self.usage_tracker = UsageTracker(self)
         self.usage_tracker.send_start_message()
@@ -168,18 +165,12 @@ class DataFlowKernel:
             self.monitoring_radio.send((MessageType.WORKFLOW_INFO,
                                        workflow_info))
-        if config.checkpoint_files is not None:
-            checkpoint_files = config.checkpoint_files
-        elif config.checkpoint_files is None and config.checkpoint_mode is not None:
-            checkpoint_files = get_all_checkpoints(self.run_dir)
-        else:
-            checkpoint_files = []
-        self.memoizer = Memoizer(memoize=config.app_cache, checkpoint_files=checkpoint_files)
-        self.checkpointed_tasks = 0
-        self._checkpoint_timer = None
-        self.checkpoint_mode = config.checkpoint_mode
-        self.checkpointable_tasks: List[TaskRecord] = []
+        self.memoizer = Memoizer(memoize=config.app_cache,
+                                 checkpoint_mode=config.checkpoint_mode,
+                                 checkpoint_files=config.checkpoint_files,
+                                 checkpoint_period=config.checkpoint_period)
+        self.memoizer.run_dir = self.run_dir
+        self.memoizer.start()
         # this must be set before executors are added since add_executors calls
         # job_status_poller.add_executors.
@@ -195,17 +186,6 @@ class DataFlowKernel:
         self.add_executors(config.executors)
         self.add_executors([parsl_internal_executor])
-        if self.checkpoint_mode == "periodic":
-            if config.checkpoint_period is None:
-                raise ConfigurationError("Checkpoint period must be specified with periodic checkpoint mode")
-            else:
-                try:
-                    h, m, s = map(int, config.checkpoint_period.split(':'))
-                except Exception:
-                    raise ConfigurationError("invalid checkpoint_period provided: {0} expected HH:MM:SS".format(config.checkpoint_period))
-                checkpoint_period = (h * 3600) + (m * 60) + s
-                self._checkpoint_timer = Timer(self.checkpoint, interval=checkpoint_period, name="Checkpoint")
         self.task_count = 0
         self.tasks: Dict[int, TaskRecord] = {}
         self.submitter_lock = threading.Lock()
@@ -371,13 +351,9 @@ class DataFlowKernel:
             else:
                 task_record['fail_cost'] += 1
-            if task_record['status'] == States.dep_fail:
+            if isinstance(e, DependencyError):
                 logger.info("Task {} failed due to dependency failure so skipping retries".format(task_id))
-                task_record['time_returned'] = datetime.datetime.now()
-                self._send_task_log_info(task_record)
-                self.memoizer.update_memo(task_record)
-                with task_record['app_fu']._update_lock:
-                    task_record['app_fu'].set_exception(e)
+                self._complete_task_exception(task_record, States.dep_fail, e)
             elif task_record['fail_cost'] <= self._config.retries:
@@ -397,61 +373,48 @@ class DataFlowKernel:
             else:
                 logger.exception("Task {} failed after {} retry attempts".format(task_id,
                                                                                  task_record['try_id']))
-                task_record['time_returned'] = datetime.datetime.now()
-                self.update_task_state(task_record, States.failed)
-                task_record['time_returned'] = datetime.datetime.now()
-                self._send_task_log_info(task_record)
-                self.memoizer.update_memo(task_record)
-                with task_record['app_fu']._update_lock:
-                    task_record['app_fu'].set_exception(e)
+                self._complete_task_exception(task_record, States.failed, e)
         else:
             if task_record['from_memo']:
-                self._complete_task(task_record, States.memo_done, res)
-                self._send_task_log_info(task_record)
+                self._complete_task_result(task_record, States.memo_done, res)
+            elif not task_record['join']:
+                self._complete_task_result(task_record, States.exec_done, res)
             else:
-                if not task_record['join']:
-                    self._complete_task(task_record, States.exec_done, res)
+                # This is a join task, and the original task's function code has
+                # completed. That means that the future returned by that code
+                # will be available inside the executor future, so we can now
+                # record the inner app ID in monitoring, and add a completion
+                # listener to that inner future.
+                joinable = future.result()
+                # Fail with a TypeError if the joinapp python body returned
+                # something we can't join on.
+                if isinstance(joinable, Future):
+                    self.update_task_state(task_record, States.joining)
+                    task_record['joins'] = joinable
+                    task_record['join_lock'] = threading.Lock()
+                    self._send_task_log_info(task_record)
+                    joinable.add_done_callback(partial(self.handle_join_update, task_record))
+                elif joinable == []:  # got a list, but it had no entries, and specifically, no Futures.
+                    self.update_task_state(task_record, States.joining)
+                    task_record['joins'] = joinable
+                    task_record['join_lock'] = threading.Lock()
                     self._send_task_log_info(task_record)
+                    self.handle_join_update(task_record, None)
+                elif isinstance(joinable, list) and [j for j in joinable if not isinstance(j, Future)] == []:
+                    self.update_task_state(task_record, States.joining)
+                    task_record['joins'] = joinable
+                    task_record['join_lock'] = threading.Lock()
+                    self._send_task_log_info(task_record)
+                    for inner_future in joinable:
+                        inner_future.add_done_callback(partial(self.handle_join_update, task_record))
                 else:
-                    # This is a join task, and the original task's function code has
-                    # completed. That means that the future returned by that code
-                    # will be available inside the executor future, so we can now
-                    # record the inner app ID in monitoring, and add a completion
-                    # listener to that inner future.
-                    joinable = future.result()
-                    # Fail with a TypeError if the joinapp python body returned
-                    # something we can't join on.
-                    if isinstance(joinable, Future):
-                        self.update_task_state(task_record, States.joining)
-                        task_record['joins'] = joinable
-                        task_record['join_lock'] = threading.Lock()
-                        self._send_task_log_info(task_record)
-                        joinable.add_done_callback(partial(self.handle_join_update, task_record))
-                    elif joinable == []:  # got a list, but it had no entries, and specifically, no Futures.
-                        self.update_task_state(task_record, States.joining)
-                        task_record['joins'] = joinable
-                        task_record['join_lock'] = threading.Lock()
-                        self._send_task_log_info(task_record)
-                        self.handle_join_update(task_record, None)
-                    elif isinstance(joinable, list) and [j for j in joinable if not isinstance(j, Future)] == []:
-                        self.update_task_state(task_record, States.joining)
-                        task_record['joins'] = joinable
-                        task_record['join_lock'] = threading.Lock()
-                        self._send_task_log_info(task_record)
-                        for inner_future in joinable:
-                            inner_future.add_done_callback(partial(self.handle_join_update, task_record))
-                    else:
-                        task_record['time_returned'] = datetime.datetime.now()
-                        self.update_task_state(task_record, States.failed)
-                        task_record['time_returned'] = datetime.datetime.now()
-                        self._send_task_log_info(task_record)
-                        self.memoizer.update_memo(task_record)
-                        with task_record['app_fu']._update_lock:
-                            task_record['app_fu'].set_exception(
-                                TypeError(f"join_app body must return a Future or list of Futures, got {joinable} of type {type(joinable)}"))
+                    self._complete_task_exception(
+                        task_record,
+                        States.failed,
+                        TypeError(f"join_app body must return a Future or list of Futures, got {joinable} of type {type(joinable)}"))
         self._log_std_streams(task_record)
@@ -522,11 +485,7 @@ class DataFlowKernel:
                 # no need to update the fail cost because join apps are never
                 # retried
-                self.update_task_state(task_record, States.failed)
-                task_record['time_returned'] = datetime.datetime.now()
-                self.memoizer.update_memo(task_record)
-                with task_record['app_fu']._update_lock:
-                    task_record['app_fu'].set_exception(e)
+                self._complete_task_exception(task_record, States.failed, e)
             else:
                 # all the joinables succeeded, so construct a result:
@@ -539,12 +498,10 @@ class DataFlowKernel:
                         res.append(future.result())
                 else:
                     raise TypeError(f"Unknown joinable type {type(joinable)}")
-                self._complete_task(task_record, States.exec_done, res)
+                self._complete_task_result(task_record, States.exec_done, res)
             self._log_std_streams(task_record)
-            self._send_task_log_info(task_record)
     def handle_app_update(self, task_record: TaskRecord, future: AppFuture) -> None:
         """This function is called as a callback when an AppFuture
         is in its final state.
@@ -565,23 +522,12 @@ class DataFlowKernel:
         if not task_record['app_fu'] == future:
             logger.error("Internal consistency error: callback future is not the app_fu in task structure, for task {}".format(task_id))
-        # Cover all checkpointing cases here:
-        # Do we need to checkpoint now, or queue for later,
-        # or do nothing?
-        if self.checkpoint_mode == 'task_exit':
-            self.checkpoint(tasks=[task_record])
-        elif self.checkpoint_mode in ('manual', 'periodic', 'dfk_exit'):
-            with self.checkpoint_lock:
-                self.checkpointable_tasks.append(task_record)
-        elif self.checkpoint_mode is None:
-            pass
-        else:
-            raise InternalConsistencyError(f"Invalid checkpoint mode {self.checkpoint_mode}")
+        self.memoizer.update_checkpoint(task_record)
         self.wipe_task(task_id)
         return
-    def _complete_task(self, task_record: TaskRecord, new_state: States, result: Any) -> None:
+    def _complete_task_result(self, task_record: TaskRecord, new_state: States, result: Any) -> None:
         """Set a task into a completed state
         """
         assert new_state in FINAL_STATES
@@ -594,9 +540,31 @@ class DataFlowKernel:
         task_record['time_returned'] = datetime.datetime.now()
         self.memoizer.update_memo(task_record)
+        self._send_task_log_info(task_record)
         with task_record['app_fu']._update_lock:
             task_record['app_fu'].set_result(result)
+    def _complete_task_exception(self, task_record: TaskRecord, new_state: States, exception: BaseException) -> None:
+        """Set a task into a failure state
+        """
+        assert new_state in FINAL_STATES
+        assert new_state in FINAL_FAILURE_STATES
+        old_state = task_record['status']
+        self.update_task_state(task_record, new_state)
+        logger.info(f"Task {task_record['id']} failed ({old_state.name} -> {new_state.name})")
+        task_record['time_returned'] = datetime.datetime.now()
+        self.memoizer.update_memo(task_record)
+        self._send_task_log_info(task_record)
+        with task_record['app_fu']._update_lock:
+            task_record['app_fu'].set_exception(exception)
     def update_task_state(self, task_record: TaskRecord, new_state: States) -> None:
         """Updates a task record state, and recording an appropriate change
         to task state counters.
@@ -648,7 +616,7 @@ class DataFlowKernel:
         _launch_if_ready will launch the specified task, if it is ready
         to run (for example, without dependencies, and in pending state).
         """
-        exec_fu = None
+        exec_fu: Future
         task_id = task_record['id']
         with task_record['task_launch_lock']:
@@ -687,28 +655,24 @@ class DataFlowKernel:
             else:
                 logger.info(
                     "Task {} failed due to dependency failure".format(task_id))
-                # Raise a dependency exception
-                self.update_task_state(task_record, States.dep_fail)
-                self._send_task_log_info(task_record)
                 exec_fu = Future()
                 exec_fu.set_exception(DependencyError(exceptions_tids,
                                                       task_id))
-        if exec_fu:
-            assert isinstance(exec_fu, Future)
-            try:
-                exec_fu.add_done_callback(partial(self.handle_exec_update, task_record))
-            except Exception:
-                # this exception is ignored here because it is assumed that exception
-                # comes from directly executing handle_exec_update (because exec_fu is
-                # done already). If the callback executes later, then any exception
-                # coming out of the callback will be ignored and not propate anywhere,
-                # so this block attempts to keep the same behaviour here.
-                logger.error("add_done_callback got an exception which will be ignored", exc_info=True)
+        assert isinstance(exec_fu, Future), "Every code path leading here needs to define exec_fu"
-            task_record['exec_fu'] = exec_fu
+        try:
+            exec_fu.add_done_callback(partial(self.handle_exec_update, task_record))
+        except Exception:
+            # this exception is ignored here because it is assumed that exception
+            # comes from directly executing handle_exec_update (because exec_fu is
+            # done already). If the callback executes later, then any exception
+            # coming out of the callback will be ignored and not propate anywhere,
+            # so this block attempts to keep the same behaviour here.
+            logger.error("add_done_callback got an exception which will be ignored", exc_info=True)
+        task_record['exec_fu'] = exec_fu
     def launch_task(self, task_record: TaskRecord) -> Future:
         """Handle the actual submission of the task to the executor layer.
@@ -1205,13 +1169,7 @@ class DataFlowKernel:
         self.log_task_states()
-        # checkpoint if any valid checkpoint method is specified
-        if self.checkpoint_mode is not None:
-            self.checkpoint()
-            if self._checkpoint_timer:
-                logger.info("Stopping checkpoint timer")
-                self._checkpoint_timer.close()
+        self.memoizer.close()
         # Send final stats
         self.usage_tracker.send_end_message()
@@ -1269,66 +1227,8 @@ class DataFlowKernel:
         # should still see it.
         logger.info("DFK cleanup complete")
-    def checkpoint(self, tasks: Optional[Sequence[TaskRecord]] = None) -> None:
-        """Checkpoint the dfk incrementally to a checkpoint file.
-        When called, every task that has been completed yet not
-        checkpointed is checkpointed to a file.
-        Kwargs:
-            - tasks (List of task records) : List of task ids to checkpoint. Default=None
-                                         if set to None, we iterate over all tasks held by the DFK.
-        .. note::
-            Checkpointing only works if memoization is enabled
-        Returns:
-            Checkpoint dir if checkpoints were written successfully.
-            By default the checkpoints are written to the RUNDIR of the current
-            run under RUNDIR/checkpoints/tasks.pkl
-        """
-        with self.checkpoint_lock:
-            if tasks:
-                checkpoint_queue = tasks
-            else:
-                checkpoint_queue = self.checkpointable_tasks
-                self.checkpointable_tasks = []
-            checkpoint_dir = '{0}/checkpoint'.format(self.run_dir)
-            checkpoint_tasks = checkpoint_dir + '/tasks.pkl'
-            if not os.path.exists(checkpoint_dir):
-                os.makedirs(checkpoint_dir, exist_ok=True)
-            count = 0
-            with open(checkpoint_tasks, 'ab') as f:
-                for task_record in checkpoint_queue:
-                    task_id = task_record['id']
-                    app_fu = task_record['app_fu']
-                    if app_fu.done() and app_fu.exception() is None:
-                        hashsum = task_record['hashsum']
-                        if not hashsum:
-                            continue
-                        t = {'hash': hashsum, 'exception': None, 'result': app_fu.result()}
-                        # We are using pickle here since pickle dumps to a file in 'ab'
-                        # mode behave like a incremental log.
-                        pickle.dump(t, f)
-                        count += 1
-                        logger.debug("Task {} checkpointed".format(task_id))
-            self.checkpointed_tasks += count
-            if count == 0:
-                if self.checkpointed_tasks == 0:
-                    logger.warning("No tasks checkpointed so far in this run. Please ensure caching is enabled")
-                else:
-                    logger.debug("No tasks checkpointed in this pass.")
-            else:
-                logger.info("Done checkpointing {} tasks".format(count))
+    def checkpoint(self) -> None:
+        self.memoizer.checkpoint()
     @staticmethod
     def _log_std_streams(task_record: TaskRecord) -> None:

parsl/dataflow/memoization.py CHANGED Viewed

@@ -4,15 +4,18 @@ import hashlib
 import logging
 import os
 import pickle
+import threading
 import types
 from concurrent.futures import Future
 from functools import lru_cache, singledispatch
-from typing import Any, Dict, List, Optional, Sequence
+from typing import Any, Dict, List, Literal, Optional, Sequence
 import typeguard
 from parsl.dataflow.errors import BadCheckpoint
 from parsl.dataflow.taskrecord import TaskRecord
+from parsl.errors import ConfigurationError, InternalConsistencyError
+from parsl.utils import Timer, get_all_checkpoints
 logger = logging.getLogger(__name__)
@@ -146,7 +149,13 @@ class Memoizer:
     """
-    def __init__(self, *, memoize: bool = True, checkpoint_files: Sequence[str]):
+    run_dir: str
+    def __init__(self, *,
+                 memoize: bool = True,
+                 checkpoint_files: Sequence[str] | None,
+                 checkpoint_period: Optional[str],
+                 checkpoint_mode: Literal['task_exit', 'periodic', 'dfk_exit', 'manual'] | None):
         """Initialize the memoizer.
         KWargs:
@@ -155,6 +164,26 @@ class Memoizer:
         """
         self.memoize = memoize
+        self.checkpointed_tasks = 0
+        self.checkpoint_lock = threading.Lock()
+        self.checkpoint_files = checkpoint_files
+        self.checkpoint_mode = checkpoint_mode
+        self.checkpoint_period = checkpoint_period
+        self.checkpointable_tasks: List[TaskRecord] = []
+        self._checkpoint_timer: Timer | None = None
+    def start(self) -> None:
+        if self.checkpoint_files is not None:
+            checkpoint_files = self.checkpoint_files
+        elif self.checkpoint_files is None and self.checkpoint_mode is not None:
+            checkpoint_files = get_all_checkpoints(self.run_dir)
+        else:
+            checkpoint_files = []
         checkpoint = self.load_checkpoints(checkpoint_files)
         if self.memoize:
@@ -164,6 +193,26 @@ class Memoizer:
             logger.info("App caching disabled for all apps")
             self.memo_lookup_table = {}
+        if self.checkpoint_mode == "periodic":
+            if self.checkpoint_period is None:
+                raise ConfigurationError("Checkpoint period must be specified with periodic checkpoint mode")
+            else:
+                try:
+                    h, m, s = map(int, self.checkpoint_period.split(':'))
+                except Exception:
+                    raise ConfigurationError("invalid checkpoint_period provided: {0} expected HH:MM:SS".format(self.checkpoint_period))
+                checkpoint_period = (h * 3600) + (m * 60) + s
+                self._checkpoint_timer = Timer(self.checkpoint, interval=checkpoint_period, name="Checkpoint")
+    def close(self) -> None:
+        if self.checkpoint_mode is not None:
+            logger.info("Making final checkpoint")
+            self.checkpoint()
+        if self._checkpoint_timer:
+            logger.info("Stopping checkpoint timer")
+            self._checkpoint_timer.close()
     def make_hash(self, task: TaskRecord) -> str:
         """Create a hash of the task inputs.
@@ -324,3 +373,78 @@ class Memoizer:
             return self._load_checkpoints(checkpointDirs)
         else:
             return {}
+    def update_checkpoint(self, task_record: TaskRecord) -> None:
+        if self.checkpoint_mode == 'task_exit':
+            self.checkpoint(task=task_record)
+        elif self.checkpoint_mode in ('manual', 'periodic', 'dfk_exit'):
+            with self.checkpoint_lock:
+                self.checkpointable_tasks.append(task_record)
+        elif self.checkpoint_mode is None:
+            pass
+        else:
+            raise InternalConsistencyError(f"Invalid checkpoint mode {self.checkpoint_mode}")
+    def checkpoint(self, *, task: Optional[TaskRecord] = None) -> None:
+        """Checkpoint the dfk incrementally to a checkpoint file.
+        When called with no argument, all tasks registered in self.checkpointable_tasks
+        will be checkpointed. When called with a single TaskRecord argument, that task will be
+        checkpointed.
+        By default the checkpoints are written to the RUNDIR of the current
+        run under RUNDIR/checkpoints/tasks.pkl
+        Kwargs:
+            - task (Optional task records) : A task to checkpoint. Default=None, meaning all
+              tasks registered for checkpointing.
+        .. note::
+            Checkpointing only works if memoization is enabled
+        """
+        with self.checkpoint_lock:
+            if task:
+                checkpoint_queue = [task]
+            else:
+                checkpoint_queue = self.checkpointable_tasks
+            checkpoint_dir = '{0}/checkpoint'.format(self.run_dir)
+            checkpoint_tasks = checkpoint_dir + '/tasks.pkl'
+            if not os.path.exists(checkpoint_dir):
+                os.makedirs(checkpoint_dir, exist_ok=True)
+            count = 0
+            with open(checkpoint_tasks, 'ab') as f:
+                for task_record in checkpoint_queue:
+                    task_id = task_record['id']
+                    app_fu = task_record['app_fu']
+                    if app_fu.done() and app_fu.exception() is None:
+                        hashsum = task_record['hashsum']
+                        if not hashsum:
+                            continue
+                        t = {'hash': hashsum, 'exception': None, 'result': app_fu.result()}
+                        # We are using pickle here since pickle dumps to a file in 'ab'
+                        # mode behave like a incremental log.
+                        pickle.dump(t, f)
+                        count += 1
+                        logger.debug("Task {} checkpointed".format(task_id))
+            self.checkpointed_tasks += count
+            if count == 0:
+                if self.checkpointed_tasks == 0:
+                    logger.warning("No tasks checkpointed so far in this run. Please ensure caching is enabled")
+                else:
+                    logger.debug("No tasks checkpointed in this pass.")
+            else:
+                logger.info("Done checkpointing {} tasks".format(count))
+            if not task:
+                self.checkpointable_tasks = []

parsl/dataflow/states.py CHANGED Viewed

@@ -67,10 +67,10 @@ class States(IntEnum):
         return self.__class__.__name__ + "." + self.name
-FINAL_STATES = [States.exec_done, States.memo_done, States.failed, States.dep_fail]
-"""States from which we will never move to another state, because the job has
-either definitively completed or failed."""
-FINAL_FAILURE_STATES = [States.failed, States.dep_fail]
+FINAL_FAILURE_STATES = {States.failed, States.dep_fail}
 """States which are final and which indicate a failure. This must
 be a subset of FINAL_STATES"""
+FINAL_STATES = {States.exec_done, States.memo_done, *FINAL_FAILURE_STATES}
+"""States from which we will never move to another state, because the job has
+either definitively completed or failed."""

parsl/jobs/strategy.py CHANGED Viewed

@@ -185,6 +185,11 @@ class Strategy:
         for executor in executors:
             label = executor.label
+            if executor.bad_state_is_set:
+                logger.info(f"Not strategizing for executor {label} because bad state is set")
+                continue
             logger.debug(f"Strategizing for executor {label}")
             if self.executors[label]['first']:
@@ -213,12 +218,8 @@ class Strategy:
             logger.debug(f"Slot ratio calculation: active_slots = {active_slots}, active_tasks = {active_tasks}")
-            if hasattr(executor, 'connected_workers'):
-                logger.debug('Executor {} has {} active tasks, {}/{} running/pending blocks, and {} connected workers'.format(
-                    label, active_tasks, running, pending, executor.connected_workers()))
-            else:
-                logger.debug('Executor {} has {} active tasks and {}/{} running/pending blocks'.format(
-                    label, active_tasks, running, pending))
+            logger.debug('Executor {} has {} active tasks and {}/{} running/pending blocks'.format(
+                label, active_tasks, running, pending))
             # reset idle timer if executor has active tasks

parsl/tests/test_htex/test_priority_queue.py CHANGED Viewed

@@ -18,7 +18,7 @@ def fake_task(parsl_resource_specification=None):
 @pytest.mark.local
-def test_priority_queue():
+def test_priority_queue(try_assert):
     provider = LocalProvider(
         init_blocks=0,
         max_blocks=0,
@@ -30,6 +30,7 @@ def test_priority_queue():
         max_workers_per_node=1,
         manager_selector=RandomManagerSelector(),
         provider=provider,
+        worker_debug=True,  # needed to instrospect interchange logs
     )
     config = Config(
@@ -50,6 +51,22 @@ def test_priority_queue():
             spec = {'priority': priority}
             futures[(priority, i)] = fake_task(parsl_resource_specification=spec)
+        # wait for the interchange to have received all tasks
+        # (which happens asynchronously to the main thread, and is otherwise
+        # a race condition which can cause this test to fail)
+        n = len(priorities)
+        def interchange_logs_task_count():
+            with open(htex.worker_logdir + "/interchange.log", "r") as f:
+                lines = f.readlines()
+                for line in lines:
+                    if f"Fetched {n} tasks so far" in line:
+                        return True
+            return False
+        try_assert(interchange_logs_task_count)
         provider.max_blocks = 1
         htex.scale_out_facade(1)  # don't wait for the JSP to catch up

parsl/tests/test_python_apps/test_depfail_propagation.py CHANGED Viewed

@@ -1,5 +1,7 @@
+import parsl
 from parsl import python_app
 from parsl.dataflow.errors import DependencyError
+from parsl.dataflow.states import States
 @python_app
@@ -14,6 +16,7 @@ def depends(parent):
 def test_depfail_once():
     """Test the simplest dependency failure case"""
+    start_dep_fail_count = parsl.dfk().task_state_counts[States.dep_fail]
     f1 = fails()
     f2 = depends(f1)
@@ -25,9 +28,12 @@ def test_depfail_once():
     # in the DependencyError message
     assert ("task " + str(f1.task_record['id'])) in str(f2.exception())
+    assert parsl.dfk().task_state_counts[States.dep_fail] == start_dep_fail_count + 1
 def test_depfail_chain():
     """Test that dependency failures chain"""
+    start_dep_fail_count = parsl.dfk().task_state_counts[States.dep_fail]
     f1 = fails()
     f2 = depends(f1)
     f3 = depends(f2)
@@ -39,11 +45,13 @@ def test_depfail_chain():
     assert isinstance(f3.exception(), DependencyError)
     assert isinstance(f4.exception(), DependencyError)
+    assert parsl.dfk().task_state_counts[States.dep_fail] == start_dep_fail_count + 3
 def test_depfail_branches():
     """Test that dependency failures propagate in the
     presence of multiple downstream tasks."""
+    start_dep_fail_count = parsl.dfk().task_state_counts[States.dep_fail]
     f1 = fails()
     f2 = depends(f1)
     f3 = depends(f1)
@@ -52,3 +60,5 @@ def test_depfail_branches():
     assert not isinstance(f1.exception(), DependencyError)
     assert isinstance(f2.exception(), DependencyError)
     assert isinstance(f3.exception(), DependencyError)
+    assert parsl.dfk().task_state_counts[States.dep_fail] == start_dep_fail_count + 2

parsl/tests/test_scaling/test_regression_3696_oscillation.py CHANGED Viewed

@@ -51,6 +51,7 @@ def test_htex_strategy_does_not_oscillate(ns):
     executor.outstanding = lambda: n_tasks
     executor.status_facade = statuses
     executor.workers_per_node = n_workers
+    executor.bad_state_is_set = False
     provider.parallelism = 1
     provider.init_blocks = 0

parsl/version.py CHANGED Viewed

@@ -3,4 +3,4 @@
 Year.Month.Day[alpha/beta/..]
 Alphas will be numbered like this -> 2024.12.10a0
 """
-VERSION = '2025.10.13'
+VERSION = '2025.10.20'

{parsl-2025.10.13.dist-info → parsl-2025.10.20.dist-info}/METADATA RENAMED Viewed

@@ -1,9 +1,9 @@
 Metadata-Version: 2.1
 Name: parsl
-Version: 2025.10.13
+Version: 2025.10.20
 Summary: Simple data dependent workflows in Python
 Home-page: https://github.com/Parsl/parsl
-Download-URL: https://github.com/Parsl/parsl/archive/2025.10.13.tar.gz
+Download-URL: https://github.com/Parsl/parsl/archive/2025.10.20.tar.gz
 Author: The Parsl Team
 Author-email: parsl@googlegroups.com
 License: Apache 2.0

{parsl-2025.10.13.dist-info → parsl-2025.10.20.dist-info}/RECORD RENAMED Viewed

@@ -8,10 +8,10 @@ parsl/multiprocessing.py,sha256=xqieTLko3DrHykCqqSHQszMwd8ORYllrgz6Qc_PsHCE,2112
 parsl/process_loggers.py,sha256=uQ7Gd0W72Jz7rrcYlOMfLsAEhkRltxXJL2MgdduJjEw,1136
 parsl/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 parsl/utils.py,sha256=smVYTusMoYUTD5N9OxTW5bh6o2iioh0NnfjrBAj8zYk,14452
-parsl/version.py,sha256=rx6RT17unUOKqi7eD7qVlatEYlVhzqpa0D-qZ3mnuMs,131
+parsl/version.py,sha256=whi_IdOncV7eAqL5UV49y8XFRCw7SVxlohTSQa_fU70,131
 parsl/app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 parsl/app/app.py,sha256=0gbM4AH2OtFOLsv07I5nglpElcwMSOi-FzdZZfrk7So,8532
-parsl/app/bash.py,sha256=jm2AvePlCT9DZR7H_4ANDWxatp5dN_22FUlT_gWhZ-g,5528
+parsl/app/bash.py,sha256=VYIUTvy3qbjR7MzVO9jErui2WMZteIeuc7iGK6NSjL0,5498
 parsl/app/errors.py,sha256=SQQ1fNp8834DZnoRnlsoZn1WMAFM3fnh2CNHRPmFcKc,3854
 parsl/app/futures.py,sha256=2tMUeKIuDzwuhLIWlsEiZuDrhkxxsUed4QUbQuQg20Y,2826
 parsl/app/python.py,sha256=0hrz2BppVOwwNfh5hnoP70Yv56gSRkIoT-fP9XNb4v4,2331
@@ -55,12 +55,12 @@ parsl/data_provider/staging.py,sha256=ZDZuuFg38pjUStegKPcvPsfGp3iMeReMzfU6DSwtJj
 parsl/data_provider/zip.py,sha256=S4kVuH9lxAegRURYbvIUR7EYYBOccyslaqyCrVWUBhw,4497
 parsl/dataflow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 parsl/dataflow/dependency_resolvers.py,sha256=Om8Dgh7a0ZwgXAc6TlhxLSzvxXHDlNNV1aBNiD3JTNY,3325
-parsl/dataflow/dflow.py,sha256=shxgZ5ulMYPnvyKx4nOrdhCZYLX3JgXPo1OC3168OZw,63087
+parsl/dataflow/dflow.py,sha256=AQKNtTwqk6YkzzDFEWmQ3dFHmDT8r1PuBF2RBhWC4Q8,58047
 parsl/dataflow/errors.py,sha256=daVfr2BWs1zRsGD6JtosEMttWHvK1df1Npiu_MUvFKg,3998
 parsl/dataflow/futures.py,sha256=08LuP-HFiHBIZmeKCjlsazw_WpQ5fwevrU2_WbidkYw,6080
-parsl/dataflow/memoization.py,sha256=AyO1khMwlbuGJQQk-l_wJRj0QeOHTOnmlvzXgQdNNQk,11977
+parsl/dataflow/memoization.py,sha256=xWR09aZkQ695NIqyXQRCVl3OzioXQPzY3_3zqXd3ggA,16918
 parsl/dataflow/rundirs.py,sha256=JZdzybVGubY35jL2YiKcDo65ZmRl1WyOApc8ajYxztc,1087
-parsl/dataflow/states.py,sha256=hV6mfv-y4A6xrujeQglcomnfEs7y3Xm2g6JFwC6dvgQ,2612
+parsl/dataflow/states.py,sha256=7i3s0QIOn0TA42YCjvEAVumNd8G0IlOvX6vOfG3Sy3U,2603
 parsl/dataflow/taskrecord.py,sha256=qIW7T6hn9dYTuNPdUura3HQwwUpUJACwPP5REm5COf4,3042
 parsl/executors/__init__.py,sha256=PEuXYrnVqwlaz_nt82s9D_YNaVsX7ET29DeIZRUR8hw,577
 parsl/executors/base.py,sha256=n-_tFtigMguc_alT8vSf1zKl2EuoGC1lmUewTv3dgsc,4990
@@ -110,7 +110,7 @@ parsl/jobs/error_handlers.py,sha256=BBXwUAMJpBm0HxV1P-I6jv7ZF9wcrhnCfzSTlsd2g4w,
 parsl/jobs/errors.py,sha256=cpSQXCrlKtuHsQf7usjF-lX8XsDkFnE5kWpmFjiN6OU,178
 parsl/jobs/job_status_poller.py,sha256=b37JOqDpSesqeSreEh1HzfVTFnD5Aoy6k8JDXkkPDmk,2192
 parsl/jobs/states.py,sha256=dUM8gC4YVpUjLMARJJ_tDERs6oHsoNheAtG6JWPIJt4,5058
-parsl/jobs/strategy.py,sha256=VxFicpEq6l4bkoFQItHCpQGv9-8jPuP_rMLV1yYZ26Q,13805
+parsl/jobs/strategy.py,sha256=Ssw_24xtxb5w8CpBL6Cm11MvcX0qzXvMiHOrOX3-pWs,13671
 parsl/launchers/__init__.py,sha256=jJeDOWGKJjvpmWTLsj1zSqce_UAhWRc_IO-TzaOAlII,579
 parsl/launchers/base.py,sha256=CblcvPTJiu-MNLWaRtFe29SZQ0BpTOlaY8CGcHdlHIE,538
 parsl/launchers/errors.py,sha256=8YMV_CHpBNVa4eXkGE4x5DaFQlZkDCRCHmBktYcY6TA,467
@@ -318,7 +318,7 @@ parsl/tests/test_htex/test_manager_selector_by_block.py,sha256=VQqSE6MDhGpDSjShG
 parsl/tests/test_htex/test_managers_command.py,sha256=SCwkfyGB-Udgu5L2yDMpR5bsaT-aNjNkiXxtuRb25DI,1622
 parsl/tests/test_htex/test_missing_worker.py,sha256=gyp5i7_t-JHyJGtz_eXZKKBY5w8oqLOIxO6cJgGJMtQ,745
 parsl/tests/test_htex/test_multiple_disconnected_blocks.py,sha256=2vXZoIx4NuAWYuiNoL5Gxr85w72qZ7Kdb3JGh0FufTg,1867
-parsl/tests/test_htex/test_priority_queue.py,sha256=sAs9W4I0LsmvPpuN9Q66yRY4zoSOEo0eMFh6DXlih0I,2336
+parsl/tests/test_htex/test_priority_queue.py,sha256=qnU5ueFsl7sLlJ4p_PVash5a9fYNLRbk7V4COnNuOmY,3007
 parsl/tests/test_htex/test_resource_spec_validation.py,sha256=ZXW02jDd1rNxjBLh1jHyiz31zNoB9JzDw94aWllXFd4,1102
 parsl/tests/test_htex/test_worker_failure.py,sha256=Uz-RHI-LK78FMjXUvrUFmo4iYfmpDVBUcBxxRb3UG9M,603
 parsl/tests/test_htex/test_zmq_binding.py,sha256=SmX_63vvXKnzWISBr8HnJCrRqubx7K0blvgjq4Px2gc,4391
@@ -358,7 +358,7 @@ parsl/tests/test_python_apps/test_context_manager.py,sha256=8kUgcxN-6cz2u-lUoDhM
 parsl/tests/test_python_apps/test_dep_standard_futures.py,sha256=kMOMZLaxJMmpABCUVniDIOIfkEqflZyhKjS_wkDti7A,1049
 parsl/tests/test_python_apps/test_dependencies.py,sha256=IRiTI_lPoWBSFSFnaBlE6Bv08PKEaf-qj5dfqO2RjT0,272
 parsl/tests/test_python_apps/test_dependencies_deep.py,sha256=Cuow2LLGY7zffPFj89AOIwKlXxHtsin3v_UIhfdwV_w,1542
-parsl/tests/test_python_apps/test_depfail_propagation.py,sha256=3q3HlVWrOixFtXWBvR_ypKtbdAHAJcKndXQ5drwrBQU,1488
+parsl/tests/test_python_apps/test_depfail_propagation.py,sha256=TSXBgcFSxqkaEeVl_cCfQfdCmCgTTRi2q2mSr2RH6Tc,2024
 parsl/tests/test_python_apps/test_fail.py,sha256=gMuZwxZNaUCaonlUX-7SOBvXg8kidkBcEeqKLEvqpYM,1692
 parsl/tests/test_python_apps/test_fibonacci_iterative.py,sha256=ly2s5HuB9R53Z2FM_zy0WWdOk01iVhgcwSpQyK6ErIY,573
 parsl/tests/test_python_apps/test_fibonacci_recursive.py,sha256=q7LMFcu_pJSNPdz8iY0UiRoIweEWIBGwMjQffHWAuDc,592
@@ -400,7 +400,7 @@ parsl/tests/test_scaling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
 parsl/tests/test_scaling/test_block_error_handler.py,sha256=OS1IyiK8gjRFI1VzpmOvEnKsPev2vKmC6Z2Hp5LaHpA,6068
 parsl/tests/test_scaling/test_regression_1621.py,sha256=e3-bkHR3d8LxA-uY0BugyWgYzksh00I_UbaA-jHOzKY,1872
 parsl/tests/test_scaling/test_regression_3568_scaledown_vs_MISSING.py,sha256=bjE_NIBoWK6heEz5LN0tzE1977vUA9kVemAYCqcIbzY,2942
-parsl/tests/test_scaling/test_regression_3696_oscillation.py,sha256=xbRY1sNmPvpliwg0nLDCS2JcIviVPHHCOe3y1W9iIlY,3637
+parsl/tests/test_scaling/test_regression_3696_oscillation.py,sha256=gjf5DDX_X-iZtekDQffsa3DBw8_vWarQh5ztkxcSkX0,3675
 parsl/tests/test_scaling/test_scale_down.py,sha256=q_H6YAaID-n6Yj_FVElhufApzsbD08ItRopjgRBlDvU,2769
 parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py,sha256=2w4BxKyWXrow9PMRZzIFdiB1EVZ8YRTmqsP-RNoOx7Q,4525
 parsl/tests/test_scaling/test_scale_down_htex_unregistered.py,sha256=OrdnYmd58n7UfkANPJ7mzha4WSCPdbgJRX1O1Zdu0tI,1954
@@ -450,13 +450,13 @@ parsl/usage_tracking/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
 parsl/usage_tracking/api.py,sha256=iaCY58Dc5J4UM7_dJzEEs871P1p1HdxBMtNGyVdzc9g,1821
 parsl/usage_tracking/levels.py,sha256=xbfzYEsd55KiZJ-mzNgPebvOH4rRHum04hROzEf41tU,291
 parsl/usage_tracking/usage.py,sha256=hbMo5BYgIWqMcFWqN-HYP1TbwNrTonpv-usfwnCFJKY,9212
-parsl-2025.10.13.data/scripts/exec_parsl_function.py,sha256=YXKVVIa4zXmOtz-0Ca4E_5nQfN_3S2bh2tB75uZZB4w,7774
-parsl-2025.10.13.data/scripts/interchange.py,sha256=Kn0yJnpcRsc37gfhD6mGkoX9wD7vP_QgWst7qwUjj5o,26145
-parsl-2025.10.13.data/scripts/parsl_coprocess.py,sha256=zrVjEqQvFOHxsLufPi00xzMONagjVwLZbavPM7bbjK4,5722
-parsl-2025.10.13.data/scripts/process_worker_pool.py,sha256=euc3xPPw1zFdXVjgbSvyyIcvjcEZGXZTi0aSj23Vp-g,41370
-parsl-2025.10.13.dist-info/LICENSE,sha256=tAkwu8-AdEyGxGoSvJ2gVmQdcicWw3j1ZZueVV74M-E,11357
-parsl-2025.10.13.dist-info/METADATA,sha256=VL1Yq8GWBXD6N7tstvWAcC5Tfs1rCOX8ldffVN6HtCo,4007
-parsl-2025.10.13.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-parsl-2025.10.13.dist-info/entry_points.txt,sha256=XqnsWDYoEcLbsMcpnYGKLEnSBmaIe1YoM5YsBdJG2tI,176
-parsl-2025.10.13.dist-info/top_level.txt,sha256=PIheYoUFQtF2icLsgOykgU-Cjuwr2Oi6On2jo5RYgRM,6
-parsl-2025.10.13.dist-info/RECORD,,
+parsl-2025.10.20.data/scripts/exec_parsl_function.py,sha256=YXKVVIa4zXmOtz-0Ca4E_5nQfN_3S2bh2tB75uZZB4w,7774
+parsl-2025.10.20.data/scripts/interchange.py,sha256=Kn0yJnpcRsc37gfhD6mGkoX9wD7vP_QgWst7qwUjj5o,26145
+parsl-2025.10.20.data/scripts/parsl_coprocess.py,sha256=zrVjEqQvFOHxsLufPi00xzMONagjVwLZbavPM7bbjK4,5722
+parsl-2025.10.20.data/scripts/process_worker_pool.py,sha256=euc3xPPw1zFdXVjgbSvyyIcvjcEZGXZTi0aSj23Vp-g,41370
+parsl-2025.10.20.dist-info/LICENSE,sha256=tAkwu8-AdEyGxGoSvJ2gVmQdcicWw3j1ZZueVV74M-E,11357
+parsl-2025.10.20.dist-info/METADATA,sha256=00bQzNdWQ0pCl_MRkEY5s59WLk9r67BfY5t6LNALEqA,4007
+parsl-2025.10.20.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+parsl-2025.10.20.dist-info/entry_points.txt,sha256=XqnsWDYoEcLbsMcpnYGKLEnSBmaIe1YoM5YsBdJG2tI,176
+parsl-2025.10.20.dist-info/top_level.txt,sha256=PIheYoUFQtF2icLsgOykgU-Cjuwr2Oi6On2jo5RYgRM,6
+parsl-2025.10.20.dist-info/RECORD,,