PyPI - parsl - Versions diffs - 2024.3.4__py3-none-any.whl → 2024.3.18__py3-none-any.whl - Mend

parsl 2024.3.4py3-none-any.whl → 2024.3.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

parsl/addresses.py +3 -1
parsl/config.py +4 -0
parsl/dataflow/dflow.py +14 -7
parsl/dataflow/taskrecord.py +3 -1
parsl/executors/high_throughput/executor.py +34 -10
parsl/executors/high_throughput/interchange.py +43 -10
parsl/executors/high_throughput/manager_record.py +1 -0
parsl/executors/high_throughput/process_worker_pool.py +48 -7
parsl/executors/taskvine/executor.py +6 -3
parsl/executors/taskvine/manager.py +1 -0
parsl/executors/taskvine/manager_config.py +3 -4
parsl/jobs/job_status_poller.py +4 -3
parsl/jobs/strategy.py +2 -1
parsl/launchers/launchers.py +6 -6
parsl/log_utils.py +8 -4
parsl/monitoring/db_manager.py +29 -7
parsl/monitoring/monitoring.py +15 -54
parsl/monitoring/remote.py +29 -0
parsl/monitoring/visualization/models.py +7 -0
parsl/monitoring/visualization/plots/default/workflow_plots.py +3 -0
parsl/monitoring/visualization/views.py +2 -1
parsl/providers/cluster_provider.py +1 -3
parsl/providers/slurm/slurm.py +13 -2
parsl/tests/configs/user_opts.py +5 -2
parsl/tests/test_htex/test_drain.py +78 -0
parsl/tests/test_monitoring/test_app_names.py +86 -0
parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py +3 -11
parsl/usage_tracking/usage.py +5 -9
parsl/utils.py +2 -2
parsl/version.py +1 -1
{parsl-2024.3.4.data → parsl-2024.3.18.data}/scripts/process_worker_pool.py +48 -7
{parsl-2024.3.4.dist-info → parsl-2024.3.18.dist-info}/METADATA +2 -2
{parsl-2024.3.4.dist-info → parsl-2024.3.18.dist-info}/RECORD +39 -38
{parsl-2024.3.4.dist-info → parsl-2024.3.18.dist-info}/WHEEL +1 -1
parsl/configs/bluewaters.py +0 -28
{parsl-2024.3.4.data → parsl-2024.3.18.data}/scripts/exec_parsl_function.py +0 -0
{parsl-2024.3.4.data → parsl-2024.3.18.data}/scripts/parsl_coprocess.py +0 -0
{parsl-2024.3.4.dist-info → parsl-2024.3.18.dist-info}/LICENSE +0 -0
{parsl-2024.3.4.dist-info → parsl-2024.3.18.dist-info}/entry_points.txt +0 -0
{parsl-2024.3.4.dist-info → parsl-2024.3.18.dist-info}/top_level.txt +0 -0

parsl/launchers/launchers.py CHANGED Viewed

@@ -8,16 +8,16 @@ logger = logging.getLogger(__name__)
 class SimpleLauncher(Launcher):
     """ Does no wrapping. Just returns the command as-is
     """
-    def __init_(self, debug: bool = True) -> None:
+    def __init__(self, debug: bool = True) -> None:
         super().__init__(debug=debug)
     def __call__(self, command: str, tasks_per_node: int, nodes_per_block: int) -> str:
-        """
-        Args:
-        - command (string): The command string to be launched
-        - task_block (string) : bash evaluated string.
-        """
+        if nodes_per_block > 1:
+            logger.warning('Simple Launcher only supports single node per block. '
+                           f'Requested nodes: {nodes_per_block}. '
+                           'You may be getting fewer workers than expected')
         return command

parsl/log_utils.py CHANGED Viewed

@@ -28,7 +28,7 @@ DEFAULT_FORMAT = (
 def set_stream_logger(name: str = 'parsl',
                       level: int = logging.DEBUG,
                       format_string: Optional[str] = None,
-                      stream: Optional[io.TextIOWrapper] = None) -> None:
+                      stream: Optional[io.TextIOWrapper] = None) -> logging.Logger:
     """Add a stream log handler.
     Args:
@@ -39,7 +39,7 @@ def set_stream_logger(name: str = 'parsl',
             If not specified, the default stream for logging.StreamHandler is used.
     Returns:
-         - None
+         - logger for specified name
     """
     if format_string is None:
         # format_string = "%(asctime)s %(name)s [%(levelname)s] Thread:%(thread)d %(message)s"
@@ -59,12 +59,14 @@ def set_stream_logger(name: str = 'parsl',
     futures_logger = logging.getLogger("concurrent.futures")
     futures_logger.addHandler(handler)
+    return logger
 @typeguard.typechecked
 def set_file_logger(filename: str,
                     name: str = 'parsl',
                     level: int = logging.DEBUG,
-                    format_string: Optional[str] = None) -> None:
+                    format_string: Optional[str] = None) -> logging.Logger:
     """Add a file log handler.
     Args:
@@ -74,7 +76,7 @@ def set_file_logger(filename: str,
         - format_string (string): Set the format string
     Returns:
-       -  None
+       - logger for specified name
     """
     if format_string is None:
         format_string = DEFAULT_FORMAT
@@ -91,3 +93,5 @@ def set_file_logger(filename: str,
     # concurrent.futures
     futures_logger = logging.getLogger("concurrent.futures")
     futures_logger.addHandler(handler)
+    return logger

parsl/monitoring/db_manager.py CHANGED Viewed

@@ -103,7 +103,13 @@ class Database:
     def rollback(self) -> None:
         self.session.rollback()
-    def _generate_mappings(self, table: Table, columns: Optional[List[str]] = None, messages: List[MonitoringMessage] = []) -> List[Dict[str, Any]]:
+    def _generate_mappings(
+        self,
+        table: Table,
+        columns: Optional[List[str]] = None,
+        messages: List[MonitoringMessage] = [],
+    ) -> List[Dict[str, Any]]:
         mappings = []
         for msg in messages:
             m = {}
@@ -250,6 +256,12 @@ class Database:
             'psutil_process_disk_write', Float, nullable=True)
         psutil_process_status = Column(
             'psutil_process_status', Text, nullable=True)
+        psutil_cpu_num = Column(
+            'psutil_cpu_num', Text, nullable=True)
+        psutil_process_num_ctx_switches_voluntary = Column(
+            'psutil_process_num_ctx_switches_voluntary', Float, nullable=True)
+        psutil_process_num_ctx_switches_involuntary = Column(
+            'psutil_process_num_ctx_switches_involuntary', Float, nullable=True)
         __table_args__ = (
             PrimaryKeyConstraint('try_id', 'task_id', 'run_id', 'timestamp'),
         )
@@ -518,7 +530,10 @@ class DatabaseManager:
                                 reprocessable_first_resource_messages.append(msg)
                             else:
                                 if task_try_id in deferred_resource_messages:
-                                    logger.error("Task {} already has a deferred resource message. Discarding previous message.".format(msg['task_id']))
+                                    logger.error(
+                                        "Task {} already has a deferred resource message. "
+                                        "Discarding previous message.".format(msg['task_id'])
+                                    )
                                 deferred_resource_messages[task_try_id] = msg
                         elif msg['last_msg']:
                             # This assumes that the primary key has been added
@@ -544,7 +559,10 @@ class DatabaseManager:
                 if reprocessable_last_resource_messages:
                     self._insert(table=STATUS, messages=reprocessable_last_resource_messages)
             except Exception:
-                logger.exception("Exception in db loop: this might have been a malformed message, or some other error. monitoring data may have been lost")
+                logger.exception(
+                    "Exception in db loop: this might have been a malformed message, "
+                    "or some other error. monitoring data may have been lost"
+                )
                 exception_happened = True
         if exception_happened:
             raise RuntimeError("An exception happened sometime during database processing and should have been logged in database_manager.log")
@@ -571,8 +589,10 @@ class DatabaseManager:
                     self._dispatch_to_internal(x)
                 elif queue_tag == 'resource':
                     assert isinstance(x, tuple), "_migrate_logs_to_internal was expecting a tuple, got {}".format(x)
-                    assert x[0] == MessageType.RESOURCE_INFO, \
-                        "_migrate_logs_to_internal can only migrate RESOURCE_INFO message from resource queue, got tag {}, message {}".format(x[0], x)
+                    assert x[0] == MessageType.RESOURCE_INFO, (
+                        "_migrate_logs_to_internal can only migrate RESOURCE_INFO message from resource queue, "
+                        "got tag {}, message {}".format(x[0], x)
+                    )
                     self._dispatch_to_internal(x)
                 elif queue_tag == 'node':
                     assert len(x) == 2, "expected message tuple to have exactly two elements"
@@ -613,7 +633,8 @@ class DatabaseManager:
                     # if retried - for example, the database being locked because someone else is readying
                     # the tables we are trying to write to. If that assumption is wrong, then this loop
                     # may go on forever.
-                    logger.warning("Got a database OperationalError. Ignoring and retrying on the assumption that it is recoverable: {}".format(e))
+                    logger.warning("Got a database OperationalError. "
+                                   "Ignoring and retrying on the assumption that it is recoverable: {}".format(e))
                     self.db.rollback()
                     time.sleep(1)  # hard coded 1s wait - this should be configurable or exponential backoff or something
@@ -640,7 +661,8 @@ class DatabaseManager:
                     done = True
                 except sa.exc.OperationalError as e:
                     # hoping that this is a database locked error during _update, not some other problem
-                    logger.warning("Got a database OperationalError. Ignoring and retrying on the assumption that it is recoverable: {}".format(e))
+                    logger.warning("Got a database OperationalError. "
+                                   "Ignoring and retrying on the assumption that it is recoverable: {}".format(e))
                     self.db.rollback()
                     time.sleep(1)  # hard coded 1s wait - this should be configurable or exponential backoff or something
         except KeyboardInterrupt:

parsl/monitoring/monitoring.py CHANGED Viewed

@@ -15,6 +15,7 @@ import parsl.monitoring.remote
 from parsl.multiprocessing import ForkProcess, SizedQueue
 from multiprocessing import Process
 from multiprocessing.queues import Queue
+from parsl.log_utils import set_file_logger
 from parsl.utils import RepresentationMixin
 from parsl.process_loggers import wrap_with_logs
 from parsl.utils import setproctitle
@@ -38,40 +39,6 @@ else:
 logger = logging.getLogger(__name__)
-def start_file_logger(filename: str, name: str = 'monitoring', level: int = logging.DEBUG, format_string: Optional[str] = None) -> logging.Logger:
-    """Add a stream log handler.
-    Parameters
-    ---------
-    filename: string
-        Name of the file to write logs to. Required.
-    name: string
-        Logger name.
-    level: logging.LEVEL
-        Set the logging level. Default=logging.DEBUG
-        - format_string (string): Set the format string
-    format_string: string
-        Format string to use.
-    Returns
-    -------
-        None.
-    """
-    if format_string is None:
-        format_string = "%(asctime)s.%(msecs)03d %(name)s:%(lineno)d [%(levelname)s]  %(message)s"
-    logger = logging.getLogger(name)
-    logger.setLevel(level)
-    logger.propagate = False
-    handler = logging.FileHandler(filename)
-    handler.setLevel(level)
-    formatter = logging.Formatter(format_string, datefmt='%Y-%m-%d %H:%M:%S')
-    handler.setFormatter(formatter)
-    logger.addHandler(handler)
-    return logger
 @typeguard.typechecked
 class MonitoringHub(RepresentationMixin):
     def __init__(self,
@@ -79,9 +46,6 @@ class MonitoringHub(RepresentationMixin):
                  hub_port: Optional[int] = None,
                  hub_port_range: Tuple[int, int] = (55050, 56000),
-                 client_address: str = "127.0.0.1",
-                 client_port_range: Tuple[int, int] = (55000, 56000),
                  workflow_name: Optional[str] = None,
                  workflow_version: Optional[str] = None,
                  logging_endpoint: Optional[str] = None,
@@ -106,11 +70,6 @@ class MonitoringHub(RepresentationMixin):
              to deliver monitoring messages to the monitoring router.
              Note that despite the similar name, this is not related to hub_port.
              Default: (55050, 56000)
-        client_address : str
-             The ip address at which the dfk will be able to reach Hub. Default: "127.0.0.1"
-        client_port_range : tuple(int, int)
-             The MonitoringHub picks ports at random from the range which will be used by Hub.
-             Default: (55000, 56000)
         workflow_name : str
              The name for the workflow. Default to the name of the parsl script
         workflow_version : str
@@ -145,9 +104,6 @@ class MonitoringHub(RepresentationMixin):
         if _db_manager_excepts:
             raise _db_manager_excepts
-        self.client_address = client_address
-        self.client_port_range = client_port_range
         self.hub_address = hub_address
         self.hub_port = hub_port
         self.hub_port_range = hub_port_range
@@ -290,8 +246,12 @@ class MonitoringHub(RepresentationMixin):
             self._dfk_channel.close()
             if exception_msgs:
                 for exception_msg in exception_msgs:
-                    self.logger.error("{} process delivered an exception: {}. Terminating all monitoring processes immediately.".format(exception_msg[0],
-                                      exception_msg[1]))
+                    self.logger.error(
+                        "{} process delivered an exception: {}. Terminating all monitoring processes immediately.".format(
+                            exception_msg[0],
+                            exception_msg[1]
+                        )
+                    )
                 self.router_proc.terminate()
                 self.dbm_proc.terminate()
                 self.filesystem_proc.terminate()
@@ -333,9 +293,9 @@ class MonitoringHub(RepresentationMixin):
 @wrap_with_logs
 def filesystem_receiver(logdir: str, q: "queue.Queue[AddressedMonitoringMessage]", run_dir: str) -> None:
-    logger = start_file_logger("{}/monitoring_filesystem_radio.log".format(logdir),
-                               name="monitoring_filesystem_radio",
-                               level=logging.INFO)
+    logger = set_file_logger("{}/monitoring_filesystem_radio.log".format(logdir),
+                             name="monitoring_filesystem_radio",
+                             level=logging.INFO)
     logger.info("Starting filesystem radio receiver")
     setproctitle("parsl: monitoring filesystem receiver")
@@ -401,9 +361,9 @@ class MonitoringRouter:
         """
         os.makedirs(logdir, exist_ok=True)
-        self.logger = start_file_logger("{}/monitoring_router.log".format(logdir),
-                                        name="monitoring_router",
-                                        level=logging_level)
+        self.logger = set_file_logger("{}/monitoring_router.log".format(logdir),
+                                      name="monitoring_router",
+                                      level=logging_level)
         self.logger.debug("Monitoring router starting")
         self.hub_address = hub_address
@@ -489,7 +449,8 @@ class MonitoringRouter:
                             # but there is no verification that the message
                             # received from ic_channel.recv_pyobj() is actually
                             # of that type.
-                            self.logger.error(f"Discarding message from interchange with unknown type {msg[0].value}")  # type: ignore[unreachable]
+                            self.logger.error("Discarding message "  # type: ignore[unreachable]
+                                              f"from interchange with unknown type {msg[0].value}")
                 except zmq.Again:
                     pass
                 except Exception:

parsl/monitoring/remote.py CHANGED Viewed

@@ -201,6 +201,8 @@ def monitor(pid: int,
     children_user_time = {}  # type: Dict[int, float]
     children_system_time = {}  # type: Dict[int, float]
+    children_num_ctx_switches_voluntary = {}  # type: Dict[int, float]
+    children_num_ctx_switches_involuntary = {}  # type: Dict[int, float]
     def accumulate_and_prepare() -> Dict[str, Any]:
         d = {"psutil_process_" + str(k): v for k, v in pm.as_dict().items() if k in simple}
@@ -218,6 +220,15 @@ def monitor(pid: int,
         logging.debug("got children")
         d["psutil_cpu_count"] = psutil.cpu_count()
+        # note that this will be the CPU number of the base process, not anything launched by it
+        d["psutil_cpu_num"] = pm.cpu_num()
+        pctxsw = pm.num_ctx_switches()
+        d["psutil_process_num_ctx_switches_voluntary"] = pctxsw.voluntary
+        d["psutil_process_num_ctx_switches_involuntary"] = pctxsw.involuntary
         d['psutil_process_memory_virtual'] = pm.memory_info().vms
         d['psutil_process_memory_resident'] = pm.memory_info().rss
         d['psutil_process_time_user'] = pm.cpu_times().user
@@ -238,6 +249,11 @@ def monitor(pid: int,
             child_system_time = child.cpu_times().system
             children_user_time[child.pid] = child_user_time
             children_system_time[child.pid] = child_system_time
+            pctxsw = child.num_ctx_switches()
+            children_num_ctx_switches_voluntary[child.pid] = pctxsw.voluntary
+            children_num_ctx_switches_involuntary[child.pid] = pctxsw.involuntary
             d['psutil_process_memory_virtual'] += child.memory_info().vms
             d['psutil_process_memory_resident'] += child.memory_info().rss
             try:
@@ -248,14 +264,27 @@ def monitor(pid: int,
                 logging.exception("Exception reading IO counters for child {k}. Recorded IO usage may be incomplete".format(k=k), exc_info=True)
                 d['psutil_process_disk_write'] += 0
                 d['psutil_process_disk_read'] += 0
         total_children_user_time = 0.0
         for child_pid in children_user_time:
             total_children_user_time += children_user_time[child_pid]
         total_children_system_time = 0.0
         for child_pid in children_system_time:
             total_children_system_time += children_system_time[child_pid]
+        total_children_num_ctx_switches_voluntary = 0.0
+        for child_pid in children_num_ctx_switches_voluntary:
+            total_children_num_ctx_switches_voluntary += children_num_ctx_switches_voluntary[child_pid]
+        total_children_num_ctx_switches_involuntary = 0.0
+        for child_pid in children_num_ctx_switches_involuntary:
+            total_children_num_ctx_switches_involuntary += children_num_ctx_switches_involuntary[child_pid]
         d['psutil_process_time_user'] += total_children_user_time
         d['psutil_process_time_system'] += total_children_system_time
+        d['psutil_process_num_ctx_switches_voluntary'] += total_children_num_ctx_switches_voluntary
+        d['psutil_process_num_ctx_switches_involuntary'] += total_children_num_ctx_switches_involuntary
         logging.debug("sending message")
         return d

parsl/monitoring/visualization/models.py CHANGED Viewed

@@ -102,5 +102,12 @@ class Resource(db.Model):
         'psutil_process_disk_write', db.Float, nullable=True)
     psutil_process_status = db.Column(
         'psutil_process_status', db.Text, nullable=True)
+    psutil_cpu_num = db.Column(
+        'psutil_cpu_num', db.Text, nullable=True)
+    psutil_process_num_ctx_switches_voluntary = db.Column(
+        'psutil_process_num_ctx_switches_voluntary', db.Float, nullable=True)
+    psutil_process_num_ctx_switches_involuntary = db.Column(
+        'psutil_process_num_ctx_switches_involuntary', db.Float, nullable=True)
     __table_args__ = (
         db.PrimaryKeyConstraint('task_id', 'run_id', 'timestamp'),)

parsl/monitoring/visualization/plots/default/workflow_plots.py CHANGED Viewed

@@ -27,6 +27,9 @@ gantt_colors = {'unsched': 'rgb(240, 240, 240)',
 def task_gantt_plot(df_task, df_status, time_completed=None):
+    if df_task.empty:
+        return None
     # if the workflow is not recorded as completed, then assume
     # that tasks should continue in their last state until now,
     # rather than the workflow end time.

parsl/monitoring/visualization/views.py CHANGED Viewed

@@ -8,7 +8,8 @@ from parsl.monitoring.visualization.models import Workflow, Task, Status, db
 from parsl.monitoring.visualization.plots.default.workflow_plots import task_gantt_plot, task_per_app_plot, workflow_dag_plot
 from parsl.monitoring.visualization.plots.default.task_plots import time_series_memory_per_task_plot
-from parsl.monitoring.visualization.plots.default.workflow_resource_plots import resource_distribution_plot, resource_efficiency, worker_efficiency
+from parsl.monitoring.visualization.plots.default.workflow_resource_plots import (resource_distribution_plot,
+                                                                                  resource_efficiency, worker_efficiency)
 dummy = True

parsl/providers/cluster_provider.py CHANGED Viewed

@@ -91,7 +91,7 @@ class ClusterProvider(ExecutionProvider):
               - configs (dict) : configs that get pushed into the template
         Returns:
-              - True: on success
+              - None
         Raises:
               SchedulerMissingArgs : If template is missing args
@@ -117,8 +117,6 @@ class ClusterProvider(ExecutionProvider):
             logger.error("Uncategorized error: %s", e)
             raise e
-        return True
     @abstractmethod
     def _status(self):
         pass

parsl/providers/slurm/slurm.py CHANGED Viewed

@@ -280,11 +280,22 @@ class SlurmProvider(ClusterProvider, RepresentationMixin):
             else:
                 logger.error("Could not read job ID from submit command standard output.")
                 logger.error("Retcode:%s STDOUT:%s STDERR:%s", retcode, stdout.strip(), stderr.strip())
-                raise SubmitException(job_name, "Could not read job ID from submit command standard output", stdout=stdout, stderr=stderr, retcode=retcode)
+                raise SubmitException(
+                    job_name,
+                    "Could not read job ID from submit command standard output",
+                    stdout=stdout,
+                    stderr=stderr,
+                    retcode=retcode
+                )
         else:
             logger.error("Submit command failed")
             logger.error("Retcode:%s STDOUT:%s STDERR:%s", retcode, stdout.strip(), stderr.strip())
-            raise SubmitException(job_name, "Could not read job ID from submit command standard output", stdout=stdout, stderr=stderr, retcode=retcode)
+            raise SubmitException(
+                job_name, "Could not read job ID from submit command standard output",
+                stdout=stdout,
+                stderr=stderr,
+                retcode=retcode
+            )
     def cancel(self, job_ids):
         ''' Cancels the jobs specified by a list of job ids

parsl/tests/configs/user_opts.py CHANGED Viewed

@@ -52,13 +52,16 @@ user_opts = {
     #     'username': MIDWAY_USERNAME,
     #     'script_dir': '/scratch/midway2/{}/parsl_scripts'.format(MIDWAY_USERNAME),
     #     'scheduler_options': "",
-    #     'worker_init': 'cd /scratch/midway2/{}/parsl_scripts; module load Anaconda3/5.1.0; source activate parsl_testing;'.format(MIDWAY_USERNAME),
+    #     'worker_init': 'cd /scratch/midway2/{}/parsl_scripts; '
+    #                    'module load Anaconda3/5.1.0; source activate parsl_testing;'
+    #                    .format(MIDWAY_USERNAME),
     # },
     # 'osg': {
     #     'username': OSG_USERNAME,
     #     'script_dir': '/home/{}/parsl_scripts'.format(OSG_USERNAME),
     #     'scheduler_options': "",
-    #     'worker_init' : 'module load python/3.5.2; python3 -m venv parsl_env; source parsl_env/bin/activate; python3 -m pip install parsl==0.5.2'
+    #     'worker_init' : 'module load python/3.5.2; python3 -m venv parsl_env;
+    #                      source parsl_env/bin/activate; python3 -m pip install parsl==0.5.2'
     # },
     # 'swan': {
     #     'username': SWAN_USERNAME,

parsl/tests/test_htex/test_drain.py ADDED Viewed

@@ -0,0 +1,78 @@
+import parsl
+import pytest
+import time
+from parsl.providers import LocalProvider
+from parsl.channels import LocalChannel
+from parsl.launchers import SimpleLauncher
+from parsl.config import Config
+from parsl.executors import HighThroughputExecutor
+# this constant is used to scale some durations that happen
+# based around the expected drain period: the drain period
+# is TIME_CONST seconds, and the single executed task will
+# last twice that many number of seconds.
+TIME_CONST = 1
+def local_config():
+    return Config(
+        executors=[
+            HighThroughputExecutor(
+                label="htex_local",
+                drain_period=TIME_CONST,
+                worker_debug=True,
+                cores_per_worker=1,
+                encrypted=True,
+                provider=LocalProvider(
+                    channel=LocalChannel(),
+                    init_blocks=1,
+                    min_blocks=0,
+                    max_blocks=0,
+                    launcher=SimpleLauncher(),
+                ),
+            )
+        ],
+        strategy='none',
+    )
+@parsl.python_app
+def f(n):
+    import time
+    time.sleep(n)
+@pytest.mark.local
+def test_drain(try_assert):
+    htex = parsl.dfk().executors['htex_local']
+    # wait till we have a block running...
+    try_assert(lambda: len(htex.connected_managers()) == 1)
+    managers = htex.connected_managers()
+    assert managers[0]['active'], "The manager should be active"
+    assert not managers[0]['draining'], "The manager should not be draining"
+    fut = f(TIME_CONST * 2)
+    time.sleep(TIME_CONST)
+    # this assert should happen *very fast* after the above delay...
+    try_assert(lambda: htex.connected_managers()[0]['draining'], timeout_ms=500)
+    # and the test task should still be running...
+    assert not fut.done(), "The test task should still be running"
+    fut.result()
+    # and now we should see the manager disappear...
+    # ... with strategy='none', this should be coming from draining but
+    # that information isn't immediately obvious from the absence in
+    # connected managers.
+    # As with the above draining assert, this should happen very fast after
+    # the task ends.
+    try_assert(lambda: len(htex.connected_managers()) == 0, timeout_ms=500)

parsl/tests/test_monitoring/test_app_names.py ADDED Viewed

@@ -0,0 +1,86 @@
+"""Tests monitoring records app name under various decoration patterns.
+"""
+import os
+import parsl
+import pytest
+import time
+from parsl.tests.configs.htex_local_alternate import fresh_config
+@parsl.python_app
+def regular_decorated_app():
+    return 5
+@pytest.mark.local
+def get_regular_decorated_app():
+    return regular_decorated_app
+def for_decoration_later():
+    return 77
+def get_for_decoration_later():
+    return parsl.python_app(for_decoration_later)
+def get_decorated_closure():
+    r = 53
+    @parsl.python_app
+    def decorated_closure():
+        return r
+    return decorated_closure
+@pytest.mark.local
+@pytest.mark.parametrize("get_app,expected_name,expected_result",
+                         [(get_regular_decorated_app, "regular_decorated_app", 5),
+                          (get_for_decoration_later, "for_decoration_later", 77),
+                          (get_decorated_closure, "decorated_closure", 53)
+                          ])
+def test_app_name(get_app, expected_name, expected_result, tmpd_cwd):
+    # this is imported here rather than at module level because
+    # it isn't available in a plain parsl install, so this module
+    # would otherwise fail to import and break even a basic test
+    # run.
+    import sqlalchemy
+    c = fresh_config()
+    c.run_dir = tmpd_cwd
+    c.monitoring.logging_endpoint = f"sqlite:///{tmpd_cwd}/monitoring.db"
+    parsl.load(c)
+    app = get_app()
+    assert app().result() == expected_result
+    parsl.dfk().cleanup()
+    parsl.clear()
+    engine = sqlalchemy.create_engine(c.monitoring.logging_endpoint)
+    with engine.begin() as connection:
+        def count_rows(table: str):
+            result = connection.execute(f"SELECT COUNT(*) FROM {table}")
+            (c, ) = result.first()
+            return c
+        # one workflow...
+        assert count_rows("workflow") == 1
+        # ... with one task ...
+        assert count_rows("task") == 1
+        # ... that was tried once ...
+        assert count_rows("try") == 1
+        # ... and has the expected name.
+        result = connection.execute("SELECT task_func_name FROM task")
+        (c, ) = result.first()
+        assert c == expected_name

parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py CHANGED Viewed

@@ -37,6 +37,7 @@ def local_config():
         ],
         max_idletime=0.5,
         strategy='htex_auto_scale',
+        strategy_period=0.1
     )
@@ -62,16 +63,6 @@ def waiting_app(ident: int, outputs=(), inputs=()):
 def test_scale_out(tmpd_cwd, try_assert):
     dfk = parsl.dfk()
-    # reconfigure scaling strategy to run faster than usual. This allows
-    # this test to complete faster - at time of writing 27s with default
-    # 5s strategy, vs XXXX with 0.5s strategy.
-    # check this attribute still exists, in the presence of ongoing
-    # development, so we have some belief that setting it will not be
-    # setting a now-ignored parameter.
-    assert hasattr(dfk.job_status_poller, 'interval')
-    dfk.job_status_poller.interval = 0.1
     num_managers = len(dfk.executors['htex_local'].connected_managers())
     assert num_managers == 0, "Expected 0 managers at start"
@@ -98,7 +89,8 @@ def test_scale_out(tmpd_cwd, try_assert):
     assert dfk.executors['htex_local'].outstanding == 0
-    # now we can launch one "long" task - and what should happen is that the connected_managers count "eventually" (?) converges to 1 and stays there.
+    # now we can launch one "long" task -
+    # and what should happen is that the connected_managers count "eventually" (?) converges to 1 and stays there.
     finish_path = tmpd_cwd / "stage2_workers_may_continue"

parsl 2024.3.4__py3-none-any.whl → 2024.3.18__py3-none-any.whl

parsl 2024.3.4py3-none-any.whl → 2024.3.18py3-none-any.whl