PyPI - parsl - Versions diffs - 2024.2.26__py3-none-any.whl → 2024.3.11__py3-none-any.whl - Mend

parsl 2024.2.26py3-none-any.whl → 2024.3.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

parsl/addresses.py +1 -1
parsl/configs/ASPIRE1.py +1 -1
parsl/configs/ad_hoc.py +1 -1
parsl/configs/bridges.py +1 -1
parsl/configs/cc_in2p3.py +1 -1
parsl/configs/expanse.py +1 -1
parsl/configs/frontera.py +1 -1
parsl/configs/kubernetes.py +1 -1
parsl/configs/midway.py +1 -1
parsl/configs/osg.py +1 -1
parsl/configs/stampede2.py +1 -1
parsl/dataflow/dflow.py +11 -6
parsl/dataflow/taskrecord.py +3 -1
parsl/executors/high_throughput/executor.py +69 -37
parsl/executors/high_throughput/interchange.py +78 -59
parsl/executors/high_throughput/process_worker_pool.py +40 -28
parsl/executors/taskvine/executor.py +3 -1
parsl/executors/workqueue/executor.py +5 -2
parsl/executors/workqueue/parsl_coprocess.py +107 -95
parsl/jobs/job_status_poller.py +9 -3
parsl/jobs/strategy.py +4 -3
parsl/monitoring/db_manager.py +25 -5
parsl/monitoring/monitoring.py +6 -2
parsl/monitoring/remote.py +29 -0
parsl/monitoring/visualization/models.py +7 -0
parsl/providers/slurm/slurm.py +13 -2
parsl/tests/configs/ad_hoc_cluster_htex.py +1 -1
parsl/tests/configs/bluewaters.py +1 -1
parsl/tests/configs/bridges.py +1 -1
parsl/tests/configs/cc_in2p3.py +1 -1
parsl/tests/configs/comet.py +1 -1
parsl/tests/configs/frontera.py +1 -1
parsl/tests/configs/midway.py +1 -1
parsl/tests/configs/nscc_singapore.py +1 -1
parsl/tests/configs/osg_htex.py +1 -1
parsl/tests/configs/petrelkube.py +1 -1
parsl/tests/configs/summit.py +1 -1
parsl/tests/configs/theta.py +1 -1
parsl/tests/configs/user_opts.py +3 -1
parsl/tests/manual_tests/test_ad_hoc_htex.py +1 -1
parsl/tests/scaling_tests/htex_local.py +1 -1
parsl/tests/sites/test_affinity.py +1 -1
parsl/tests/sites/test_concurrent.py +1 -1
parsl/tests/sites/test_dynamic_executor.py +1 -1
parsl/tests/sites/test_worker_info.py +1 -1
parsl/tests/test_htex/test_basic.py +1 -1
parsl/tests/test_htex/test_connected_blocks.py +1 -1
parsl/tests/test_htex/test_cpu_affinity_explicit.py +1 -1
parsl/tests/test_htex/test_disconnected_blocks.py +1 -1
parsl/tests/test_htex/test_htex.py +13 -0
parsl/tests/test_htex/test_manager_failure.py +1 -1
parsl/tests/test_htex/test_missing_worker.py +1 -1
parsl/tests/test_htex/test_multiple_disconnected_blocks.py +1 -1
parsl/tests/test_htex/test_worker_failure.py +1 -1
parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py +1 -1
parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py +1 -1
parsl/tests/test_mpi_apps/test_resource_spec.py +1 -1
parsl/tests/test_scaling/test_scale_down.py +2 -2
parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py +159 -0
parsl/usage_tracking/usage.py +5 -9
parsl/version.py +1 -1
parsl-2024.3.11.data/scripts/parsl_coprocess.py +166 -0
{parsl-2024.2.26.data → parsl-2024.3.11.data}/scripts/process_worker_pool.py +40 -28
{parsl-2024.2.26.dist-info → parsl-2024.3.11.dist-info}/METADATA +2 -2
{parsl-2024.2.26.dist-info → parsl-2024.3.11.dist-info}/RECORD +70 -70
{parsl-2024.2.26.dist-info → parsl-2024.3.11.dist-info}/WHEEL +1 -1
parsl/configs/bluewaters.py +0 -28
parsl-2024.2.26.data/scripts/parsl_coprocess.py +0 -154
{parsl-2024.2.26.data → parsl-2024.3.11.data}/scripts/exec_parsl_function.py +0 -0
{parsl-2024.2.26.dist-info → parsl-2024.3.11.dist-info}/LICENSE +0 -0
{parsl-2024.2.26.dist-info → parsl-2024.3.11.dist-info}/entry_points.txt +0 -0
{parsl-2024.2.26.dist-info → parsl-2024.3.11.dist-info}/top_level.txt +0 -0

parsl/executors/high_throughput/process_worker_pool.py CHANGED Viewed

@@ -62,7 +62,7 @@ class Manager:
                  result_port,
                  cores_per_worker,
                  mem_per_worker,
-                 max_workers,
+                 max_workers_per_node,
                  prefetch_capacity,
                  uid,
                  block_id,
@@ -100,8 +100,8 @@ class Manager:
              the there's sufficient memory for each worker. If set to None, memory on node is not
              considered in the determination of workers to be launched on node by the manager.
-        max_workers : int
-             caps the maximum number of workers that can be launched.
+        max_workers_per_node : int | float
+             Caps the maximum number of workers that can be launched.
         prefetch_capacity : int
              Number of tasks that could be prefetched over available worker capacity.
@@ -140,7 +140,9 @@ class Manager:
             Path to the certificate directory.
         """
-        logger.info("Manager started")
+        logger.info("Manager initializing")
+        self._start_time = time.time()
         try:
             ix_address = probe_addresses(addresses.split(','), task_port, timeout=address_probe_timeout)
@@ -188,15 +190,15 @@ class Manager:
         else:
             available_mem_on_node = round(psutil.virtual_memory().available / (2**30), 1)
-        self.max_workers = max_workers
+        self.max_workers_per_node = max_workers_per_node
         self.prefetch_capacity = prefetch_capacity
-        mem_slots = max_workers
+        mem_slots = max_workers_per_node
         # Avoid a divide by 0 error.
         if mem_per_worker and mem_per_worker > 0:
             mem_slots = math.floor(available_mem_on_node / mem_per_worker)
-        self.worker_count: int = min(max_workers,
+        self.worker_count: int = min(max_workers_per_node,
                                      mem_slots,
                                      math.floor(cores_on_node / cores_per_worker))
@@ -237,7 +239,8 @@ class Manager:
     def create_reg_message(self):
         """ Creates a registration message to identify the worker to the interchange
         """
-        msg = {'parsl_v': PARSL_VERSION,
+        msg = {'type': 'registration',
+               'parsl_v': PARSL_VERSION,
                'python_v': "{}.{}.{}".format(sys.version_info.major,
                                              sys.version_info.minor,
                                              sys.version_info.micro),
@@ -258,8 +261,9 @@ class Manager:
     def heartbeat_to_incoming(self):
         """ Send heartbeat to the incoming task queue
         """
-        heartbeat = (HEARTBEAT_CODE).to_bytes(4, "little")
-        self.task_incoming.send(heartbeat)
+        msg = {'type': 'heartbeat'}
+        b_msg = json.dumps(msg).encode('utf-8')
+        self.task_incoming.send(b_msg)
         logger.debug("Sent heartbeat")
     @wrap_with_logs
@@ -284,9 +288,17 @@ class Manager:
         last_interchange_contact = time.time()
         task_recv_counter = 0
-        poll_timer = self.poll_period
         while not kill_event.is_set():
+            # This loop will sit inside poller.poll until either a message
+            # arrives or one of these event times is reached. This code
+            # assumes that the event times won't change except on iteration
+            # of this loop - so will break if a different thread does
+            # anything to bring one of the event times earlier - and that the
+            # time here are correctly copy-pasted from the relevant if
+            # statements.
+            next_interesting_event_time = min(last_beat + self.heartbeat_period,
+                                              last_interchange_contact + self.heartbeat_threshold)
             try:
                 pending_task_count = self.pending_task_queue.qsize()
             except NotImplementedError:
@@ -296,14 +308,14 @@ class Manager:
             logger.debug("ready workers: {}, pending tasks: {}".format(self.ready_worker_count.value,
                                                                        pending_task_count))
-            if time.time() > last_beat + self.heartbeat_period:
+            if time.time() >= last_beat + self.heartbeat_period:
                 self.heartbeat_to_incoming()
                 last_beat = time.time()
-            socks = dict(poller.poll(timeout=poll_timer))
+            poll_duration_s = max(0, next_interesting_event_time - time.time())
+            socks = dict(poller.poll(timeout=poll_duration_s * 1000))
             if self.task_incoming in socks and socks[self.task_incoming] == zmq.POLLIN:
-                poll_timer = 0
                 _, pkl_msg = self.task_incoming.recv_multipart()
                 tasks = pickle.loads(pkl_msg)
                 last_interchange_contact = time.time()
@@ -320,14 +332,9 @@ class Manager:
             else:
                 logger.debug("No incoming tasks")
-                # Limit poll duration to heartbeat_period
-                # heartbeat_period is in s vs poll_timer in ms
-                if not poll_timer:
-                    poll_timer = self.poll_period
-                poll_timer = min(self.heartbeat_period * 1000, poll_timer * 2)
                 # Only check if no messages were received.
-                if time.time() > last_interchange_contact + self.heartbeat_threshold:
+                if time.time() >= last_interchange_contact + self.heartbeat_threshold:
                     logger.critical("Missing contact with interchange beyond heartbeat_threshold")
                     kill_event.set()
                     logger.critical("Exiting")
@@ -364,7 +371,8 @@ class Manager:
                 logger.exception("Got an exception: {}".format(e))
             if time.time() > last_result_beat + self.heartbeat_period:
-                logger.info(f"Sending heartbeat via results connection: last_result_beat={last_result_beat} heartbeat_period={self.heartbeat_period} seconds")
+                heartbeat_message = f"last_result_beat={last_result_beat} heartbeat_period={self.heartbeat_period} seconds"
+                logger.info(f"Sending heartbeat via results connection: {heartbeat_message}")
                 last_result_beat = time.time()
                 items.append(pickle.dumps({'type': 'heartbeat'}))
@@ -405,7 +413,9 @@ class Manager:
                             raise WorkerLost(worker_id, platform.node())
                         except Exception:
                             logger.info("Putting exception for executor task {} in the pending result queue".format(task['task_id']))
-                            result_package = {'type': 'result', 'task_id': task['task_id'], 'exception': serialize(RemoteExceptionWrapper(*sys.exc_info()))}
+                            result_package = {'type': 'result',
+                                              'task_id': task['task_id'],
+                                              'exception': serialize(RemoteExceptionWrapper(*sys.exc_info()))}
                             pkl_package = pickle.dumps(result_package)
                             self.pending_result_queue.put(pkl_package)
                     except KeyError:
@@ -452,7 +462,6 @@ class Manager:
         TODO: Move task receiving to a thread
         """
-        start = time.time()
         self._kill_event = threading.Event()
         self._tasks_in_progress = self._mp_manager.dict()
@@ -502,7 +511,7 @@ class Manager:
         self.task_incoming.close()
         self.result_outgoing.close()
         self.zmq_context.term()
-        delta = time.time() - start
+        delta = time.time() - self._start_time
         logger.info("process_worker_pool ran for {} seconds".format(delta))
         return
@@ -787,7 +796,7 @@ if __name__ == "__main__":
                         help="GB of memory assigned to each worker process. Default=0, no assignment")
     parser.add_argument("-t", "--task_port", required=True,
                         help="REQUIRED: Task port for receiving tasks from the interchange")
-    parser.add_argument("--max_workers", default=float('inf'),
+    parser.add_argument("--max_workers_per_node", default=float('inf'),
                         help="Caps the maximum workers that can be launched, default:infinity")
     parser.add_argument("-p", "--prefetch_capacity", default=0,
                         help="Number of tasks that can be prefetched to the manager. Default is 0.")
@@ -841,7 +850,7 @@ if __name__ == "__main__":
         logger.info("task_port: {}".format(args.task_port))
         logger.info("result_port: {}".format(args.result_port))
         logger.info("addresses: {}".format(args.addresses))
-        logger.info("max_workers: {}".format(args.max_workers))
+        logger.info("max_workers_per_node: {}".format(args.max_workers_per_node))
         logger.info("poll_period: {}".format(args.poll))
         logger.info("address_probe_timeout: {}".format(args.address_probe_timeout))
         logger.info("Prefetch capacity: {}".format(args.prefetch_capacity))
@@ -860,7 +869,10 @@ if __name__ == "__main__":
                           block_id=args.block_id,
                           cores_per_worker=float(args.cores_per_worker),
                           mem_per_worker=None if args.mem_per_worker == 'None' else float(args.mem_per_worker),
-                          max_workers=args.max_workers if args.max_workers == float('inf') else int(args.max_workers),
+                          max_workers_per_node=(
+                              args.max_workers_per_node if args.max_workers_per_node == float('inf')
+                              else int(args.max_workers_per_node)
+                          ),
                           prefetch_capacity=int(args.prefetch_capacity),
                           heartbeat_threshold=int(args.hb_threshold),
                           heartbeat_period=int(args.hb_period),

parsl/executors/taskvine/executor.py CHANGED Viewed

@@ -228,7 +228,9 @@ class TaskVineExecutor(BlockProviderExecutor, putils.RepresentationMixin):
         # factory logs go with manager logs regardless
         self.factory_config.scratch_dir = self.manager_config.vine_log_dir
         logger.debug(f"Function data directory: {self._function_data_dir}, log directory: {log_dir}")
-        logger.debug(f"TaskVine manager log directory: {self.manager_config.vine_log_dir}, factory log directory: {self.factory_config.scratch_dir}")
+        logger.debug(
+            f"TaskVine manager log directory: {self.manager_config.vine_log_dir}, "
+            f"factory log directory: {self.factory_config.scratch_dir}")
     def start(self):
         """Create submit process and collector thread to create, send, and

parsl/executors/workqueue/executor.py CHANGED Viewed

@@ -61,8 +61,11 @@ logger = logging.getLogger(__name__)
 # Support structure to communicate parsl tasks to the work queue submit thread.
-ParslTaskToWq = namedtuple('ParslTaskToWq',
-                           'id category cores memory disk gpus priority running_time_min env_pkg map_file function_file result_file input_files output_files')
+ParslTaskToWq = namedtuple(
+                        'ParslTaskToWq',
+                        'id '
+                        'category '
+                        'cores memory disk gpus priority running_time_min env_pkg map_file function_file result_file input_files output_files')
 # Support structure to communicate final status of work queue tasks to parsl
 # if result_received is True:

parsl/executors/workqueue/parsl_coprocess.py CHANGED Viewed

@@ -1,18 +1,29 @@
 #! /usr/bin/env python3
-import sys
-from parsl.app.errors import RemoteExceptionWrapper
 import socket
 import json
 import os
 import sys
-import threading
-import queue
+# If enabled, coprocess will print to stdout
+debug_mode = False
+# Send a message on a binary I/O stream by sending the message length and then the (string) message.
+def send_message(stream, data):
+    size = len(data)
+    size_msg = "{}\n".format(size)
+    stream.write(size_msg)
+    stream.write(data)
+# Receive a standard message from a binary I/O stream by reading length and then returning the (string) message
+def recv_message(stream):
+    line = stream.readline()
+    length = int(line)
+    return stream.read(length)
+# Decorator for remotely execution functions to package things as json.
 def remote_execute(func):
-    def remote_wrapper(event, q=None):
-        if q:
-            event = json.loads(event)
+    def remote_wrapper(event):
         kwargs = event["fn_kwargs"]
         args = event["fn_args"]
         try:
@@ -21,114 +32,115 @@ def remote_execute(func):
                 "StatusCode": 200
             }
         except Exception as e:
-            response = {
+            response = {
                 "Result": str(e),
-                "StatusCode": 500
+                "StatusCode": 500
             }
-        if not q:
-            return response
-        q.put(response)
+        return response
     return remote_wrapper
-read, write = os.pipe()
-def send_configuration(config):
-    config_string = json.dumps(config)
-    config_cmd = f"{len(config_string) + 1}\n{config_string}\n"
-    sys.stdout.write(config_cmd)
-    sys.stdout.flush()
+# Main loop of coprocess for executing network functions.
 def main():
+    # Listen on an arbitrary port to be reported to the worker.
     s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
     try:
-        # modify the port argument to be 0 to listen on an arbitrary port
         s.bind(('localhost', 0))
     except Exception as e:
         s.close()
-        print(e)
-        exit(1)
-    # information to print to stdout for worker
+        print(e, file=sys.stderr)
+        sys.exit(1)
+    # Inform the worker of name and port for later connection.
     config = {
-            "name": name(),
-            "port": s.getsockname()[1],
-            }
-    send_configuration(config)
+        "name": name(),  # noqa: F821
+        "port": s.getsockname()[1],
+    }
+    send_message(sys.stdout, json.dumps(config))
+    sys.stdout.flush()
+    # Remember original working directory b/c we change for each invocation.
     abs_working_dir = os.getcwd()
+    # Create pipe for communication with child process
+    rpipe, wpipe = os.pipe()
+    rpipestream = os.fdopen(rpipe, "r")
     while True:
         s.listen()
         conn, addr = s.accept()
-        print('Network function: connection from {}'.format(addr), file=sys.stderr)
+        connstream = conn.makefile("rw", encoding="utf-8")
+        if debug_mode:
+            print('Network function: connection from {}'.format(addr), file=sys.stderr)
         while True:
-            # peek at message to find newline to get the size
-            event_size = None
-            line = conn.recv(100, socket.MSG_PEEK)
-            eol = line.find(b'\n')
-            if eol >= 0:
-                size = eol+1
-                # actually read the size of the event
-                input_spec = conn.recv(size).decode('utf-8').split()
-                function_name = input_spec[0]
-                task_id = int(input_spec[1])
-                event_size = int(input_spec[2])
+            # Read the invocation header from the worker
+            line = connstream.readline()
+            # If end of file, then break out and accept again
+            if not line:
+                break
+            # Parse the invocation header.
+            input_spec = line.split()
+            function_name = input_spec[0]
+            task_id = int(input_spec[1])
+            event_size = int(input_spec[2])
+            # then read the contents of the event itself
+            event_str = connstream.read(event_size)
+            event = json.loads(event_str)
+            exec_method = event.get("remote_task_exec_method", None)
             try:
-                if event_size:
-                    # receive the bytes containing the event and turn it into a string
-                    event_str = conn.recv(event_size).decode("utf-8")
-                    # turn the event into a python dictionary
-                    event = json.loads(event_str)
-                    # see if the user specified an execution method
-                    exec_method = event.get("remote_task_exec_method", None)
-                    print('Network function: recieved event: {}'.format(event), file=sys.stderr)
-                    os.chdir(os.path.join(abs_working_dir, f't.{task_id}'))
-                    if exec_method == "thread":
-                        # create a forked process for function handler
-                        q = queue.Queue()
-                        p = threading.Thread(target=globals()[function_name], args=(event_str, q))
-                        p.start()
-                        p.join()
-                        response = json.dumps(q.get()).encode("utf-8")
-                    elif exec_method == "direct":
-                        response = json.dumps(globals()[function_name](event)).encode("utf-8")
+                # First move to target directory (is undone in finally block)
+                os.chdir(os.path.join(abs_working_dir, f't.{task_id}'))
+                # Then invoke function by desired method, resulting in
+                # response containing the text representation of the result.
+                if exec_method == "direct":
+                    response = json.dumps(globals()[function_name](event))
+                else:
+                    p = os.fork()
+                    if p == 0:
+                        response = globals()[function_name](event)
+                        wpipestream = os.fdopen(wpipe, "w")
+                        send_message(wpipestream, json.dumps(response))
+                        wpipestream.flush()
+                        os._exit(0)
+                    elif p < 0:
+                        if debug_mode:
+                            print(f'Network function: unable to fork to execute {function_name}', file=sys.stderr)
+                        response = {
+                            "Result": "unable to fork",
+                            "StatusCode": 500
+                        }
+                        response = json.dumps(response)
                     else:
-                        p = os.fork()
-                        if p == 0:
-                            response =globals()[function_name](event)
-                            os.write(write, json.dumps(response).encode("utf-8"))
-                            os._exit(0)
-                        elif p < 0:
-                            print('Network function: unable to fork', file=sys.stderr)
-                            response = {
-                                "Result": "unable to fork",
-                                "StatusCode": 500
-                            }
-                        else:
-                            chunk = os.read(read, 65536).decode("utf-8")
-                            all_chunks = [chunk]
-                            while (len(chunk) >= 65536):
-                                chunk = os.read(read, 65536).decode("utf-8")
-                                all_chunks.append(chunk)
-                            response = "".join(all_chunks).encode("utf-8")
-                            os.waitid(os.P_PID, p, os.WEXITED)
-                    response_size = len(response)
-                    size_msg = "{}\n".format(response_size)
-                    # send the size of response
-                    conn.sendall(size_msg.encode('utf-8'))
-                    # send response
-                    conn.sendall(response)
-                    break
+                        # Get response string from child process.
+                        response = recv_message(rpipestream)
+                        # Wait for child process to complete
+                        os.waitpid(p, 0)
+                # At this point, response is set to a value one way or the other
             except Exception as e:
-                print("Network function encountered exception ", str(e), file=sys.stderr)
+                if debug_mode:
+                    print("Network function encountered exception ", str(e), file=sys.stderr)
                 response = {
                     'Result': f'network function encountered exception {e}',
                     'Status Code': 500
                 }
-                response = json.dumps(response).encode('utf-8')
-                response_size = len(response)
-                size_msg = "{}\n".format(response_size)
-                # send the size of response
-                conn.sendall(size_msg.encode('utf-8'))
-                # send response
-                conn.sendall(response)
+                response = json.dumps(response)
             finally:
+                # Restore the working directory, no matter how the function ended.
                 os.chdir(abs_working_dir)
+            # Send response string back to parent worker process.
+            send_message(connstream, response)
+            connstream.flush()
     return 0
 def name():
     return 'parsl_coprocess'
@@ -136,9 +148,9 @@ def name():
 def run_parsl_task(a, b, c):
     import parsl.executors.workqueue.exec_parsl_function as epf
     try:
-        map_file, function_file, result_file = (a, b, c)
+        (map_file, function_file, result_file) = (a, b, c)
         try:
-            namespace, function_code, result_name = epf.load_function(map_file, function_file)
+            (namespace, function_code, result_name) = epf.load_function(map_file, function_file)
         except Exception:
             raise
         try:
@@ -150,5 +162,5 @@ def run_parsl_task(a, b, c):
     epf.dump_result_to_file(result_file, result)
     return None
 if __name__ == "__main__":
-	main()
+    main()

parsl/jobs/job_status_poller.py CHANGED Viewed

@@ -72,11 +72,17 @@ class PollItem:
     def executor(self) -> BlockProviderExecutor:
         return self._executor
-    def scale_in(self, n, force=True, max_idletime=None):
-        if force and not max_idletime:
+    def scale_in(self, n, max_idletime=None):
+        if max_idletime is None:
             block_ids = self._executor.scale_in(n)
         else:
-            block_ids = self._executor.scale_in(n, force=force, max_idletime=max_idletime)
+            # This is a HighThroughputExecutor-specific interface violation.
+            # This code hopes, through pan-codebase reasoning, that this
+            # scale_in method really does come from HighThroughputExecutor,
+            # and so does have an extra max_idletime parameter not present
+            # in the executor interface.
+            block_ids = self._executor.scale_in(n, max_idletime=max_idletime)
         if block_ids is not None:
             new_status = {}
             for block_id in block_ids:

parsl/jobs/strategy.py CHANGED Viewed

@@ -245,7 +245,8 @@ class Strategy:
                         exec_status.scale_in(active_blocks - min_blocks)
                     else:
-                        logger.debug(f"Idle time {idle_duration}s is less than max_idletime {self.max_idletime}s for executor {label}; not scaling in")
+                        logger.debug(
+                                f"Idle time {idle_duration}s is less than max_idletime {self.max_idletime}s for executor {label}; not scaling in")
             # Case 2
             # More tasks than the available slots.
@@ -288,8 +289,8 @@ class Strategy:
                             excess_slots = math.ceil(active_slots - (active_tasks * parallelism))
                             excess_blocks = math.ceil(float(excess_slots) / (tasks_per_node * nodes_per_block))
                             excess_blocks = min(excess_blocks, active_blocks - min_blocks)
-                            logger.debug(f"Requesting scaling in by {excess_blocks} blocks")
-                            exec_status.scale_in(excess_blocks, force=False, max_idletime=self.max_idletime)
+                            logger.debug(f"Requesting scaling in by {excess_blocks} blocks with idle time {self.max_idletime}s")
+                            exec_status.scale_in(excess_blocks, max_idletime=self.max_idletime)
                     else:
                         logger.error("This strategy does not support scaling in except for HighThroughputExecutor - taking no action")
                 else:

parsl/monitoring/db_manager.py CHANGED Viewed

@@ -103,7 +103,13 @@ class Database:
     def rollback(self) -> None:
         self.session.rollback()
-    def _generate_mappings(self, table: Table, columns: Optional[List[str]] = None, messages: List[MonitoringMessage] = []) -> List[Dict[str, Any]]:
+    def _generate_mappings(
+        self,
+        table: Table,
+        columns: Optional[List[str]] = None,
+        messages: List[MonitoringMessage] = [],
+    ) -> List[Dict[str, Any]]:
         mappings = []
         for msg in messages:
             m = {}
@@ -250,6 +256,12 @@ class Database:
             'psutil_process_disk_write', Float, nullable=True)
         psutil_process_status = Column(
             'psutil_process_status', Text, nullable=True)
+        psutil_cpu_num = Column(
+            'psutil_cpu_num', Text, nullable=True)
+        psutil_process_num_ctx_switches_voluntary = Column(
+            'psutil_process_num_ctx_switches_voluntary', Float, nullable=True)
+        psutil_process_num_ctx_switches_involuntary = Column(
+            'psutil_process_num_ctx_switches_involuntary', Float, nullable=True)
         __table_args__ = (
             PrimaryKeyConstraint('try_id', 'task_id', 'run_id', 'timestamp'),
         )
@@ -518,7 +530,10 @@ class DatabaseManager:
                                 reprocessable_first_resource_messages.append(msg)
                             else:
                                 if task_try_id in deferred_resource_messages:
-                                    logger.error("Task {} already has a deferred resource message. Discarding previous message.".format(msg['task_id']))
+                                    logger.error(
+                                        "Task {} already has a deferred resource message. "
+                                        "Discarding previous message.".format(msg['task_id'])
+                                    )
                                 deferred_resource_messages[task_try_id] = msg
                         elif msg['last_msg']:
                             # This assumes that the primary key has been added
@@ -544,7 +559,10 @@ class DatabaseManager:
                 if reprocessable_last_resource_messages:
                     self._insert(table=STATUS, messages=reprocessable_last_resource_messages)
             except Exception:
-                logger.exception("Exception in db loop: this might have been a malformed message, or some other error. monitoring data may have been lost")
+                logger.exception(
+                    "Exception in db loop: this might have been a malformed message, "
+                    "or some other error. monitoring data may have been lost"
+                )
                 exception_happened = True
         if exception_happened:
             raise RuntimeError("An exception happened sometime during database processing and should have been logged in database_manager.log")
@@ -571,8 +589,10 @@ class DatabaseManager:
                     self._dispatch_to_internal(x)
                 elif queue_tag == 'resource':
                     assert isinstance(x, tuple), "_migrate_logs_to_internal was expecting a tuple, got {}".format(x)
-                    assert x[0] == MessageType.RESOURCE_INFO, \
-                        "_migrate_logs_to_internal can only migrate RESOURCE_INFO message from resource queue, got tag {}, message {}".format(x[0], x)
+                    assert x[0] == MessageType.RESOURCE_INFO, (
+                        "_migrate_logs_to_internal can only migrate RESOURCE_INFO message from resource queue, "
+                        "got tag {}, message {}".format(x[0], x)
+                    )
                     self._dispatch_to_internal(x)
                 elif queue_tag == 'node':
                     assert len(x) == 2, "expected message tuple to have exactly two elements"

parsl/monitoring/monitoring.py CHANGED Viewed

@@ -290,8 +290,12 @@ class MonitoringHub(RepresentationMixin):
             self._dfk_channel.close()
             if exception_msgs:
                 for exception_msg in exception_msgs:
-                    self.logger.error("{} process delivered an exception: {}. Terminating all monitoring processes immediately.".format(exception_msg[0],
-                                      exception_msg[1]))
+                    self.logger.error(
+                        "{} process delivered an exception: {}. Terminating all monitoring processes immediately.".format(
+                            exception_msg[0],
+                            exception_msg[1]
+                        )
+                    )
                 self.router_proc.terminate()
                 self.dbm_proc.terminate()
                 self.filesystem_proc.terminate()

parsl 2024.2.26__py3-none-any.whl → 2024.3.11__py3-none-any.whl

parsl 2024.2.26py3-none-any.whl → 2024.3.11py3-none-any.whl