PyPI - parsl - Versions diffs - 2024.2.26__py3-none-any.whl → 2024.3.11__py3-none-any.whl - Mend

parsl 2024.2.26py3-none-any.whl → 2024.3.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

parsl/addresses.py +1 -1
parsl/configs/ASPIRE1.py +1 -1
parsl/configs/ad_hoc.py +1 -1
parsl/configs/bridges.py +1 -1
parsl/configs/cc_in2p3.py +1 -1
parsl/configs/expanse.py +1 -1
parsl/configs/frontera.py +1 -1
parsl/configs/kubernetes.py +1 -1
parsl/configs/midway.py +1 -1
parsl/configs/osg.py +1 -1
parsl/configs/stampede2.py +1 -1
parsl/dataflow/dflow.py +11 -6
parsl/dataflow/taskrecord.py +3 -1
parsl/executors/high_throughput/executor.py +69 -37
parsl/executors/high_throughput/interchange.py +78 -59
parsl/executors/high_throughput/process_worker_pool.py +40 -28
parsl/executors/taskvine/executor.py +3 -1
parsl/executors/workqueue/executor.py +5 -2
parsl/executors/workqueue/parsl_coprocess.py +107 -95
parsl/jobs/job_status_poller.py +9 -3
parsl/jobs/strategy.py +4 -3
parsl/monitoring/db_manager.py +25 -5
parsl/monitoring/monitoring.py +6 -2
parsl/monitoring/remote.py +29 -0
parsl/monitoring/visualization/models.py +7 -0
parsl/providers/slurm/slurm.py +13 -2
parsl/tests/configs/ad_hoc_cluster_htex.py +1 -1
parsl/tests/configs/bluewaters.py +1 -1
parsl/tests/configs/bridges.py +1 -1
parsl/tests/configs/cc_in2p3.py +1 -1
parsl/tests/configs/comet.py +1 -1
parsl/tests/configs/frontera.py +1 -1
parsl/tests/configs/midway.py +1 -1
parsl/tests/configs/nscc_singapore.py +1 -1
parsl/tests/configs/osg_htex.py +1 -1
parsl/tests/configs/petrelkube.py +1 -1
parsl/tests/configs/summit.py +1 -1
parsl/tests/configs/theta.py +1 -1
parsl/tests/configs/user_opts.py +3 -1
parsl/tests/manual_tests/test_ad_hoc_htex.py +1 -1
parsl/tests/scaling_tests/htex_local.py +1 -1
parsl/tests/sites/test_affinity.py +1 -1
parsl/tests/sites/test_concurrent.py +1 -1
parsl/tests/sites/test_dynamic_executor.py +1 -1
parsl/tests/sites/test_worker_info.py +1 -1
parsl/tests/test_htex/test_basic.py +1 -1
parsl/tests/test_htex/test_connected_blocks.py +1 -1
parsl/tests/test_htex/test_cpu_affinity_explicit.py +1 -1
parsl/tests/test_htex/test_disconnected_blocks.py +1 -1
parsl/tests/test_htex/test_htex.py +13 -0
parsl/tests/test_htex/test_manager_failure.py +1 -1
parsl/tests/test_htex/test_missing_worker.py +1 -1
parsl/tests/test_htex/test_multiple_disconnected_blocks.py +1 -1
parsl/tests/test_htex/test_worker_failure.py +1 -1
parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py +1 -1
parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py +1 -1
parsl/tests/test_mpi_apps/test_resource_spec.py +1 -1
parsl/tests/test_scaling/test_scale_down.py +2 -2
parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py +159 -0
parsl/usage_tracking/usage.py +5 -9
parsl/version.py +1 -1
parsl-2024.3.11.data/scripts/parsl_coprocess.py +166 -0
{parsl-2024.2.26.data → parsl-2024.3.11.data}/scripts/process_worker_pool.py +40 -28
{parsl-2024.2.26.dist-info → parsl-2024.3.11.dist-info}/METADATA +2 -2
{parsl-2024.2.26.dist-info → parsl-2024.3.11.dist-info}/RECORD +70 -70
{parsl-2024.2.26.dist-info → parsl-2024.3.11.dist-info}/WHEEL +1 -1
parsl/configs/bluewaters.py +0 -28
parsl-2024.2.26.data/scripts/parsl_coprocess.py +0 -154
{parsl-2024.2.26.data → parsl-2024.3.11.data}/scripts/exec_parsl_function.py +0 -0
{parsl-2024.2.26.dist-info → parsl-2024.3.11.dist-info}/LICENSE +0 -0
{parsl-2024.2.26.dist-info → parsl-2024.3.11.dist-info}/entry_points.txt +0 -0
{parsl-2024.2.26.dist-info → parsl-2024.3.11.dist-info}/top_level.txt +0 -0

parsl/monitoring/remote.py CHANGED Viewed

@@ -201,6 +201,8 @@ def monitor(pid: int,
     children_user_time = {}  # type: Dict[int, float]
     children_system_time = {}  # type: Dict[int, float]
+    children_num_ctx_switches_voluntary = {}  # type: Dict[int, float]
+    children_num_ctx_switches_involuntary = {}  # type: Dict[int, float]
     def accumulate_and_prepare() -> Dict[str, Any]:
         d = {"psutil_process_" + str(k): v for k, v in pm.as_dict().items() if k in simple}
@@ -218,6 +220,15 @@ def monitor(pid: int,
         logging.debug("got children")
         d["psutil_cpu_count"] = psutil.cpu_count()
+        # note that this will be the CPU number of the base process, not anything launched by it
+        d["psutil_cpu_num"] = pm.cpu_num()
+        pctxsw = pm.num_ctx_switches()
+        d["psutil_process_num_ctx_switches_voluntary"] = pctxsw.voluntary
+        d["psutil_process_num_ctx_switches_involuntary"] = pctxsw.involuntary
         d['psutil_process_memory_virtual'] = pm.memory_info().vms
         d['psutil_process_memory_resident'] = pm.memory_info().rss
         d['psutil_process_time_user'] = pm.cpu_times().user
@@ -238,6 +249,11 @@ def monitor(pid: int,
             child_system_time = child.cpu_times().system
             children_user_time[child.pid] = child_user_time
             children_system_time[child.pid] = child_system_time
+            pctxsw = child.num_ctx_switches()
+            children_num_ctx_switches_voluntary[child.pid] = pctxsw.voluntary
+            children_num_ctx_switches_involuntary[child.pid] = pctxsw.involuntary
             d['psutil_process_memory_virtual'] += child.memory_info().vms
             d['psutil_process_memory_resident'] += child.memory_info().rss
             try:
@@ -248,14 +264,27 @@ def monitor(pid: int,
                 logging.exception("Exception reading IO counters for child {k}. Recorded IO usage may be incomplete".format(k=k), exc_info=True)
                 d['psutil_process_disk_write'] += 0
                 d['psutil_process_disk_read'] += 0
         total_children_user_time = 0.0
         for child_pid in children_user_time:
             total_children_user_time += children_user_time[child_pid]
         total_children_system_time = 0.0
         for child_pid in children_system_time:
             total_children_system_time += children_system_time[child_pid]
+        total_children_num_ctx_switches_voluntary = 0.0
+        for child_pid in children_num_ctx_switches_voluntary:
+            total_children_num_ctx_switches_voluntary += children_num_ctx_switches_voluntary[child_pid]
+        total_children_num_ctx_switches_involuntary = 0.0
+        for child_pid in children_num_ctx_switches_involuntary:
+            total_children_num_ctx_switches_involuntary += children_num_ctx_switches_involuntary[child_pid]
         d['psutil_process_time_user'] += total_children_user_time
         d['psutil_process_time_system'] += total_children_system_time
+        d['psutil_process_num_ctx_switches_voluntary'] += total_children_num_ctx_switches_voluntary
+        d['psutil_process_num_ctx_switches_involuntary'] += total_children_num_ctx_switches_involuntary
         logging.debug("sending message")
         return d

parsl/monitoring/visualization/models.py CHANGED Viewed

@@ -102,5 +102,12 @@ class Resource(db.Model):
         'psutil_process_disk_write', db.Float, nullable=True)
     psutil_process_status = db.Column(
         'psutil_process_status', db.Text, nullable=True)
+    psutil_cpu_num = db.Column(
+        'psutil_cpu_num', db.Text, nullable=True)
+    psutil_process_num_ctx_switches_voluntary = db.Column(
+        'psutil_process_num_ctx_switches_voluntary', db.Float, nullable=True)
+    psutil_process_num_ctx_switches_involuntary = db.Column(
+        'psutil_process_num_ctx_switches_involuntary', db.Float, nullable=True)
     __table_args__ = (
         db.PrimaryKeyConstraint('task_id', 'run_id', 'timestamp'),)

parsl/providers/slurm/slurm.py CHANGED Viewed

@@ -280,11 +280,22 @@ class SlurmProvider(ClusterProvider, RepresentationMixin):
             else:
                 logger.error("Could not read job ID from submit command standard output.")
                 logger.error("Retcode:%s STDOUT:%s STDERR:%s", retcode, stdout.strip(), stderr.strip())
-                raise SubmitException(job_name, "Could not read job ID from submit command standard output", stdout=stdout, stderr=stderr, retcode=retcode)
+                raise SubmitException(
+                    job_name,
+                    "Could not read job ID from submit command standard output",
+                    stdout=stdout,
+                    stderr=stderr,
+                    retcode=retcode
+                )
         else:
             logger.error("Submit command failed")
             logger.error("Retcode:%s STDOUT:%s STDERR:%s", retcode, stdout.strip(), stderr.strip())
-            raise SubmitException(job_name, "Could not read job ID from submit command standard output", stdout=stdout, stderr=stderr, retcode=retcode)
+            raise SubmitException(
+                job_name, "Could not read job ID from submit command standard output",
+                stdout=stdout,
+                stderr=stderr,
+                retcode=retcode
+            )
     def cancel(self, job_ids):
         ''' Cancels the jobs specified by a list of job ids

parsl/tests/configs/ad_hoc_cluster_htex.py CHANGED Viewed

@@ -16,7 +16,7 @@ config = Config(
     executors=[
         HighThroughputExecutor(
             label='remote_htex',
-            max_workers=2,
+            max_workers_per_node=2,
             worker_logdir_root=user_opts['adhoc']['script_dir'],
             encrypted=True,
             provider=AdHocProvider(

parsl/tests/configs/bluewaters.py CHANGED Viewed

@@ -13,7 +13,7 @@ def fresh_config():
                 label="bw_htex",
                 cores_per_worker=1,
                 worker_debug=False,
-                max_workers=1,
+                max_workers_per_node=1,
                 encrypted=True,
                 provider=TorqueProvider(
                     queue='normal',

parsl/tests/configs/bridges.py CHANGED Viewed

@@ -13,7 +13,7 @@ def fresh_config():
                 # This is the network interface on the login node to
                 # which compute nodes can communicate
                 # address=address_by_interface('bond0.144'),
-                max_workers=1,
+                max_workers_per_node=1,
                 encrypted=True,
                 provider=SlurmProvider(
                     user_opts['bridges']['partition'],  # Partition / QOS

parsl/tests/configs/cc_in2p3.py CHANGED Viewed

@@ -11,7 +11,7 @@ def fresh_config():
         executors=[
             HighThroughputExecutor(
                 label='cc_in2p3_htex',
-                max_workers=1,
+                max_workers_per_node=1,
                 encrypted=True,
                 provider=GridEngineProvider(
                     channel=LocalChannel(),

parsl/tests/configs/comet.py CHANGED Viewed

@@ -10,7 +10,7 @@ def fresh_config():
         executors=[
             HighThroughputExecutor(
                 label='Comet_HTEX_multinode',
-                max_workers=1,
+                max_workers_per_node=1,
                 encrypted=True,
                 provider=SlurmProvider(
                     'debug',

parsl/tests/configs/frontera.py CHANGED Viewed

@@ -15,7 +15,7 @@ def fresh_config():
         executors=[
             HighThroughputExecutor(
                 label="frontera_htex",
-                max_workers=1,
+                max_workers_per_node=1,
                 encrypted=True,
                 provider=SlurmProvider(
                     cmd_timeout=60,     # Add extra time for slow scheduler responses

parsl/tests/configs/midway.py CHANGED Viewed

@@ -12,7 +12,7 @@ def fresh_config():
             HighThroughputExecutor(
                 label='Midway_HTEX_multinode',
                 worker_debug=False,
-                max_workers=1,
+                max_workers_per_node=1,
                 encrypted=True,
                 provider=SlurmProvider(
                     'broadwl',  # Partition name, e.g 'broadwl'

parsl/tests/configs/nscc_singapore.py CHANGED Viewed

@@ -15,7 +15,7 @@ def fresh_config():
                 heartbeat_period=15,
                 heartbeat_threshold=120,
                 worker_debug=False,
-                max_workers=1,
+                max_workers_per_node=1,
                 address=address_by_interface('ib0'),
                 encrypted=True,
                 provider=PBSProProvider(

parsl/tests/configs/osg_htex.py CHANGED Viewed

@@ -13,7 +13,7 @@ config = Config(
     executors=[
         HighThroughputExecutor(
             label='OSG_HTEX',
-            max_workers=1,
+            max_workers_per_node=1,
             encrypted=True,
             provider=CondorProvider(
                 nodes_per_block=1,

parsl/tests/configs/petrelkube.py CHANGED Viewed

@@ -18,7 +18,7 @@ def fresh_config():
             HighThroughputExecutor(
                 label='kube-htex',
                 cores_per_worker=1,
-                max_workers=1,
+                max_workers_per_node=1,
                 worker_logdir_root='.',
                 # Address for the pod worker to connect back

parsl/tests/configs/summit.py CHANGED Viewed

@@ -20,7 +20,7 @@ def fresh_config():
                 # address=address_by_interface('ib0'),  # This assumes Parsl is running on login node
                 worker_port_range=(50000, 55000),
-                max_workers=1,
+                max_workers_per_node=1,
                 encrypted=True,
                 provider=LSFProvider(
                     launcher=JsrunLauncher(),

parsl/tests/configs/theta.py CHANGED Viewed

@@ -11,7 +11,7 @@ def fresh_config():
         executors=[
             HighThroughputExecutor(
                 label='theta_local_htex_multinode',
-                max_workers=1,
+                max_workers_per_node=1,
                 encrypted=True,
                 provider=CobaltProvider(
                     queue=user_opts['theta']['queue'],

parsl/tests/configs/user_opts.py CHANGED Viewed

@@ -52,7 +52,9 @@ user_opts = {
     #     'username': MIDWAY_USERNAME,
     #     'script_dir': '/scratch/midway2/{}/parsl_scripts'.format(MIDWAY_USERNAME),
     #     'scheduler_options': "",
-    #     'worker_init': 'cd /scratch/midway2/{}/parsl_scripts; module load Anaconda3/5.1.0; source activate parsl_testing;'.format(MIDWAY_USERNAME),
+    #     'worker_init': 'cd /scratch/midway2/{}/parsl_scripts; '
+    #                    'module load Anaconda3/5.1.0; source activate parsl_testing;'
+    #                    .format(MIDWAY_USERNAME),
     # },
     # 'osg': {
     #     'username': OSG_USERNAME,

parsl/tests/manual_tests/test_ad_hoc_htex.py CHANGED Viewed

@@ -13,7 +13,7 @@ config = Config(
     executors=[
         HighThroughputExecutor(
             label='AdHoc',
-            max_workers=2,
+            max_workers_per_node=2,
             worker_logdir_root="/scratch/midway2/yadunand/parsl_scripts",
             encrypted=True,
             provider=AdHocProvider(

parsl/tests/scaling_tests/htex_local.py CHANGED Viewed

@@ -9,7 +9,7 @@ config = Config(
         HighThroughputExecutor(
             label="htex_local",
             cores_per_worker=1,
-            max_workers=8,
+            max_workers_per_node=8,
             encrypted=True,
             provider=LocalProvider(
                 channel=LocalChannel(),

parsl/tests/sites/test_affinity.py CHANGED Viewed

@@ -15,7 +15,7 @@ def local_config():
             HighThroughputExecutor(
                 label="htex_Local",
                 worker_debug=True,
-                max_workers=2,
+                max_workers_per_node=2,
                 cpu_affinity='block',
                 available_accelerators=2,
                 encrypted=True,

parsl/tests/sites/test_concurrent.py CHANGED Viewed

@@ -14,7 +14,7 @@ def make_config():
         executors=[
             HighThroughputExecutor(
                 address="127.0.0.1",
-                max_workers=2,
+                max_workers_per_node=2,
                 heartbeat_period=2,
                 heartbeat_threshold=4,
                 encrypted=True,

parsl/tests/sites/test_dynamic_executor.py CHANGED Viewed

@@ -59,7 +59,7 @@ def test_dynamic_executor():
         HighThroughputExecutor(
             label='htex_local',
             cores_per_worker=1,
-            max_workers=5,
+            max_workers_per_node=5,
             encrypted=True,
             provider=LocalProvider(
                 init_blocks=1,

parsl/tests/sites/test_worker_info.py CHANGED Viewed

@@ -14,7 +14,7 @@ def local_config():
             HighThroughputExecutor(
                 label="htex_Local",
                 worker_debug=True,
-                max_workers=4,
+                max_workers_per_node=4,
                 encrypted=True,
                 provider=LocalProvider(
                     channel=LocalChannel(),

parsl/tests/test_htex/test_basic.py CHANGED Viewed

@@ -8,7 +8,7 @@ from parsl.tests.configs.htex_local import fresh_config
 def local_setup():
     config = fresh_config()
     config.executors[0].poll_period = 1
-    config.executors[0].max_workers = 1
+    config.executors[0].max_workers_per_node = 1
     parsl.load(config)

parsl/tests/test_htex/test_connected_blocks.py CHANGED Viewed

@@ -14,7 +14,7 @@ def local_config():
                 heartbeat_threshold=2,
                 poll_period=100,
                 address="127.0.0.1",
-                max_workers=1,
+                max_workers_per_node=1,
                 provider=LocalProvider(
                     init_blocks=0,
                     max_blocks=2,

parsl/tests/test_htex/test_cpu_affinity_explicit.py CHANGED Viewed

@@ -34,7 +34,7 @@ def test_cpu_affinity_explicit():
     config = fresh_config()
     config.executors[0].cpu_affinity = affinity
-    config.executors[0].max_workers = 1
+    config.executors[0].max_workers_per_node = 1
     logger.debug(f"config: {config}")
     # TODO: is there a `with` style for this, to properly deal with exceptions?

parsl/tests/test_htex/test_disconnected_blocks.py CHANGED Viewed

@@ -17,7 +17,7 @@ def local_config():
                 heartbeat_period=1,
                 heartbeat_threshold=2,
                 poll_period=100,
-                max_workers=1,
+                max_workers_per_node=1,
                 provider=LocalProvider(
                     worker_init="conda deactivate; export PATH=''; which python; exit 0",
                     init_blocks=0,

parsl/tests/test_htex/test_htex.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import pathlib
+import warnings
 from unittest import mock
 import pytest
@@ -107,3 +108,15 @@ def test_htex_shutdown(
         assert not mock_ix_proc.terminate.called
         assert not mock_ix_proc.join.called
         assert "has not started" in mock_logs[0][0][0]
+@pytest.mark.local
+def test_max_workers_per_node():
+    with pytest.warns(DeprecationWarning) as record:
+        htex = HighThroughputExecutor(max_workers_per_node=1, max_workers=2)
+    warning_msg = "max_workers is deprecated"
+    assert any(warning_msg in str(warning.message) for warning in record)
+    # Ensure max_workers_per_node takes precedence
+    assert htex.max_workers_per_node == htex.max_workers == 1

parsl/tests/test_htex/test_manager_failure.py CHANGED Viewed

@@ -13,7 +13,7 @@ from parsl.tests.configs.htex_local import fresh_config
 def load_config():
     config = fresh_config()
     config.executors[0].poll_period = 1
-    config.executors[0].max_workers = 1
+    config.executors[0].max_workers_per_node = 1
     config.executors[0].heartbeat_period = 1
     parsl.load(config)

parsl/tests/test_htex/test_missing_worker.py CHANGED Viewed

@@ -8,7 +8,7 @@ from parsl.tests.configs.htex_local import fresh_config
 def local_setup():
     config = fresh_config()
     config.executors[0].poll_period = 1
-    config.executors[0].max_workers = 1
+    config.executors[0].max_workers_per_node = 1
     config.executors[0].launch_cmd = "executable_that_hopefully_does_not_exist_1030509.py"
     parsl.load(config)

parsl/tests/test_htex/test_multiple_disconnected_blocks.py CHANGED Viewed

@@ -17,7 +17,7 @@ def local_config():
                 heartbeat_period=1,
                 heartbeat_threshold=2,
                 poll_period=100,
-                max_workers=1,
+                max_workers_per_node=1,
                 provider=LocalProvider(
                     worker_init="conda deactivate; export PATH=''; which python; exit 0",
                     init_blocks=2,

parsl/tests/test_htex/test_worker_failure.py CHANGED Viewed

@@ -8,7 +8,7 @@ def local_config():
     from parsl.tests.configs.htex_local import fresh_config
     config = fresh_config()
     config.executors[0].poll_period = 1
-    config.executors[0].max_workers = 1
+    config.executors[0].max_workers_per_node = 1
     config.executors[0].heartbeat_period = 1
     return config

parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py CHANGED Viewed

@@ -11,7 +11,7 @@ EXECUTOR_LABEL = "MPI_TEST"
 def local_setup():
     config = fresh_config()
     config.executors[0].label = EXECUTOR_LABEL
-    config.executors[0].max_workers = 1
+    config.executors[0].max_workers_per_node = 1
     config.executors[0].enable_mpi_mode = False
     parsl.load(config)

parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py CHANGED Viewed

@@ -14,7 +14,7 @@ EXECUTOR_LABEL = "MPI_TEST"
 def local_setup():
     config = fresh_config()
     config.executors[0].label = EXECUTOR_LABEL
-    config.executors[0].max_workers = 2
+    config.executors[0].max_workers_per_node = 2
     config.executors[0].enable_mpi_mode = True
     config.executors[0].mpi_launcher = "mpiexec"

parsl/tests/test_mpi_apps/test_resource_spec.py CHANGED Viewed

@@ -28,7 +28,7 @@ EXECUTOR_LABEL = "MPI_TEST"
 def local_setup():
     config = fresh_config()
     config.executors[0].label = EXECUTOR_LABEL
-    config.executors[0].max_workers = 1
+    config.executors[0].max_workers_per_node = 1
     parsl.load(config)

parsl/tests/test_scaling/test_scale_down.py CHANGED Viewed

@@ -27,7 +27,7 @@ def local_config():
                 poll_period=100,
                 label="htex_local",
                 address="127.0.0.1",
-                max_workers=1,
+                max_workers_per_node=1,
                 encrypted=True,
                 provider=LocalProvider(
                     channel=LocalChannel(),
@@ -39,7 +39,7 @@ def local_config():
             )
         ],
         max_idletime=0.5,
-        strategy='htex_auto_scale',
+        strategy='simple',
     )

parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py ADDED Viewed

@@ -0,0 +1,159 @@
+import pytest
+import parsl
+from parsl import File, python_app
+from parsl.providers import LocalProvider
+from parsl.channels import LocalChannel
+from parsl.launchers import SingleNodeLauncher
+from parsl.config import Config
+from parsl.executors import HighThroughputExecutor
+from threading import Event
+_max_blocks = 5
+_min_blocks = 0
+def local_config():
+    return Config(
+        executors=[
+            HighThroughputExecutor(
+                heartbeat_period=1,
+                heartbeat_threshold=2,
+                poll_period=100,
+                label="htex_local",
+                address="127.0.0.1",
+                max_workers=1,
+                encrypted=True,
+                provider=LocalProvider(
+                    channel=LocalChannel(),
+                    init_blocks=0,
+                    max_blocks=_max_blocks,
+                    min_blocks=_min_blocks,
+                    launcher=SingleNodeLauncher(),
+                ),
+            )
+        ],
+        max_idletime=0.5,
+        strategy='htex_auto_scale',
+    )
+@python_app
+def waiting_app(ident: int, outputs=(), inputs=()):
+    import pathlib
+    import time
+    # Approximate an Event by writing to files; the test logic will poll this file
+    with open(outputs[0], "a") as f:
+        f.write(f"Ready: {ident}\n")
+    # Similarly, use Event approximation (file check!) by polling.
+    may_finish_file = pathlib.Path(inputs[0])
+    while not may_finish_file.exists():
+        time.sleep(0.01)
+# see issue #1885 for details of failures of this test.
+# at the time of issue #1885 this test was failing frequently
+# in CI.
+@pytest.mark.local
+def test_scale_out(tmpd_cwd, try_assert):
+    dfk = parsl.dfk()
+    # reconfigure scaling strategy to run faster than usual. This allows
+    # this test to complete faster - at time of writing 27s with default
+    # 5s strategy, vs XXXX with 0.5s strategy.
+    # check this attribute still exists, in the presence of ongoing
+    # development, so we have some belief that setting it will not be
+    # setting a now-ignored parameter.
+    assert hasattr(dfk.job_status_poller, 'interval')
+    dfk.job_status_poller.interval = 0.1
+    num_managers = len(dfk.executors['htex_local'].connected_managers())
+    assert num_managers == 0, "Expected 0 managers at start"
+    assert dfk.executors['htex_local'].outstanding == 0, "Expected 0 tasks at start"
+    ntasks = _max_blocks * 2
+    ready_path = tmpd_cwd / "workers_ready"
+    finish_path = tmpd_cwd / "stage1_workers_may_continue"
+    ready_path.touch()
+    inputs = [File(finish_path)]
+    outputs = [File(ready_path)]
+    futs = [waiting_app(i, outputs=outputs, inputs=inputs) for i in range(ntasks)]
+    try_assert(lambda: ready_path.read_text().count("\n") == _max_blocks, "Wait for _max_blocks tasks to be running", timeout_ms=15000)
+    # This should be true immediately, because the previous try_assert should
+    # wait until there are max_blocks tasks running, and his test should be
+    # configured to use 1 worker per block.
+    assert len(dfk.executors['htex_local'].connected_managers()) == _max_blocks
+    finish_path.touch()  # Approximation of Event, via files
+    [x.result() for x in futs]
+    assert dfk.executors['htex_local'].outstanding == 0
+    # now we can launch one "long" task -
+    # and what should happen is that the connected_managers count "eventually" (?) converges to 1 and stays there.
+    finish_path = tmpd_cwd / "stage2_workers_may_continue"
+    fut = waiting_app(0, outputs=outputs, inputs=[File(finish_path)])
+    def check_one_block():
+        return len(dfk.executors['htex_local'].connected_managers()) == 1
+    try_assert(
+        check_one_block,
+        fail_msg="Expected 1 managers during a single long task",
+    )
+    # the task should not have finished by the time we end up with 1 manager
+    assert not fut.done()
+    # This section wait for the strategy to run again, with the above single
+    # task outstanding, and check that the strategy has not scaled up or
+    # down more on those subsequent iterations.
+    # It does this by hooking the callback of the job status poller, and
+    # waiting until it has run.
+    old_cb = dfk.job_status_poller.callback
+    strategy_iterated = Event()
+    def hook_cb(*args, **kwargs):
+        r = old_cb(*args, **kwargs)
+        strategy_iterated.set()
+        return r
+    dfk.job_status_poller.callback = hook_cb
+    # hack strategies to run more frequently. this allo
+    # dfk.job_status_poller.
+    try_assert(
+        strategy_iterated.is_set,
+        fail_msg="Expected strategy to have run within this period",
+    )
+    assert check_one_block()
+    finish_path.touch()  # now we can end the single stage-2 task
+    fut.result()
+    # now we should expect min_blocks scale down
+    def check_min_blocks():
+        return len(dfk.executors['htex_local'].connected_managers()) == _min_blocks
+    try_assert(
+        check_min_blocks,
+        fail_msg=f"Expected {_min_blocks} managers when no tasks (min_blocks)",
+    )

parsl/usage_tracking/usage.py CHANGED Viewed

@@ -109,7 +109,6 @@ class UsageTracker:
                                                 sys.version_info.micro)
         self.tracking_enabled = self.check_tracking_enabled()
         logger.debug("Tracking status: {}".format(self.tracking_enabled))
-        self.initialized = False  # Once first message is sent this will be True
     def check_tracking_enabled(self):
         """Check if tracking is enabled.
@@ -176,15 +175,12 @@ class UsageTracker:
             except Exception as e:
                 logger.debug("Usage tracking failed: {}".format(e))
-    def send_message(self) -> None:
-        """Send message over UDP.
-        """
-        if not self.initialized:
-            message = self.construct_start_message()
-            self.initialized = True
-        else:
-            message = self.construct_end_message()
+    def send_start_message(self) -> None:
+        message = self.construct_start_message()
+        self.send_UDP_message(message)
+    def send_end_message(self) -> None:
+        message = self.construct_end_message()
         self.send_UDP_message(message)
     def close(self, timeout: float = 10.0) -> None:

parsl/version.py CHANGED Viewed

@@ -3,4 +3,4 @@
 Year.Month.Day[alpha/beta/..]
 Alphas will be numbered like this -> 2024.12.10a0
 """
-VERSION = '2024.02.26'
+VERSION = '2024.03.11'

parsl 2024.2.26__py3-none-any.whl → 2024.3.11__py3-none-any.whl

parsl 2024.2.26py3-none-any.whl → 2024.3.11py3-none-any.whl