PyPI - parsl - Versions diffs - 2024.3.18__py3-none-any.whl → 2025.1.13__py3-none-any.whl - Mend

parsl 2024.3.18py3-none-any.whl → 2025.1.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (369) hide show

parsl/__init__.py +9 -10
parsl/addresses.py +26 -6
parsl/app/app.py +7 -8
parsl/app/bash.py +15 -8
parsl/app/errors.py +10 -13
parsl/app/futures.py +8 -10
parsl/app/python.py +2 -1
parsl/benchmark/perf.py +2 -1
parsl/concurrent/__init__.py +2 -2
parsl/config.py +53 -10
parsl/configs/ASPIRE1.py +6 -5
parsl/configs/Azure.py +9 -8
parsl/configs/bridges.py +6 -4
parsl/configs/cc_in2p3.py +3 -3
parsl/configs/ec2.py +3 -1
parsl/configs/expanse.py +4 -3
parsl/configs/frontera.py +3 -4
parsl/configs/htex_local.py +3 -4
parsl/configs/illinoiscluster.py +3 -1
parsl/configs/improv.py +34 -0
parsl/configs/kubernetes.py +4 -3
parsl/configs/local_threads.py +5 -1
parsl/configs/midway.py +5 -3
parsl/configs/osg.py +4 -2
parsl/configs/polaris.py +4 -2
parsl/configs/stampede2.py +6 -5
parsl/configs/summit.py +3 -3
parsl/configs/toss3_llnl.py +4 -3
parsl/configs/vineex_local.py +6 -4
parsl/configs/wqex_local.py +5 -3
parsl/curvezmq.py +4 -0
parsl/data_provider/data_manager.py +4 -3
parsl/data_provider/file_noop.py +1 -2
parsl/data_provider/files.py +3 -3
parsl/data_provider/ftp.py +1 -3
parsl/data_provider/globus.py +7 -6
parsl/data_provider/http.py +2 -2
parsl/data_provider/rsync.py +1 -1
parsl/data_provider/staging.py +2 -2
parsl/data_provider/zip.py +135 -0
parsl/dataflow/dependency_resolvers.py +115 -0
parsl/dataflow/dflow.py +259 -223
parsl/dataflow/errors.py +3 -5
parsl/dataflow/futures.py +27 -14
parsl/dataflow/memoization.py +5 -5
parsl/dataflow/rundirs.py +5 -6
parsl/dataflow/taskrecord.py +4 -5
parsl/executors/__init__.py +4 -2
parsl/executors/base.py +45 -15
parsl/executors/errors.py +13 -0
parsl/executors/execute_task.py +37 -0
parsl/executors/flux/execute_parsl_task.py +3 -3
parsl/executors/flux/executor.py +18 -19
parsl/executors/flux/flux_instance_manager.py +26 -27
parsl/executors/high_throughput/errors.py +43 -3
parsl/executors/high_throughput/executor.py +307 -285
parsl/executors/high_throughput/interchange.py +137 -168
parsl/executors/high_throughput/manager_record.py +4 -0
parsl/executors/high_throughput/manager_selector.py +55 -0
parsl/executors/high_throughput/monitoring_info.py +2 -1
parsl/executors/high_throughput/mpi_executor.py +113 -0
parsl/executors/high_throughput/mpi_prefix_composer.py +10 -11
parsl/executors/high_throughput/mpi_resource_management.py +6 -17
parsl/executors/high_throughput/probe.py +9 -7
parsl/executors/high_throughput/process_worker_pool.py +77 -75
parsl/executors/high_throughput/zmq_pipes.py +81 -23
parsl/executors/radical/executor.py +130 -79
parsl/executors/radical/rpex_resources.py +17 -15
parsl/executors/radical/rpex_worker.py +4 -3
parsl/executors/status_handling.py +157 -51
parsl/executors/taskvine/__init__.py +1 -1
parsl/executors/taskvine/errors.py +1 -1
parsl/executors/taskvine/exec_parsl_function.py +2 -2
parsl/executors/taskvine/executor.py +38 -55
parsl/executors/taskvine/factory.py +1 -1
parsl/executors/taskvine/factory_config.py +1 -1
parsl/executors/taskvine/manager.py +17 -13
parsl/executors/taskvine/manager_config.py +7 -2
parsl/executors/threads.py +6 -6
parsl/executors/workqueue/errors.py +1 -1
parsl/executors/workqueue/exec_parsl_function.py +6 -5
parsl/executors/workqueue/executor.py +64 -63
parsl/executors/workqueue/parsl_coprocess.py +1 -1
parsl/jobs/error_handlers.py +2 -2
parsl/jobs/job_status_poller.py +28 -112
parsl/jobs/states.py +7 -2
parsl/jobs/strategy.py +43 -31
parsl/launchers/__init__.py +12 -3
parsl/launchers/errors.py +1 -1
parsl/launchers/launchers.py +0 -6
parsl/log_utils.py +1 -2
parsl/monitoring/db_manager.py +55 -93
parsl/monitoring/errors.py +6 -0
parsl/monitoring/monitoring.py +85 -311
parsl/monitoring/queries/pandas.py +1 -2
parsl/monitoring/radios/base.py +13 -0
parsl/monitoring/radios/filesystem.py +52 -0
parsl/monitoring/radios/htex.py +57 -0
parsl/monitoring/radios/multiprocessing.py +17 -0
parsl/monitoring/radios/udp.py +56 -0
parsl/monitoring/radios/zmq.py +17 -0
parsl/monitoring/remote.py +33 -37
parsl/monitoring/router.py +212 -0
parsl/monitoring/types.py +5 -6
parsl/monitoring/visualization/app.py +4 -2
parsl/monitoring/visualization/models.py +0 -1
parsl/monitoring/visualization/plots/default/workflow_plots.py +8 -4
parsl/monitoring/visualization/plots/default/workflow_resource_plots.py +1 -0
parsl/monitoring/visualization/utils.py +0 -1
parsl/monitoring/visualization/views.py +16 -9
parsl/multiprocessing.py +0 -1
parsl/process_loggers.py +1 -2
parsl/providers/__init__.py +8 -17
parsl/providers/aws/aws.py +2 -3
parsl/providers/azure/azure.py +4 -5
parsl/providers/base.py +2 -18
parsl/providers/cluster_provider.py +3 -9
parsl/providers/condor/condor.py +7 -17
parsl/providers/errors.py +2 -2
parsl/providers/googlecloud/googlecloud.py +2 -1
parsl/providers/grid_engine/grid_engine.py +5 -14
parsl/providers/kubernetes/kube.py +80 -40
parsl/providers/local/local.py +13 -26
parsl/providers/lsf/lsf.py +5 -23
parsl/providers/pbspro/pbspro.py +5 -17
parsl/providers/slurm/slurm.py +81 -39
parsl/providers/torque/torque.py +3 -14
parsl/serialize/__init__.py +8 -3
parsl/serialize/base.py +1 -2
parsl/serialize/concretes.py +5 -4
parsl/serialize/facade.py +3 -3
parsl/serialize/proxystore.py +3 -2
parsl/tests/__init__.py +1 -1
parsl/tests/configs/azure_single_node.py +4 -5
parsl/tests/configs/bridges.py +3 -2
parsl/tests/configs/cc_in2p3.py +1 -3
parsl/tests/configs/comet.py +2 -1
parsl/tests/configs/ec2_single_node.py +1 -2
parsl/tests/configs/ec2_spot.py +1 -2
parsl/tests/configs/flux_local.py +11 -0
parsl/tests/configs/frontera.py +2 -3
parsl/tests/configs/htex_local.py +3 -5
parsl/tests/configs/htex_local_alternate.py +11 -15
parsl/tests/configs/htex_local_intask_staging.py +5 -9
parsl/tests/configs/htex_local_rsync_staging.py +4 -8
parsl/tests/configs/local_radical.py +1 -3
parsl/tests/configs/local_radical_mpi.py +2 -2
parsl/tests/configs/local_threads_checkpoint_periodic.py +8 -10
parsl/tests/configs/local_threads_monitoring.py +0 -1
parsl/tests/configs/midway.py +2 -2
parsl/tests/configs/nscc_singapore.py +3 -3
parsl/tests/configs/osg_htex.py +1 -1
parsl/tests/configs/petrelkube.py +3 -2
parsl/tests/configs/slurm_local.py +24 -0
parsl/tests/configs/summit.py +1 -0
parsl/tests/configs/taskvine_ex.py +4 -7
parsl/tests/configs/user_opts.py +0 -7
parsl/tests/configs/workqueue_ex.py +4 -6
parsl/tests/conftest.py +27 -13
parsl/tests/integration/test_stress/test_python_simple.py +3 -4
parsl/tests/integration/test_stress/test_python_threads.py +3 -5
parsl/tests/manual_tests/htex_local.py +4 -6
parsl/tests/manual_tests/test_basic.py +1 -0
parsl/tests/manual_tests/test_log_filter.py +3 -1
parsl/tests/manual_tests/test_memory_limits.py +6 -8
parsl/tests/manual_tests/test_regression_220.py +2 -1
parsl/tests/manual_tests/test_udp_simple.py +4 -4
parsl/tests/manual_tests/test_worker_count.py +3 -2
parsl/tests/scaling_tests/htex_local.py +2 -4
parsl/tests/scaling_tests/test_scale.py +0 -9
parsl/tests/scaling_tests/vineex_condor.py +1 -2
parsl/tests/scaling_tests/vineex_local.py +1 -2
parsl/tests/site_tests/site_config_selector.py +1 -6
parsl/tests/site_tests/test_provider.py +4 -2
parsl/tests/site_tests/test_site.py +2 -0
parsl/tests/sites/test_affinity.py +7 -7
parsl/tests/sites/test_dynamic_executor.py +3 -4
parsl/tests/sites/test_ec2.py +3 -2
parsl/tests/sites/test_worker_info.py +4 -5
parsl/tests/test_aalst_patterns.py +0 -1
parsl/tests/test_bash_apps/test_apptimeout.py +2 -2
parsl/tests/test_bash_apps/test_basic.py +10 -4
parsl/tests/test_bash_apps/test_error_codes.py +5 -7
parsl/tests/test_bash_apps/test_inputs_default.py +25 -0
parsl/tests/test_bash_apps/test_kwarg_storage.py +1 -1
parsl/tests/test_bash_apps/test_memoize.py +2 -8
parsl/tests/test_bash_apps/test_memoize_ignore_args.py +9 -14
parsl/tests/test_bash_apps/test_memoize_ignore_args_regr.py +9 -14
parsl/tests/test_bash_apps/test_multiline.py +1 -1
parsl/tests/test_bash_apps/test_pipeline.py +1 -1
parsl/tests/test_bash_apps/test_std_uri.py +123 -0
parsl/tests/test_bash_apps/test_stdout.py +33 -8
parsl/tests/test_callables.py +2 -2
parsl/tests/test_checkpointing/test_periodic.py +21 -39
parsl/tests/test_checkpointing/test_python_checkpoint_1.py +1 -0
parsl/tests/test_checkpointing/test_python_checkpoint_2.py +2 -2
parsl/tests/test_checkpointing/test_python_checkpoint_3.py +0 -1
parsl/tests/test_checkpointing/test_regression_239.py +1 -1
parsl/tests/test_checkpointing/test_task_exit.py +2 -3
parsl/tests/test_docs/test_from_slides.py +5 -2
parsl/tests/test_docs/test_kwargs.py +4 -1
parsl/tests/test_docs/test_tutorial_1.py +1 -2
parsl/tests/test_docs/test_workflow1.py +2 -2
parsl/tests/test_docs/test_workflow2.py +0 -1
parsl/tests/test_error_handling/test_rand_fail.py +2 -2
parsl/tests/test_error_handling/test_resource_spec.py +10 -12
parsl/tests/test_error_handling/test_retries.py +6 -16
parsl/tests/test_error_handling/test_retry_handler.py +1 -0
parsl/tests/test_error_handling/test_retry_handler_failure.py +2 -1
parsl/tests/test_error_handling/test_serialization_fail.py +1 -1
parsl/tests/test_error_handling/test_wrap_with_logs.py +1 -0
parsl/tests/test_execute_task.py +29 -0
parsl/tests/test_flux.py +1 -1
parsl/tests/test_htex/test_basic.py +2 -3
parsl/tests/test_htex/test_block_manager_selector_unit.py +20 -0
parsl/tests/test_htex/test_command_client_timeout.py +66 -0
parsl/tests/test_htex/test_connected_blocks.py +3 -2
parsl/tests/test_htex/test_cpu_affinity_explicit.py +6 -10
parsl/tests/test_htex/test_disconnected_blocks.py +6 -5
parsl/tests/test_htex/test_disconnected_blocks_failing_provider.py +71 -0
parsl/tests/test_htex/test_drain.py +11 -10
parsl/tests/test_htex/test_htex.py +51 -25
parsl/tests/test_htex/test_manager_failure.py +0 -1
parsl/tests/test_htex/test_manager_selector_by_block.py +51 -0
parsl/tests/test_htex/test_managers_command.py +36 -0
parsl/tests/test_htex/test_missing_worker.py +2 -12
parsl/tests/test_htex/test_multiple_disconnected_blocks.py +9 -9
parsl/tests/test_htex/test_resource_spec_validation.py +45 -0
parsl/tests/test_htex/test_zmq_binding.py +29 -8
parsl/tests/test_monitoring/test_app_names.py +5 -5
parsl/tests/test_monitoring/test_basic.py +73 -25
parsl/tests/test_monitoring/test_db_locks.py +6 -4
parsl/tests/test_monitoring/test_fuzz_zmq.py +19 -8
parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +80 -0
parsl/tests/test_monitoring/test_incomplete_futures.py +5 -4
parsl/tests/test_monitoring/test_memoization_representation.py +4 -2
parsl/tests/test_monitoring/test_stdouterr.py +134 -0
parsl/tests/test_monitoring/test_viz_colouring.py +1 -0
parsl/tests/test_mpi_apps/test_bad_mpi_config.py +33 -26
parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py +28 -11
parsl/tests/test_mpi_apps/test_mpi_prefix.py +4 -4
parsl/tests/test_mpi_apps/test_mpi_scheduler.py +7 -2
parsl/tests/test_mpi_apps/test_mpiex.py +64 -0
parsl/tests/test_mpi_apps/test_resource_spec.py +42 -49
parsl/tests/test_providers/test_kubernetes_provider.py +102 -0
parsl/tests/test_providers/test_local_provider.py +3 -132
parsl/tests/test_providers/test_pbspro_template.py +2 -3
parsl/tests/test_providers/test_slurm_template.py +2 -3
parsl/tests/test_providers/test_submiterror_deprecation.py +2 -1
parsl/tests/test_python_apps/test_context_manager.py +128 -0
parsl/tests/test_python_apps/test_dep_standard_futures.py +2 -1
parsl/tests/test_python_apps/test_dependencies_deep.py +59 -0
parsl/tests/test_python_apps/test_fail.py +0 -25
parsl/tests/test_python_apps/test_futures.py +2 -1
parsl/tests/test_python_apps/test_inputs_default.py +22 -0
parsl/tests/test_python_apps/test_join.py +0 -1
parsl/tests/test_python_apps/test_lifted.py +11 -7
parsl/tests/test_python_apps/test_memoize_bad_id_for_memo.py +1 -0
parsl/tests/test_python_apps/test_outputs.py +1 -1
parsl/tests/test_python_apps/test_pluggable_future_resolution.py +161 -0
parsl/tests/test_radical/test_mpi_funcs.py +1 -2
parsl/tests/test_regression/test_1480.py +2 -1
parsl/tests/test_regression/test_1653.py +2 -1
parsl/tests/test_regression/test_226.py +1 -0
parsl/tests/test_regression/test_2652.py +1 -0
parsl/tests/test_regression/test_69a.py +0 -1
parsl/tests/test_regression/test_854.py +4 -2
parsl/tests/test_regression/test_97_parallelism_0.py +1 -2
parsl/tests/test_regression/test_98.py +0 -1
parsl/tests/test_scaling/test_block_error_handler.py +9 -4
parsl/tests/test_scaling/test_regression_1621.py +11 -15
parsl/tests/test_scaling/test_regression_3568_scaledown_vs_MISSING.py +84 -0
parsl/tests/test_scaling/test_regression_3696_oscillation.py +103 -0
parsl/tests/test_scaling/test_scale_down.py +2 -5
parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py +5 -8
parsl/tests/test_scaling/test_scale_down_htex_unregistered.py +71 -0
parsl/tests/test_scaling/test_shutdown_scalein.py +73 -0
parsl/tests/test_scaling/test_worker_interchange_bad_messages_3262.py +90 -0
parsl/tests/test_serialization/test_2555_caching_deserializer.py +1 -1
parsl/tests/test_serialization/test_3495_deserialize_managerlost.py +47 -0
parsl/tests/test_serialization/test_basic.py +2 -1
parsl/tests/test_serialization/test_htex_code_cache.py +3 -4
parsl/tests/test_serialization/test_pack_resource_spec.py +2 -1
parsl/tests/test_serialization/test_proxystore_configured.py +10 -6
parsl/tests/test_serialization/test_proxystore_impl.py +5 -3
parsl/tests/test_shutdown/test_kill_monitoring.py +64 -0
parsl/tests/test_staging/staging_provider.py +2 -2
parsl/tests/test_staging/test_1316.py +3 -4
parsl/tests/test_staging/test_docs_1.py +2 -1
parsl/tests/test_staging/test_docs_2.py +2 -1
parsl/tests/test_staging/test_elaborate_noop_file.py +2 -3
parsl/tests/{test_data → test_staging}/test_file.py +6 -6
parsl/tests/{test_data → test_staging}/test_output_chain_filenames.py +3 -0
parsl/tests/test_staging/test_staging_ftp.py +1 -0
parsl/tests/test_staging/test_staging_https.py +5 -2
parsl/tests/test_staging/test_staging_stdout.py +64 -0
parsl/tests/test_staging/test_zip_in.py +39 -0
parsl/tests/test_staging/test_zip_out.py +110 -0
parsl/tests/test_staging/test_zip_to_zip.py +41 -0
parsl/tests/test_summary.py +2 -2
parsl/tests/test_thread_parallelism.py +0 -1
parsl/tests/test_threads/test_configs.py +1 -2
parsl/tests/test_threads/test_lazy_errors.py +2 -2
parsl/tests/test_utils/test_execute_wait.py +35 -0
parsl/tests/test_utils/test_sanitize_dns.py +76 -0
parsl/tests/unit/test_address.py +20 -0
parsl/tests/unit/test_file.py +99 -0
parsl/tests/unit/test_usage_tracking.py +66 -0
parsl/usage_tracking/api.py +65 -0
parsl/usage_tracking/levels.py +6 -0
parsl/usage_tracking/usage.py +104 -62
parsl/utils.py +137 -4
parsl/version.py +1 -1
{parsl-2024.3.18.data → parsl-2025.1.13.data}/scripts/exec_parsl_function.py +6 -5
parsl-2025.1.13.data/scripts/interchange.py +649 -0
{parsl-2024.3.18.data → parsl-2025.1.13.data}/scripts/process_worker_pool.py +77 -75
parsl-2025.1.13.dist-info/METADATA +96 -0
parsl-2025.1.13.dist-info/RECORD +462 -0
{parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/WHEEL +1 -1
parsl/channels/__init__.py +0 -7
parsl/channels/base.py +0 -141
parsl/channels/errors.py +0 -113
parsl/channels/local/local.py +0 -164
parsl/channels/oauth_ssh/oauth_ssh.py +0 -110
parsl/channels/ssh/ssh.py +0 -276
parsl/channels/ssh_il/__init__.py +0 -0
parsl/channels/ssh_il/ssh_il.py +0 -74
parsl/configs/ad_hoc.py +0 -35
parsl/executors/radical/rpex_master.py +0 -42
parsl/monitoring/radios.py +0 -175
parsl/providers/ad_hoc/__init__.py +0 -0
parsl/providers/ad_hoc/ad_hoc.py +0 -248
parsl/providers/cobalt/__init__.py +0 -0
parsl/providers/cobalt/cobalt.py +0 -236
parsl/providers/cobalt/template.py +0 -17
parsl/tests/configs/ad_hoc_cluster_htex.py +0 -35
parsl/tests/configs/cooley_htex.py +0 -37
parsl/tests/configs/htex_ad_hoc_cluster.py +0 -28
parsl/tests/configs/local_adhoc.py +0 -18
parsl/tests/configs/swan_htex.py +0 -43
parsl/tests/configs/theta.py +0 -37
parsl/tests/integration/test_channels/__init__.py +0 -0
parsl/tests/integration/test_channels/test_channels.py +0 -17
parsl/tests/integration/test_channels/test_local_channel.py +0 -42
parsl/tests/integration/test_channels/test_scp_1.py +0 -45
parsl/tests/integration/test_channels/test_ssh_1.py +0 -40
parsl/tests/integration/test_channels/test_ssh_errors.py +0 -46
parsl/tests/integration/test_channels/test_ssh_file_transport.py +0 -41
parsl/tests/integration/test_channels/test_ssh_interactive.py +0 -24
parsl/tests/manual_tests/test_ad_hoc_htex.py +0 -48
parsl/tests/manual_tests/test_fan_in_out_htex_remote.py +0 -88
parsl/tests/manual_tests/test_oauth_ssh.py +0 -13
parsl/tests/sites/test_local_adhoc.py +0 -61
parsl/tests/test_channels/__init__.py +0 -0
parsl/tests/test_channels/test_large_output.py +0 -22
parsl/tests/test_data/__init__.py +0 -0
parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py +0 -51
parsl/tests/test_providers/test_cobalt_deprecation_warning.py +0 -16
parsl-2024.3.18.dist-info/METADATA +0 -98
parsl-2024.3.18.dist-info/RECORD +0 -449
parsl/{channels/local → monitoring/radios}/__init__.py +0 -0
parsl/{channels/oauth_ssh → tests/test_shutdown}/__init__.py +0 -0
parsl/tests/{test_data → test_staging}/test_file_apps.py +0 -0
parsl/tests/{test_data → test_staging}/test_file_staging.py +0 -0
parsl/{channels/ssh → tests/unit}/__init__.py +0 -0
{parsl-2024.3.18.data → parsl-2025.1.13.data}/scripts/parsl_coprocess.py +1 -1
{parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/LICENSE +0 -0
{parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/entry_points.txt +0 -0
{parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/top_level.txt +0 -0

parsl/executors/workqueue/executor.py CHANGED Viewed

@@ -3,50 +3,49 @@ Cooperative Computing Lab (CCL) at Notre Dame to provide a fault-tolerant,
 high-throughput system for delegating Parsl tasks to thousands of remote machines
 """
-import threading
-import multiprocessing
-import logging
-from concurrent.futures import Future
-from ctypes import c_bool
-import tempfile
 import hashlib
-import subprocess
+import inspect
+import itertools
+import logging
+import multiprocessing
 import os
-import socket
-import time
 import pickle
 import queue
-import inspect
 import shutil
-import itertools
+import socket
+import subprocess
+import tempfile
+import threading
+import time
+from collections import namedtuple
+from concurrent.futures import Future
+from ctypes import c_bool
+from typing import Dict, List, Optional, Set, Union
+import typeguard
-from parsl.serialize import pack_apply_message, deserialize
 import parsl.utils as putils
-from parsl.executors.errors import ExecutorError
 from parsl.data_provider.files import File
+from parsl.data_provider.staging import Staging
 from parsl.errors import OptionalModuleMissing
+from parsl.executors.errors import ExecutorError, InvalidResourceSpecification
 from parsl.executors.status_handling import BlockProviderExecutor
-from parsl.providers.base import ExecutionProvider
-from parsl.providers import LocalProvider, CondorProvider
 from parsl.executors.workqueue import exec_parsl_function
 from parsl.process_loggers import wrap_with_logs
+from parsl.providers import CondorProvider, LocalProvider
+from parsl.providers.base import ExecutionProvider
+from parsl.serialize import deserialize, pack_apply_message
 from parsl.utils import setproctitle
-import typeguard
-from typing import Dict, List, Optional, Set, Union
-from parsl.data_provider.staging import Staging
-from .errors import WorkQueueTaskFailure
-from .errors import WorkQueueFailure
-from collections import namedtuple
+from .errors import WorkQueueFailure, WorkQueueTaskFailure
 try:
     import work_queue as wq
-    from work_queue import WorkQueue
-    from work_queue import WORK_QUEUE_DEFAULT_PORT
-    from work_queue import WORK_QUEUE_ALLOCATION_MODE_MAX_THROUGHPUT
+    from work_queue import (
+        WORK_QUEUE_ALLOCATION_MODE_MAX_THROUGHPUT,
+        WORK_QUEUE_DEFAULT_PORT,
+        WorkQueue,
+    )
 except ImportError:
     _work_queue_enabled = False
     WORK_QUEUE_DEFAULT_PORT = 0
@@ -216,6 +215,13 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
             This requires a version of Work Queue / cctools after commit
             874df524516441da531b694afc9d591e8b134b73 (release 7.5.0 is too early).
             Default is False.
+        scaling_cores_per_worker: int
+            When using Parsl scaling, this specifies the number of cores that a
+            worker is expected to have available for computation. Default 1. This
+            parameter can be ignored when using a fixed number of blocks, or when
+            using one task per worker (by omitting a ``cores`` resource
+            specifiation for each task).
     """
     radio_mode = "filesystem"
@@ -245,16 +251,17 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
                  full_debug: bool = True,
                  worker_executable: str = 'work_queue_worker',
                  function_dir: Optional[str] = None,
-                 coprocess: bool = False):
+                 coprocess: bool = False,
+                 scaling_cores_per_worker: int = 1):
         BlockProviderExecutor.__init__(self, provider=provider,
                                        block_error_handler=True)
         if not _work_queue_enabled:
             raise OptionalModuleMissing(['work_queue'], "WorkQueueExecutor requires the work_queue module.")
+        self.scaling_cores_per_worker = scaling_cores_per_worker
         self.label = label
         self.task_queue = multiprocessing.Queue()  # type: multiprocessing.Queue
         self.collector_queue = multiprocessing.Queue()  # type: multiprocessing.Queue
-        self.blocks = {}  # type: Dict[str, str]
         self.address = address
         self.port = port
         self.executor_task_counter = -1
@@ -412,7 +419,7 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
                 message = "Task resource specification only accepts these types of resources: {}".format(
                         ', '.join(acceptable_fields))
                 logger.error(message)
-                raise ExecutorError(self, message)
+                raise InvalidResourceSpecification(keys, message)
             # this checks that either all of the required resource types are specified, or
             # that none of them are: the `required_resource_types` are not actually required,
@@ -423,9 +430,10 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
                 logger.error("Running with `autolabel=False`. In this mode, "
                              "task resource specification requires "
                              "three resources to be specified simultaneously: cores, memory, and disk")
-                raise ExecutorError(self, "Task resource specification requires "
-                                          "three resources to be specified simultaneously: cores, memory, and disk. "
-                                          "Try setting autolabel=True if you are unsure of the resource usage")
+                raise InvalidResourceSpecification(keys,
+                                                   "Task resource specification requires "
+                                                   "three resources to be specified simultaneously: cores, memory, and disk. "
+                                                   "Try setting autolabel=True if you are unsure of the resource usage")
             for k in keys:
                 if k == 'cores':
@@ -471,6 +479,8 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
         # Create a Future object and have it be mapped from the task ID in the tasks dictionary
         fu = Future()
         fu.parsl_executor_task_id = executor_task_id
+        assert isinstance(resource_specification, dict)
+        fu.resource_specification = resource_specification
         logger.debug("Getting tasks_lock to set WQ-level task entry")
         with self.tasks_lock:
             logger.debug("Got tasks_lock to set WQ-level task entry")
@@ -654,42 +664,31 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
         self.worker_command = self._construct_worker_command()
         self._patch_providers()
-        if hasattr(self.provider, 'init_blocks'):
-            try:
-                self.scale_out(blocks=self.provider.init_blocks)
-            except Exception as e:
-                logger.error("Initial block scaling out failed: {}".format(e))
-                raise e
     @property
     def outstanding(self) -> int:
-        """Count the number of outstanding tasks. This is inefficiently
+        """Count the number of outstanding slots required. This is inefficiently
         implemented and probably could be replaced with a counter.
         """
+        logger.debug("Calculating outstanding task slot load")
         outstanding = 0
+        tasks = 0  # only for log message...
         with self.tasks_lock:
             for fut in self.tasks.values():
                 if not fut.done():
-                    outstanding += 1
-        logger.debug(f"Counted {outstanding} outstanding tasks")
+                    # if a task does not specify a core count, Work Queue will allocate an entire
+                    # worker node to that task. That's approximated here by saying that it uses
+                    # scaling_cores_per_worker.
+                    resource_spec = getattr(fut, 'resource_specification', {})
+                    cores = resource_spec.get('cores', self.scaling_cores_per_worker)
+                    outstanding += cores
+                    tasks += 1
+        logger.debug(f"Counted {tasks} outstanding tasks with {outstanding} outstanding slots")
         return outstanding
     @property
     def workers_per_node(self) -> Union[int, float]:
-        return 1
-    def scale_in(self, count):
-        """Scale in method.
-        """
-        # Obtain list of blocks to kill
-        to_kill = list(self.blocks.keys())[:count]
-        kill_ids = [self.blocks[block] for block in to_kill]
-        # Cancel the blocks provisioned
-        if self.provider:
-            self.provider.cancel(kill_ids)
-        else:
-            logger.error("No execution provider available to scale")
+        return self.scaling_cores_per_worker
     def shutdown(self, *args, **kwargs):
         """Shutdown the executor. Sets flag to cancel the submit process and
@@ -698,17 +697,19 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
         logger.debug("Work Queue shutdown started")
         self.should_stop.value = True
-        # Remove the workers that are still going
-        kill_ids = [self.blocks[block] for block in self.blocks.keys()]
-        if self.provider:
-            logger.debug("Cancelling blocks")
-            self.provider.cancel(kill_ids)
         logger.debug("Joining on submit process")
         self.submit_process.join()
+        self.submit_process.close()
         logger.debug("Joining on collector thread")
         self.collector_thread.join()
+        logger.debug("Closing multiprocessing queues")
+        self.task_queue.close()
+        self.task_queue.join_thread()
+        self.collector_queue.close()
+        self.collector_queue.join_thread()
         logger.debug("Work Queue shutdown completed")
     @wrap_with_logs

parsl/executors/workqueue/parsl_coprocess.py CHANGED Viewed

@@ -1,8 +1,8 @@
 #! /usr/bin/env python3
-import socket
 import json
 import os
+import socket
 import sys
 # If enabled, coprocess will print to stdout

parsl/jobs/error_handlers.py CHANGED Viewed

@@ -3,8 +3,8 @@ from __future__ import annotations
 from typing import Dict, Tuple
 import parsl.executors.status_handling as status_handling
-from parsl.jobs.states import JobStatus, JobState
 from parsl.jobs.errors import TooManyJobFailuresError
+from parsl.jobs.states import JobState, JobStatus
 def noop_error_handler(executor: status_handling.BlockProviderExecutor, status: Dict[str, JobStatus], threshold: int = 3) -> None:
@@ -20,7 +20,7 @@ def simple_error_handler(executor: status_handling.BlockProviderExecutor, status
         executor.set_bad_state_and_fail_all(_get_error(status))
-def windowed_error_handler(executor: status_handling.BlockProviderExecutor, status: Dict[str, JobStatus], threshold: int = 3):
+def windowed_error_handler(executor: status_handling.BlockProviderExecutor, status: Dict[str, JobStatus], threshold: int = 3) -> None:
     sorted_status = [(key, status[key]) for key in sorted(status, key=lambda x: int(x))]
     current_window = dict(sorted_status[-threshold:])
     total, failed = _count_jobs(current_window)

parsl/jobs/job_status_poller.py CHANGED Viewed

@@ -1,137 +1,53 @@
 import logging
-import parsl
-import time
-import zmq
-from typing import Dict, List, Sequence, Optional, Union
+from typing import List, Optional, Sequence, Union
-from parsl.jobs.states import JobStatus, JobState
-from parsl.jobs.strategy import Strategy
 from parsl.executors.status_handling import BlockProviderExecutor
-from parsl.monitoring.message_type import MessageType
+from parsl.jobs.strategy import Strategy
 from parsl.utils import Timer
 logger = logging.getLogger(__name__)
-class PollItem:
-    def __init__(self, executor: BlockProviderExecutor, dfk: Optional["parsl.dataflow.dflow.DataFlowKernel"] = None):
-        self._executor = executor
-        self._dfk = dfk
-        self._interval = executor.status_polling_interval
-        self._last_poll_time = 0.0
-        self._status = {}  # type: Dict[str, JobStatus]
-        # Create a ZMQ channel to send poll status to monitoring
-        self.monitoring_enabled = False
-        if self._dfk and self._dfk.monitoring is not None:
-            self.monitoring_enabled = True
-            hub_address = self._dfk.hub_address
-            hub_port = self._dfk.hub_interchange_port
-            context = zmq.Context()
-            self.hub_channel = context.socket(zmq.DEALER)
-            self.hub_channel.set_hwm(0)
-            self.hub_channel.connect("tcp://{}:{}".format(hub_address, hub_port))
-            logger.info("Monitoring enabled on job status poller")
-    def _should_poll(self, now: float) -> bool:
-        return now >= self._last_poll_time + self._interval
-    def poll(self, now: float) -> None:
-        if self._should_poll(now):
-            previous_status = self._status
-            self._status = self._executor.status()
-            self._last_poll_time = now
-            delta_status = {}
-            for block_id in self._status:
-                if block_id not in previous_status \
-                   or previous_status[block_id].state != self._status[block_id].state:
-                    delta_status[block_id] = self._status[block_id]
-            if delta_status:
-                self.send_monitoring_info(delta_status)
-    def send_monitoring_info(self, status: Dict) -> None:
-        # Send monitoring info for HTEX when monitoring enabled
-        if self.monitoring_enabled:
-            msg = self._executor.create_monitoring_info(status)
-            logger.debug("Sending message {} to hub from job status poller".format(msg))
-            self.hub_channel.send_pyobj((MessageType.BLOCK_INFO, msg))
-    @property
-    def status(self) -> Dict[str, JobStatus]:
-        """Return the status of all jobs/blocks of the executor of this poller.
-        :return: a dictionary mapping block ids (in string) to job status
-        """
-        return self._status
-    @property
-    def executor(self) -> BlockProviderExecutor:
-        return self._executor
-    def scale_in(self, n, max_idletime=None):
-        if max_idletime is None:
-            block_ids = self._executor.scale_in(n)
-        else:
-            # This is a HighThroughputExecutor-specific interface violation.
-            # This code hopes, through pan-codebase reasoning, that this
-            # scale_in method really does come from HighThroughputExecutor,
-            # and so does have an extra max_idletime parameter not present
-            # in the executor interface.
-            block_ids = self._executor.scale_in(n, max_idletime=max_idletime)
-        if block_ids is not None:
-            new_status = {}
-            for block_id in block_ids:
-                new_status[block_id] = JobStatus(JobState.CANCELLED)
-                del self._status[block_id]
-            self.send_monitoring_info(new_status)
-        return block_ids
-    def scale_out(self, n):
-        block_ids = self._executor.scale_out(n)
-        if block_ids is not None:
-            new_status = {}
-            for block_id in block_ids:
-                new_status[block_id] = JobStatus(JobState.PENDING)
-            self.send_monitoring_info(new_status)
-            self._status.update(new_status)
-        return block_ids
-    def __repr__(self) -> str:
-        return self._status.__repr__()
 class JobStatusPoller(Timer):
     def __init__(self, *, strategy: Optional[str], max_idletime: float,
-                 strategy_period: Union[float, int],
-                 dfk: Optional["parsl.dataflow.dflow.DataFlowKernel"] = None) -> None:
-        self._poll_items = []  # type: List[PollItem]
-        self.dfk = dfk
+                 strategy_period: Union[float, int]) -> None:
+        self._executors = []  # type: List[BlockProviderExecutor]
         self._strategy = Strategy(strategy=strategy,
                                   max_idletime=max_idletime)
         super().__init__(self.poll, interval=strategy_period, name="JobStatusPoller")
     def poll(self) -> None:
         self._update_state()
-        self._run_error_handlers(self._poll_items)
-        self._strategy.strategize(self._poll_items)
+        self._run_error_handlers(self._executors)
+        self._strategy.strategize(self._executors)
-    def _run_error_handlers(self, status: List[PollItem]) -> None:
-        for es in status:
-            es.executor.handle_errors(es.status)
+    def _run_error_handlers(self, executors: List[BlockProviderExecutor]) -> None:
+        for e in executors:
+            e.handle_errors(e.status_facade)
     def _update_state(self) -> None:
-        now = time.time()
-        for item in self._poll_items:
-            item.poll(now)
+        for item in self._executors:
+            item.poll_facade()
     def add_executors(self, executors: Sequence[BlockProviderExecutor]) -> None:
         for executor in executors:
             if executor.status_polling_interval > 0:
                 logger.debug("Adding executor {}".format(executor.label))
-                self._poll_items.append(PollItem(executor, self.dfk))
+                self._executors.append(executor)
         self._strategy.add_executors(executors)
+    def close(self, timeout: Optional[float] = None) -> None:
+        super().close(timeout)
+        for executor in self._executors:
+            if not executor.bad_state_is_set:
+                logger.info(f"Scaling in executor {executor.label}")
+                # this code needs to be at least as many blocks as need
+                # cancelling, but it is safe to be more, as the scaling
+                # code will cope with being asked to cancel more blocks
+                # than exist.
+                block_count = len(executor.status_facade)
+                executor.scale_in_facade(block_count)
+            else:  # and bad_state_is_set
+                logger.warning(f"Not scaling in executor {executor.label} because it is in bad state")

parsl/jobs/states.py CHANGED Viewed

@@ -1,6 +1,6 @@
+import logging
 import os
 from enum import IntEnum
-import logging
 from typing import Optional
 logger = logging.getLogger(__name__)
@@ -46,12 +46,17 @@ class JobState(IntEnum):
     bad worker environment or network connectivity issues.
     """
+    SCALED_IN = 9
+    """This job has been deliberately scaled in. Scaling code should not be concerned
+    that the job never ran (for example for error handling purposes).
+    """
     def __str__(self) -> str:
         return f"{self.__class__.__name__}.{self.name}"
 TERMINAL_STATES = [JobState.CANCELLED, JobState.COMPLETED, JobState.FAILED,
-                   JobState.TIMEOUT, JobState.MISSING]
+                   JobState.TIMEOUT, JobState.MISSING, JobState.SCALED_IN]
 class JobStatus:

parsl/jobs/strategy.py CHANGED Viewed

@@ -1,19 +1,17 @@
 from __future__ import annotations
 import logging
-import time
 import math
+import time
 import warnings
 from typing import Dict, List, Optional, Sequence, TypedDict
-import parsl.jobs.job_status_poller as jsp
 from parsl.executors import HighThroughputExecutor
 from parsl.executors.base import ParslExecutor
 from parsl.executors.status_handling import BlockProviderExecutor
 from parsl.jobs.states import JobState
 from parsl.process_loggers import wrap_with_logs
 logger = logging.getLogger(__name__)
@@ -26,6 +24,10 @@ class ExecutorState(TypedDict):
     If the executor is not idle, then None.
     """
+    first: bool
+    """True if this executor has not yet had a strategy poll.
+    """
 class Strategy:
     """Scaling strategy.
@@ -129,8 +131,8 @@ class Strategy:
         self.executors = {}
         self.max_idletime = max_idletime
-        self.strategies = {None: self._strategy_noop,
-                           'none': self._strategy_noop,
+        self.strategies = {None: self._strategy_init_only,
+                           'none': self._strategy_init_only,
                            'simple': self._strategy_simple,
                            'htex_auto_scale': self._strategy_htex_auto_scale}
@@ -144,17 +146,23 @@ class Strategy:
     def add_executors(self, executors: Sequence[ParslExecutor]) -> None:
         for executor in executors:
-            self.executors[executor.label] = {'idle_since': None}
+            self.executors[executor.label] = {'idle_since': None, 'first': True}
-    def _strategy_noop(self, status: List[jsp.PollItem]) -> None:
-        """Do nothing.
+    def _strategy_init_only(self, executors: List[BlockProviderExecutor]) -> None:
+        """Scale up to init_blocks at the start, then nothing more.
         """
-        logger.debug("strategy_noop: doing nothing")
+        for executor in executors:
+            if self.executors[executor.label]['first']:
+                logger.debug(f"strategy_init_only: scaling out {executor.provider.init_blocks} initial blocks for {executor.label}")
+                executor.scale_out_facade(executor.provider.init_blocks)
+                self.executors[executor.label]['first'] = False
+            else:
+                logger.debug("strategy_init_only: doing nothing")
-    def _strategy_simple(self, status_list: List[jsp.PollItem]) -> None:
-        self._general_strategy(status_list, strategy_type='simple')
+    def _strategy_simple(self, executors: List[BlockProviderExecutor]) -> None:
+        self._general_strategy(executors, strategy_type='simple')
-    def _strategy_htex_auto_scale(self, status_list: List[jsp.PollItem]) -> None:
+    def _strategy_htex_auto_scale(self, executors: List[BlockProviderExecutor]) -> None:
         """HTEX specific auto scaling strategy
         This strategy works only for HTEX. This strategy will scale out by
@@ -169,24 +177,25 @@ class Strategy:
         expected to scale in effectively only when # of workers, or tasks executing
         per block is close to 1.
         """
-        self._general_strategy(status_list, strategy_type='htex')
+        self._general_strategy(executors, strategy_type='htex')
     @wrap_with_logs
-    def _general_strategy(self, status_list, *, strategy_type):
-        logger.debug(f"general strategy starting with strategy_type {strategy_type} for {len(status_list)} executors")
+    def _general_strategy(self, executors: List[BlockProviderExecutor], *, strategy_type: str) -> None:
+        logger.debug(f"general strategy starting with strategy_type {strategy_type} for {len(executors)} executors")
-        for exec_status in status_list:
-            executor = exec_status.executor
+        for executor in executors:
             label = executor.label
-            if not isinstance(executor, BlockProviderExecutor):
-                logger.debug(f"Not strategizing for executor {label} because scaling not enabled")
-                continue
             logger.debug(f"Strategizing for executor {label}")
+            if self.executors[label]['first']:
+                logger.debug(f"Scaling out {executor.provider.init_blocks} initial blocks for {label}")
+                executor.scale_out_facade(executor.provider.init_blocks)
+                self.executors[label]['first'] = False
             # Tasks that are either pending completion
             active_tasks = executor.outstanding
-            status = exec_status.status
+            status = executor.status_facade
             # FIXME we need to handle case where provider does not define these
             # FIXME probably more of this logic should be moved to the provider
@@ -230,23 +239,26 @@ class Strategy:
                 else:
                     # We want to make sure that max_idletime is reached
                     # before killing off resources
-                    logger.debug(f"Strategy case 1b: Executor has no active tasks, and more ({active_blocks}) than minimum blocks ({min_blocks})")
+                    logger.debug(f"Strategy case 1b: Executor has no active tasks, and more ({active_blocks})"
+                                 f" than minimum blocks ({min_blocks})")
                     if not self.executors[executor.label]['idle_since']:
                         logger.debug(f"Starting idle timer for executor. If idle time exceeds {self.max_idletime}s, blocks will be scaled in")
                         self.executors[executor.label]['idle_since'] = time.time()
                     idle_since = self.executors[executor.label]['idle_since']
+                    assert idle_since is not None, "The `if` statement above this assert should have forced idle time to be not-None"
                     idle_duration = time.time() - idle_since
                     if idle_duration > self.max_idletime:
                         # We have resources idle for the max duration,
                         # we have to scale_in now.
                         logger.debug(f"Idle time has reached {self.max_idletime}s for executor {label}; scaling in")
-                        exec_status.scale_in(active_blocks - min_blocks)
+                        executor.scale_in_facade(active_blocks - min_blocks)
                     else:
                         logger.debug(
-                                f"Idle time {idle_duration}s is less than max_idletime {self.max_idletime}s for executor {label}; not scaling in")
+                                f"Idle time {idle_duration}s is less than max_idletime {self.max_idletime}s"
+                                f" for executor {label}; not scaling in")
             # Case 2
             # More tasks than the available slots.
@@ -265,7 +277,7 @@ class Strategy:
                     excess_blocks = math.ceil(float(excess_slots) / (tasks_per_node * nodes_per_block))
                     excess_blocks = min(excess_blocks, max_blocks - active_blocks)
                     logger.debug(f"Requesting {excess_blocks} more blocks")
-                    exec_status.scale_out(excess_blocks)
+                    executor.scale_out_facade(excess_blocks)
             elif active_slots == 0 and active_tasks > 0:
                 logger.debug("Strategy case 4a: No active slots but some active tasks - could scale out by a single block")
@@ -274,7 +286,7 @@ class Strategy:
                 if active_blocks < max_blocks:
                     logger.debug("Requesting single block")
-                    exec_status.scale_out(1)
+                    executor.scale_out_facade(1)
                 else:
                     logger.debug("Not requesting single block, because at maxblocks already")
@@ -286,11 +298,11 @@ class Strategy:
                     # Scale in for htex
                     if isinstance(executor, HighThroughputExecutor):
                         if active_blocks > min_blocks:
-                            excess_slots = math.ceil(active_slots - (active_tasks * parallelism))
-                            excess_blocks = math.ceil(float(excess_slots) / (tasks_per_node * nodes_per_block))
+                            excess_slots = math.floor(active_slots - (active_tasks * parallelism))
+                            excess_blocks = math.floor(float(excess_slots) / (tasks_per_node * nodes_per_block))
                             excess_blocks = min(excess_blocks, active_blocks - min_blocks)
                             logger.debug(f"Requesting scaling in by {excess_blocks} blocks with idle time {self.max_idletime}s")
-                            exec_status.scale_in(excess_blocks, max_idletime=self.max_idletime)
+                            executor.scale_in_facade(excess_blocks, max_idletime=self.max_idletime)
                     else:
                         logger.error("This strategy does not support scaling in except for HighThroughputExecutor - taking no action")
                 else:

parsl/launchers/__init__.py CHANGED Viewed

@@ -1,6 +1,15 @@
-from parsl.launchers.launchers import SimpleLauncher, SingleNodeLauncher, \
-    SrunLauncher, AprunLauncher, SrunMPILauncher, WrappedLauncher, \
-    GnuParallelLauncher, MpiExecLauncher, MpiRunLauncher, JsrunLauncher
+from parsl.launchers.launchers import (
+    AprunLauncher,
+    GnuParallelLauncher,
+    JsrunLauncher,
+    MpiExecLauncher,
+    MpiRunLauncher,
+    SimpleLauncher,
+    SingleNodeLauncher,
+    SrunLauncher,
+    SrunMPILauncher,
+    WrappedLauncher,
+)
 __all__ = ['SimpleLauncher',
            'WrappedLauncher',

parsl/launchers/errors.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from parsl.providers.errors import ExecutionProviderException
 from parsl.launchers.base import Launcher
+from parsl.providers.errors import ExecutionProviderException
 class BadLauncher(ExecutionProviderException, TypeError):

parsl 2024.3.18__py3-none-any.whl → 2025.1.13__py3-none-any.whl

parsl 2024.3.18py3-none-any.whl → 2025.1.13py3-none-any.whl