parsl 2024.3.11__py3-none-any.whl → 2025.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsl/__init__.py +9 -10
- parsl/addresses.py +29 -7
- parsl/app/app.py +7 -8
- parsl/app/bash.py +15 -8
- parsl/app/errors.py +10 -13
- parsl/app/futures.py +8 -10
- parsl/app/python.py +2 -1
- parsl/benchmark/perf.py +2 -1
- parsl/concurrent/__init__.py +2 -2
- parsl/config.py +57 -10
- parsl/configs/ASPIRE1.py +6 -5
- parsl/configs/Azure.py +9 -8
- parsl/configs/bridges.py +6 -4
- parsl/configs/cc_in2p3.py +3 -3
- parsl/configs/ec2.py +3 -1
- parsl/configs/expanse.py +4 -3
- parsl/configs/frontera.py +3 -4
- parsl/configs/htex_local.py +3 -4
- parsl/configs/illinoiscluster.py +3 -1
- parsl/configs/improv.py +34 -0
- parsl/configs/kubernetes.py +4 -3
- parsl/configs/local_threads.py +5 -1
- parsl/configs/midway.py +5 -3
- parsl/configs/osg.py +4 -2
- parsl/configs/polaris.py +4 -2
- parsl/configs/stampede2.py +6 -5
- parsl/configs/summit.py +3 -3
- parsl/configs/toss3_llnl.py +4 -3
- parsl/configs/vineex_local.py +6 -4
- parsl/configs/wqex_local.py +5 -3
- parsl/curvezmq.py +4 -0
- parsl/data_provider/data_manager.py +4 -3
- parsl/data_provider/file_noop.py +1 -2
- parsl/data_provider/files.py +3 -3
- parsl/data_provider/ftp.py +1 -3
- parsl/data_provider/globus.py +7 -6
- parsl/data_provider/http.py +2 -2
- parsl/data_provider/rsync.py +1 -1
- parsl/data_provider/staging.py +2 -2
- parsl/data_provider/zip.py +135 -0
- parsl/dataflow/dependency_resolvers.py +115 -0
- parsl/dataflow/dflow.py +262 -224
- parsl/dataflow/errors.py +3 -5
- parsl/dataflow/futures.py +27 -14
- parsl/dataflow/memoization.py +5 -5
- parsl/dataflow/rundirs.py +5 -6
- parsl/dataflow/taskrecord.py +4 -5
- parsl/executors/__init__.py +4 -2
- parsl/executors/base.py +45 -15
- parsl/executors/errors.py +13 -0
- parsl/executors/execute_task.py +37 -0
- parsl/executors/flux/execute_parsl_task.py +3 -3
- parsl/executors/flux/executor.py +18 -19
- parsl/executors/flux/flux_instance_manager.py +26 -27
- parsl/executors/high_throughput/errors.py +43 -3
- parsl/executors/high_throughput/executor.py +316 -282
- parsl/executors/high_throughput/interchange.py +158 -167
- parsl/executors/high_throughput/manager_record.py +5 -0
- parsl/executors/high_throughput/manager_selector.py +55 -0
- parsl/executors/high_throughput/monitoring_info.py +2 -1
- parsl/executors/high_throughput/mpi_executor.py +113 -0
- parsl/executors/high_throughput/mpi_prefix_composer.py +10 -11
- parsl/executors/high_throughput/mpi_resource_management.py +6 -17
- parsl/executors/high_throughput/probe.py +9 -7
- parsl/executors/high_throughput/process_worker_pool.py +115 -77
- parsl/executors/high_throughput/zmq_pipes.py +81 -23
- parsl/executors/radical/executor.py +130 -79
- parsl/executors/radical/rpex_resources.py +17 -15
- parsl/executors/radical/rpex_worker.py +4 -3
- parsl/executors/status_handling.py +157 -51
- parsl/executors/taskvine/__init__.py +1 -1
- parsl/executors/taskvine/errors.py +1 -1
- parsl/executors/taskvine/exec_parsl_function.py +2 -2
- parsl/executors/taskvine/executor.py +41 -57
- parsl/executors/taskvine/factory.py +1 -1
- parsl/executors/taskvine/factory_config.py +1 -1
- parsl/executors/taskvine/manager.py +18 -13
- parsl/executors/taskvine/manager_config.py +9 -5
- parsl/executors/threads.py +6 -6
- parsl/executors/workqueue/errors.py +1 -1
- parsl/executors/workqueue/exec_parsl_function.py +6 -5
- parsl/executors/workqueue/executor.py +64 -63
- parsl/executors/workqueue/parsl_coprocess.py +1 -1
- parsl/jobs/error_handlers.py +2 -2
- parsl/jobs/job_status_poller.py +30 -113
- parsl/jobs/states.py +7 -2
- parsl/jobs/strategy.py +43 -31
- parsl/launchers/__init__.py +12 -3
- parsl/launchers/errors.py +1 -1
- parsl/launchers/launchers.py +6 -12
- parsl/log_utils.py +9 -6
- parsl/monitoring/db_manager.py +59 -95
- parsl/monitoring/errors.py +6 -0
- parsl/monitoring/monitoring.py +87 -356
- parsl/monitoring/queries/pandas.py +1 -2
- parsl/monitoring/radios/base.py +13 -0
- parsl/monitoring/radios/filesystem.py +52 -0
- parsl/monitoring/radios/htex.py +57 -0
- parsl/monitoring/radios/multiprocessing.py +17 -0
- parsl/monitoring/radios/udp.py +56 -0
- parsl/monitoring/radios/zmq.py +17 -0
- parsl/monitoring/remote.py +33 -37
- parsl/monitoring/router.py +212 -0
- parsl/monitoring/types.py +5 -6
- parsl/monitoring/visualization/app.py +4 -2
- parsl/monitoring/visualization/models.py +0 -1
- parsl/monitoring/visualization/plots/default/workflow_plots.py +11 -4
- parsl/monitoring/visualization/plots/default/workflow_resource_plots.py +1 -0
- parsl/monitoring/visualization/utils.py +0 -1
- parsl/monitoring/visualization/views.py +16 -8
- parsl/multiprocessing.py +0 -1
- parsl/process_loggers.py +1 -2
- parsl/providers/__init__.py +8 -17
- parsl/providers/aws/aws.py +2 -3
- parsl/providers/azure/azure.py +4 -5
- parsl/providers/base.py +2 -18
- parsl/providers/cluster_provider.py +4 -12
- parsl/providers/condor/condor.py +7 -17
- parsl/providers/errors.py +2 -2
- parsl/providers/googlecloud/googlecloud.py +2 -1
- parsl/providers/grid_engine/grid_engine.py +5 -14
- parsl/providers/kubernetes/kube.py +80 -40
- parsl/providers/local/local.py +13 -26
- parsl/providers/lsf/lsf.py +5 -23
- parsl/providers/pbspro/pbspro.py +5 -17
- parsl/providers/slurm/slurm.py +81 -39
- parsl/providers/torque/torque.py +3 -14
- parsl/serialize/__init__.py +8 -3
- parsl/serialize/base.py +1 -2
- parsl/serialize/concretes.py +5 -4
- parsl/serialize/facade.py +3 -3
- parsl/serialize/proxystore.py +3 -2
- parsl/tests/__init__.py +1 -1
- parsl/tests/configs/azure_single_node.py +4 -5
- parsl/tests/configs/bridges.py +3 -2
- parsl/tests/configs/cc_in2p3.py +1 -3
- parsl/tests/configs/comet.py +2 -1
- parsl/tests/configs/ec2_single_node.py +1 -2
- parsl/tests/configs/ec2_spot.py +1 -2
- parsl/tests/configs/flux_local.py +11 -0
- parsl/tests/configs/frontera.py +2 -3
- parsl/tests/configs/htex_local.py +3 -5
- parsl/tests/configs/htex_local_alternate.py +11 -15
- parsl/tests/configs/htex_local_intask_staging.py +5 -9
- parsl/tests/configs/htex_local_rsync_staging.py +4 -8
- parsl/tests/configs/local_radical.py +1 -3
- parsl/tests/configs/local_radical_mpi.py +2 -2
- parsl/tests/configs/local_threads_checkpoint_periodic.py +8 -10
- parsl/tests/configs/local_threads_monitoring.py +0 -1
- parsl/tests/configs/midway.py +2 -2
- parsl/tests/configs/nscc_singapore.py +3 -3
- parsl/tests/configs/osg_htex.py +1 -1
- parsl/tests/configs/petrelkube.py +3 -2
- parsl/tests/configs/slurm_local.py +24 -0
- parsl/tests/configs/summit.py +1 -0
- parsl/tests/configs/taskvine_ex.py +4 -7
- parsl/tests/configs/user_opts.py +2 -8
- parsl/tests/configs/workqueue_ex.py +4 -6
- parsl/tests/conftest.py +27 -13
- parsl/tests/integration/test_stress/test_python_simple.py +3 -4
- parsl/tests/integration/test_stress/test_python_threads.py +3 -5
- parsl/tests/manual_tests/htex_local.py +4 -6
- parsl/tests/manual_tests/test_basic.py +1 -0
- parsl/tests/manual_tests/test_log_filter.py +3 -1
- parsl/tests/manual_tests/test_memory_limits.py +6 -8
- parsl/tests/manual_tests/test_regression_220.py +2 -1
- parsl/tests/manual_tests/test_udp_simple.py +4 -4
- parsl/tests/manual_tests/test_worker_count.py +3 -2
- parsl/tests/scaling_tests/htex_local.py +2 -4
- parsl/tests/scaling_tests/test_scale.py +0 -9
- parsl/tests/scaling_tests/vineex_condor.py +1 -2
- parsl/tests/scaling_tests/vineex_local.py +1 -2
- parsl/tests/site_tests/site_config_selector.py +1 -6
- parsl/tests/site_tests/test_provider.py +4 -2
- parsl/tests/site_tests/test_site.py +2 -0
- parsl/tests/sites/test_affinity.py +7 -7
- parsl/tests/sites/test_dynamic_executor.py +3 -4
- parsl/tests/sites/test_ec2.py +3 -2
- parsl/tests/sites/test_worker_info.py +4 -5
- parsl/tests/test_aalst_patterns.py +0 -1
- parsl/tests/test_bash_apps/test_apptimeout.py +2 -2
- parsl/tests/test_bash_apps/test_basic.py +10 -4
- parsl/tests/test_bash_apps/test_error_codes.py +5 -7
- parsl/tests/test_bash_apps/test_inputs_default.py +25 -0
- parsl/tests/test_bash_apps/test_kwarg_storage.py +1 -1
- parsl/tests/test_bash_apps/test_memoize.py +2 -8
- parsl/tests/test_bash_apps/test_memoize_ignore_args.py +9 -14
- parsl/tests/test_bash_apps/test_memoize_ignore_args_regr.py +9 -14
- parsl/tests/test_bash_apps/test_multiline.py +1 -1
- parsl/tests/test_bash_apps/test_pipeline.py +1 -1
- parsl/tests/test_bash_apps/test_std_uri.py +123 -0
- parsl/tests/test_bash_apps/test_stdout.py +33 -8
- parsl/tests/test_callables.py +2 -2
- parsl/tests/test_checkpointing/test_periodic.py +21 -39
- parsl/tests/test_checkpointing/test_python_checkpoint_1.py +1 -0
- parsl/tests/test_checkpointing/test_python_checkpoint_2.py +2 -2
- parsl/tests/test_checkpointing/test_python_checkpoint_3.py +0 -1
- parsl/tests/test_checkpointing/test_regression_239.py +1 -1
- parsl/tests/test_checkpointing/test_task_exit.py +2 -3
- parsl/tests/test_docs/test_from_slides.py +5 -2
- parsl/tests/test_docs/test_kwargs.py +4 -1
- parsl/tests/test_docs/test_tutorial_1.py +1 -2
- parsl/tests/test_docs/test_workflow1.py +2 -2
- parsl/tests/test_docs/test_workflow2.py +0 -1
- parsl/tests/test_error_handling/test_rand_fail.py +2 -2
- parsl/tests/test_error_handling/test_resource_spec.py +10 -12
- parsl/tests/test_error_handling/test_retries.py +6 -16
- parsl/tests/test_error_handling/test_retry_handler.py +1 -0
- parsl/tests/test_error_handling/test_retry_handler_failure.py +2 -1
- parsl/tests/test_error_handling/test_serialization_fail.py +1 -1
- parsl/tests/test_error_handling/test_wrap_with_logs.py +1 -0
- parsl/tests/test_execute_task.py +29 -0
- parsl/tests/test_flux.py +1 -1
- parsl/tests/test_htex/test_basic.py +2 -3
- parsl/tests/test_htex/test_block_manager_selector_unit.py +20 -0
- parsl/tests/test_htex/test_command_client_timeout.py +66 -0
- parsl/tests/test_htex/test_connected_blocks.py +3 -2
- parsl/tests/test_htex/test_cpu_affinity_explicit.py +6 -10
- parsl/tests/test_htex/test_disconnected_blocks.py +6 -5
- parsl/tests/test_htex/test_disconnected_blocks_failing_provider.py +71 -0
- parsl/tests/test_htex/test_drain.py +79 -0
- parsl/tests/test_htex/test_htex.py +51 -25
- parsl/tests/test_htex/test_manager_failure.py +0 -1
- parsl/tests/test_htex/test_manager_selector_by_block.py +51 -0
- parsl/tests/test_htex/test_managers_command.py +36 -0
- parsl/tests/test_htex/test_missing_worker.py +2 -12
- parsl/tests/test_htex/test_multiple_disconnected_blocks.py +9 -9
- parsl/tests/test_htex/test_resource_spec_validation.py +45 -0
- parsl/tests/test_htex/test_zmq_binding.py +29 -8
- parsl/tests/test_monitoring/test_app_names.py +86 -0
- parsl/tests/test_monitoring/test_basic.py +73 -25
- parsl/tests/test_monitoring/test_db_locks.py +6 -4
- parsl/tests/test_monitoring/test_fuzz_zmq.py +19 -8
- parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +80 -0
- parsl/tests/test_monitoring/test_incomplete_futures.py +5 -4
- parsl/tests/test_monitoring/test_memoization_representation.py +4 -2
- parsl/tests/test_monitoring/test_stdouterr.py +134 -0
- parsl/tests/test_monitoring/test_viz_colouring.py +1 -0
- parsl/tests/test_mpi_apps/test_bad_mpi_config.py +33 -26
- parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py +28 -11
- parsl/tests/test_mpi_apps/test_mpi_prefix.py +4 -4
- parsl/tests/test_mpi_apps/test_mpi_scheduler.py +7 -2
- parsl/tests/test_mpi_apps/test_mpiex.py +64 -0
- parsl/tests/test_mpi_apps/test_resource_spec.py +42 -49
- parsl/tests/test_providers/test_kubernetes_provider.py +102 -0
- parsl/tests/test_providers/test_local_provider.py +3 -132
- parsl/tests/test_providers/test_pbspro_template.py +2 -3
- parsl/tests/test_providers/test_slurm_template.py +2 -3
- parsl/tests/test_providers/test_submiterror_deprecation.py +2 -1
- parsl/tests/test_python_apps/test_context_manager.py +128 -0
- parsl/tests/test_python_apps/test_dep_standard_futures.py +2 -1
- parsl/tests/test_python_apps/test_dependencies_deep.py +59 -0
- parsl/tests/test_python_apps/test_fail.py +0 -25
- parsl/tests/test_python_apps/test_futures.py +2 -1
- parsl/tests/test_python_apps/test_inputs_default.py +22 -0
- parsl/tests/test_python_apps/test_join.py +0 -1
- parsl/tests/test_python_apps/test_lifted.py +11 -7
- parsl/tests/test_python_apps/test_memoize_bad_id_for_memo.py +1 -0
- parsl/tests/test_python_apps/test_outputs.py +1 -1
- parsl/tests/test_python_apps/test_pluggable_future_resolution.py +161 -0
- parsl/tests/test_radical/test_mpi_funcs.py +1 -2
- parsl/tests/test_regression/test_1480.py +2 -1
- parsl/tests/test_regression/test_1653.py +2 -1
- parsl/tests/test_regression/test_226.py +1 -0
- parsl/tests/test_regression/test_2652.py +1 -0
- parsl/tests/test_regression/test_69a.py +0 -1
- parsl/tests/test_regression/test_854.py +4 -2
- parsl/tests/test_regression/test_97_parallelism_0.py +1 -2
- parsl/tests/test_regression/test_98.py +0 -1
- parsl/tests/test_scaling/test_block_error_handler.py +9 -4
- parsl/tests/test_scaling/test_regression_1621.py +11 -15
- parsl/tests/test_scaling/test_regression_3568_scaledown_vs_MISSING.py +84 -0
- parsl/tests/test_scaling/test_regression_3696_oscillation.py +103 -0
- parsl/tests/test_scaling/test_scale_down.py +2 -5
- parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py +6 -18
- parsl/tests/test_scaling/test_scale_down_htex_unregistered.py +71 -0
- parsl/tests/test_scaling/test_shutdown_scalein.py +73 -0
- parsl/tests/test_scaling/test_worker_interchange_bad_messages_3262.py +90 -0
- parsl/tests/test_serialization/test_2555_caching_deserializer.py +1 -1
- parsl/tests/test_serialization/test_3495_deserialize_managerlost.py +47 -0
- parsl/tests/test_serialization/test_basic.py +2 -1
- parsl/tests/test_serialization/test_htex_code_cache.py +3 -4
- parsl/tests/test_serialization/test_pack_resource_spec.py +2 -1
- parsl/tests/test_serialization/test_proxystore_configured.py +10 -6
- parsl/tests/test_serialization/test_proxystore_impl.py +5 -3
- parsl/tests/test_shutdown/test_kill_monitoring.py +64 -0
- parsl/tests/test_staging/staging_provider.py +2 -2
- parsl/tests/test_staging/test_1316.py +3 -4
- parsl/tests/test_staging/test_docs_1.py +2 -1
- parsl/tests/test_staging/test_docs_2.py +2 -1
- parsl/tests/test_staging/test_elaborate_noop_file.py +2 -3
- parsl/tests/{test_data → test_staging}/test_file.py +6 -6
- parsl/tests/{test_data → test_staging}/test_output_chain_filenames.py +3 -0
- parsl/tests/test_staging/test_staging_ftp.py +1 -0
- parsl/tests/test_staging/test_staging_https.py +5 -2
- parsl/tests/test_staging/test_staging_stdout.py +64 -0
- parsl/tests/test_staging/test_zip_in.py +39 -0
- parsl/tests/test_staging/test_zip_out.py +110 -0
- parsl/tests/test_staging/test_zip_to_zip.py +41 -0
- parsl/tests/test_summary.py +2 -2
- parsl/tests/test_thread_parallelism.py +0 -1
- parsl/tests/test_threads/test_configs.py +1 -2
- parsl/tests/test_threads/test_lazy_errors.py +2 -2
- parsl/tests/test_utils/test_execute_wait.py +35 -0
- parsl/tests/test_utils/test_sanitize_dns.py +76 -0
- parsl/tests/unit/test_address.py +20 -0
- parsl/tests/unit/test_file.py +99 -0
- parsl/tests/unit/test_usage_tracking.py +66 -0
- parsl/usage_tracking/api.py +65 -0
- parsl/usage_tracking/levels.py +6 -0
- parsl/usage_tracking/usage.py +104 -62
- parsl/utils.py +139 -6
- parsl/version.py +1 -1
- {parsl-2024.3.11.data → parsl-2025.1.13.data}/scripts/exec_parsl_function.py +6 -5
- parsl-2025.1.13.data/scripts/interchange.py +649 -0
- {parsl-2024.3.11.data → parsl-2025.1.13.data}/scripts/process_worker_pool.py +115 -77
- parsl-2025.1.13.dist-info/METADATA +96 -0
- parsl-2025.1.13.dist-info/RECORD +462 -0
- {parsl-2024.3.11.dist-info → parsl-2025.1.13.dist-info}/WHEEL +1 -1
- parsl/channels/__init__.py +0 -7
- parsl/channels/base.py +0 -141
- parsl/channels/errors.py +0 -113
- parsl/channels/local/local.py +0 -164
- parsl/channels/oauth_ssh/oauth_ssh.py +0 -110
- parsl/channels/ssh/ssh.py +0 -276
- parsl/channels/ssh_il/__init__.py +0 -0
- parsl/channels/ssh_il/ssh_il.py +0 -74
- parsl/configs/ad_hoc.py +0 -35
- parsl/executors/radical/rpex_master.py +0 -42
- parsl/monitoring/radios.py +0 -175
- parsl/providers/ad_hoc/__init__.py +0 -0
- parsl/providers/ad_hoc/ad_hoc.py +0 -248
- parsl/providers/cobalt/__init__.py +0 -0
- parsl/providers/cobalt/cobalt.py +0 -236
- parsl/providers/cobalt/template.py +0 -17
- parsl/tests/configs/ad_hoc_cluster_htex.py +0 -35
- parsl/tests/configs/cooley_htex.py +0 -37
- parsl/tests/configs/htex_ad_hoc_cluster.py +0 -28
- parsl/tests/configs/local_adhoc.py +0 -18
- parsl/tests/configs/swan_htex.py +0 -43
- parsl/tests/configs/theta.py +0 -37
- parsl/tests/integration/test_channels/__init__.py +0 -0
- parsl/tests/integration/test_channels/test_channels.py +0 -17
- parsl/tests/integration/test_channels/test_local_channel.py +0 -42
- parsl/tests/integration/test_channels/test_scp_1.py +0 -45
- parsl/tests/integration/test_channels/test_ssh_1.py +0 -40
- parsl/tests/integration/test_channels/test_ssh_errors.py +0 -46
- parsl/tests/integration/test_channels/test_ssh_file_transport.py +0 -41
- parsl/tests/integration/test_channels/test_ssh_interactive.py +0 -24
- parsl/tests/manual_tests/test_ad_hoc_htex.py +0 -48
- parsl/tests/manual_tests/test_fan_in_out_htex_remote.py +0 -88
- parsl/tests/manual_tests/test_oauth_ssh.py +0 -13
- parsl/tests/sites/test_local_adhoc.py +0 -61
- parsl/tests/test_channels/__init__.py +0 -0
- parsl/tests/test_channels/test_large_output.py +0 -22
- parsl/tests/test_data/__init__.py +0 -0
- parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py +0 -51
- parsl/tests/test_providers/test_cobalt_deprecation_warning.py +0 -16
- parsl-2024.3.11.dist-info/METADATA +0 -98
- parsl-2024.3.11.dist-info/RECORD +0 -447
- parsl/{channels/local → monitoring/radios}/__init__.py +0 -0
- parsl/{channels/oauth_ssh → tests/test_shutdown}/__init__.py +0 -0
- parsl/tests/{test_data → test_staging}/test_file_apps.py +0 -0
- parsl/tests/{test_data → test_staging}/test_file_staging.py +0 -0
- parsl/{channels/ssh → tests/unit}/__init__.py +0 -0
- {parsl-2024.3.11.data → parsl-2025.1.13.data}/scripts/parsl_coprocess.py +1 -1
- {parsl-2024.3.11.dist-info → parsl-2025.1.13.dist-info}/LICENSE +0 -0
- {parsl-2024.3.11.dist-info → parsl-2025.1.13.dist-info}/entry_points.txt +0 -0
- {parsl-2024.3.11.dist-info → parsl-2025.1.13.dist-info}/top_level.txt +0 -0
@@ -1,30 +1,30 @@
|
|
1
1
|
"""RadicalPilotExecutor builds on the RADICAL-Pilot/Parsl
|
2
2
|
"""
|
3
|
+
import inspect
|
4
|
+
import logging
|
3
5
|
import os
|
6
|
+
import queue
|
4
7
|
import sys
|
8
|
+
import threading as mt
|
5
9
|
import time
|
6
|
-
import
|
7
|
-
import
|
8
|
-
import
|
9
|
-
import
|
10
|
+
from concurrent.futures import Future
|
11
|
+
from functools import partial
|
12
|
+
from pathlib import PosixPath
|
13
|
+
from typing import Dict, Optional
|
14
|
+
|
10
15
|
import requests
|
11
16
|
import typeguard
|
12
|
-
import threading as mt
|
13
|
-
|
14
|
-
from functools import partial
|
15
|
-
from typing import Optional, Dict
|
16
|
-
from pathlib import Path, PosixPath
|
17
|
-
from concurrent.futures import Future
|
18
17
|
|
18
|
+
import parsl
|
19
|
+
from parsl.app.errors import BashExitFailure, RemoteExceptionWrapper
|
19
20
|
from parsl.app.python import timeout
|
20
|
-
from .rpex_resources import ResourceConfig
|
21
21
|
from parsl.data_provider.files import File
|
22
|
-
from parsl.utils import RepresentationMixin
|
23
|
-
from parsl.app.errors import BashExitFailure
|
24
22
|
from parsl.executors.base import ParslExecutor
|
25
|
-
from parsl.app.errors import RemoteExceptionWrapper
|
26
23
|
from parsl.serialize import deserialize, pack_res_spec_apply_message
|
27
|
-
from parsl.serialize.errors import
|
24
|
+
from parsl.serialize.errors import DeserializationError, SerializationError
|
25
|
+
from parsl.utils import RepresentationMixin
|
26
|
+
|
27
|
+
from .rpex_resources import CLIENT, MPI, ResourceConfig
|
28
28
|
|
29
29
|
try:
|
30
30
|
import radical.pilot as rp
|
@@ -59,7 +59,7 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
|
|
59
59
|
``rp.PilotManager`` and ``rp.TaskManager``.
|
60
60
|
2. "translate": Unwrap, identify, and parse Parsl ``apps`` into ``rp.TaskDescription``.
|
61
61
|
3. "submit": Submit Parsl apps to ``rp.TaskManager``.
|
62
|
-
4. "
|
62
|
+
4. "shutdown": Shut down the RADICAL-Pilot runtime and all associated components.
|
63
63
|
|
64
64
|
Here is a diagram
|
65
65
|
|
@@ -133,24 +133,32 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
|
|
133
133
|
self.resource = resource
|
134
134
|
self._uid = RPEX.lower()
|
135
135
|
self.bulk_mode = bulk_mode
|
136
|
+
self._terminate = mt.Event()
|
136
137
|
self.working_dir = working_dir
|
137
138
|
self.pilot_kwargs = rpex_pilot_kwargs
|
138
139
|
self.future_tasks: Dict[str, Future] = {}
|
139
140
|
|
140
141
|
if rpex_cfg:
|
141
|
-
self.rpex_cfg = rpex_cfg
|
142
|
+
self.rpex_cfg = rpex_cfg.get_config()
|
142
143
|
elif not rpex_cfg and 'local' in resource:
|
143
|
-
self.rpex_cfg = ResourceConfig()
|
144
|
+
self.rpex_cfg = ResourceConfig().get_config()
|
144
145
|
else:
|
145
|
-
raise ValueError('Resource config
|
146
|
-
'specified for a non-local
|
146
|
+
raise ValueError('Resource config must be '
|
147
|
+
'specified for a non-local resources')
|
147
148
|
|
148
149
|
def task_state_cb(self, task, state):
|
149
150
|
"""
|
150
151
|
Update the state of Parsl Future apps
|
151
152
|
Based on RP task state callbacks.
|
152
153
|
"""
|
153
|
-
|
154
|
+
# check the Master/Worker state
|
155
|
+
if task.mode in [rp.RAPTOR_MASTER, rp.RAPTOR_WORKER]:
|
156
|
+
if state == rp.FAILED:
|
157
|
+
exception = RuntimeError(f'{task.uid} failed with internal error: {task.stderr}')
|
158
|
+
self._fail_all_tasks(exception)
|
159
|
+
|
160
|
+
# check all other tasks state
|
161
|
+
else:
|
154
162
|
parsl_task = self.future_tasks[task.uid]
|
155
163
|
|
156
164
|
if state == rp.DONE:
|
@@ -186,6 +194,23 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
|
|
186
194
|
else:
|
187
195
|
parsl_task.set_exception('Task failed for an unknown reason')
|
188
196
|
|
197
|
+
def _fail_all_tasks(self, exception):
|
198
|
+
"""
|
199
|
+
Fail all outstanding tasks with the given exception.
|
200
|
+
|
201
|
+
This method iterates through all outstanding tasks in the
|
202
|
+
`_future_tasks` dictionary, which have not yet completed,
|
203
|
+
and sets the provided exception as their result, indicating
|
204
|
+
a failure.
|
205
|
+
|
206
|
+
Parameters:
|
207
|
+
- exception: The exception to be set as the result for all
|
208
|
+
outstanding tasks.
|
209
|
+
"""
|
210
|
+
for fut_task in self.future_tasks.values():
|
211
|
+
if not fut_task.done():
|
212
|
+
fut_task.set_exception(exception)
|
213
|
+
|
189
214
|
def start(self):
|
190
215
|
"""Create the Pilot component and pass it.
|
191
216
|
"""
|
@@ -202,63 +227,62 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
|
|
202
227
|
'resource': self.resource}
|
203
228
|
|
204
229
|
if not self.resource or 'local' in self.resource:
|
205
|
-
|
206
|
-
|
207
|
-
# to include the agent sandbox with the ci artifacts.
|
208
|
-
if os.environ.get("LOCAL_SANDBOX"):
|
209
|
-
pd_init['sandbox'] = self.run_dir
|
210
|
-
os.environ["RADICAL_LOG_LVL"] = "DEBUG"
|
211
|
-
|
212
|
-
logger.info("RPEX will be running in the local mode")
|
230
|
+
os.environ["RADICAL_LOG_LVL"] = "DEBUG"
|
231
|
+
logger.info("RPEX will be running in local mode")
|
213
232
|
|
214
233
|
pd = rp.PilotDescription(pd_init)
|
215
234
|
pd.verify()
|
216
235
|
|
217
|
-
|
218
|
-
|
236
|
+
# start RP's main components TMGR, PMGR and Pilot
|
237
|
+
self.tmgr = rp.TaskManager(session=self.session)
|
238
|
+
self.pmgr = rp.PilotManager(session=self.session)
|
239
|
+
self.pilot = self.pmgr.submit_pilots(pd)
|
219
240
|
|
220
|
-
self.
|
221
|
-
|
241
|
+
if not self.pilot.description.get('cores') or not self.pilot.description.get('nodes'):
|
242
|
+
logger.warning('no "cores/nodes" per pilot were set, using default resources')
|
243
|
+
|
244
|
+
self.tmgr.add_pilots(self.pilot)
|
245
|
+
self.tmgr.register_callback(self.task_state_cb)
|
222
246
|
|
223
|
-
tds = list()
|
224
|
-
master_path = '{0}/rpex_master.py'.format(PWD)
|
225
247
|
worker_path = '{0}/rpex_worker.py'.format(PWD)
|
226
248
|
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
249
|
+
self.masters = []
|
250
|
+
|
251
|
+
logger.info(f'Starting {self.rpex_cfg.n_masters} masters and {self.rpex_cfg.n_workers} workers for each master')
|
252
|
+
|
253
|
+
# create N masters
|
254
|
+
for _ in range(self.rpex_cfg.n_masters):
|
255
|
+
md = rp.TaskDescription(self.rpex_cfg.master_descr)
|
256
|
+
md.uid = ru.generate_id('rpex.master.%(item_counter)06d', ru.ID_CUSTOM,
|
231
257
|
ns=self.session.uid)
|
232
|
-
td.ranks = 1
|
233
|
-
td.cores_per_rank = 1
|
234
|
-
td.arguments = [self.rpex_cfg, i]
|
235
|
-
td.input_staging = self._stage_files([File(master_path),
|
236
|
-
File(worker_path),
|
237
|
-
File(self.rpex_cfg)], mode='in')
|
238
|
-
tds.append(td)
|
239
258
|
|
240
|
-
|
241
|
-
|
259
|
+
# submit the master to the TMGR
|
260
|
+
master = self.tmgr.submit_raptors(md)[0]
|
261
|
+
self.masters.append(master)
|
262
|
+
|
263
|
+
workers = []
|
264
|
+
# create N workers for each master and submit them to the TMGR
|
265
|
+
for _ in range(self.rpex_cfg.n_workers):
|
266
|
+
wd = rp.TaskDescription(self.rpex_cfg.worker_descr)
|
267
|
+
wd.uid = ru.generate_id('rpex.worker.%(item_counter)06d', ru.ID_CUSTOM,
|
268
|
+
ns=self.session.uid)
|
269
|
+
wd.raptor_id = master.uid
|
270
|
+
wd.input_staging = self._stage_files([File(worker_path)], mode='in')
|
271
|
+
workers.append(wd)
|
242
272
|
|
243
|
-
|
244
|
-
pilot = self.pmgr.submit_pilots(pd)
|
245
|
-
if not pilot.description.get('cores'):
|
246
|
-
logger.warning('no "cores" per pilot was set, using default resources {0}'.format(pilot.resources))
|
273
|
+
self.tmgr.submit_workers(workers)
|
247
274
|
|
248
|
-
self.
|
275
|
+
self.select_master = self._cyclic_master_selector()
|
249
276
|
|
250
277
|
# prepare or use the current env for the agent/pilot side environment
|
251
|
-
if
|
252
|
-
logger.info("creating {0} environment for the executor".format(
|
253
|
-
pilot.prepare_env(env_name=
|
254
|
-
|
278
|
+
if self.rpex_cfg.pilot_env_mode != CLIENT:
|
279
|
+
logger.info("creating {0} environment for the executor".format(self.rpex_cfg.pilot_env.name))
|
280
|
+
self.pilot.prepare_env(env_name=self.rpex_cfg.pilot_env.name,
|
281
|
+
env_spec=self.rpex_cfg.pilot_env.as_dict())
|
255
282
|
else:
|
256
283
|
client_env = sys.prefix
|
257
284
|
logger.info("reusing ({0}) environment for the executor".format(client_env))
|
258
285
|
|
259
|
-
self.tmgr.add_pilots(pilot)
|
260
|
-
self.tmgr.register_callback(self.task_state_cb)
|
261
|
-
|
262
286
|
# create a bulking thread to run the actual task submission
|
263
287
|
# to RP in bulks
|
264
288
|
if self.bulk_mode:
|
@@ -272,8 +296,21 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
|
|
272
296
|
self._bulk_thread.daemon = True
|
273
297
|
self._bulk_thread.start()
|
274
298
|
|
299
|
+
logger.info('bulk mode is on, submitting tasks in bulks')
|
300
|
+
|
275
301
|
return True
|
276
302
|
|
303
|
+
def _cyclic_master_selector(self):
|
304
|
+
"""
|
305
|
+
Balance tasks submission across N masters and N workers
|
306
|
+
"""
|
307
|
+
current_master = 0
|
308
|
+
masters_uids = [m.uid for m in self.masters]
|
309
|
+
|
310
|
+
while True:
|
311
|
+
yield masters_uids[current_master]
|
312
|
+
current_master = (current_master + 1) % len(self.masters)
|
313
|
+
|
277
314
|
def unwrap(self, func, args):
|
278
315
|
"""
|
279
316
|
Unwrap a Parsl app and its args for further processing.
|
@@ -364,22 +401,25 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
|
|
364
401
|
|
365
402
|
# This is the default mode where the bash_app will be executed as
|
366
403
|
# as a single core process by RP. For cores > 1 the user must use
|
367
|
-
# above or use MPI functions if their
|
404
|
+
# task.mode=rp.TASK_EXECUTABLE (above) or use MPI functions if their
|
405
|
+
# code is Python.
|
368
406
|
else:
|
369
407
|
task.mode = rp.TASK_PROC
|
370
|
-
task.raptor_id =
|
408
|
+
task.raptor_id = next(self.select_master)
|
371
409
|
task.executable = self._pack_and_apply_message(func, args, kwargs)
|
372
410
|
|
373
411
|
elif PYTHON in task_type or not task_type:
|
374
412
|
task.mode = rp.TASK_FUNCTION
|
375
|
-
task.raptor_id =
|
413
|
+
task.raptor_id = next(self.select_master)
|
376
414
|
if kwargs.get('walltime'):
|
377
415
|
func = timeout(func, kwargs['walltime'])
|
378
416
|
|
379
|
-
#
|
380
|
-
if
|
417
|
+
# Check how to serialize the function object
|
418
|
+
if MPI in self.rpex_cfg.worker_type.lower():
|
419
|
+
task.use_mpi = True
|
381
420
|
task.function = rp.PythonTask(func, *args, **kwargs)
|
382
421
|
else:
|
422
|
+
task.use_mpi = False
|
383
423
|
task.function = self._pack_and_apply_message(func, args, kwargs)
|
384
424
|
|
385
425
|
task.input_staging = self._stage_files(kwargs.get("inputs", []),
|
@@ -394,7 +434,7 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
|
|
394
434
|
try:
|
395
435
|
task.verify()
|
396
436
|
except ru.typeddict.TDKeyError as e:
|
397
|
-
raise Exception(f'{e}. Please check
|
437
|
+
raise Exception(f'{e}. Please check: https://radicalpilot.readthedocs.io/en/stable/ documentation')
|
398
438
|
|
399
439
|
return task
|
400
440
|
|
@@ -413,7 +453,11 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
|
|
413
453
|
|
414
454
|
def _unpack_and_set_parsl_exception(self, parsl_task, exception):
|
415
455
|
try:
|
416
|
-
|
456
|
+
try:
|
457
|
+
s = rp.utils.deserialize_bson(exception)
|
458
|
+
except Exception:
|
459
|
+
s = exception
|
460
|
+
|
417
461
|
if isinstance(s, RemoteExceptionWrapper):
|
418
462
|
try:
|
419
463
|
s.reraise()
|
@@ -421,6 +465,8 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
|
|
421
465
|
parsl_task.set_exception(e)
|
422
466
|
elif isinstance(s, Exception):
|
423
467
|
parsl_task.set_exception(s)
|
468
|
+
elif isinstance(s, str):
|
469
|
+
parsl_task.set_exception(eval(s))
|
424
470
|
else:
|
425
471
|
raise ValueError("Unknown exception-like type received: {}".format(type(s)))
|
426
472
|
except Exception as e:
|
@@ -440,16 +486,10 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
|
|
440
486
|
elif isinstance(k_val, PosixPath):
|
441
487
|
k_val = k_val.__str__()
|
442
488
|
|
443
|
-
#
|
444
|
-
#
|
445
|
-
# we just set the path to the cwd
|
446
|
-
if '/' not in k_val:
|
447
|
-
k_val = CWD + '/' + k_val
|
448
|
-
|
449
|
-
# finally set the stderr/out to
|
450
|
-
# the desired name by the user
|
489
|
+
# set the stderr/out to the desired
|
490
|
+
# name by the user
|
451
491
|
setattr(task, k, k_val)
|
452
|
-
task.sandbox =
|
492
|
+
task.sandbox = CWD
|
453
493
|
|
454
494
|
def _stage_files(self, files, mode):
|
455
495
|
"""
|
@@ -477,7 +517,7 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
|
|
477
517
|
# this indicates that the user
|
478
518
|
# did not provided a specific
|
479
519
|
# output file and RP will stage out
|
480
|
-
# the task.
|
520
|
+
# the task.stdout from pilot://task_folder
|
481
521
|
# to the CWD or file.url
|
482
522
|
if '/' not in file.url:
|
483
523
|
f = {'source': file.filename,
|
@@ -493,7 +533,7 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
|
|
493
533
|
|
494
534
|
bulk = list()
|
495
535
|
|
496
|
-
while
|
536
|
+
while not self._terminate.is_set():
|
497
537
|
|
498
538
|
now = time.time() # time of last submission
|
499
539
|
|
@@ -513,6 +553,9 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
|
|
513
553
|
if len(bulk) >= self._max_bulk_size:
|
514
554
|
break
|
515
555
|
|
556
|
+
if self._terminate.is_set():
|
557
|
+
break
|
558
|
+
|
516
559
|
if bulk:
|
517
560
|
logger.debug('submit bulk: %d', len(bulk))
|
518
561
|
self.tmgr.submit_tasks(bulk)
|
@@ -548,7 +591,15 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
|
|
548
591
|
|
549
592
|
def shutdown(self, hub=True, targets='all', block=False):
|
550
593
|
"""Shutdown the executor, including all RADICAL-Pilot components."""
|
551
|
-
logger.info("RadicalPilotExecutor
|
594
|
+
logger.info("RadicalPilotExecutor is terminating...")
|
595
|
+
|
596
|
+
self._terminate.set()
|
597
|
+
|
598
|
+
# ensure we are in the bulk submssion mode
|
599
|
+
if self.bulk_mode:
|
600
|
+
self._bulk_thread.join()
|
601
|
+
|
552
602
|
self.session.close(download=True)
|
603
|
+
logger.info("RadicalPilotExecutor is terminated.")
|
553
604
|
|
554
605
|
return True
|
@@ -1,17 +1,13 @@
|
|
1
|
-
import sys
|
2
1
|
import json
|
3
|
-
|
2
|
+
import sys
|
4
3
|
from typing import List
|
5
4
|
|
6
|
-
_setup_paths: List[str]
|
5
|
+
_setup_paths: List[str] = []
|
7
6
|
try:
|
8
7
|
import radical.pilot as rp
|
9
8
|
import radical.utils as ru
|
10
9
|
except ImportError:
|
11
|
-
|
12
|
-
else:
|
13
|
-
_setup_paths = [rp.sdist_path,
|
14
|
-
ru.sdist_path]
|
10
|
+
pass
|
15
11
|
|
16
12
|
|
17
13
|
MPI = "mpi"
|
@@ -77,7 +73,7 @@ class ResourceConfig:
|
|
77
73
|
|
78
74
|
pilot_env_setup : list
|
79
75
|
List of setup commands/packages for the pilot environment.
|
80
|
-
Default
|
76
|
+
Default is an empty list.
|
81
77
|
|
82
78
|
python_v : str
|
83
79
|
The Python version to be used in the pilot environment.
|
@@ -108,7 +104,7 @@ class ResourceConfig:
|
|
108
104
|
python_v: str = f'{sys.version_info[0]}.{sys.version_info[1]}'
|
109
105
|
worker_type: str = DEFAULT_WORKER
|
110
106
|
|
111
|
-
def
|
107
|
+
def get_config(cls, path=None):
|
112
108
|
|
113
109
|
# Default ENV mode for RP is to reuse
|
114
110
|
# the client side. If this is not the case,
|
@@ -126,6 +122,7 @@ class ResourceConfig:
|
|
126
122
|
cfg = {
|
127
123
|
'n_masters': cls.masters,
|
128
124
|
'n_workers': cls.workers,
|
125
|
+
'worker_type': cls.worker_type,
|
129
126
|
'gpus_per_node': cls.worker_gpus_per_node,
|
130
127
|
'cores_per_node': cls.worker_cores_per_node,
|
131
128
|
'cores_per_master': cls.cores_per_master,
|
@@ -143,9 +140,10 @@ class ResourceConfig:
|
|
143
140
|
'pilot_env_mode': cls.pilot_env_mode,
|
144
141
|
|
145
142
|
'master_descr': {
|
143
|
+
"ranks": 1,
|
144
|
+
"cores_per_rank": 1,
|
146
145
|
"mode": rp.RAPTOR_MASTER,
|
147
146
|
"named_env": cls.pilot_env_name,
|
148
|
-
"executable": "python3 rpex_master.py",
|
149
147
|
},
|
150
148
|
|
151
149
|
'worker_descr': {
|
@@ -154,12 +152,16 @@ class ResourceConfig:
|
|
154
152
|
"raptor_file": "./rpex_worker.py",
|
155
153
|
"raptor_class": cls.worker_type if
|
156
154
|
cls.worker_type.lower() != MPI else MPI_WORKER,
|
155
|
+
"ranks": cls.nodes_per_worker * cls.worker_cores_per_node,
|
156
|
+
"gpus_per_rank": cls.nodes_per_worker * cls.worker_gpus_per_node,
|
157
157
|
}}
|
158
158
|
|
159
|
-
# Convert the class instance to a
|
160
|
-
config_path = 'rpex.cfg'
|
159
|
+
# Convert the class instance to a Json file or a Config dict.
|
161
160
|
if path:
|
161
|
+
config_path = 'rpex.cfg'
|
162
162
|
config_path = path + '/' + config_path
|
163
|
-
|
164
|
-
|
165
|
-
|
163
|
+
with open(config_path, 'w') as f:
|
164
|
+
json.dump(cfg, f, indent=4)
|
165
|
+
else:
|
166
|
+
config_obj = ru.Config(from_dict=cfg)
|
167
|
+
return config_obj
|
@@ -1,10 +1,11 @@
|
|
1
1
|
import sys
|
2
|
+
|
2
3
|
import radical.pilot as rp
|
3
4
|
|
4
5
|
import parsl.app.errors as pe
|
5
6
|
from parsl.app.bash import remote_side_bash_executor
|
6
|
-
from parsl.
|
7
|
-
from parsl.
|
7
|
+
from parsl.executors.execute_task import execute_task
|
8
|
+
from parsl.serialize import serialize, unpack_res_spec_apply_message
|
8
9
|
|
9
10
|
|
10
11
|
class ParslWorker:
|
@@ -32,7 +33,7 @@ class ParslWorker:
|
|
32
33
|
|
33
34
|
try:
|
34
35
|
buffer = rp.utils.deserialize_bson(task['description']['executable'])
|
35
|
-
func, args, kwargs, _resource_spec = unpack_res_spec_apply_message(buffer
|
36
|
+
func, args, kwargs, _resource_spec = unpack_res_spec_apply_message(buffer)
|
36
37
|
ret = remote_side_bash_executor(func, *args, **kwargs)
|
37
38
|
exc = (None, None)
|
38
39
|
val = None
|