parsl 2024.3.18__py3-none-any.whl → 2025.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsl/__init__.py +9 -10
- parsl/addresses.py +26 -6
- parsl/app/app.py +7 -8
- parsl/app/bash.py +15 -8
- parsl/app/errors.py +10 -13
- parsl/app/futures.py +8 -10
- parsl/app/python.py +2 -1
- parsl/benchmark/perf.py +2 -1
- parsl/concurrent/__init__.py +2 -2
- parsl/config.py +53 -10
- parsl/configs/ASPIRE1.py +6 -5
- parsl/configs/Azure.py +9 -8
- parsl/configs/bridges.py +6 -4
- parsl/configs/cc_in2p3.py +3 -3
- parsl/configs/ec2.py +3 -1
- parsl/configs/expanse.py +4 -3
- parsl/configs/frontera.py +3 -4
- parsl/configs/htex_local.py +3 -4
- parsl/configs/illinoiscluster.py +3 -1
- parsl/configs/improv.py +34 -0
- parsl/configs/kubernetes.py +4 -3
- parsl/configs/local_threads.py +5 -1
- parsl/configs/midway.py +5 -3
- parsl/configs/osg.py +4 -2
- parsl/configs/polaris.py +4 -2
- parsl/configs/stampede2.py +6 -5
- parsl/configs/summit.py +3 -3
- parsl/configs/toss3_llnl.py +4 -3
- parsl/configs/vineex_local.py +6 -4
- parsl/configs/wqex_local.py +5 -3
- parsl/curvezmq.py +4 -0
- parsl/data_provider/data_manager.py +4 -3
- parsl/data_provider/file_noop.py +1 -2
- parsl/data_provider/files.py +3 -3
- parsl/data_provider/ftp.py +1 -3
- parsl/data_provider/globus.py +7 -6
- parsl/data_provider/http.py +2 -2
- parsl/data_provider/rsync.py +1 -1
- parsl/data_provider/staging.py +2 -2
- parsl/data_provider/zip.py +135 -0
- parsl/dataflow/dependency_resolvers.py +115 -0
- parsl/dataflow/dflow.py +259 -223
- parsl/dataflow/errors.py +3 -5
- parsl/dataflow/futures.py +27 -14
- parsl/dataflow/memoization.py +5 -5
- parsl/dataflow/rundirs.py +5 -6
- parsl/dataflow/taskrecord.py +4 -5
- parsl/executors/__init__.py +4 -2
- parsl/executors/base.py +45 -15
- parsl/executors/errors.py +13 -0
- parsl/executors/execute_task.py +37 -0
- parsl/executors/flux/execute_parsl_task.py +3 -3
- parsl/executors/flux/executor.py +18 -19
- parsl/executors/flux/flux_instance_manager.py +26 -27
- parsl/executors/high_throughput/errors.py +43 -3
- parsl/executors/high_throughput/executor.py +307 -285
- parsl/executors/high_throughput/interchange.py +137 -168
- parsl/executors/high_throughput/manager_record.py +4 -0
- parsl/executors/high_throughput/manager_selector.py +55 -0
- parsl/executors/high_throughput/monitoring_info.py +2 -1
- parsl/executors/high_throughput/mpi_executor.py +113 -0
- parsl/executors/high_throughput/mpi_prefix_composer.py +10 -11
- parsl/executors/high_throughput/mpi_resource_management.py +6 -17
- parsl/executors/high_throughput/probe.py +9 -7
- parsl/executors/high_throughput/process_worker_pool.py +77 -75
- parsl/executors/high_throughput/zmq_pipes.py +81 -23
- parsl/executors/radical/executor.py +130 -79
- parsl/executors/radical/rpex_resources.py +17 -15
- parsl/executors/radical/rpex_worker.py +4 -3
- parsl/executors/status_handling.py +157 -51
- parsl/executors/taskvine/__init__.py +1 -1
- parsl/executors/taskvine/errors.py +1 -1
- parsl/executors/taskvine/exec_parsl_function.py +2 -2
- parsl/executors/taskvine/executor.py +38 -55
- parsl/executors/taskvine/factory.py +1 -1
- parsl/executors/taskvine/factory_config.py +1 -1
- parsl/executors/taskvine/manager.py +17 -13
- parsl/executors/taskvine/manager_config.py +7 -2
- parsl/executors/threads.py +6 -6
- parsl/executors/workqueue/errors.py +1 -1
- parsl/executors/workqueue/exec_parsl_function.py +6 -5
- parsl/executors/workqueue/executor.py +64 -63
- parsl/executors/workqueue/parsl_coprocess.py +1 -1
- parsl/jobs/error_handlers.py +2 -2
- parsl/jobs/job_status_poller.py +28 -112
- parsl/jobs/states.py +7 -2
- parsl/jobs/strategy.py +43 -31
- parsl/launchers/__init__.py +12 -3
- parsl/launchers/errors.py +1 -1
- parsl/launchers/launchers.py +0 -6
- parsl/log_utils.py +1 -2
- parsl/monitoring/db_manager.py +55 -93
- parsl/monitoring/errors.py +6 -0
- parsl/monitoring/monitoring.py +85 -311
- parsl/monitoring/queries/pandas.py +1 -2
- parsl/monitoring/radios/base.py +13 -0
- parsl/monitoring/radios/filesystem.py +52 -0
- parsl/monitoring/radios/htex.py +57 -0
- parsl/monitoring/radios/multiprocessing.py +17 -0
- parsl/monitoring/radios/udp.py +56 -0
- parsl/monitoring/radios/zmq.py +17 -0
- parsl/monitoring/remote.py +33 -37
- parsl/monitoring/router.py +212 -0
- parsl/monitoring/types.py +5 -6
- parsl/monitoring/visualization/app.py +4 -2
- parsl/monitoring/visualization/models.py +0 -1
- parsl/monitoring/visualization/plots/default/workflow_plots.py +8 -4
- parsl/monitoring/visualization/plots/default/workflow_resource_plots.py +1 -0
- parsl/monitoring/visualization/utils.py +0 -1
- parsl/monitoring/visualization/views.py +16 -9
- parsl/multiprocessing.py +0 -1
- parsl/process_loggers.py +1 -2
- parsl/providers/__init__.py +8 -17
- parsl/providers/aws/aws.py +2 -3
- parsl/providers/azure/azure.py +4 -5
- parsl/providers/base.py +2 -18
- parsl/providers/cluster_provider.py +3 -9
- parsl/providers/condor/condor.py +7 -17
- parsl/providers/errors.py +2 -2
- parsl/providers/googlecloud/googlecloud.py +2 -1
- parsl/providers/grid_engine/grid_engine.py +5 -14
- parsl/providers/kubernetes/kube.py +80 -40
- parsl/providers/local/local.py +13 -26
- parsl/providers/lsf/lsf.py +5 -23
- parsl/providers/pbspro/pbspro.py +5 -17
- parsl/providers/slurm/slurm.py +81 -39
- parsl/providers/torque/torque.py +3 -14
- parsl/serialize/__init__.py +8 -3
- parsl/serialize/base.py +1 -2
- parsl/serialize/concretes.py +5 -4
- parsl/serialize/facade.py +3 -3
- parsl/serialize/proxystore.py +3 -2
- parsl/tests/__init__.py +1 -1
- parsl/tests/configs/azure_single_node.py +4 -5
- parsl/tests/configs/bridges.py +3 -2
- parsl/tests/configs/cc_in2p3.py +1 -3
- parsl/tests/configs/comet.py +2 -1
- parsl/tests/configs/ec2_single_node.py +1 -2
- parsl/tests/configs/ec2_spot.py +1 -2
- parsl/tests/configs/flux_local.py +11 -0
- parsl/tests/configs/frontera.py +2 -3
- parsl/tests/configs/htex_local.py +3 -5
- parsl/tests/configs/htex_local_alternate.py +11 -15
- parsl/tests/configs/htex_local_intask_staging.py +5 -9
- parsl/tests/configs/htex_local_rsync_staging.py +4 -8
- parsl/tests/configs/local_radical.py +1 -3
- parsl/tests/configs/local_radical_mpi.py +2 -2
- parsl/tests/configs/local_threads_checkpoint_periodic.py +8 -10
- parsl/tests/configs/local_threads_monitoring.py +0 -1
- parsl/tests/configs/midway.py +2 -2
- parsl/tests/configs/nscc_singapore.py +3 -3
- parsl/tests/configs/osg_htex.py +1 -1
- parsl/tests/configs/petrelkube.py +3 -2
- parsl/tests/configs/slurm_local.py +24 -0
- parsl/tests/configs/summit.py +1 -0
- parsl/tests/configs/taskvine_ex.py +4 -7
- parsl/tests/configs/user_opts.py +0 -7
- parsl/tests/configs/workqueue_ex.py +4 -6
- parsl/tests/conftest.py +27 -13
- parsl/tests/integration/test_stress/test_python_simple.py +3 -4
- parsl/tests/integration/test_stress/test_python_threads.py +3 -5
- parsl/tests/manual_tests/htex_local.py +4 -6
- parsl/tests/manual_tests/test_basic.py +1 -0
- parsl/tests/manual_tests/test_log_filter.py +3 -1
- parsl/tests/manual_tests/test_memory_limits.py +6 -8
- parsl/tests/manual_tests/test_regression_220.py +2 -1
- parsl/tests/manual_tests/test_udp_simple.py +4 -4
- parsl/tests/manual_tests/test_worker_count.py +3 -2
- parsl/tests/scaling_tests/htex_local.py +2 -4
- parsl/tests/scaling_tests/test_scale.py +0 -9
- parsl/tests/scaling_tests/vineex_condor.py +1 -2
- parsl/tests/scaling_tests/vineex_local.py +1 -2
- parsl/tests/site_tests/site_config_selector.py +1 -6
- parsl/tests/site_tests/test_provider.py +4 -2
- parsl/tests/site_tests/test_site.py +2 -0
- parsl/tests/sites/test_affinity.py +7 -7
- parsl/tests/sites/test_dynamic_executor.py +3 -4
- parsl/tests/sites/test_ec2.py +3 -2
- parsl/tests/sites/test_worker_info.py +4 -5
- parsl/tests/test_aalst_patterns.py +0 -1
- parsl/tests/test_bash_apps/test_apptimeout.py +2 -2
- parsl/tests/test_bash_apps/test_basic.py +10 -4
- parsl/tests/test_bash_apps/test_error_codes.py +5 -7
- parsl/tests/test_bash_apps/test_inputs_default.py +25 -0
- parsl/tests/test_bash_apps/test_kwarg_storage.py +1 -1
- parsl/tests/test_bash_apps/test_memoize.py +2 -8
- parsl/tests/test_bash_apps/test_memoize_ignore_args.py +9 -14
- parsl/tests/test_bash_apps/test_memoize_ignore_args_regr.py +9 -14
- parsl/tests/test_bash_apps/test_multiline.py +1 -1
- parsl/tests/test_bash_apps/test_pipeline.py +1 -1
- parsl/tests/test_bash_apps/test_std_uri.py +123 -0
- parsl/tests/test_bash_apps/test_stdout.py +33 -8
- parsl/tests/test_callables.py +2 -2
- parsl/tests/test_checkpointing/test_periodic.py +21 -39
- parsl/tests/test_checkpointing/test_python_checkpoint_1.py +1 -0
- parsl/tests/test_checkpointing/test_python_checkpoint_2.py +2 -2
- parsl/tests/test_checkpointing/test_python_checkpoint_3.py +0 -1
- parsl/tests/test_checkpointing/test_regression_239.py +1 -1
- parsl/tests/test_checkpointing/test_task_exit.py +2 -3
- parsl/tests/test_docs/test_from_slides.py +5 -2
- parsl/tests/test_docs/test_kwargs.py +4 -1
- parsl/tests/test_docs/test_tutorial_1.py +1 -2
- parsl/tests/test_docs/test_workflow1.py +2 -2
- parsl/tests/test_docs/test_workflow2.py +0 -1
- parsl/tests/test_error_handling/test_rand_fail.py +2 -2
- parsl/tests/test_error_handling/test_resource_spec.py +10 -12
- parsl/tests/test_error_handling/test_retries.py +6 -16
- parsl/tests/test_error_handling/test_retry_handler.py +1 -0
- parsl/tests/test_error_handling/test_retry_handler_failure.py +2 -1
- parsl/tests/test_error_handling/test_serialization_fail.py +1 -1
- parsl/tests/test_error_handling/test_wrap_with_logs.py +1 -0
- parsl/tests/test_execute_task.py +29 -0
- parsl/tests/test_flux.py +1 -1
- parsl/tests/test_htex/test_basic.py +2 -3
- parsl/tests/test_htex/test_block_manager_selector_unit.py +20 -0
- parsl/tests/test_htex/test_command_client_timeout.py +66 -0
- parsl/tests/test_htex/test_connected_blocks.py +3 -2
- parsl/tests/test_htex/test_cpu_affinity_explicit.py +6 -10
- parsl/tests/test_htex/test_disconnected_blocks.py +6 -5
- parsl/tests/test_htex/test_disconnected_blocks_failing_provider.py +71 -0
- parsl/tests/test_htex/test_drain.py +11 -10
- parsl/tests/test_htex/test_htex.py +51 -25
- parsl/tests/test_htex/test_manager_failure.py +0 -1
- parsl/tests/test_htex/test_manager_selector_by_block.py +51 -0
- parsl/tests/test_htex/test_managers_command.py +36 -0
- parsl/tests/test_htex/test_missing_worker.py +2 -12
- parsl/tests/test_htex/test_multiple_disconnected_blocks.py +9 -9
- parsl/tests/test_htex/test_resource_spec_validation.py +45 -0
- parsl/tests/test_htex/test_zmq_binding.py +29 -8
- parsl/tests/test_monitoring/test_app_names.py +5 -5
- parsl/tests/test_monitoring/test_basic.py +73 -25
- parsl/tests/test_monitoring/test_db_locks.py +6 -4
- parsl/tests/test_monitoring/test_fuzz_zmq.py +19 -8
- parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +80 -0
- parsl/tests/test_monitoring/test_incomplete_futures.py +5 -4
- parsl/tests/test_monitoring/test_memoization_representation.py +4 -2
- parsl/tests/test_monitoring/test_stdouterr.py +134 -0
- parsl/tests/test_monitoring/test_viz_colouring.py +1 -0
- parsl/tests/test_mpi_apps/test_bad_mpi_config.py +33 -26
- parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py +28 -11
- parsl/tests/test_mpi_apps/test_mpi_prefix.py +4 -4
- parsl/tests/test_mpi_apps/test_mpi_scheduler.py +7 -2
- parsl/tests/test_mpi_apps/test_mpiex.py +64 -0
- parsl/tests/test_mpi_apps/test_resource_spec.py +42 -49
- parsl/tests/test_providers/test_kubernetes_provider.py +102 -0
- parsl/tests/test_providers/test_local_provider.py +3 -132
- parsl/tests/test_providers/test_pbspro_template.py +2 -3
- parsl/tests/test_providers/test_slurm_template.py +2 -3
- parsl/tests/test_providers/test_submiterror_deprecation.py +2 -1
- parsl/tests/test_python_apps/test_context_manager.py +128 -0
- parsl/tests/test_python_apps/test_dep_standard_futures.py +2 -1
- parsl/tests/test_python_apps/test_dependencies_deep.py +59 -0
- parsl/tests/test_python_apps/test_fail.py +0 -25
- parsl/tests/test_python_apps/test_futures.py +2 -1
- parsl/tests/test_python_apps/test_inputs_default.py +22 -0
- parsl/tests/test_python_apps/test_join.py +0 -1
- parsl/tests/test_python_apps/test_lifted.py +11 -7
- parsl/tests/test_python_apps/test_memoize_bad_id_for_memo.py +1 -0
- parsl/tests/test_python_apps/test_outputs.py +1 -1
- parsl/tests/test_python_apps/test_pluggable_future_resolution.py +161 -0
- parsl/tests/test_radical/test_mpi_funcs.py +1 -2
- parsl/tests/test_regression/test_1480.py +2 -1
- parsl/tests/test_regression/test_1653.py +2 -1
- parsl/tests/test_regression/test_226.py +1 -0
- parsl/tests/test_regression/test_2652.py +1 -0
- parsl/tests/test_regression/test_69a.py +0 -1
- parsl/tests/test_regression/test_854.py +4 -2
- parsl/tests/test_regression/test_97_parallelism_0.py +1 -2
- parsl/tests/test_regression/test_98.py +0 -1
- parsl/tests/test_scaling/test_block_error_handler.py +9 -4
- parsl/tests/test_scaling/test_regression_1621.py +11 -15
- parsl/tests/test_scaling/test_regression_3568_scaledown_vs_MISSING.py +84 -0
- parsl/tests/test_scaling/test_regression_3696_oscillation.py +103 -0
- parsl/tests/test_scaling/test_scale_down.py +2 -5
- parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py +5 -8
- parsl/tests/test_scaling/test_scale_down_htex_unregistered.py +71 -0
- parsl/tests/test_scaling/test_shutdown_scalein.py +73 -0
- parsl/tests/test_scaling/test_worker_interchange_bad_messages_3262.py +90 -0
- parsl/tests/test_serialization/test_2555_caching_deserializer.py +1 -1
- parsl/tests/test_serialization/test_3495_deserialize_managerlost.py +47 -0
- parsl/tests/test_serialization/test_basic.py +2 -1
- parsl/tests/test_serialization/test_htex_code_cache.py +3 -4
- parsl/tests/test_serialization/test_pack_resource_spec.py +2 -1
- parsl/tests/test_serialization/test_proxystore_configured.py +10 -6
- parsl/tests/test_serialization/test_proxystore_impl.py +5 -3
- parsl/tests/test_shutdown/test_kill_monitoring.py +64 -0
- parsl/tests/test_staging/staging_provider.py +2 -2
- parsl/tests/test_staging/test_1316.py +3 -4
- parsl/tests/test_staging/test_docs_1.py +2 -1
- parsl/tests/test_staging/test_docs_2.py +2 -1
- parsl/tests/test_staging/test_elaborate_noop_file.py +2 -3
- parsl/tests/{test_data → test_staging}/test_file.py +6 -6
- parsl/tests/{test_data → test_staging}/test_output_chain_filenames.py +3 -0
- parsl/tests/test_staging/test_staging_ftp.py +1 -0
- parsl/tests/test_staging/test_staging_https.py +5 -2
- parsl/tests/test_staging/test_staging_stdout.py +64 -0
- parsl/tests/test_staging/test_zip_in.py +39 -0
- parsl/tests/test_staging/test_zip_out.py +110 -0
- parsl/tests/test_staging/test_zip_to_zip.py +41 -0
- parsl/tests/test_summary.py +2 -2
- parsl/tests/test_thread_parallelism.py +0 -1
- parsl/tests/test_threads/test_configs.py +1 -2
- parsl/tests/test_threads/test_lazy_errors.py +2 -2
- parsl/tests/test_utils/test_execute_wait.py +35 -0
- parsl/tests/test_utils/test_sanitize_dns.py +76 -0
- parsl/tests/unit/test_address.py +20 -0
- parsl/tests/unit/test_file.py +99 -0
- parsl/tests/unit/test_usage_tracking.py +66 -0
- parsl/usage_tracking/api.py +65 -0
- parsl/usage_tracking/levels.py +6 -0
- parsl/usage_tracking/usage.py +104 -62
- parsl/utils.py +137 -4
- parsl/version.py +1 -1
- {parsl-2024.3.18.data → parsl-2025.1.13.data}/scripts/exec_parsl_function.py +6 -5
- parsl-2025.1.13.data/scripts/interchange.py +649 -0
- {parsl-2024.3.18.data → parsl-2025.1.13.data}/scripts/process_worker_pool.py +77 -75
- parsl-2025.1.13.dist-info/METADATA +96 -0
- parsl-2025.1.13.dist-info/RECORD +462 -0
- {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/WHEEL +1 -1
- parsl/channels/__init__.py +0 -7
- parsl/channels/base.py +0 -141
- parsl/channels/errors.py +0 -113
- parsl/channels/local/local.py +0 -164
- parsl/channels/oauth_ssh/oauth_ssh.py +0 -110
- parsl/channels/ssh/ssh.py +0 -276
- parsl/channels/ssh_il/__init__.py +0 -0
- parsl/channels/ssh_il/ssh_il.py +0 -74
- parsl/configs/ad_hoc.py +0 -35
- parsl/executors/radical/rpex_master.py +0 -42
- parsl/monitoring/radios.py +0 -175
- parsl/providers/ad_hoc/__init__.py +0 -0
- parsl/providers/ad_hoc/ad_hoc.py +0 -248
- parsl/providers/cobalt/__init__.py +0 -0
- parsl/providers/cobalt/cobalt.py +0 -236
- parsl/providers/cobalt/template.py +0 -17
- parsl/tests/configs/ad_hoc_cluster_htex.py +0 -35
- parsl/tests/configs/cooley_htex.py +0 -37
- parsl/tests/configs/htex_ad_hoc_cluster.py +0 -28
- parsl/tests/configs/local_adhoc.py +0 -18
- parsl/tests/configs/swan_htex.py +0 -43
- parsl/tests/configs/theta.py +0 -37
- parsl/tests/integration/test_channels/__init__.py +0 -0
- parsl/tests/integration/test_channels/test_channels.py +0 -17
- parsl/tests/integration/test_channels/test_local_channel.py +0 -42
- parsl/tests/integration/test_channels/test_scp_1.py +0 -45
- parsl/tests/integration/test_channels/test_ssh_1.py +0 -40
- parsl/tests/integration/test_channels/test_ssh_errors.py +0 -46
- parsl/tests/integration/test_channels/test_ssh_file_transport.py +0 -41
- parsl/tests/integration/test_channels/test_ssh_interactive.py +0 -24
- parsl/tests/manual_tests/test_ad_hoc_htex.py +0 -48
- parsl/tests/manual_tests/test_fan_in_out_htex_remote.py +0 -88
- parsl/tests/manual_tests/test_oauth_ssh.py +0 -13
- parsl/tests/sites/test_local_adhoc.py +0 -61
- parsl/tests/test_channels/__init__.py +0 -0
- parsl/tests/test_channels/test_large_output.py +0 -22
- parsl/tests/test_data/__init__.py +0 -0
- parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py +0 -51
- parsl/tests/test_providers/test_cobalt_deprecation_warning.py +0 -16
- parsl-2024.3.18.dist-info/METADATA +0 -98
- parsl-2024.3.18.dist-info/RECORD +0 -449
- parsl/{channels/local → monitoring/radios}/__init__.py +0 -0
- parsl/{channels/oauth_ssh → tests/test_shutdown}/__init__.py +0 -0
- parsl/tests/{test_data → test_staging}/test_file_apps.py +0 -0
- parsl/tests/{test_data → test_staging}/test_file_staging.py +0 -0
- parsl/{channels/ssh → tests/unit}/__init__.py +0 -0
- {parsl-2024.3.18.data → parsl-2025.1.13.data}/scripts/parsl_coprocess.py +1 -1
- {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/LICENSE +0 -0
- {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/entry_points.txt +0 -0
- {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/top_level.txt +0 -0
parsl/dataflow/errors.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
|
-
from parsl.errors import ParslError
|
2
1
|
from typing import Optional, Sequence, Tuple
|
3
2
|
|
3
|
+
from parsl.errors import ParslError
|
4
|
+
|
4
5
|
|
5
6
|
class DataFlowException(ParslError):
|
6
7
|
"""Base class for all exceptions.
|
@@ -24,11 +25,8 @@ class BadCheckpoint(DataFlowException):
|
|
24
25
|
def __init__(self, reason: str) -> None:
|
25
26
|
self.reason = reason
|
26
27
|
|
27
|
-
def __repr__(self) -> str:
|
28
|
-
return self.reason
|
29
|
-
|
30
28
|
def __str__(self) -> str:
|
31
|
-
return self.
|
29
|
+
return self.reason
|
32
30
|
|
33
31
|
|
34
32
|
class DependencyError(DataFlowException):
|
parsl/dataflow/futures.py
CHANGED
@@ -1,19 +1,11 @@
|
|
1
|
-
"""This module implements the AppFutures.
|
2
|
-
|
3
|
-
We have two basic types of futures:
|
4
|
-
1. DataFutures which represent data objects
|
5
|
-
2. AppFutures which represent the futures on App/Leaf tasks.
|
6
|
-
|
7
|
-
"""
|
8
1
|
from __future__ import annotations
|
9
2
|
|
10
|
-
from concurrent.futures import Future
|
11
3
|
import logging
|
12
4
|
import threading
|
13
|
-
from
|
5
|
+
from concurrent.futures import Future
|
6
|
+
from typing import Any, Optional, Sequence, Union
|
14
7
|
|
15
8
|
import parsl.app.app as app
|
16
|
-
|
17
9
|
from parsl.app.futures import DataFuture
|
18
10
|
from parsl.dataflow.taskrecord import TaskRecord
|
19
11
|
|
@@ -77,13 +69,34 @@ class AppFuture(Future):
|
|
77
69
|
self._outputs = []
|
78
70
|
self.task_record = task_record
|
79
71
|
|
72
|
+
self._stdout_future: Optional[DataFuture] = None
|
73
|
+
self._stderr_future: Optional[DataFuture] = None
|
74
|
+
|
80
75
|
@property
|
81
|
-
def stdout(self) ->
|
82
|
-
|
76
|
+
def stdout(self) -> Union[None, str, DataFuture]:
|
77
|
+
"""Return app stdout. If stdout was specified as a string, then this
|
78
|
+
property will return that string. If stdout was specified as a File,
|
79
|
+
then this property will return a DataFuture representing that file
|
80
|
+
stageout.
|
81
|
+
TODO: this can be a tuple too I think?"""
|
82
|
+
if self._stdout_future:
|
83
|
+
return self._stdout_future
|
84
|
+
else:
|
85
|
+
# this covers the str and None cases
|
86
|
+
return self.task_record['kwargs'].get('stdout')
|
83
87
|
|
84
88
|
@property
|
85
|
-
def stderr(self) ->
|
86
|
-
|
89
|
+
def stderr(self) -> Union[None, str, DataFuture]:
|
90
|
+
"""Return app stderr. If stdout was specified as a string, then this
|
91
|
+
property will return that string. If stdout was specified as a File,
|
92
|
+
then this property will return a DataFuture representing that file
|
93
|
+
stageout.
|
94
|
+
TODO: this can be a tuple too I think?"""
|
95
|
+
if self._stderr_future:
|
96
|
+
return self._stderr_future
|
97
|
+
else:
|
98
|
+
# this covers the str and None cases
|
99
|
+
return self.task_record['kwargs'].get('stderr')
|
87
100
|
|
88
101
|
@property
|
89
102
|
def tid(self) -> int:
|
parsl/dataflow/memoization.py
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
from __future__ import annotations
|
2
|
+
|
2
3
|
import hashlib
|
3
|
-
from functools import lru_cache, singledispatch
|
4
4
|
import logging
|
5
5
|
import pickle
|
6
|
-
from
|
6
|
+
from functools import lru_cache, singledispatch
|
7
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
7
8
|
|
8
|
-
from
|
9
|
+
from parsl.dataflow.taskrecord import TaskRecord
|
9
10
|
|
10
11
|
if TYPE_CHECKING:
|
11
12
|
from parsl import DataFlowKernel # import loop at runtime - needed for typechecking - TODO turn into "if typing:"
|
12
13
|
|
13
|
-
from concurrent.futures import Future
|
14
|
-
|
15
14
|
import types
|
15
|
+
from concurrent.futures import Future
|
16
16
|
|
17
17
|
logger = logging.getLogger(__name__)
|
18
18
|
|
parsl/dataflow/rundirs.py
CHANGED
@@ -1,22 +1,21 @@
|
|
1
|
+
import logging
|
1
2
|
import os
|
2
3
|
from glob import glob
|
3
|
-
import logging
|
4
4
|
|
5
5
|
logger = logging.getLogger(__name__)
|
6
6
|
|
7
7
|
|
8
8
|
def make_rundir(path: str) -> str:
|
9
|
-
"""
|
9
|
+
"""Create a numbered run directory under the specified path.
|
10
10
|
|
11
|
-
|
12
|
-
./runinfo <- Home of all run directories
|
11
|
+
./runinfo <- specified path
|
13
12
|
|----000
|
14
13
|
|----001 <- Directories for each run
|
15
14
|
| ....
|
16
15
|
|----NNN
|
17
16
|
|
18
|
-
|
19
|
-
- path (str): String path to
|
17
|
+
Args:
|
18
|
+
- path (str): String path to root of all rundirs
|
20
19
|
"""
|
21
20
|
try:
|
22
21
|
if not os.path.exists(path):
|
parsl/dataflow/taskrecord.py
CHANGED
@@ -1,19 +1,18 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
import threading
|
4
3
|
import datetime
|
5
|
-
|
4
|
+
import threading
|
6
5
|
from concurrent.futures import Future
|
7
6
|
|
8
|
-
|
9
7
|
# only for type checking:
|
10
|
-
from typing import Any, Callable, Dict,
|
8
|
+
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Sequence, Union
|
9
|
+
|
10
|
+
from typing_extensions import TypedDict
|
11
11
|
|
12
12
|
if TYPE_CHECKING:
|
13
13
|
from parsl.dataflow.futures import AppFuture
|
14
14
|
|
15
15
|
import parsl.dataflow.dflow as dflow
|
16
|
-
|
17
16
|
from parsl.dataflow.states import States
|
18
17
|
|
19
18
|
|
parsl/executors/__init__.py
CHANGED
@@ -1,9 +1,11 @@
|
|
1
|
+
from parsl.executors.flux.executor import FluxExecutor
|
2
|
+
from parsl.executors.high_throughput.executor import HighThroughputExecutor
|
3
|
+
from parsl.executors.high_throughput.mpi_executor import MPIExecutor
|
1
4
|
from parsl.executors.threads import ThreadPoolExecutor
|
2
5
|
from parsl.executors.workqueue.executor import WorkQueueExecutor
|
3
|
-
from parsl.executors.high_throughput.executor import HighThroughputExecutor
|
4
|
-
from parsl.executors.flux.executor import FluxExecutor
|
5
6
|
|
6
7
|
__all__ = ['ThreadPoolExecutor',
|
7
8
|
'HighThroughputExecutor',
|
9
|
+
'MPIExecutor',
|
8
10
|
'WorkQueueExecutor',
|
9
11
|
'FluxExecutor']
|
parsl/executors/base.py
CHANGED
@@ -1,9 +1,11 @@
|
|
1
|
+
import os
|
1
2
|
from abc import ABCMeta, abstractmethod
|
2
3
|
from concurrent.futures import Future
|
3
|
-
from typing import Any, Callable, Dict, Optional
|
4
|
+
from typing import Any, Callable, Dict, Optional
|
5
|
+
|
4
6
|
from typing_extensions import Literal, Self
|
5
7
|
|
6
|
-
from parsl.
|
8
|
+
from parsl.monitoring.radios.base import MonitoringRadioSender
|
7
9
|
|
8
10
|
|
9
11
|
class ParslExecutor(metaclass=ABCMeta):
|
@@ -45,6 +47,21 @@ class ParslExecutor(metaclass=ABCMeta):
|
|
45
47
|
label: str = "undefined"
|
46
48
|
radio_mode: str = "udp"
|
47
49
|
|
50
|
+
def __init__(
|
51
|
+
self,
|
52
|
+
*,
|
53
|
+
hub_address: Optional[str] = None,
|
54
|
+
hub_zmq_port: Optional[int] = None,
|
55
|
+
submit_monitoring_radio: Optional[MonitoringRadioSender] = None,
|
56
|
+
run_dir: str = ".",
|
57
|
+
run_id: Optional[str] = None,
|
58
|
+
):
|
59
|
+
self.hub_address = hub_address
|
60
|
+
self.hub_zmq_port = hub_zmq_port
|
61
|
+
self.submit_monitoring_radio = submit_monitoring_radio
|
62
|
+
self.run_dir = os.path.abspath(run_dir)
|
63
|
+
self.run_id = run_id
|
64
|
+
|
48
65
|
def __enter__(self) -> Self:
|
49
66
|
return self
|
50
67
|
|
@@ -53,7 +70,7 @@ class ParslExecutor(metaclass=ABCMeta):
|
|
53
70
|
return False
|
54
71
|
|
55
72
|
@abstractmethod
|
56
|
-
def start(self) ->
|
73
|
+
def start(self) -> None:
|
57
74
|
"""Start the executor.
|
58
75
|
|
59
76
|
Any spin-up operations (for example: starting thread pools) should be performed here.
|
@@ -79,13 +96,6 @@ class ParslExecutor(metaclass=ABCMeta):
|
|
79
96
|
"""
|
80
97
|
pass
|
81
98
|
|
82
|
-
def create_monitoring_info(self, status: Dict[str, JobStatus]) -> List[object]:
|
83
|
-
"""Create a monitoring message for each block based on the poll status.
|
84
|
-
|
85
|
-
:return: a list of dictionaries mapping to the info of each block
|
86
|
-
"""
|
87
|
-
return []
|
88
|
-
|
89
99
|
def monitor_resources(self) -> bool:
|
90
100
|
"""Should resource monitoring happen for tasks on running on this executor?
|
91
101
|
|
@@ -106,6 +116,16 @@ class ParslExecutor(metaclass=ABCMeta):
|
|
106
116
|
def run_dir(self, value: str) -> None:
|
107
117
|
self._run_dir = value
|
108
118
|
|
119
|
+
@property
|
120
|
+
def run_id(self) -> Optional[str]:
|
121
|
+
"""UUID for the enclosing DFK.
|
122
|
+
"""
|
123
|
+
return self._run_id
|
124
|
+
|
125
|
+
@run_id.setter
|
126
|
+
def run_id(self, value: Optional[str]) -> None:
|
127
|
+
self._run_id = value
|
128
|
+
|
109
129
|
@property
|
110
130
|
def hub_address(self) -> Optional[str]:
|
111
131
|
"""Address to the Hub for monitoring.
|
@@ -117,11 +137,21 @@ class ParslExecutor(metaclass=ABCMeta):
|
|
117
137
|
self._hub_address = value
|
118
138
|
|
119
139
|
@property
|
120
|
-
def
|
140
|
+
def hub_zmq_port(self) -> Optional[int]:
|
121
141
|
"""Port to the Hub for monitoring.
|
122
142
|
"""
|
123
|
-
return self.
|
143
|
+
return self._hub_zmq_port
|
144
|
+
|
145
|
+
@hub_zmq_port.setter
|
146
|
+
def hub_zmq_port(self, value: Optional[int]) -> None:
|
147
|
+
self._hub_zmq_port = value
|
148
|
+
|
149
|
+
@property
|
150
|
+
def submit_monitoring_radio(self) -> Optional[MonitoringRadioSender]:
|
151
|
+
"""Local radio for sending monitoring messages
|
152
|
+
"""
|
153
|
+
return self._submit_monitoring_radio
|
124
154
|
|
125
|
-
@
|
126
|
-
def
|
127
|
-
self.
|
155
|
+
@submit_monitoring_radio.setter
|
156
|
+
def submit_monitoring_radio(self, value: Optional[MonitoringRadioSender]) -> None:
|
157
|
+
self._submit_monitoring_radio = value
|
parsl/executors/errors.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
"""Exceptions raise by Executors."""
|
2
|
+
from typing import Set
|
3
|
+
|
2
4
|
from parsl.errors import ParslError
|
3
5
|
from parsl.executors.base import ParslExecutor
|
4
6
|
|
@@ -44,6 +46,17 @@ class UnsupportedFeatureError(ExecutorError):
|
|
44
46
|
self.current_executor)
|
45
47
|
|
46
48
|
|
49
|
+
class InvalidResourceSpecification(ExecutorError):
|
50
|
+
"""Error raised when Invalid input is supplied via resource Specification"""
|
51
|
+
|
52
|
+
def __init__(self, invalid_keys: Set[str], message: str = ''):
|
53
|
+
self.invalid_keys = invalid_keys
|
54
|
+
self.message = message
|
55
|
+
|
56
|
+
def __str__(self):
|
57
|
+
return f"Invalid Resource Specification Supplied: {self.invalid_keys}. {self.message}"
|
58
|
+
|
59
|
+
|
47
60
|
class ScalingFailed(ExecutorError):
|
48
61
|
"""Scaling failed due to error in Execution provider."""
|
49
62
|
|
@@ -0,0 +1,37 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
from parsl.serialize import unpack_res_spec_apply_message
|
4
|
+
|
5
|
+
|
6
|
+
def execute_task(bufs: bytes):
|
7
|
+
"""Deserialize the buffer and execute the task.
|
8
|
+
Returns the result or throws exception.
|
9
|
+
"""
|
10
|
+
f, args, kwargs, resource_spec = unpack_res_spec_apply_message(bufs)
|
11
|
+
|
12
|
+
for varname in resource_spec:
|
13
|
+
envname = "PARSL_" + str(varname).upper()
|
14
|
+
os.environ[envname] = str(resource_spec[varname])
|
15
|
+
|
16
|
+
# We might need to look into callability of the function from itself
|
17
|
+
# since we change it's name in the new namespace
|
18
|
+
prefix = "parsl_"
|
19
|
+
fname = prefix + "f"
|
20
|
+
argname = prefix + "args"
|
21
|
+
kwargname = prefix + "kwargs"
|
22
|
+
resultname = prefix + "result"
|
23
|
+
|
24
|
+
code = "{0} = {1}(*{2}, **{3})".format(resultname, fname,
|
25
|
+
argname, kwargname)
|
26
|
+
|
27
|
+
user_ns = locals()
|
28
|
+
user_ns.update({
|
29
|
+
'__builtins__': __builtins__,
|
30
|
+
fname: f,
|
31
|
+
argname: args,
|
32
|
+
kwargname: kwargs,
|
33
|
+
resultname: resultname
|
34
|
+
})
|
35
|
+
|
36
|
+
exec(code, user_ns, user_ns)
|
37
|
+
return user_ns.get(resultname)
|
@@ -1,12 +1,12 @@
|
|
1
1
|
"""Script for executing tasks inside of Flux jobs."""
|
2
2
|
|
3
3
|
import argparse
|
4
|
-
import os
|
5
4
|
import logging
|
5
|
+
import os
|
6
6
|
|
7
|
-
from parsl.executors.
|
8
|
-
from parsl.serialize import serialize
|
7
|
+
from parsl.executors.execute_task import execute_task
|
9
8
|
from parsl.executors.flux import TaskResult
|
9
|
+
from parsl.serialize import serialize
|
10
10
|
|
11
11
|
|
12
12
|
def main():
|
parsl/executors/flux/executor.py
CHANGED
@@ -1,33 +1,32 @@
|
|
1
1
|
"""Defines the FluxExecutor class."""
|
2
2
|
|
3
|
+
import collections
|
3
4
|
import concurrent.futures as cf
|
4
5
|
import functools
|
6
|
+
import itertools
|
5
7
|
import os
|
8
|
+
import queue
|
9
|
+
import shutil
|
6
10
|
import sys
|
7
|
-
import uuid
|
8
11
|
import threading
|
9
|
-
import
|
10
|
-
import shutil
|
11
|
-
import queue
|
12
|
-
from socket import gethostname
|
13
|
-
import collections
|
14
|
-
from collections.abc import Mapping, Callable
|
15
|
-
from typing import Optional, Any, Dict
|
12
|
+
import uuid
|
16
13
|
import weakref
|
14
|
+
from collections.abc import Callable, Mapping
|
15
|
+
from socket import gethostname
|
16
|
+
from typing import Any, Dict, Optional
|
17
17
|
|
18
18
|
import zmq
|
19
19
|
|
20
|
-
from parsl.
|
20
|
+
from parsl.app.errors import AppException
|
21
21
|
from parsl.executors.base import ParslExecutor
|
22
|
+
from parsl.executors.errors import ScalingFailed
|
22
23
|
from parsl.executors.flux.execute_parsl_task import __file__ as _WORKER_PATH
|
23
24
|
from parsl.executors.flux.flux_instance_manager import __file__ as _MANAGER_PATH
|
24
|
-
from parsl.executors.errors import ScalingFailed
|
25
25
|
from parsl.providers import LocalProvider
|
26
26
|
from parsl.providers.base import ExecutionProvider
|
27
27
|
from parsl.serialize import deserialize, pack_res_spec_apply_message
|
28
28
|
from parsl.serialize.errors import SerializationError
|
29
|
-
from parsl.
|
30
|
-
|
29
|
+
from parsl.utils import RepresentationMixin
|
31
30
|
|
32
31
|
_WORKER_PATH = os.path.realpath(_WORKER_PATH)
|
33
32
|
_MANAGER_PATH = os.path.realpath(_MANAGER_PATH)
|
@@ -201,7 +200,6 @@ class FluxExecutor(ParslExecutor, RepresentationMixin):
|
|
201
200
|
raise EnvironmentError("Cannot find Flux installation in PATH")
|
202
201
|
self.flux_path = os.path.abspath(flux_path)
|
203
202
|
self._task_id_counter = itertools.count()
|
204
|
-
self._socket = zmq.Context().socket(zmq.REP)
|
205
203
|
# Assumes a launch command cannot be None or empty
|
206
204
|
self.launch_cmd = launch_cmd or self.DEFAULT_LAUNCH_CMD
|
207
205
|
self._submission_queue: queue.Queue = queue.Queue()
|
@@ -214,7 +212,6 @@ class FluxExecutor(ParslExecutor, RepresentationMixin):
|
|
214
212
|
args=(
|
215
213
|
self._submission_queue,
|
216
214
|
self._stop_event,
|
217
|
-
self._socket,
|
218
215
|
self.working_dir,
|
219
216
|
self.flux_executor_kwargs,
|
220
217
|
self.provider,
|
@@ -307,11 +304,13 @@ def _submit_wrapper(
|
|
307
304
|
|
308
305
|
If an exception is thrown, error out all submitted tasks.
|
309
306
|
"""
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
307
|
+
with zmq.Context() as ctx:
|
308
|
+
with ctx.socket(zmq.REP) as socket:
|
309
|
+
try:
|
310
|
+
_submit_flux_jobs(submission_queue, stop_event, socket, *args, **kwargs)
|
311
|
+
except Exception as exc:
|
312
|
+
_error_out_jobs(submission_queue, stop_event, exc)
|
313
|
+
raise
|
315
314
|
|
316
315
|
|
317
316
|
def _error_out_jobs(
|
@@ -1,10 +1,10 @@
|
|
1
1
|
"""Script meant to be the initial program of a Flux instance."""
|
2
2
|
|
3
3
|
import argparse
|
4
|
+
import logging
|
4
5
|
import os
|
5
6
|
from os.path import dirname
|
6
|
-
import
|
7
|
-
from socket import gethostname, gethostbyname
|
7
|
+
from socket import gethostbyname, gethostname
|
8
8
|
|
9
9
|
import zmq
|
10
10
|
|
@@ -16,8 +16,8 @@ def main():
|
|
16
16
|
encapsulating Flux instance.
|
17
17
|
"""
|
18
18
|
# flux imports only available when launched under Flux instance
|
19
|
-
import flux.job
|
20
19
|
import flux
|
20
|
+
import flux.job
|
21
21
|
|
22
22
|
logging.basicConfig(
|
23
23
|
level=logging.DEBUG, format="%(asctime)s [%(levelname)s] %(message)s"
|
@@ -27,30 +27,29 @@ def main():
|
|
27
27
|
parser.add_argument("hostname", help="hostname of the parent executor's socket")
|
28
28
|
parser.add_argument("port", help="Port of the parent executor's socket")
|
29
29
|
args = parser.parse_args()
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
logging.debug("Flux jobs drained, exiting.")
|
30
|
+
with zmq.Context() as context, context.socket(zmq.REQ) as socket:
|
31
|
+
socket.connect(
|
32
|
+
args.protocol + "://" + gethostbyname(args.hostname) + ":" + args.port
|
33
|
+
)
|
34
|
+
# send the path to the ``flux.job`` package
|
35
|
+
socket.send(dirname(dirname(os.path.realpath(flux.__file__))).encode())
|
36
|
+
logging.debug("Flux package path sent.")
|
37
|
+
# collect the encapsulating Flux instance's URI
|
38
|
+
local_uri = flux.Flux().attr_get("local-uri")
|
39
|
+
hostname = gethostname()
|
40
|
+
if args.hostname == hostname:
|
41
|
+
flux_uri = local_uri
|
42
|
+
else:
|
43
|
+
flux_uri = "ssh://" + gethostname() + local_uri.replace("local://", "")
|
44
|
+
logging.debug("Flux URI is %s", flux_uri)
|
45
|
+
response = socket.recv() # get acknowledgment
|
46
|
+
logging.debug("Received acknowledgment %s", response)
|
47
|
+
socket.send(flux_uri.encode()) # send URI
|
48
|
+
logging.debug("URI sent. Blocking for response...")
|
49
|
+
response = socket.recv() # wait for shutdown message
|
50
|
+
logging.debug("Response %s received, draining flux jobs...", response)
|
51
|
+
flux.Flux().rpc("job-manager.drain").get()
|
52
|
+
logging.debug("Flux jobs drained, exiting.")
|
54
53
|
|
55
54
|
|
56
55
|
if __name__ == "__main__":
|
@@ -1,3 +1,36 @@
|
|
1
|
+
import time
|
2
|
+
|
3
|
+
|
4
|
+
class ManagerLost(Exception):
|
5
|
+
"""
|
6
|
+
Task lost due to manager loss. Manager is considered lost when multiple heartbeats
|
7
|
+
have been missed.
|
8
|
+
"""
|
9
|
+
def __init__(self, manager_id: bytes, hostname: str) -> None:
|
10
|
+
self.manager_id = manager_id
|
11
|
+
self.tstamp = time.time()
|
12
|
+
self.hostname = hostname
|
13
|
+
|
14
|
+
def __str__(self) -> str:
|
15
|
+
return (
|
16
|
+
f"Task failure due to loss of manager {self.manager_id.decode()} on"
|
17
|
+
f" host {self.hostname}"
|
18
|
+
)
|
19
|
+
|
20
|
+
|
21
|
+
class VersionMismatch(Exception):
|
22
|
+
"""Manager and Interchange versions do not match"""
|
23
|
+
def __init__(self, interchange_version: str, manager_version: str):
|
24
|
+
self.interchange_version = interchange_version
|
25
|
+
self.manager_version = manager_version
|
26
|
+
|
27
|
+
def __str__(self) -> str:
|
28
|
+
return (
|
29
|
+
f"Manager version info {self.manager_version} does not match interchange"
|
30
|
+
f" version info {self.interchange_version}, causing a critical failure"
|
31
|
+
)
|
32
|
+
|
33
|
+
|
1
34
|
class WorkerLost(Exception):
|
2
35
|
"""Exception raised when a worker is lost
|
3
36
|
"""
|
@@ -5,8 +38,15 @@ class WorkerLost(Exception):
|
|
5
38
|
self.worker_id = worker_id
|
6
39
|
self.hostname = hostname
|
7
40
|
|
8
|
-
def
|
41
|
+
def __str__(self):
|
9
42
|
return "Task failure due to loss of worker {} on host {}".format(self.worker_id, self.hostname)
|
10
43
|
|
11
|
-
|
12
|
-
|
44
|
+
|
45
|
+
class CommandClientTimeoutError(Exception):
|
46
|
+
"""Raised when the command client times out waiting for a response.
|
47
|
+
"""
|
48
|
+
|
49
|
+
|
50
|
+
class CommandClientBadError(Exception):
|
51
|
+
"""Raised when the command client is bad from an earlier timeout.
|
52
|
+
"""
|