parsl 2024.3.11__py3-none-any.whl → 2025.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsl/__init__.py +9 -10
- parsl/addresses.py +29 -7
- parsl/app/app.py +7 -8
- parsl/app/bash.py +15 -8
- parsl/app/errors.py +10 -13
- parsl/app/futures.py +8 -10
- parsl/app/python.py +2 -1
- parsl/benchmark/perf.py +2 -1
- parsl/concurrent/__init__.py +2 -2
- parsl/config.py +57 -10
- parsl/configs/ASPIRE1.py +6 -5
- parsl/configs/Azure.py +9 -8
- parsl/configs/bridges.py +6 -4
- parsl/configs/cc_in2p3.py +3 -3
- parsl/configs/ec2.py +3 -1
- parsl/configs/expanse.py +4 -3
- parsl/configs/frontera.py +3 -4
- parsl/configs/htex_local.py +3 -4
- parsl/configs/illinoiscluster.py +3 -1
- parsl/configs/improv.py +34 -0
- parsl/configs/kubernetes.py +4 -3
- parsl/configs/local_threads.py +5 -1
- parsl/configs/midway.py +5 -3
- parsl/configs/osg.py +4 -2
- parsl/configs/polaris.py +4 -2
- parsl/configs/stampede2.py +6 -5
- parsl/configs/summit.py +3 -3
- parsl/configs/toss3_llnl.py +4 -3
- parsl/configs/vineex_local.py +6 -4
- parsl/configs/wqex_local.py +5 -3
- parsl/curvezmq.py +4 -0
- parsl/data_provider/data_manager.py +4 -3
- parsl/data_provider/file_noop.py +1 -2
- parsl/data_provider/files.py +3 -3
- parsl/data_provider/ftp.py +1 -3
- parsl/data_provider/globus.py +7 -6
- parsl/data_provider/http.py +2 -2
- parsl/data_provider/rsync.py +1 -1
- parsl/data_provider/staging.py +2 -2
- parsl/data_provider/zip.py +135 -0
- parsl/dataflow/dependency_resolvers.py +115 -0
- parsl/dataflow/dflow.py +262 -224
- parsl/dataflow/errors.py +3 -5
- parsl/dataflow/futures.py +27 -14
- parsl/dataflow/memoization.py +5 -5
- parsl/dataflow/rundirs.py +5 -6
- parsl/dataflow/taskrecord.py +4 -5
- parsl/executors/__init__.py +4 -2
- parsl/executors/base.py +45 -15
- parsl/executors/errors.py +13 -0
- parsl/executors/execute_task.py +37 -0
- parsl/executors/flux/execute_parsl_task.py +3 -3
- parsl/executors/flux/executor.py +18 -19
- parsl/executors/flux/flux_instance_manager.py +26 -27
- parsl/executors/high_throughput/errors.py +43 -3
- parsl/executors/high_throughput/executor.py +316 -282
- parsl/executors/high_throughput/interchange.py +158 -167
- parsl/executors/high_throughput/manager_record.py +5 -0
- parsl/executors/high_throughput/manager_selector.py +55 -0
- parsl/executors/high_throughput/monitoring_info.py +2 -1
- parsl/executors/high_throughput/mpi_executor.py +113 -0
- parsl/executors/high_throughput/mpi_prefix_composer.py +10 -11
- parsl/executors/high_throughput/mpi_resource_management.py +6 -17
- parsl/executors/high_throughput/probe.py +9 -7
- parsl/executors/high_throughput/process_worker_pool.py +115 -77
- parsl/executors/high_throughput/zmq_pipes.py +81 -23
- parsl/executors/radical/executor.py +130 -79
- parsl/executors/radical/rpex_resources.py +17 -15
- parsl/executors/radical/rpex_worker.py +4 -3
- parsl/executors/status_handling.py +157 -51
- parsl/executors/taskvine/__init__.py +1 -1
- parsl/executors/taskvine/errors.py +1 -1
- parsl/executors/taskvine/exec_parsl_function.py +2 -2
- parsl/executors/taskvine/executor.py +41 -57
- parsl/executors/taskvine/factory.py +1 -1
- parsl/executors/taskvine/factory_config.py +1 -1
- parsl/executors/taskvine/manager.py +18 -13
- parsl/executors/taskvine/manager_config.py +9 -5
- parsl/executors/threads.py +6 -6
- parsl/executors/workqueue/errors.py +1 -1
- parsl/executors/workqueue/exec_parsl_function.py +6 -5
- parsl/executors/workqueue/executor.py +64 -63
- parsl/executors/workqueue/parsl_coprocess.py +1 -1
- parsl/jobs/error_handlers.py +2 -2
- parsl/jobs/job_status_poller.py +30 -113
- parsl/jobs/states.py +7 -2
- parsl/jobs/strategy.py +43 -31
- parsl/launchers/__init__.py +12 -3
- parsl/launchers/errors.py +1 -1
- parsl/launchers/launchers.py +6 -12
- parsl/log_utils.py +9 -6
- parsl/monitoring/db_manager.py +59 -95
- parsl/monitoring/errors.py +6 -0
- parsl/monitoring/monitoring.py +87 -356
- parsl/monitoring/queries/pandas.py +1 -2
- parsl/monitoring/radios/base.py +13 -0
- parsl/monitoring/radios/filesystem.py +52 -0
- parsl/monitoring/radios/htex.py +57 -0
- parsl/monitoring/radios/multiprocessing.py +17 -0
- parsl/monitoring/radios/udp.py +56 -0
- parsl/monitoring/radios/zmq.py +17 -0
- parsl/monitoring/remote.py +33 -37
- parsl/monitoring/router.py +212 -0
- parsl/monitoring/types.py +5 -6
- parsl/monitoring/visualization/app.py +4 -2
- parsl/monitoring/visualization/models.py +0 -1
- parsl/monitoring/visualization/plots/default/workflow_plots.py +11 -4
- parsl/monitoring/visualization/plots/default/workflow_resource_plots.py +1 -0
- parsl/monitoring/visualization/utils.py +0 -1
- parsl/monitoring/visualization/views.py +16 -8
- parsl/multiprocessing.py +0 -1
- parsl/process_loggers.py +1 -2
- parsl/providers/__init__.py +8 -17
- parsl/providers/aws/aws.py +2 -3
- parsl/providers/azure/azure.py +4 -5
- parsl/providers/base.py +2 -18
- parsl/providers/cluster_provider.py +4 -12
- parsl/providers/condor/condor.py +7 -17
- parsl/providers/errors.py +2 -2
- parsl/providers/googlecloud/googlecloud.py +2 -1
- parsl/providers/grid_engine/grid_engine.py +5 -14
- parsl/providers/kubernetes/kube.py +80 -40
- parsl/providers/local/local.py +13 -26
- parsl/providers/lsf/lsf.py +5 -23
- parsl/providers/pbspro/pbspro.py +5 -17
- parsl/providers/slurm/slurm.py +81 -39
- parsl/providers/torque/torque.py +3 -14
- parsl/serialize/__init__.py +8 -3
- parsl/serialize/base.py +1 -2
- parsl/serialize/concretes.py +5 -4
- parsl/serialize/facade.py +3 -3
- parsl/serialize/proxystore.py +3 -2
- parsl/tests/__init__.py +1 -1
- parsl/tests/configs/azure_single_node.py +4 -5
- parsl/tests/configs/bridges.py +3 -2
- parsl/tests/configs/cc_in2p3.py +1 -3
- parsl/tests/configs/comet.py +2 -1
- parsl/tests/configs/ec2_single_node.py +1 -2
- parsl/tests/configs/ec2_spot.py +1 -2
- parsl/tests/configs/flux_local.py +11 -0
- parsl/tests/configs/frontera.py +2 -3
- parsl/tests/configs/htex_local.py +3 -5
- parsl/tests/configs/htex_local_alternate.py +11 -15
- parsl/tests/configs/htex_local_intask_staging.py +5 -9
- parsl/tests/configs/htex_local_rsync_staging.py +4 -8
- parsl/tests/configs/local_radical.py +1 -3
- parsl/tests/configs/local_radical_mpi.py +2 -2
- parsl/tests/configs/local_threads_checkpoint_periodic.py +8 -10
- parsl/tests/configs/local_threads_monitoring.py +0 -1
- parsl/tests/configs/midway.py +2 -2
- parsl/tests/configs/nscc_singapore.py +3 -3
- parsl/tests/configs/osg_htex.py +1 -1
- parsl/tests/configs/petrelkube.py +3 -2
- parsl/tests/configs/slurm_local.py +24 -0
- parsl/tests/configs/summit.py +1 -0
- parsl/tests/configs/taskvine_ex.py +4 -7
- parsl/tests/configs/user_opts.py +2 -8
- parsl/tests/configs/workqueue_ex.py +4 -6
- parsl/tests/conftest.py +27 -13
- parsl/tests/integration/test_stress/test_python_simple.py +3 -4
- parsl/tests/integration/test_stress/test_python_threads.py +3 -5
- parsl/tests/manual_tests/htex_local.py +4 -6
- parsl/tests/manual_tests/test_basic.py +1 -0
- parsl/tests/manual_tests/test_log_filter.py +3 -1
- parsl/tests/manual_tests/test_memory_limits.py +6 -8
- parsl/tests/manual_tests/test_regression_220.py +2 -1
- parsl/tests/manual_tests/test_udp_simple.py +4 -4
- parsl/tests/manual_tests/test_worker_count.py +3 -2
- parsl/tests/scaling_tests/htex_local.py +2 -4
- parsl/tests/scaling_tests/test_scale.py +0 -9
- parsl/tests/scaling_tests/vineex_condor.py +1 -2
- parsl/tests/scaling_tests/vineex_local.py +1 -2
- parsl/tests/site_tests/site_config_selector.py +1 -6
- parsl/tests/site_tests/test_provider.py +4 -2
- parsl/tests/site_tests/test_site.py +2 -0
- parsl/tests/sites/test_affinity.py +7 -7
- parsl/tests/sites/test_dynamic_executor.py +3 -4
- parsl/tests/sites/test_ec2.py +3 -2
- parsl/tests/sites/test_worker_info.py +4 -5
- parsl/tests/test_aalst_patterns.py +0 -1
- parsl/tests/test_bash_apps/test_apptimeout.py +2 -2
- parsl/tests/test_bash_apps/test_basic.py +10 -4
- parsl/tests/test_bash_apps/test_error_codes.py +5 -7
- parsl/tests/test_bash_apps/test_inputs_default.py +25 -0
- parsl/tests/test_bash_apps/test_kwarg_storage.py +1 -1
- parsl/tests/test_bash_apps/test_memoize.py +2 -8
- parsl/tests/test_bash_apps/test_memoize_ignore_args.py +9 -14
- parsl/tests/test_bash_apps/test_memoize_ignore_args_regr.py +9 -14
- parsl/tests/test_bash_apps/test_multiline.py +1 -1
- parsl/tests/test_bash_apps/test_pipeline.py +1 -1
- parsl/tests/test_bash_apps/test_std_uri.py +123 -0
- parsl/tests/test_bash_apps/test_stdout.py +33 -8
- parsl/tests/test_callables.py +2 -2
- parsl/tests/test_checkpointing/test_periodic.py +21 -39
- parsl/tests/test_checkpointing/test_python_checkpoint_1.py +1 -0
- parsl/tests/test_checkpointing/test_python_checkpoint_2.py +2 -2
- parsl/tests/test_checkpointing/test_python_checkpoint_3.py +0 -1
- parsl/tests/test_checkpointing/test_regression_239.py +1 -1
- parsl/tests/test_checkpointing/test_task_exit.py +2 -3
- parsl/tests/test_docs/test_from_slides.py +5 -2
- parsl/tests/test_docs/test_kwargs.py +4 -1
- parsl/tests/test_docs/test_tutorial_1.py +1 -2
- parsl/tests/test_docs/test_workflow1.py +2 -2
- parsl/tests/test_docs/test_workflow2.py +0 -1
- parsl/tests/test_error_handling/test_rand_fail.py +2 -2
- parsl/tests/test_error_handling/test_resource_spec.py +10 -12
- parsl/tests/test_error_handling/test_retries.py +6 -16
- parsl/tests/test_error_handling/test_retry_handler.py +1 -0
- parsl/tests/test_error_handling/test_retry_handler_failure.py +2 -1
- parsl/tests/test_error_handling/test_serialization_fail.py +1 -1
- parsl/tests/test_error_handling/test_wrap_with_logs.py +1 -0
- parsl/tests/test_execute_task.py +29 -0
- parsl/tests/test_flux.py +1 -1
- parsl/tests/test_htex/test_basic.py +2 -3
- parsl/tests/test_htex/test_block_manager_selector_unit.py +20 -0
- parsl/tests/test_htex/test_command_client_timeout.py +66 -0
- parsl/tests/test_htex/test_connected_blocks.py +3 -2
- parsl/tests/test_htex/test_cpu_affinity_explicit.py +6 -10
- parsl/tests/test_htex/test_disconnected_blocks.py +6 -5
- parsl/tests/test_htex/test_disconnected_blocks_failing_provider.py +71 -0
- parsl/tests/test_htex/test_drain.py +79 -0
- parsl/tests/test_htex/test_htex.py +51 -25
- parsl/tests/test_htex/test_manager_failure.py +0 -1
- parsl/tests/test_htex/test_manager_selector_by_block.py +51 -0
- parsl/tests/test_htex/test_managers_command.py +36 -0
- parsl/tests/test_htex/test_missing_worker.py +2 -12
- parsl/tests/test_htex/test_multiple_disconnected_blocks.py +9 -9
- parsl/tests/test_htex/test_resource_spec_validation.py +45 -0
- parsl/tests/test_htex/test_zmq_binding.py +29 -8
- parsl/tests/test_monitoring/test_app_names.py +86 -0
- parsl/tests/test_monitoring/test_basic.py +73 -25
- parsl/tests/test_monitoring/test_db_locks.py +6 -4
- parsl/tests/test_monitoring/test_fuzz_zmq.py +19 -8
- parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +80 -0
- parsl/tests/test_monitoring/test_incomplete_futures.py +5 -4
- parsl/tests/test_monitoring/test_memoization_representation.py +4 -2
- parsl/tests/test_monitoring/test_stdouterr.py +134 -0
- parsl/tests/test_monitoring/test_viz_colouring.py +1 -0
- parsl/tests/test_mpi_apps/test_bad_mpi_config.py +33 -26
- parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py +28 -11
- parsl/tests/test_mpi_apps/test_mpi_prefix.py +4 -4
- parsl/tests/test_mpi_apps/test_mpi_scheduler.py +7 -2
- parsl/tests/test_mpi_apps/test_mpiex.py +64 -0
- parsl/tests/test_mpi_apps/test_resource_spec.py +42 -49
- parsl/tests/test_providers/test_kubernetes_provider.py +102 -0
- parsl/tests/test_providers/test_local_provider.py +3 -132
- parsl/tests/test_providers/test_pbspro_template.py +2 -3
- parsl/tests/test_providers/test_slurm_template.py +2 -3
- parsl/tests/test_providers/test_submiterror_deprecation.py +2 -1
- parsl/tests/test_python_apps/test_context_manager.py +128 -0
- parsl/tests/test_python_apps/test_dep_standard_futures.py +2 -1
- parsl/tests/test_python_apps/test_dependencies_deep.py +59 -0
- parsl/tests/test_python_apps/test_fail.py +0 -25
- parsl/tests/test_python_apps/test_futures.py +2 -1
- parsl/tests/test_python_apps/test_inputs_default.py +22 -0
- parsl/tests/test_python_apps/test_join.py +0 -1
- parsl/tests/test_python_apps/test_lifted.py +11 -7
- parsl/tests/test_python_apps/test_memoize_bad_id_for_memo.py +1 -0
- parsl/tests/test_python_apps/test_outputs.py +1 -1
- parsl/tests/test_python_apps/test_pluggable_future_resolution.py +161 -0
- parsl/tests/test_radical/test_mpi_funcs.py +1 -2
- parsl/tests/test_regression/test_1480.py +2 -1
- parsl/tests/test_regression/test_1653.py +2 -1
- parsl/tests/test_regression/test_226.py +1 -0
- parsl/tests/test_regression/test_2652.py +1 -0
- parsl/tests/test_regression/test_69a.py +0 -1
- parsl/tests/test_regression/test_854.py +4 -2
- parsl/tests/test_regression/test_97_parallelism_0.py +1 -2
- parsl/tests/test_regression/test_98.py +0 -1
- parsl/tests/test_scaling/test_block_error_handler.py +9 -4
- parsl/tests/test_scaling/test_regression_1621.py +11 -15
- parsl/tests/test_scaling/test_regression_3568_scaledown_vs_MISSING.py +84 -0
- parsl/tests/test_scaling/test_regression_3696_oscillation.py +103 -0
- parsl/tests/test_scaling/test_scale_down.py +2 -5
- parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py +6 -18
- parsl/tests/test_scaling/test_scale_down_htex_unregistered.py +71 -0
- parsl/tests/test_scaling/test_shutdown_scalein.py +73 -0
- parsl/tests/test_scaling/test_worker_interchange_bad_messages_3262.py +90 -0
- parsl/tests/test_serialization/test_2555_caching_deserializer.py +1 -1
- parsl/tests/test_serialization/test_3495_deserialize_managerlost.py +47 -0
- parsl/tests/test_serialization/test_basic.py +2 -1
- parsl/tests/test_serialization/test_htex_code_cache.py +3 -4
- parsl/tests/test_serialization/test_pack_resource_spec.py +2 -1
- parsl/tests/test_serialization/test_proxystore_configured.py +10 -6
- parsl/tests/test_serialization/test_proxystore_impl.py +5 -3
- parsl/tests/test_shutdown/test_kill_monitoring.py +64 -0
- parsl/tests/test_staging/staging_provider.py +2 -2
- parsl/tests/test_staging/test_1316.py +3 -4
- parsl/tests/test_staging/test_docs_1.py +2 -1
- parsl/tests/test_staging/test_docs_2.py +2 -1
- parsl/tests/test_staging/test_elaborate_noop_file.py +2 -3
- parsl/tests/{test_data → test_staging}/test_file.py +6 -6
- parsl/tests/{test_data → test_staging}/test_output_chain_filenames.py +3 -0
- parsl/tests/test_staging/test_staging_ftp.py +1 -0
- parsl/tests/test_staging/test_staging_https.py +5 -2
- parsl/tests/test_staging/test_staging_stdout.py +64 -0
- parsl/tests/test_staging/test_zip_in.py +39 -0
- parsl/tests/test_staging/test_zip_out.py +110 -0
- parsl/tests/test_staging/test_zip_to_zip.py +41 -0
- parsl/tests/test_summary.py +2 -2
- parsl/tests/test_thread_parallelism.py +0 -1
- parsl/tests/test_threads/test_configs.py +1 -2
- parsl/tests/test_threads/test_lazy_errors.py +2 -2
- parsl/tests/test_utils/test_execute_wait.py +35 -0
- parsl/tests/test_utils/test_sanitize_dns.py +76 -0
- parsl/tests/unit/test_address.py +20 -0
- parsl/tests/unit/test_file.py +99 -0
- parsl/tests/unit/test_usage_tracking.py +66 -0
- parsl/usage_tracking/api.py +65 -0
- parsl/usage_tracking/levels.py +6 -0
- parsl/usage_tracking/usage.py +104 -62
- parsl/utils.py +139 -6
- parsl/version.py +1 -1
- {parsl-2024.3.11.data → parsl-2025.1.13.data}/scripts/exec_parsl_function.py +6 -5
- parsl-2025.1.13.data/scripts/interchange.py +649 -0
- {parsl-2024.3.11.data → parsl-2025.1.13.data}/scripts/process_worker_pool.py +115 -77
- parsl-2025.1.13.dist-info/METADATA +96 -0
- parsl-2025.1.13.dist-info/RECORD +462 -0
- {parsl-2024.3.11.dist-info → parsl-2025.1.13.dist-info}/WHEEL +1 -1
- parsl/channels/__init__.py +0 -7
- parsl/channels/base.py +0 -141
- parsl/channels/errors.py +0 -113
- parsl/channels/local/local.py +0 -164
- parsl/channels/oauth_ssh/oauth_ssh.py +0 -110
- parsl/channels/ssh/ssh.py +0 -276
- parsl/channels/ssh_il/__init__.py +0 -0
- parsl/channels/ssh_il/ssh_il.py +0 -74
- parsl/configs/ad_hoc.py +0 -35
- parsl/executors/radical/rpex_master.py +0 -42
- parsl/monitoring/radios.py +0 -175
- parsl/providers/ad_hoc/__init__.py +0 -0
- parsl/providers/ad_hoc/ad_hoc.py +0 -248
- parsl/providers/cobalt/__init__.py +0 -0
- parsl/providers/cobalt/cobalt.py +0 -236
- parsl/providers/cobalt/template.py +0 -17
- parsl/tests/configs/ad_hoc_cluster_htex.py +0 -35
- parsl/tests/configs/cooley_htex.py +0 -37
- parsl/tests/configs/htex_ad_hoc_cluster.py +0 -28
- parsl/tests/configs/local_adhoc.py +0 -18
- parsl/tests/configs/swan_htex.py +0 -43
- parsl/tests/configs/theta.py +0 -37
- parsl/tests/integration/test_channels/__init__.py +0 -0
- parsl/tests/integration/test_channels/test_channels.py +0 -17
- parsl/tests/integration/test_channels/test_local_channel.py +0 -42
- parsl/tests/integration/test_channels/test_scp_1.py +0 -45
- parsl/tests/integration/test_channels/test_ssh_1.py +0 -40
- parsl/tests/integration/test_channels/test_ssh_errors.py +0 -46
- parsl/tests/integration/test_channels/test_ssh_file_transport.py +0 -41
- parsl/tests/integration/test_channels/test_ssh_interactive.py +0 -24
- parsl/tests/manual_tests/test_ad_hoc_htex.py +0 -48
- parsl/tests/manual_tests/test_fan_in_out_htex_remote.py +0 -88
- parsl/tests/manual_tests/test_oauth_ssh.py +0 -13
- parsl/tests/sites/test_local_adhoc.py +0 -61
- parsl/tests/test_channels/__init__.py +0 -0
- parsl/tests/test_channels/test_large_output.py +0 -22
- parsl/tests/test_data/__init__.py +0 -0
- parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py +0 -51
- parsl/tests/test_providers/test_cobalt_deprecation_warning.py +0 -16
- parsl-2024.3.11.dist-info/METADATA +0 -98
- parsl-2024.3.11.dist-info/RECORD +0 -447
- parsl/{channels/local → monitoring/radios}/__init__.py +0 -0
- parsl/{channels/oauth_ssh → tests/test_shutdown}/__init__.py +0 -0
- parsl/tests/{test_data → test_staging}/test_file_apps.py +0 -0
- parsl/tests/{test_data → test_staging}/test_file_staging.py +0 -0
- parsl/{channels/ssh → tests/unit}/__init__.py +0 -0
- {parsl-2024.3.11.data → parsl-2025.1.13.data}/scripts/parsl_coprocess.py +1 -1
- {parsl-2024.3.11.dist-info → parsl-2025.1.13.dist-info}/LICENSE +0 -0
- {parsl-2024.3.11.dist-info → parsl-2025.1.13.dist-info}/entry_points.txt +0 -0
- {parsl-2024.3.11.dist-info → parsl-2025.1.13.dist-info}/top_level.txt +0 -0
@@ -1,41 +1,47 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
|
3
3
|
import argparse
|
4
|
+
import json
|
4
5
|
import logging
|
6
|
+
import math
|
7
|
+
import multiprocessing
|
5
8
|
import os
|
6
|
-
import
|
9
|
+
import pickle
|
7
10
|
import platform
|
11
|
+
import queue
|
12
|
+
import subprocess
|
13
|
+
import sys
|
8
14
|
import threading
|
9
|
-
import pickle
|
10
15
|
import time
|
11
|
-
import queue
|
12
16
|
import uuid
|
13
|
-
from typing import Sequence, Optional, Dict, List
|
14
|
-
|
15
|
-
import zmq
|
16
|
-
import math
|
17
|
-
import json
|
18
|
-
import psutil
|
19
|
-
import multiprocessing
|
20
17
|
from multiprocessing.managers import DictProxy
|
21
18
|
from multiprocessing.sharedctypes import Synchronized
|
19
|
+
from typing import Dict, List, Optional, Sequence
|
20
|
+
|
21
|
+
import psutil
|
22
|
+
import zmq
|
22
23
|
|
23
24
|
from parsl import curvezmq
|
24
|
-
from parsl.
|
25
|
-
from parsl.version import VERSION as PARSL_VERSION
|
25
|
+
from parsl.addresses import tcp_url
|
26
26
|
from parsl.app.errors import RemoteExceptionWrapper
|
27
|
+
from parsl.executors.execute_task import execute_task
|
27
28
|
from parsl.executors.high_throughput.errors import WorkerLost
|
28
|
-
from parsl.executors.high_throughput.
|
29
|
-
|
30
|
-
|
29
|
+
from parsl.executors.high_throughput.mpi_prefix_composer import (
|
30
|
+
VALID_LAUNCHERS,
|
31
|
+
compose_all,
|
32
|
+
)
|
31
33
|
from parsl.executors.high_throughput.mpi_resource_management import (
|
34
|
+
MPITaskScheduler,
|
32
35
|
TaskScheduler,
|
33
|
-
MPITaskScheduler
|
34
36
|
)
|
35
|
-
|
36
|
-
from parsl.
|
37
|
+
from parsl.executors.high_throughput.probe import probe_addresses
|
38
|
+
from parsl.multiprocessing import SpawnContext
|
39
|
+
from parsl.process_loggers import wrap_with_logs
|
40
|
+
from parsl.serialize import serialize
|
41
|
+
from parsl.version import VERSION as PARSL_VERSION
|
37
42
|
|
38
43
|
HEARTBEAT_CODE = (2 ** 32) - 1
|
44
|
+
DRAINED_CODE = (2 ** 32) - 2
|
39
45
|
|
40
46
|
|
41
47
|
class Manager:
|
@@ -73,7 +79,8 @@ class Manager:
|
|
73
79
|
enable_mpi_mode: bool = False,
|
74
80
|
mpi_launcher: str = "mpiexec",
|
75
81
|
available_accelerators: Sequence[str],
|
76
|
-
cert_dir: Optional[str]
|
82
|
+
cert_dir: Optional[str],
|
83
|
+
drain_period: Optional[int]):
|
77
84
|
"""
|
78
85
|
Parameters
|
79
86
|
----------
|
@@ -138,6 +145,9 @@ class Manager:
|
|
138
145
|
|
139
146
|
cert_dir : str | None
|
140
147
|
Path to the certificate directory.
|
148
|
+
|
149
|
+
drain_period: int | None
|
150
|
+
Number of seconds to drain after TODO: could be a nicer timespec involving m,s,h qualifiers for user friendliness?
|
141
151
|
"""
|
142
152
|
|
143
153
|
logger.info("Manager initializing")
|
@@ -150,8 +160,8 @@ class Manager:
|
|
150
160
|
raise Exception("No viable address found")
|
151
161
|
else:
|
152
162
|
logger.info("Connection to Interchange successful on {}".format(ix_address))
|
153
|
-
task_q_url =
|
154
|
-
result_q_url =
|
163
|
+
task_q_url = tcp_url(ix_address, task_port)
|
164
|
+
result_q_url = tcp_url(ix_address, result_port)
|
155
165
|
logger.info("Task url : {}".format(task_q_url))
|
156
166
|
logger.info("Result url : {}".format(result_q_url))
|
157
167
|
except Exception:
|
@@ -176,6 +186,7 @@ class Manager:
|
|
176
186
|
|
177
187
|
self.uid = uid
|
178
188
|
self.block_id = block_id
|
189
|
+
self.start_time = time.time()
|
179
190
|
|
180
191
|
self.enable_mpi_mode = enable_mpi_mode
|
181
192
|
self.mpi_launcher = mpi_launcher
|
@@ -227,6 +238,14 @@ class Manager:
|
|
227
238
|
self.heartbeat_period = heartbeat_period
|
228
239
|
self.heartbeat_threshold = heartbeat_threshold
|
229
240
|
self.poll_period = poll_period
|
241
|
+
|
242
|
+
self.drain_time: float
|
243
|
+
if drain_period:
|
244
|
+
self.drain_time = self._start_time + drain_period
|
245
|
+
logger.info(f"Will request drain at {self.drain_time}")
|
246
|
+
else:
|
247
|
+
self.drain_time = float('inf')
|
248
|
+
|
230
249
|
self.cpu_affinity = cpu_affinity
|
231
250
|
|
232
251
|
# Define accelerator available, adjust worker count accordingly
|
@@ -247,6 +266,7 @@ class Manager:
|
|
247
266
|
'worker_count': self.worker_count,
|
248
267
|
'uid': self.uid,
|
249
268
|
'block_id': self.block_id,
|
269
|
+
'start_time': self.start_time,
|
250
270
|
'prefetch_capacity': self.prefetch_capacity,
|
251
271
|
'max_capacity': self.worker_count + self.prefetch_capacity,
|
252
272
|
'os': platform.system(),
|
@@ -262,10 +282,19 @@ class Manager:
|
|
262
282
|
""" Send heartbeat to the incoming task queue
|
263
283
|
"""
|
264
284
|
msg = {'type': 'heartbeat'}
|
285
|
+
# don't need to dumps and encode this every time - could do as a global on import?
|
265
286
|
b_msg = json.dumps(msg).encode('utf-8')
|
266
287
|
self.task_incoming.send(b_msg)
|
267
288
|
logger.debug("Sent heartbeat")
|
268
289
|
|
290
|
+
def drain_to_incoming(self):
|
291
|
+
""" Send heartbeat to the incoming task queue
|
292
|
+
"""
|
293
|
+
msg = {'type': 'drain'}
|
294
|
+
b_msg = json.dumps(msg).encode('utf-8')
|
295
|
+
self.task_incoming.send(b_msg)
|
296
|
+
logger.debug("Sent drain")
|
297
|
+
|
269
298
|
@wrap_with_logs
|
270
299
|
def pull_tasks(self, kill_event):
|
271
300
|
""" Pull tasks from the incoming tasks zmq pipe onto the internal
|
@@ -298,6 +327,7 @@ class Manager:
|
|
298
327
|
# time here are correctly copy-pasted from the relevant if
|
299
328
|
# statements.
|
300
329
|
next_interesting_event_time = min(last_beat + self.heartbeat_period,
|
330
|
+
self.drain_time,
|
301
331
|
last_interchange_contact + self.heartbeat_threshold)
|
302
332
|
try:
|
303
333
|
pending_task_count = self.pending_task_queue.qsize()
|
@@ -312,6 +342,17 @@ class Manager:
|
|
312
342
|
self.heartbeat_to_incoming()
|
313
343
|
last_beat = time.time()
|
314
344
|
|
345
|
+
if time.time() > self.drain_time:
|
346
|
+
logger.info("Requesting drain")
|
347
|
+
self.drain_to_incoming()
|
348
|
+
# This will start the pool draining...
|
349
|
+
# Drained exit behaviour does not happen here. It will be
|
350
|
+
# driven by the interchange sending a DRAINED_CODE message.
|
351
|
+
|
352
|
+
# now set drain time to the far future so we don't send a drain
|
353
|
+
# message every iteration.
|
354
|
+
self.drain_time = float('inf')
|
355
|
+
|
315
356
|
poll_duration_s = max(0, next_interesting_event_time - time.time())
|
316
357
|
socks = dict(poller.poll(timeout=poll_duration_s * 1000))
|
317
358
|
|
@@ -322,10 +363,14 @@ class Manager:
|
|
322
363
|
|
323
364
|
if tasks == HEARTBEAT_CODE:
|
324
365
|
logger.debug("Got heartbeat from interchange")
|
325
|
-
|
366
|
+
elif tasks == DRAINED_CODE:
|
367
|
+
logger.info("Got fully drained message from interchange - setting kill flag")
|
368
|
+
kill_event.set()
|
326
369
|
else:
|
327
370
|
task_recv_counter += len(tasks)
|
328
|
-
logger.debug("Got executor tasks: {}, cumulative count of tasks: {}".format(
|
371
|
+
logger.debug("Got executor tasks: {}, cumulative count of tasks: {}".format(
|
372
|
+
[t['task_id'] for t in tasks], task_recv_counter
|
373
|
+
))
|
329
374
|
|
330
375
|
for task in tasks:
|
331
376
|
self.task_scheduler.put_task(task)
|
@@ -490,9 +535,8 @@ class Manager:
|
|
490
535
|
self._worker_watchdog_thread.start()
|
491
536
|
self._monitoring_handler_thread.start()
|
492
537
|
|
493
|
-
logger.info("
|
538
|
+
logger.info("Manager threads started")
|
494
539
|
|
495
|
-
# TODO : Add mechanism in this loop to stop the worker pool
|
496
540
|
# This might need a multiprocessing event to signal back.
|
497
541
|
self._kill_event.wait()
|
498
542
|
logger.critical("Received kill event, terminating worker processes")
|
@@ -548,45 +592,13 @@ def update_resource_spec_env_vars(mpi_launcher: str, resource_spec: Dict, node_i
|
|
548
592
|
os.environ[key] = prefix_table[key]
|
549
593
|
|
550
594
|
|
551
|
-
def
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
f, args, kwargs, resource_spec = unpack_res_spec_apply_message(bufs, user_ns, copy=False)
|
560
|
-
|
561
|
-
for varname in resource_spec:
|
562
|
-
envname = "PARSL_" + str(varname).upper()
|
563
|
-
os.environ[envname] = str(resource_spec[varname])
|
564
|
-
|
565
|
-
if resource_spec.get("MPI_NODELIST"):
|
566
|
-
worker_id = os.environ['PARSL_WORKER_RANK']
|
567
|
-
nodes_for_task = resource_spec["MPI_NODELIST"].split(',')
|
568
|
-
logger.info(f"Launching task on provisioned nodes: {nodes_for_task}")
|
569
|
-
assert mpi_launcher
|
570
|
-
update_resource_spec_env_vars(mpi_launcher,
|
571
|
-
resource_spec=resource_spec,
|
572
|
-
node_info=nodes_for_task)
|
573
|
-
# We might need to look into callability of the function from itself
|
574
|
-
# since we change it's name in the new namespace
|
575
|
-
prefix = "parsl_"
|
576
|
-
fname = prefix + "f"
|
577
|
-
argname = prefix + "args"
|
578
|
-
kwargname = prefix + "kwargs"
|
579
|
-
resultname = prefix + "result"
|
580
|
-
|
581
|
-
user_ns.update({fname: f,
|
582
|
-
argname: args,
|
583
|
-
kwargname: kwargs,
|
584
|
-
resultname: resultname})
|
585
|
-
|
586
|
-
code = "{0} = {1}(*{2}, **{3})".format(resultname, fname,
|
587
|
-
argname, kwargname)
|
588
|
-
exec(code, user_ns, user_ns)
|
589
|
-
return user_ns.get(resultname)
|
595
|
+
def _init_mpi_env(mpi_launcher: str, resource_spec: Dict):
|
596
|
+
node_list = resource_spec.get("MPI_NODELIST")
|
597
|
+
if node_list is None:
|
598
|
+
return
|
599
|
+
nodes_for_task = node_list.split(',')
|
600
|
+
logger.info(f"Launching task on provisioned nodes: {nodes_for_task}")
|
601
|
+
update_resource_spec_env_vars(mpi_launcher=mpi_launcher, resource_spec=resource_spec, node_info=nodes_for_task)
|
590
602
|
|
591
603
|
|
592
604
|
@wrap_with_logs(target="worker_log")
|
@@ -608,14 +620,6 @@ def worker(
|
|
608
620
|
debug: bool,
|
609
621
|
mpi_launcher: str,
|
610
622
|
):
|
611
|
-
"""
|
612
|
-
|
613
|
-
Put request token into queue
|
614
|
-
Get task from task_queue
|
615
|
-
Pop request from queue
|
616
|
-
Put result into result_queue
|
617
|
-
"""
|
618
|
-
|
619
623
|
# override the global logger inherited from the __main__ process (which
|
620
624
|
# usually logs to manager.log) with one specific to this worker.
|
621
625
|
global logger
|
@@ -640,7 +644,8 @@ def worker(
|
|
640
644
|
# If desired, set process affinity
|
641
645
|
if cpu_affinity != "none":
|
642
646
|
# Count the number of cores per worker
|
643
|
-
|
647
|
+
# OSX does not implement os.sched_getaffinity
|
648
|
+
avail_cores = sorted(os.sched_getaffinity(0)) # type: ignore[attr-defined, unused-ignore]
|
644
649
|
cores_per_worker = len(avail_cores) // pool_size
|
645
650
|
assert cores_per_worker > 0, "Affinity does not work if there are more workers than cores"
|
646
651
|
|
@@ -680,12 +685,39 @@ def worker(
|
|
680
685
|
os.environ["KMP_AFFINITY"] = f"explicit,proclist=[{proc_list}]" # For Intel OpenMP
|
681
686
|
|
682
687
|
# Set the affinity for this worker
|
683
|
-
os.sched_setaffinity
|
688
|
+
# OSX does not implement os.sched_setaffinity so type checking
|
689
|
+
# is ignored here in two ways:
|
690
|
+
# On a platform without sched_setaffinity, that attribute will not
|
691
|
+
# be defined, so ignore[attr-defined] will tell mypy to ignore this
|
692
|
+
# incorrect-for-OS X attribute access.
|
693
|
+
# On a platform with sched_setaffinity, that type: ignore message
|
694
|
+
# will be redundant, and ignore[unused-ignore] tells mypy to ignore
|
695
|
+
# that this ignore is unneeded.
|
696
|
+
os.sched_setaffinity(0, my_cores) # type: ignore[attr-defined, unused-ignore]
|
684
697
|
logger.info("Set worker CPU affinity to {}".format(my_cores))
|
685
698
|
|
686
699
|
# If desired, pin to accelerator
|
687
700
|
if accelerator is not None:
|
688
|
-
|
701
|
+
|
702
|
+
# If CUDA devices, find total number of devices to allow for MPS
|
703
|
+
# See: https://developer.nvidia.com/system-management-interface
|
704
|
+
nvidia_smi_cmd = "nvidia-smi -L > /dev/null && nvidia-smi -L | wc -l"
|
705
|
+
nvidia_smi_ret = subprocess.run(nvidia_smi_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
706
|
+
if nvidia_smi_ret.returncode == 0:
|
707
|
+
num_cuda_devices = int(nvidia_smi_ret.stdout.split()[0])
|
708
|
+
else:
|
709
|
+
num_cuda_devices = None
|
710
|
+
|
711
|
+
try:
|
712
|
+
if num_cuda_devices is not None:
|
713
|
+
procs_per_cuda_device = pool_size // num_cuda_devices
|
714
|
+
partitioned_accelerator = str(int(accelerator) // procs_per_cuda_device) # multiple workers will share a GPU
|
715
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = partitioned_accelerator
|
716
|
+
logger.info(f'Pinned worker to partitioned cuda device: {partitioned_accelerator}')
|
717
|
+
else:
|
718
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = accelerator
|
719
|
+
except (TypeError, ValueError, ZeroDivisionError):
|
720
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = accelerator
|
689
721
|
os.environ["ROCR_VISIBLE_DEVICES"] = accelerator
|
690
722
|
os.environ["ZE_AFFINITY_MASK"] = accelerator
|
691
723
|
os.environ["ZE_ENABLE_PCI_ID_DEVICE_ORDER"] = '1'
|
@@ -724,8 +756,10 @@ def worker(
|
|
724
756
|
ready_worker_count.value -= 1
|
725
757
|
worker_enqueued = False
|
726
758
|
|
759
|
+
_init_mpi_env(mpi_launcher=mpi_launcher, resource_spec=req["resource_spec"])
|
760
|
+
|
727
761
|
try:
|
728
|
-
result = execute_task(req['buffer']
|
762
|
+
result = execute_task(req['buffer'])
|
729
763
|
serialized_result = serialize(result, buffer_threshold=1000000)
|
730
764
|
except Exception as e:
|
731
765
|
logger.info('Caught an exception: {}'.format(e))
|
@@ -804,6 +838,8 @@ if __name__ == "__main__":
|
|
804
838
|
help="Heartbeat period in seconds. Uses manager default unless set")
|
805
839
|
parser.add_argument("--hb_threshold", default=120,
|
806
840
|
help="Heartbeat threshold in seconds. Uses manager default unless set")
|
841
|
+
parser.add_argument("--drain_period", default=None,
|
842
|
+
help="Drain this pool after specified number of seconds. By default, does not drain.")
|
807
843
|
parser.add_argument("--address_probe_timeout", default=30,
|
808
844
|
help="Timeout to probe for viable address to interchange. Default: 30s")
|
809
845
|
parser.add_argument("--poll", default=10,
|
@@ -824,7 +860,7 @@ if __name__ == "__main__":
|
|
824
860
|
required=True,
|
825
861
|
help="Whether/how workers should control CPU affinity.")
|
826
862
|
parser.add_argument("--available-accelerators", type=str, nargs="*",
|
827
|
-
help="Names of available accelerators")
|
863
|
+
help="Names of available accelerators, if not given assumed to be zero accelerators available", default=[])
|
828
864
|
parser.add_argument("--enable_mpi_mode", action='store_true',
|
829
865
|
help="Enable MPI mode")
|
830
866
|
parser.add_argument("--mpi-launcher", type=str, choices=VALID_LAUNCHERS,
|
@@ -856,6 +892,7 @@ if __name__ == "__main__":
|
|
856
892
|
logger.info("Prefetch capacity: {}".format(args.prefetch_capacity))
|
857
893
|
logger.info("Heartbeat threshold: {}".format(args.hb_threshold))
|
858
894
|
logger.info("Heartbeat period: {}".format(args.hb_period))
|
895
|
+
logger.info("Drain period: {}".format(args.drain_period))
|
859
896
|
logger.info("CPU affinity: {}".format(args.cpu_affinity))
|
860
897
|
logger.info("Accelerators: {}".format(" ".join(args.available_accelerators)))
|
861
898
|
logger.info("enable_mpi_mode: {}".format(args.enable_mpi_mode))
|
@@ -876,6 +913,7 @@ if __name__ == "__main__":
|
|
876
913
|
prefetch_capacity=int(args.prefetch_capacity),
|
877
914
|
heartbeat_threshold=int(args.hb_threshold),
|
878
915
|
heartbeat_period=int(args.hb_period),
|
916
|
+
drain_period=None if args.drain_period == "None" else int(args.drain_period),
|
879
917
|
poll_period=int(args.poll),
|
880
918
|
cpu_affinity=args.cpu_affinity,
|
881
919
|
enable_mpi_mode=args.enable_mpi_mode,
|
@@ -1,10 +1,19 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
|
3
|
-
import zmq
|
4
3
|
import logging
|
5
4
|
import threading
|
5
|
+
import time
|
6
|
+
from typing import Optional
|
7
|
+
|
8
|
+
import zmq
|
6
9
|
|
7
10
|
from parsl import curvezmq
|
11
|
+
from parsl.addresses import tcp_url
|
12
|
+
from parsl.errors import InternalConsistencyError
|
13
|
+
from parsl.executors.high_throughput.errors import (
|
14
|
+
CommandClientBadError,
|
15
|
+
CommandClientTimeoutError,
|
16
|
+
)
|
8
17
|
|
9
18
|
logger = logging.getLogger(__name__)
|
10
19
|
|
@@ -12,25 +21,29 @@ logger = logging.getLogger(__name__)
|
|
12
21
|
class CommandClient:
|
13
22
|
""" CommandClient
|
14
23
|
"""
|
15
|
-
def __init__(self,
|
24
|
+
def __init__(self, ip_address, port_range, cert_dir: Optional[str] = None):
|
16
25
|
"""
|
17
26
|
Parameters
|
18
27
|
----------
|
19
28
|
|
20
|
-
zmq_context: curvezmq.ClientContext
|
21
|
-
CurveZMQ client context used to create secure sockets
|
22
29
|
ip_address: str
|
23
30
|
IP address of the client (where Parsl runs)
|
31
|
+
|
24
32
|
port_range: tuple(int, int)
|
25
33
|
Port range for the comms between client and interchange
|
26
34
|
|
35
|
+
cert_dir: str | None
|
36
|
+
Path to the certificate directory. Setting this to None will disable encryption.
|
37
|
+
default: None
|
38
|
+
|
27
39
|
"""
|
28
|
-
self.zmq_context =
|
40
|
+
self.zmq_context = curvezmq.ClientContext(cert_dir)
|
29
41
|
self.ip_address = ip_address
|
30
42
|
self.port_range = port_range
|
31
43
|
self.port = None
|
32
44
|
self.create_socket_and_bind()
|
33
45
|
self._lock = threading.Lock()
|
46
|
+
self.ok = True
|
34
47
|
|
35
48
|
def create_socket_and_bind(self):
|
36
49
|
""" Creates socket and binds to a port.
|
@@ -40,13 +53,13 @@ class CommandClient:
|
|
40
53
|
self.zmq_socket = self.zmq_context.socket(zmq.REQ)
|
41
54
|
self.zmq_socket.setsockopt(zmq.LINGER, 0)
|
42
55
|
if self.port is None:
|
43
|
-
self.port = self.zmq_socket.bind_to_random_port(
|
56
|
+
self.port = self.zmq_socket.bind_to_random_port(tcp_url(self.ip_address),
|
44
57
|
min_port=self.port_range[0],
|
45
58
|
max_port=self.port_range[1])
|
46
59
|
else:
|
47
|
-
self.zmq_socket.bind(
|
60
|
+
self.zmq_socket.bind(tcp_url(self.ip_address, self.port))
|
48
61
|
|
49
|
-
def run(self, message, max_retries=3):
|
62
|
+
def run(self, message, max_retries=3, timeout_s=None):
|
50
63
|
""" This function needs to be fast at the same time aware of the possibility of
|
51
64
|
ZMQ pipes overflowing.
|
52
65
|
|
@@ -54,13 +67,43 @@ class CommandClient:
|
|
54
67
|
in ZMQ sockets reaching a broken state once there are ~10k tasks in flight.
|
55
68
|
This issue can be magnified if each the serialized buffer itself is larger.
|
56
69
|
"""
|
70
|
+
if not self.ok:
|
71
|
+
raise CommandClientBadError()
|
72
|
+
|
73
|
+
start_time_s = time.monotonic()
|
74
|
+
|
57
75
|
reply = '__PARSL_ZMQ_PIPES_MAGIC__'
|
58
76
|
with self._lock:
|
59
77
|
for _ in range(max_retries):
|
60
78
|
try:
|
61
79
|
logger.debug("Sending command client command")
|
80
|
+
|
81
|
+
if timeout_s is not None:
|
82
|
+
remaining_time_s = start_time_s + timeout_s - time.monotonic()
|
83
|
+
poll_result = self.zmq_socket.poll(timeout=remaining_time_s * 1000, flags=zmq.POLLOUT)
|
84
|
+
if poll_result == zmq.POLLOUT:
|
85
|
+
pass # this is OK, so continue
|
86
|
+
elif poll_result == 0:
|
87
|
+
raise CommandClientTimeoutError("Waiting for command channel to be ready for a command")
|
88
|
+
else:
|
89
|
+
raise InternalConsistencyError(f"ZMQ poll returned unexpected value: {poll_result}")
|
90
|
+
|
62
91
|
self.zmq_socket.send_pyobj(message, copy=True)
|
63
|
-
|
92
|
+
|
93
|
+
if timeout_s is not None:
|
94
|
+
logger.debug("Polling for command client response or timeout")
|
95
|
+
remaining_time_s = start_time_s + timeout_s - time.monotonic()
|
96
|
+
poll_result = self.zmq_socket.poll(timeout=remaining_time_s * 1000, flags=zmq.POLLIN)
|
97
|
+
if poll_result == zmq.POLLIN:
|
98
|
+
pass # this is OK, so continue
|
99
|
+
elif poll_result == 0:
|
100
|
+
logger.error("Command timed-out - command client is now bad forever")
|
101
|
+
self.ok = False
|
102
|
+
raise CommandClientTimeoutError("Waiting for a reply from command channel")
|
103
|
+
else:
|
104
|
+
raise InternalConsistencyError(f"ZMQ poll returned unexpected value: {poll_result}")
|
105
|
+
|
106
|
+
logger.debug("Receiving command client response")
|
64
107
|
reply = self.zmq_socket.recv_pyobj()
|
65
108
|
logger.debug("Received command client response")
|
66
109
|
except zmq.ZMQError:
|
@@ -85,23 +128,26 @@ class CommandClient:
|
|
85
128
|
class TasksOutgoing:
|
86
129
|
""" Outgoing task queue from the executor to the Interchange
|
87
130
|
"""
|
88
|
-
def __init__(self,
|
131
|
+
def __init__(self, ip_address, port_range, cert_dir: Optional[str] = None):
|
89
132
|
"""
|
90
133
|
Parameters
|
91
134
|
----------
|
92
135
|
|
93
|
-
zmq_context: curvezmq.ClientContext
|
94
|
-
CurveZMQ client context used to create secure sockets
|
95
136
|
ip_address: str
|
96
137
|
IP address of the client (where Parsl runs)
|
138
|
+
|
97
139
|
port_range: tuple(int, int)
|
98
140
|
Port range for the comms between client and interchange
|
99
141
|
|
142
|
+
cert_dir: str | None
|
143
|
+
Path to the certificate directory. Setting this to None will disable encryption.
|
144
|
+
default: None
|
145
|
+
|
100
146
|
"""
|
101
|
-
self.zmq_context =
|
147
|
+
self.zmq_context = curvezmq.ClientContext(cert_dir)
|
102
148
|
self.zmq_socket = self.zmq_context.socket(zmq.DEALER)
|
103
149
|
self.zmq_socket.set_hwm(0)
|
104
|
-
self.port = self.zmq_socket.bind_to_random_port(
|
150
|
+
self.port = self.zmq_socket.bind_to_random_port(tcp_url(ip_address),
|
105
151
|
min_port=port_range[0],
|
106
152
|
max_port=port_range[1])
|
107
153
|
self.poller = zmq.Poller()
|
@@ -138,31 +184,43 @@ class ResultsIncoming:
|
|
138
184
|
""" Incoming results queue from the Interchange to the executor
|
139
185
|
"""
|
140
186
|
|
141
|
-
def __init__(self,
|
187
|
+
def __init__(self, ip_address, port_range, cert_dir: Optional[str] = None):
|
142
188
|
"""
|
143
189
|
Parameters
|
144
190
|
----------
|
145
191
|
|
146
|
-
zmq_context: curvezmq.ClientContext
|
147
|
-
CurveZMQ client context used to create secure sockets
|
148
192
|
ip_address: str
|
149
193
|
IP address of the client (where Parsl runs)
|
194
|
+
|
150
195
|
port_range: tuple(int, int)
|
151
196
|
Port range for the comms between client and interchange
|
152
197
|
|
198
|
+
cert_dir: str | None
|
199
|
+
Path to the certificate directory. Setting this to None will disable encryption.
|
200
|
+
default: None
|
201
|
+
|
153
202
|
"""
|
154
|
-
self.zmq_context =
|
203
|
+
self.zmq_context = curvezmq.ClientContext(cert_dir)
|
155
204
|
self.results_receiver = self.zmq_context.socket(zmq.DEALER)
|
156
205
|
self.results_receiver.set_hwm(0)
|
157
|
-
self.port = self.results_receiver.bind_to_random_port(
|
206
|
+
self.port = self.results_receiver.bind_to_random_port(tcp_url(ip_address),
|
158
207
|
min_port=port_range[0],
|
159
208
|
max_port=port_range[1])
|
209
|
+
self.poller = zmq.Poller()
|
210
|
+
self.poller.register(self.results_receiver, zmq.POLLIN)
|
160
211
|
|
161
|
-
def get(self):
|
212
|
+
def get(self, timeout_ms=None):
|
213
|
+
"""Get a message from the queue, returning None if timeout expires
|
214
|
+
without a message. timeout is measured in milliseconds.
|
215
|
+
"""
|
162
216
|
logger.debug("Waiting for ResultsIncoming message")
|
163
|
-
|
164
|
-
|
165
|
-
|
217
|
+
socks = dict(self.poller.poll(timeout=timeout_ms))
|
218
|
+
if self.results_receiver in socks and socks[self.results_receiver] == zmq.POLLIN:
|
219
|
+
m = self.results_receiver.recv_multipart()
|
220
|
+
logger.debug("Received ResultsIncoming message")
|
221
|
+
return m
|
222
|
+
else:
|
223
|
+
return None
|
166
224
|
|
167
225
|
def close(self):
|
168
226
|
self.results_receiver.close()
|