parsl 2024.3.18__py3-none-any.whl → 2025.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsl/__init__.py +9 -10
- parsl/addresses.py +26 -6
- parsl/app/app.py +7 -8
- parsl/app/bash.py +15 -8
- parsl/app/errors.py +10 -13
- parsl/app/futures.py +8 -10
- parsl/app/python.py +2 -1
- parsl/benchmark/perf.py +2 -1
- parsl/concurrent/__init__.py +2 -2
- parsl/config.py +53 -10
- parsl/configs/ASPIRE1.py +6 -5
- parsl/configs/Azure.py +9 -8
- parsl/configs/bridges.py +6 -4
- parsl/configs/cc_in2p3.py +3 -3
- parsl/configs/ec2.py +3 -1
- parsl/configs/expanse.py +4 -3
- parsl/configs/frontera.py +3 -4
- parsl/configs/htex_local.py +3 -4
- parsl/configs/illinoiscluster.py +3 -1
- parsl/configs/improv.py +34 -0
- parsl/configs/kubernetes.py +4 -3
- parsl/configs/local_threads.py +5 -1
- parsl/configs/midway.py +5 -3
- parsl/configs/osg.py +4 -2
- parsl/configs/polaris.py +4 -2
- parsl/configs/stampede2.py +6 -5
- parsl/configs/summit.py +3 -3
- parsl/configs/toss3_llnl.py +4 -3
- parsl/configs/vineex_local.py +6 -4
- parsl/configs/wqex_local.py +5 -3
- parsl/curvezmq.py +4 -0
- parsl/data_provider/data_manager.py +4 -3
- parsl/data_provider/file_noop.py +1 -2
- parsl/data_provider/files.py +3 -3
- parsl/data_provider/ftp.py +1 -3
- parsl/data_provider/globus.py +7 -6
- parsl/data_provider/http.py +2 -2
- parsl/data_provider/rsync.py +1 -1
- parsl/data_provider/staging.py +2 -2
- parsl/data_provider/zip.py +135 -0
- parsl/dataflow/dependency_resolvers.py +115 -0
- parsl/dataflow/dflow.py +259 -223
- parsl/dataflow/errors.py +3 -5
- parsl/dataflow/futures.py +27 -14
- parsl/dataflow/memoization.py +5 -5
- parsl/dataflow/rundirs.py +5 -6
- parsl/dataflow/taskrecord.py +4 -5
- parsl/executors/__init__.py +4 -2
- parsl/executors/base.py +45 -15
- parsl/executors/errors.py +13 -0
- parsl/executors/execute_task.py +37 -0
- parsl/executors/flux/execute_parsl_task.py +3 -3
- parsl/executors/flux/executor.py +18 -19
- parsl/executors/flux/flux_instance_manager.py +26 -27
- parsl/executors/high_throughput/errors.py +43 -3
- parsl/executors/high_throughput/executor.py +307 -285
- parsl/executors/high_throughput/interchange.py +137 -168
- parsl/executors/high_throughput/manager_record.py +4 -0
- parsl/executors/high_throughput/manager_selector.py +55 -0
- parsl/executors/high_throughput/monitoring_info.py +2 -1
- parsl/executors/high_throughput/mpi_executor.py +113 -0
- parsl/executors/high_throughput/mpi_prefix_composer.py +10 -11
- parsl/executors/high_throughput/mpi_resource_management.py +6 -17
- parsl/executors/high_throughput/probe.py +9 -7
- parsl/executors/high_throughput/process_worker_pool.py +77 -75
- parsl/executors/high_throughput/zmq_pipes.py +81 -23
- parsl/executors/radical/executor.py +130 -79
- parsl/executors/radical/rpex_resources.py +17 -15
- parsl/executors/radical/rpex_worker.py +4 -3
- parsl/executors/status_handling.py +157 -51
- parsl/executors/taskvine/__init__.py +1 -1
- parsl/executors/taskvine/errors.py +1 -1
- parsl/executors/taskvine/exec_parsl_function.py +2 -2
- parsl/executors/taskvine/executor.py +38 -55
- parsl/executors/taskvine/factory.py +1 -1
- parsl/executors/taskvine/factory_config.py +1 -1
- parsl/executors/taskvine/manager.py +17 -13
- parsl/executors/taskvine/manager_config.py +7 -2
- parsl/executors/threads.py +6 -6
- parsl/executors/workqueue/errors.py +1 -1
- parsl/executors/workqueue/exec_parsl_function.py +6 -5
- parsl/executors/workqueue/executor.py +64 -63
- parsl/executors/workqueue/parsl_coprocess.py +1 -1
- parsl/jobs/error_handlers.py +2 -2
- parsl/jobs/job_status_poller.py +28 -112
- parsl/jobs/states.py +7 -2
- parsl/jobs/strategy.py +43 -31
- parsl/launchers/__init__.py +12 -3
- parsl/launchers/errors.py +1 -1
- parsl/launchers/launchers.py +0 -6
- parsl/log_utils.py +1 -2
- parsl/monitoring/db_manager.py +55 -93
- parsl/monitoring/errors.py +6 -0
- parsl/monitoring/monitoring.py +85 -311
- parsl/monitoring/queries/pandas.py +1 -2
- parsl/monitoring/radios/base.py +13 -0
- parsl/monitoring/radios/filesystem.py +52 -0
- parsl/monitoring/radios/htex.py +57 -0
- parsl/monitoring/radios/multiprocessing.py +17 -0
- parsl/monitoring/radios/udp.py +56 -0
- parsl/monitoring/radios/zmq.py +17 -0
- parsl/monitoring/remote.py +33 -37
- parsl/monitoring/router.py +212 -0
- parsl/monitoring/types.py +5 -6
- parsl/monitoring/visualization/app.py +4 -2
- parsl/monitoring/visualization/models.py +0 -1
- parsl/monitoring/visualization/plots/default/workflow_plots.py +8 -4
- parsl/monitoring/visualization/plots/default/workflow_resource_plots.py +1 -0
- parsl/monitoring/visualization/utils.py +0 -1
- parsl/monitoring/visualization/views.py +16 -9
- parsl/multiprocessing.py +0 -1
- parsl/process_loggers.py +1 -2
- parsl/providers/__init__.py +8 -17
- parsl/providers/aws/aws.py +2 -3
- parsl/providers/azure/azure.py +4 -5
- parsl/providers/base.py +2 -18
- parsl/providers/cluster_provider.py +3 -9
- parsl/providers/condor/condor.py +7 -17
- parsl/providers/errors.py +2 -2
- parsl/providers/googlecloud/googlecloud.py +2 -1
- parsl/providers/grid_engine/grid_engine.py +5 -14
- parsl/providers/kubernetes/kube.py +80 -40
- parsl/providers/local/local.py +13 -26
- parsl/providers/lsf/lsf.py +5 -23
- parsl/providers/pbspro/pbspro.py +5 -17
- parsl/providers/slurm/slurm.py +81 -39
- parsl/providers/torque/torque.py +3 -14
- parsl/serialize/__init__.py +8 -3
- parsl/serialize/base.py +1 -2
- parsl/serialize/concretes.py +5 -4
- parsl/serialize/facade.py +3 -3
- parsl/serialize/proxystore.py +3 -2
- parsl/tests/__init__.py +1 -1
- parsl/tests/configs/azure_single_node.py +4 -5
- parsl/tests/configs/bridges.py +3 -2
- parsl/tests/configs/cc_in2p3.py +1 -3
- parsl/tests/configs/comet.py +2 -1
- parsl/tests/configs/ec2_single_node.py +1 -2
- parsl/tests/configs/ec2_spot.py +1 -2
- parsl/tests/configs/flux_local.py +11 -0
- parsl/tests/configs/frontera.py +2 -3
- parsl/tests/configs/htex_local.py +3 -5
- parsl/tests/configs/htex_local_alternate.py +11 -15
- parsl/tests/configs/htex_local_intask_staging.py +5 -9
- parsl/tests/configs/htex_local_rsync_staging.py +4 -8
- parsl/tests/configs/local_radical.py +1 -3
- parsl/tests/configs/local_radical_mpi.py +2 -2
- parsl/tests/configs/local_threads_checkpoint_periodic.py +8 -10
- parsl/tests/configs/local_threads_monitoring.py +0 -1
- parsl/tests/configs/midway.py +2 -2
- parsl/tests/configs/nscc_singapore.py +3 -3
- parsl/tests/configs/osg_htex.py +1 -1
- parsl/tests/configs/petrelkube.py +3 -2
- parsl/tests/configs/slurm_local.py +24 -0
- parsl/tests/configs/summit.py +1 -0
- parsl/tests/configs/taskvine_ex.py +4 -7
- parsl/tests/configs/user_opts.py +0 -7
- parsl/tests/configs/workqueue_ex.py +4 -6
- parsl/tests/conftest.py +27 -13
- parsl/tests/integration/test_stress/test_python_simple.py +3 -4
- parsl/tests/integration/test_stress/test_python_threads.py +3 -5
- parsl/tests/manual_tests/htex_local.py +4 -6
- parsl/tests/manual_tests/test_basic.py +1 -0
- parsl/tests/manual_tests/test_log_filter.py +3 -1
- parsl/tests/manual_tests/test_memory_limits.py +6 -8
- parsl/tests/manual_tests/test_regression_220.py +2 -1
- parsl/tests/manual_tests/test_udp_simple.py +4 -4
- parsl/tests/manual_tests/test_worker_count.py +3 -2
- parsl/tests/scaling_tests/htex_local.py +2 -4
- parsl/tests/scaling_tests/test_scale.py +0 -9
- parsl/tests/scaling_tests/vineex_condor.py +1 -2
- parsl/tests/scaling_tests/vineex_local.py +1 -2
- parsl/tests/site_tests/site_config_selector.py +1 -6
- parsl/tests/site_tests/test_provider.py +4 -2
- parsl/tests/site_tests/test_site.py +2 -0
- parsl/tests/sites/test_affinity.py +7 -7
- parsl/tests/sites/test_dynamic_executor.py +3 -4
- parsl/tests/sites/test_ec2.py +3 -2
- parsl/tests/sites/test_worker_info.py +4 -5
- parsl/tests/test_aalst_patterns.py +0 -1
- parsl/tests/test_bash_apps/test_apptimeout.py +2 -2
- parsl/tests/test_bash_apps/test_basic.py +10 -4
- parsl/tests/test_bash_apps/test_error_codes.py +5 -7
- parsl/tests/test_bash_apps/test_inputs_default.py +25 -0
- parsl/tests/test_bash_apps/test_kwarg_storage.py +1 -1
- parsl/tests/test_bash_apps/test_memoize.py +2 -8
- parsl/tests/test_bash_apps/test_memoize_ignore_args.py +9 -14
- parsl/tests/test_bash_apps/test_memoize_ignore_args_regr.py +9 -14
- parsl/tests/test_bash_apps/test_multiline.py +1 -1
- parsl/tests/test_bash_apps/test_pipeline.py +1 -1
- parsl/tests/test_bash_apps/test_std_uri.py +123 -0
- parsl/tests/test_bash_apps/test_stdout.py +33 -8
- parsl/tests/test_callables.py +2 -2
- parsl/tests/test_checkpointing/test_periodic.py +21 -39
- parsl/tests/test_checkpointing/test_python_checkpoint_1.py +1 -0
- parsl/tests/test_checkpointing/test_python_checkpoint_2.py +2 -2
- parsl/tests/test_checkpointing/test_python_checkpoint_3.py +0 -1
- parsl/tests/test_checkpointing/test_regression_239.py +1 -1
- parsl/tests/test_checkpointing/test_task_exit.py +2 -3
- parsl/tests/test_docs/test_from_slides.py +5 -2
- parsl/tests/test_docs/test_kwargs.py +4 -1
- parsl/tests/test_docs/test_tutorial_1.py +1 -2
- parsl/tests/test_docs/test_workflow1.py +2 -2
- parsl/tests/test_docs/test_workflow2.py +0 -1
- parsl/tests/test_error_handling/test_rand_fail.py +2 -2
- parsl/tests/test_error_handling/test_resource_spec.py +10 -12
- parsl/tests/test_error_handling/test_retries.py +6 -16
- parsl/tests/test_error_handling/test_retry_handler.py +1 -0
- parsl/tests/test_error_handling/test_retry_handler_failure.py +2 -1
- parsl/tests/test_error_handling/test_serialization_fail.py +1 -1
- parsl/tests/test_error_handling/test_wrap_with_logs.py +1 -0
- parsl/tests/test_execute_task.py +29 -0
- parsl/tests/test_flux.py +1 -1
- parsl/tests/test_htex/test_basic.py +2 -3
- parsl/tests/test_htex/test_block_manager_selector_unit.py +20 -0
- parsl/tests/test_htex/test_command_client_timeout.py +66 -0
- parsl/tests/test_htex/test_connected_blocks.py +3 -2
- parsl/tests/test_htex/test_cpu_affinity_explicit.py +6 -10
- parsl/tests/test_htex/test_disconnected_blocks.py +6 -5
- parsl/tests/test_htex/test_disconnected_blocks_failing_provider.py +71 -0
- parsl/tests/test_htex/test_drain.py +11 -10
- parsl/tests/test_htex/test_htex.py +51 -25
- parsl/tests/test_htex/test_manager_failure.py +0 -1
- parsl/tests/test_htex/test_manager_selector_by_block.py +51 -0
- parsl/tests/test_htex/test_managers_command.py +36 -0
- parsl/tests/test_htex/test_missing_worker.py +2 -12
- parsl/tests/test_htex/test_multiple_disconnected_blocks.py +9 -9
- parsl/tests/test_htex/test_resource_spec_validation.py +45 -0
- parsl/tests/test_htex/test_zmq_binding.py +29 -8
- parsl/tests/test_monitoring/test_app_names.py +5 -5
- parsl/tests/test_monitoring/test_basic.py +73 -25
- parsl/tests/test_monitoring/test_db_locks.py +6 -4
- parsl/tests/test_monitoring/test_fuzz_zmq.py +19 -8
- parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +80 -0
- parsl/tests/test_monitoring/test_incomplete_futures.py +5 -4
- parsl/tests/test_monitoring/test_memoization_representation.py +4 -2
- parsl/tests/test_monitoring/test_stdouterr.py +134 -0
- parsl/tests/test_monitoring/test_viz_colouring.py +1 -0
- parsl/tests/test_mpi_apps/test_bad_mpi_config.py +33 -26
- parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py +28 -11
- parsl/tests/test_mpi_apps/test_mpi_prefix.py +4 -4
- parsl/tests/test_mpi_apps/test_mpi_scheduler.py +7 -2
- parsl/tests/test_mpi_apps/test_mpiex.py +64 -0
- parsl/tests/test_mpi_apps/test_resource_spec.py +42 -49
- parsl/tests/test_providers/test_kubernetes_provider.py +102 -0
- parsl/tests/test_providers/test_local_provider.py +3 -132
- parsl/tests/test_providers/test_pbspro_template.py +2 -3
- parsl/tests/test_providers/test_slurm_template.py +2 -3
- parsl/tests/test_providers/test_submiterror_deprecation.py +2 -1
- parsl/tests/test_python_apps/test_context_manager.py +128 -0
- parsl/tests/test_python_apps/test_dep_standard_futures.py +2 -1
- parsl/tests/test_python_apps/test_dependencies_deep.py +59 -0
- parsl/tests/test_python_apps/test_fail.py +0 -25
- parsl/tests/test_python_apps/test_futures.py +2 -1
- parsl/tests/test_python_apps/test_inputs_default.py +22 -0
- parsl/tests/test_python_apps/test_join.py +0 -1
- parsl/tests/test_python_apps/test_lifted.py +11 -7
- parsl/tests/test_python_apps/test_memoize_bad_id_for_memo.py +1 -0
- parsl/tests/test_python_apps/test_outputs.py +1 -1
- parsl/tests/test_python_apps/test_pluggable_future_resolution.py +161 -0
- parsl/tests/test_radical/test_mpi_funcs.py +1 -2
- parsl/tests/test_regression/test_1480.py +2 -1
- parsl/tests/test_regression/test_1653.py +2 -1
- parsl/tests/test_regression/test_226.py +1 -0
- parsl/tests/test_regression/test_2652.py +1 -0
- parsl/tests/test_regression/test_69a.py +0 -1
- parsl/tests/test_regression/test_854.py +4 -2
- parsl/tests/test_regression/test_97_parallelism_0.py +1 -2
- parsl/tests/test_regression/test_98.py +0 -1
- parsl/tests/test_scaling/test_block_error_handler.py +9 -4
- parsl/tests/test_scaling/test_regression_1621.py +11 -15
- parsl/tests/test_scaling/test_regression_3568_scaledown_vs_MISSING.py +84 -0
- parsl/tests/test_scaling/test_regression_3696_oscillation.py +103 -0
- parsl/tests/test_scaling/test_scale_down.py +2 -5
- parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py +5 -8
- parsl/tests/test_scaling/test_scale_down_htex_unregistered.py +71 -0
- parsl/tests/test_scaling/test_shutdown_scalein.py +73 -0
- parsl/tests/test_scaling/test_worker_interchange_bad_messages_3262.py +90 -0
- parsl/tests/test_serialization/test_2555_caching_deserializer.py +1 -1
- parsl/tests/test_serialization/test_3495_deserialize_managerlost.py +47 -0
- parsl/tests/test_serialization/test_basic.py +2 -1
- parsl/tests/test_serialization/test_htex_code_cache.py +3 -4
- parsl/tests/test_serialization/test_pack_resource_spec.py +2 -1
- parsl/tests/test_serialization/test_proxystore_configured.py +10 -6
- parsl/tests/test_serialization/test_proxystore_impl.py +5 -3
- parsl/tests/test_shutdown/test_kill_monitoring.py +64 -0
- parsl/tests/test_staging/staging_provider.py +2 -2
- parsl/tests/test_staging/test_1316.py +3 -4
- parsl/tests/test_staging/test_docs_1.py +2 -1
- parsl/tests/test_staging/test_docs_2.py +2 -1
- parsl/tests/test_staging/test_elaborate_noop_file.py +2 -3
- parsl/tests/{test_data → test_staging}/test_file.py +6 -6
- parsl/tests/{test_data → test_staging}/test_output_chain_filenames.py +3 -0
- parsl/tests/test_staging/test_staging_ftp.py +1 -0
- parsl/tests/test_staging/test_staging_https.py +5 -2
- parsl/tests/test_staging/test_staging_stdout.py +64 -0
- parsl/tests/test_staging/test_zip_in.py +39 -0
- parsl/tests/test_staging/test_zip_out.py +110 -0
- parsl/tests/test_staging/test_zip_to_zip.py +41 -0
- parsl/tests/test_summary.py +2 -2
- parsl/tests/test_thread_parallelism.py +0 -1
- parsl/tests/test_threads/test_configs.py +1 -2
- parsl/tests/test_threads/test_lazy_errors.py +2 -2
- parsl/tests/test_utils/test_execute_wait.py +35 -0
- parsl/tests/test_utils/test_sanitize_dns.py +76 -0
- parsl/tests/unit/test_address.py +20 -0
- parsl/tests/unit/test_file.py +99 -0
- parsl/tests/unit/test_usage_tracking.py +66 -0
- parsl/usage_tracking/api.py +65 -0
- parsl/usage_tracking/levels.py +6 -0
- parsl/usage_tracking/usage.py +104 -62
- parsl/utils.py +137 -4
- parsl/version.py +1 -1
- {parsl-2024.3.18.data → parsl-2025.1.13.data}/scripts/exec_parsl_function.py +6 -5
- parsl-2025.1.13.data/scripts/interchange.py +649 -0
- {parsl-2024.3.18.data → parsl-2025.1.13.data}/scripts/process_worker_pool.py +77 -75
- parsl-2025.1.13.dist-info/METADATA +96 -0
- parsl-2025.1.13.dist-info/RECORD +462 -0
- {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/WHEEL +1 -1
- parsl/channels/__init__.py +0 -7
- parsl/channels/base.py +0 -141
- parsl/channels/errors.py +0 -113
- parsl/channels/local/local.py +0 -164
- parsl/channels/oauth_ssh/oauth_ssh.py +0 -110
- parsl/channels/ssh/ssh.py +0 -276
- parsl/channels/ssh_il/__init__.py +0 -0
- parsl/channels/ssh_il/ssh_il.py +0 -74
- parsl/configs/ad_hoc.py +0 -35
- parsl/executors/radical/rpex_master.py +0 -42
- parsl/monitoring/radios.py +0 -175
- parsl/providers/ad_hoc/__init__.py +0 -0
- parsl/providers/ad_hoc/ad_hoc.py +0 -248
- parsl/providers/cobalt/__init__.py +0 -0
- parsl/providers/cobalt/cobalt.py +0 -236
- parsl/providers/cobalt/template.py +0 -17
- parsl/tests/configs/ad_hoc_cluster_htex.py +0 -35
- parsl/tests/configs/cooley_htex.py +0 -37
- parsl/tests/configs/htex_ad_hoc_cluster.py +0 -28
- parsl/tests/configs/local_adhoc.py +0 -18
- parsl/tests/configs/swan_htex.py +0 -43
- parsl/tests/configs/theta.py +0 -37
- parsl/tests/integration/test_channels/__init__.py +0 -0
- parsl/tests/integration/test_channels/test_channels.py +0 -17
- parsl/tests/integration/test_channels/test_local_channel.py +0 -42
- parsl/tests/integration/test_channels/test_scp_1.py +0 -45
- parsl/tests/integration/test_channels/test_ssh_1.py +0 -40
- parsl/tests/integration/test_channels/test_ssh_errors.py +0 -46
- parsl/tests/integration/test_channels/test_ssh_file_transport.py +0 -41
- parsl/tests/integration/test_channels/test_ssh_interactive.py +0 -24
- parsl/tests/manual_tests/test_ad_hoc_htex.py +0 -48
- parsl/tests/manual_tests/test_fan_in_out_htex_remote.py +0 -88
- parsl/tests/manual_tests/test_oauth_ssh.py +0 -13
- parsl/tests/sites/test_local_adhoc.py +0 -61
- parsl/tests/test_channels/__init__.py +0 -0
- parsl/tests/test_channels/test_large_output.py +0 -22
- parsl/tests/test_data/__init__.py +0 -0
- parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py +0 -51
- parsl/tests/test_providers/test_cobalt_deprecation_warning.py +0 -16
- parsl-2024.3.18.dist-info/METADATA +0 -98
- parsl-2024.3.18.dist-info/RECORD +0 -449
- parsl/{channels/local → monitoring/radios}/__init__.py +0 -0
- parsl/{channels/oauth_ssh → tests/test_shutdown}/__init__.py +0 -0
- parsl/tests/{test_data → test_staging}/test_file_apps.py +0 -0
- parsl/tests/{test_data → test_staging}/test_file_staging.py +0 -0
- parsl/{channels/ssh → tests/unit}/__init__.py +0 -0
- {parsl-2024.3.18.data → parsl-2025.1.13.data}/scripts/parsl_coprocess.py +1 -1
- {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/LICENSE +0 -0
- {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/entry_points.txt +0 -0
- {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,19 @@
|
|
1
1
|
from __future__ import annotations
|
2
|
+
|
3
|
+
import datetime
|
2
4
|
import logging
|
3
5
|
import threading
|
4
|
-
|
6
|
+
import time
|
5
7
|
from abc import abstractmethod, abstractproperty
|
6
8
|
from concurrent.futures import Future
|
7
|
-
from
|
9
|
+
from itertools import compress
|
10
|
+
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
|
8
11
|
|
9
12
|
from parsl.executors.base import ParslExecutor
|
10
13
|
from parsl.executors.errors import BadStateException, ScalingFailed
|
11
|
-
from parsl.jobs.
|
12
|
-
from parsl.jobs.
|
14
|
+
from parsl.jobs.error_handlers import noop_error_handler, simple_error_handler
|
15
|
+
from parsl.jobs.states import TERMINAL_STATES, JobState, JobStatus
|
16
|
+
from parsl.monitoring.message_type import MessageType
|
13
17
|
from parsl.providers.base import ExecutionProvider
|
14
18
|
from parsl.utils import AtomicIDCounter
|
15
19
|
|
@@ -40,9 +44,6 @@ class BlockProviderExecutor(ParslExecutor):
|
|
40
44
|
invoking scale_out, but it will not initialize the blocks requested by
|
41
45
|
any init_blocks parameter. Subclasses must implement that behaviour
|
42
46
|
themselves.
|
43
|
-
|
44
|
-
BENC: TODO: block error handling: maybe I want this more user pluggable?
|
45
|
-
I'm not sure of use cases for switchability at the moment beyond "yes or no"
|
46
47
|
"""
|
47
48
|
def __init__(self, *,
|
48
49
|
provider: Optional[ExecutionProvider],
|
@@ -58,18 +59,29 @@ class BlockProviderExecutor(ParslExecutor):
|
|
58
59
|
else:
|
59
60
|
self.block_error_handler = block_error_handler
|
60
61
|
|
61
|
-
# errors can happen during the submit call to the provider; this is used
|
62
|
-
# to keep track of such errors so that they can be handled in one place
|
63
|
-
# together with errors reported by status()
|
64
|
-
self._simulated_status: Dict[Any, JobStatus] = {}
|
65
62
|
self._executor_bad_state = threading.Event()
|
66
63
|
self._executor_exception: Optional[Exception] = None
|
67
64
|
|
68
65
|
self._block_id_counter = AtomicIDCounter()
|
69
66
|
|
70
67
|
self._tasks = {} # type: Dict[object, Future]
|
71
|
-
|
72
|
-
self.
|
68
|
+
|
69
|
+
self._last_poll_time = 0.0
|
70
|
+
|
71
|
+
# these four structures track, in loosely coordinated fashion, the
|
72
|
+
# existence of blocks and jobs and how to map between their
|
73
|
+
# identifiers.
|
74
|
+
self.blocks_to_job_id = {} # type: Dict[str, str]
|
75
|
+
self.job_ids_to_block = {} # type: Dict[str, str]
|
76
|
+
|
77
|
+
# errors can happen during the submit call to the provider; this is used
|
78
|
+
# to keep track of such errors so that they can be handled in one place
|
79
|
+
# together with errors reported by status()
|
80
|
+
self._simulated_status: Dict[str, JobStatus] = {}
|
81
|
+
|
82
|
+
# this stores an approximation (sometimes delayed) of the latest status
|
83
|
+
# of pending, active and recently terminated blocks
|
84
|
+
self._status = {} # type: Dict[str, JobStatus]
|
73
85
|
|
74
86
|
def _make_status_dict(self, block_ids: List[str], status_list: List[JobStatus]) -> Dict[str, JobStatus]:
|
75
87
|
"""Given a list of block ids and a list of corresponding status strings,
|
@@ -102,15 +114,6 @@ class BlockProviderExecutor(ParslExecutor):
|
|
102
114
|
else:
|
103
115
|
return self._provider.status_polling_interval
|
104
116
|
|
105
|
-
def _fail_job_async(self, block_id: Any, message: str):
|
106
|
-
"""Marks a job that has failed to start but would not otherwise be included in status()
|
107
|
-
as failed and report it in status()
|
108
|
-
"""
|
109
|
-
if block_id is None:
|
110
|
-
block_id = str(self._block_id_counter.get_id())
|
111
|
-
logger.info(f"Allocated block ID {block_id} for simulated failure")
|
112
|
-
self._simulated_status[block_id] = JobStatus(JobState.FAILED, message)
|
113
|
-
|
114
117
|
@abstractproperty
|
115
118
|
def outstanding(self) -> int:
|
116
119
|
"""This should return the number of tasks that the executor has been given to run (waiting to run, and running now)"""
|
@@ -118,20 +121,6 @@ class BlockProviderExecutor(ParslExecutor):
|
|
118
121
|
raise NotImplementedError("Classes inheriting from BlockProviderExecutor must implement "
|
119
122
|
"outstanding()")
|
120
123
|
|
121
|
-
def status(self) -> Dict[str, JobStatus]:
|
122
|
-
"""Return the status of all jobs/blocks currently known to this executor.
|
123
|
-
|
124
|
-
:return: a dictionary mapping block ids (in string) to job status
|
125
|
-
"""
|
126
|
-
if self._provider:
|
127
|
-
block_ids, job_ids = self._get_block_and_job_ids()
|
128
|
-
status = self._make_status_dict(block_ids, self._provider.status(job_ids))
|
129
|
-
else:
|
130
|
-
status = {}
|
131
|
-
status.update(self._simulated_status)
|
132
|
-
|
133
|
-
return status
|
134
|
-
|
135
124
|
def set_bad_state_and_fail_all(self, exception: Exception):
|
136
125
|
"""Allows external error handlers to mark this executor as irrecoverably bad and cause
|
137
126
|
all tasks submitted to it now and in the future to fail. The executor is responsible
|
@@ -178,46 +167,82 @@ class BlockProviderExecutor(ParslExecutor):
|
|
178
167
|
def provider(self):
|
179
168
|
return self._provider
|
180
169
|
|
181
|
-
def _filter_scale_in_ids(self, to_kill, killed):
|
170
|
+
def _filter_scale_in_ids(self, to_kill: Sequence[Any], killed: Sequence[bool]) -> Sequence[Any]:
|
182
171
|
""" Filter out job id's that were not killed
|
183
172
|
"""
|
184
173
|
assert len(to_kill) == len(killed)
|
174
|
+
|
175
|
+
if False in killed:
|
176
|
+
killed_job_ids = [jid for jid, k in zip(to_kill, killed) if k]
|
177
|
+
not_killed_job_ids = [jid for jid, k in zip(to_kill, killed) if not k]
|
178
|
+
logger.warning("Some jobs were not killed successfully: "
|
179
|
+
f"killed jobs: {killed_job_ids}, "
|
180
|
+
f"not-killed jobs: {not_killed_job_ids}")
|
181
|
+
|
185
182
|
# Filters first iterable by bool values in second
|
186
183
|
return list(compress(to_kill, killed))
|
187
184
|
|
188
|
-
def
|
185
|
+
def scale_out_facade(self, n: int) -> List[str]:
|
189
186
|
"""Scales out the number of blocks by "blocks"
|
190
187
|
"""
|
191
188
|
if not self.provider:
|
192
189
|
raise ScalingFailed(self, "No execution provider available")
|
193
190
|
block_ids = []
|
194
|
-
|
195
|
-
|
191
|
+
monitoring_status_changes = {}
|
192
|
+
logger.info(f"Scaling out by {n} blocks")
|
193
|
+
for _ in range(n):
|
196
194
|
block_id = str(self._block_id_counter.get_id())
|
197
195
|
logger.info(f"Allocated block ID {block_id}")
|
198
196
|
try:
|
199
197
|
job_id = self._launch_block(block_id)
|
200
|
-
|
201
|
-
|
198
|
+
|
199
|
+
pending_status = JobStatus(JobState.PENDING)
|
200
|
+
|
201
|
+
self.blocks_to_job_id[block_id] = job_id
|
202
|
+
self.job_ids_to_block[job_id] = block_id
|
203
|
+
self._status[block_id] = pending_status
|
204
|
+
|
205
|
+
monitoring_status_changes[block_id] = pending_status
|
202
206
|
block_ids.append(block_id)
|
207
|
+
|
203
208
|
except Exception as ex:
|
204
|
-
|
205
|
-
|
209
|
+
failed_status = JobStatus(JobState.FAILED, "Failed to start block {}: {}".format(block_id, ex))
|
210
|
+
self._simulated_status[block_id] = failed_status
|
211
|
+
self._status[block_id] = failed_status
|
212
|
+
|
213
|
+
self.send_monitoring_info(monitoring_status_changes)
|
206
214
|
return block_ids
|
207
215
|
|
208
|
-
@abstractmethod
|
209
216
|
def scale_in(self, blocks: int) -> List[str]:
|
210
217
|
"""Scale in method.
|
211
218
|
|
212
219
|
Cause the executor to reduce the number of blocks by count.
|
213
220
|
|
214
|
-
|
215
|
-
|
216
|
-
scaling
|
221
|
+
The default implementation will kill blocks without regard to their
|
222
|
+
status or whether they are executing tasks. Executors with more
|
223
|
+
nuanced scaling strategies might overload this method to work with
|
224
|
+
that strategy - see the HighThroughputExecutor for an example of that.
|
217
225
|
|
218
226
|
:return: A list of block ids corresponding to the blocks that were removed.
|
219
227
|
"""
|
220
|
-
|
228
|
+
|
229
|
+
active_blocks = [block_id for block_id, status in self._status.items()
|
230
|
+
if status.state not in TERMINAL_STATES]
|
231
|
+
|
232
|
+
block_ids_to_kill = active_blocks[:blocks]
|
233
|
+
|
234
|
+
job_ids_to_kill = [self.blocks_to_job_id[block] for block in block_ids_to_kill]
|
235
|
+
|
236
|
+
# Cancel the blocks provisioned
|
237
|
+
if self.provider:
|
238
|
+
logger.info(f"Scaling in jobs: {job_ids_to_kill}")
|
239
|
+
r = self.provider.cancel(job_ids_to_kill)
|
240
|
+
job_ids = self._filter_scale_in_ids(job_ids_to_kill, r)
|
241
|
+
block_ids_killed = [self.job_ids_to_block[job_id] for job_id in job_ids]
|
242
|
+
return block_ids_killed
|
243
|
+
else:
|
244
|
+
logger.error("No execution provider available to scale in")
|
245
|
+
return []
|
221
246
|
|
222
247
|
def _launch_block(self, block_id: str) -> Any:
|
223
248
|
launch_cmd = self._get_launch_command(block_id)
|
@@ -239,12 +264,93 @@ class BlockProviderExecutor(ParslExecutor):
|
|
239
264
|
# Not using self.blocks.keys() and self.blocks.values() simultaneously
|
240
265
|
# The dictionary may be changed during invoking this function
|
241
266
|
# As scale_in and scale_out are invoked in multiple threads
|
242
|
-
block_ids = list(self.
|
267
|
+
block_ids = list(self.blocks_to_job_id.keys())
|
243
268
|
job_ids = [] # types: List[Any]
|
244
269
|
for bid in block_ids:
|
245
|
-
job_ids.append(self.
|
270
|
+
job_ids.append(self.blocks_to_job_id[bid])
|
246
271
|
return block_ids, job_ids
|
247
272
|
|
248
273
|
@abstractproperty
|
249
274
|
def workers_per_node(self) -> Union[int, float]:
|
250
275
|
pass
|
276
|
+
|
277
|
+
def send_monitoring_info(self, status: Dict) -> None:
|
278
|
+
# Send monitoring info for HTEX when monitoring enabled
|
279
|
+
if self.submit_monitoring_radio:
|
280
|
+
msg = self.create_monitoring_info(status)
|
281
|
+
logger.debug("Sending block monitoring message: %r", msg)
|
282
|
+
self.submit_monitoring_radio.send((MessageType.BLOCK_INFO, msg))
|
283
|
+
|
284
|
+
def create_monitoring_info(self, status: Dict[str, JobStatus]) -> Sequence[object]:
|
285
|
+
"""Create a monitoring message for each block based on the poll status.
|
286
|
+
"""
|
287
|
+
msg = []
|
288
|
+
for bid, s in status.items():
|
289
|
+
d: Dict[str, Any] = {}
|
290
|
+
d['run_id'] = self.run_id
|
291
|
+
d['status'] = s.status_name
|
292
|
+
d['timestamp'] = datetime.datetime.now()
|
293
|
+
d['executor_label'] = self.label
|
294
|
+
d['job_id'] = self.blocks_to_job_id.get(bid, None)
|
295
|
+
d['block_id'] = bid
|
296
|
+
msg.append(d)
|
297
|
+
return msg
|
298
|
+
|
299
|
+
def poll_facade(self) -> None:
|
300
|
+
now = time.time()
|
301
|
+
if now >= self._last_poll_time + self.status_polling_interval:
|
302
|
+
previous_status = self._status
|
303
|
+
self._status = self.status()
|
304
|
+
self._last_poll_time = now
|
305
|
+
delta_status = {}
|
306
|
+
for block_id in self._status:
|
307
|
+
if block_id not in previous_status \
|
308
|
+
or previous_status[block_id].state != self._status[block_id].state:
|
309
|
+
delta_status[block_id] = self._status[block_id]
|
310
|
+
|
311
|
+
if delta_status:
|
312
|
+
self.send_monitoring_info(delta_status)
|
313
|
+
|
314
|
+
def status(self) -> Dict[str, JobStatus]:
|
315
|
+
"""Return the status of all jobs/blocks currently known to this executor.
|
316
|
+
|
317
|
+
:return: a dictionary mapping block ids (in string) to job status
|
318
|
+
"""
|
319
|
+
if self._provider:
|
320
|
+
block_ids, job_ids = self._get_block_and_job_ids()
|
321
|
+
status = self._make_status_dict(block_ids, self._provider.status(job_ids))
|
322
|
+
else:
|
323
|
+
status = {}
|
324
|
+
status.update(self._simulated_status)
|
325
|
+
|
326
|
+
return status
|
327
|
+
|
328
|
+
@property
|
329
|
+
def status_facade(self) -> Dict[str, JobStatus]:
|
330
|
+
"""Return the status of all jobs/blocks of the executor of this poller.
|
331
|
+
|
332
|
+
:return: a dictionary mapping block ids (in string) to job status
|
333
|
+
"""
|
334
|
+
return self._status
|
335
|
+
|
336
|
+
def scale_in_facade(self, n: int, max_idletime: Optional[float] = None) -> List[str]:
|
337
|
+
|
338
|
+
if max_idletime is None:
|
339
|
+
block_ids = self.scale_in(n)
|
340
|
+
else:
|
341
|
+
# This is a HighThroughputExecutor-specific interface violation.
|
342
|
+
# This code hopes, through pan-codebase reasoning, that this
|
343
|
+
# scale_in method really does come from HighThroughputExecutor,
|
344
|
+
# and so does have an extra max_idletime parameter not present
|
345
|
+
# in the executor interface.
|
346
|
+
block_ids = self.scale_in(n, max_idletime=max_idletime) # type: ignore[call-arg]
|
347
|
+
if block_ids is not None:
|
348
|
+
new_status = {}
|
349
|
+
for block_id in block_ids:
|
350
|
+
logger.debug("Marking block %s as SCALED_IN", block_id)
|
351
|
+
s = JobStatus(JobState.SCALED_IN)
|
352
|
+
new_status[block_id] = s
|
353
|
+
self._status[block_id] = s
|
354
|
+
self._simulated_status[block_id] = s
|
355
|
+
self.send_monitoring_info(new_status)
|
356
|
+
return block_ids
|
@@ -1,5 +1,5 @@
|
|
1
1
|
from parsl.executors.taskvine.executor import TaskVineExecutor
|
2
|
-
from parsl.executors.taskvine.manager_config import TaskVineManagerConfig
|
3
2
|
from parsl.executors.taskvine.factory_config import TaskVineFactoryConfig
|
3
|
+
from parsl.executors.taskvine.manager_config import TaskVineManagerConfig
|
4
4
|
|
5
5
|
__all__ = ['TaskVineExecutor', 'TaskVineManagerConfig', 'TaskVineFactoryConfig']
|
@@ -1,10 +1,10 @@
|
|
1
|
-
import traceback
|
2
1
|
import sys
|
2
|
+
import traceback
|
3
3
|
|
4
4
|
from parsl.app.errors import RemoteExceptionWrapper
|
5
5
|
from parsl.data_provider.files import File
|
6
|
-
from parsl.utils import get_std_fname_mode
|
7
6
|
from parsl.serialize import deserialize, serialize
|
7
|
+
from parsl.utils import get_std_fname_mode
|
8
8
|
|
9
9
|
# This scripts executes a parsl function which is pickled in 4 files:
|
10
10
|
#
|
@@ -3,46 +3,47 @@ Cooperative Computing Lab (CCL) at Notre Dame to provide a fault-tolerant,
|
|
3
3
|
high-throughput system for delegating Parsl tasks to thousands of remote machines
|
4
4
|
"""
|
5
5
|
|
6
|
-
|
7
|
-
import threading
|
8
|
-
import multiprocessing
|
9
|
-
import logging
|
10
|
-
import tempfile
|
6
|
+
import getpass
|
11
7
|
import hashlib
|
12
|
-
import
|
8
|
+
import inspect
|
9
|
+
import itertools
|
10
|
+
import logging
|
11
|
+
import multiprocessing
|
13
12
|
import os
|
14
13
|
import queue
|
15
|
-
import inspect
|
16
14
|
import shutil
|
17
|
-
import
|
15
|
+
import subprocess
|
16
|
+
import tempfile
|
17
|
+
|
18
|
+
# Import Python built-in libraries
|
19
|
+
import threading
|
18
20
|
import uuid
|
19
21
|
from concurrent.futures import Future
|
20
|
-
from
|
22
|
+
from datetime import datetime
|
23
|
+
from typing import List, Literal, Optional, Union
|
24
|
+
|
25
|
+
# Import other libraries
|
26
|
+
import typeguard
|
21
27
|
|
22
28
|
# Import Parsl constructs
|
23
29
|
import parsl.utils as putils
|
24
|
-
from parsl.
|
25
|
-
from parsl.serialize import serialize, deserialize
|
30
|
+
from parsl.addresses import get_any_address
|
26
31
|
from parsl.data_provider.files import File
|
32
|
+
from parsl.data_provider.staging import Staging
|
27
33
|
from parsl.errors import OptionalModuleMissing
|
28
|
-
from parsl.providers.base import ExecutionProvider
|
29
|
-
from parsl.providers import LocalProvider, CondorProvider
|
30
|
-
from parsl.process_loggers import wrap_with_logs
|
31
|
-
from parsl.addresses import get_any_address
|
32
34
|
from parsl.executors.errors import ExecutorError
|
33
35
|
from parsl.executors.status_handling import BlockProviderExecutor
|
34
36
|
from parsl.executors.taskvine import exec_parsl_function
|
35
|
-
from parsl.executors.taskvine.
|
37
|
+
from parsl.executors.taskvine.errors import TaskVineManagerFailure, TaskVineTaskFailure
|
38
|
+
from parsl.executors.taskvine.factory import _taskvine_factory
|
36
39
|
from parsl.executors.taskvine.factory_config import TaskVineFactoryConfig
|
37
|
-
from parsl.executors.taskvine.errors import TaskVineTaskFailure
|
38
|
-
from parsl.executors.taskvine.errors import TaskVineManagerFailure
|
39
|
-
from parsl.executors.taskvine.utils import ParslTaskToVine
|
40
|
-
from parsl.executors.taskvine.utils import ParslFileToVine
|
41
40
|
from parsl.executors.taskvine.manager import _taskvine_submit_wait
|
42
|
-
from parsl.executors.taskvine.
|
43
|
-
|
44
|
-
|
45
|
-
import
|
41
|
+
from parsl.executors.taskvine.manager_config import TaskVineManagerConfig
|
42
|
+
from parsl.executors.taskvine.utils import ParslFileToVine, ParslTaskToVine
|
43
|
+
from parsl.process_loggers import wrap_with_logs
|
44
|
+
from parsl.providers import CondorProvider, LocalProvider
|
45
|
+
from parsl.providers.base import ExecutionProvider
|
46
|
+
from parsl.serialize import deserialize, serialize
|
46
47
|
|
47
48
|
logger = logging.getLogger(__name__)
|
48
49
|
|
@@ -171,7 +172,7 @@ class TaskVineExecutor(BlockProviderExecutor, putils.RepresentationMixin):
|
|
171
172
|
# Path to directory that holds all tasks' data and results.
|
172
173
|
self._function_data_dir = ""
|
173
174
|
|
174
|
-
#
|
175
|
+
# Helper scripts to prepare package tarballs for Parsl apps
|
175
176
|
self._package_analyze_script = shutil.which("poncho_package_analyze")
|
176
177
|
self._package_create_script = shutil.which("poncho_package_create")
|
177
178
|
if self._package_analyze_script is None or self._package_create_script is None:
|
@@ -216,9 +217,9 @@ class TaskVineExecutor(BlockProviderExecutor, putils.RepresentationMixin):
|
|
216
217
|
|
217
218
|
# Create directories for data and results
|
218
219
|
log_dir = os.path.join(run_dir, self.label)
|
219
|
-
self._function_data_dir = os.path.join(run_dir, self.label, "function_data")
|
220
220
|
os.makedirs(log_dir)
|
221
|
-
|
221
|
+
tmp_prefix = f'{self.label}-{getpass.getuser()}-{datetime.now().strftime("%Y%m%d%H%M%S%f")}-'
|
222
|
+
self._function_data_dir = tempfile.TemporaryDirectory(prefix=tmp_prefix)
|
222
223
|
|
223
224
|
# put TaskVine logs outside of a Parsl run as TaskVine caches between runs while
|
224
225
|
# Parsl does not.
|
@@ -228,7 +229,7 @@ class TaskVineExecutor(BlockProviderExecutor, putils.RepresentationMixin):
|
|
228
229
|
|
229
230
|
# factory logs go with manager logs regardless
|
230
231
|
self.factory_config.scratch_dir = self.manager_config.vine_log_dir
|
231
|
-
logger.debug(f"Function data directory: {self._function_data_dir}, log directory: {log_dir}")
|
232
|
+
logger.debug(f"Function data directory: {self._function_data_dir.name}, log directory: {log_dir}")
|
232
233
|
logger.debug(
|
233
234
|
f"TaskVine manager log directory: {self.manager_config.vine_log_dir}, "
|
234
235
|
f"factory log directory: {self.factory_config.scratch_dir}")
|
@@ -294,7 +295,7 @@ class TaskVineExecutor(BlockProviderExecutor, putils.RepresentationMixin):
|
|
294
295
|
'map': Pickled file with a dict between local parsl names, and remote taskvine names.
|
295
296
|
"""
|
296
297
|
task_dir = "{:04d}".format(executor_task_id)
|
297
|
-
return os.path.join(self._function_data_dir, task_dir, *path_components)
|
298
|
+
return os.path.join(self._function_data_dir.name, task_dir, *path_components)
|
298
299
|
|
299
300
|
def submit(self, func, resource_specification, *args, **kwargs):
|
300
301
|
"""Processes the Parsl app by its arguments and submits the function
|
@@ -564,13 +565,6 @@ class TaskVineExecutor(BlockProviderExecutor, putils.RepresentationMixin):
|
|
564
565
|
self._worker_command = self._construct_worker_command()
|
565
566
|
self._patch_providers()
|
566
567
|
|
567
|
-
if hasattr(self.provider, 'init_blocks'):
|
568
|
-
try:
|
569
|
-
self.scale_out(blocks=self.provider.init_blocks)
|
570
|
-
except Exception as e:
|
571
|
-
logger.error("Initial block scaling out failed: {}".format(e))
|
572
|
-
raise e
|
573
|
-
|
574
568
|
@property
|
575
569
|
def outstanding(self) -> int:
|
576
570
|
"""Count the number of outstanding tasks."""
|
@@ -581,19 +575,6 @@ class TaskVineExecutor(BlockProviderExecutor, putils.RepresentationMixin):
|
|
581
575
|
def workers_per_node(self) -> Union[int, float]:
|
582
576
|
return 1
|
583
577
|
|
584
|
-
def scale_in(self, count):
|
585
|
-
"""Scale in method. Cancel a given number of blocks
|
586
|
-
"""
|
587
|
-
# Obtain list of blocks to kill
|
588
|
-
to_kill = list(self.blocks.keys())[:count]
|
589
|
-
kill_ids = [self.blocks[block] for block in to_kill]
|
590
|
-
|
591
|
-
# Cancel the blocks provisioned
|
592
|
-
if self.provider:
|
593
|
-
self.provider.cancel(kill_ids)
|
594
|
-
else:
|
595
|
-
logger.error("No execution provider available to scale")
|
596
|
-
|
597
578
|
def shutdown(self, *args, **kwargs):
|
598
579
|
"""Shutdown the executor. Sets flag to cancel the submit process and
|
599
580
|
collector thread, which shuts down the TaskVine system submission.
|
@@ -601,20 +582,22 @@ class TaskVineExecutor(BlockProviderExecutor, putils.RepresentationMixin):
|
|
601
582
|
logger.debug("TaskVine shutdown started")
|
602
583
|
self._should_stop.set()
|
603
584
|
|
604
|
-
# Remove the workers that are still going
|
605
|
-
kill_ids = [self.blocks[block] for block in self.blocks.keys()]
|
606
|
-
if self.provider:
|
607
|
-
logger.debug("Cancelling blocks")
|
608
|
-
self.provider.cancel(kill_ids)
|
609
|
-
|
610
585
|
# Join all processes before exiting
|
611
586
|
logger.debug("Joining on submit process")
|
612
587
|
self._submit_process.join()
|
588
|
+
self._submit_process.close()
|
613
589
|
logger.debug("Joining on collector thread")
|
614
590
|
self._collector_thread.join()
|
615
591
|
if self.worker_launch_method == 'factory':
|
616
592
|
logger.debug("Joining on factory process")
|
617
593
|
self._factory_process.join()
|
594
|
+
self._factory_process.close()
|
595
|
+
|
596
|
+
# Shutdown multiprocessing queues
|
597
|
+
self._ready_task_queue.close()
|
598
|
+
self._ready_task_queue.join_thread()
|
599
|
+
self._finished_task_queue.close()
|
600
|
+
self._finished_task_queue.join_thread()
|
618
601
|
|
619
602
|
logger.debug("TaskVine shutdown completed")
|
620
603
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import logging
|
2
2
|
|
3
|
-
from parsl.process_loggers import wrap_with_logs
|
4
3
|
from parsl.executors.taskvine.errors import TaskVineFactoryFailure
|
4
|
+
from parsl.process_loggers import wrap_with_logs
|
5
5
|
|
6
6
|
# This try except clause prevents import errors
|
7
7
|
# when TaskVine is not used in Parsl.
|
@@ -1,25 +1,23 @@
|
|
1
|
-
import logging
|
2
1
|
import hashlib
|
3
|
-
import
|
2
|
+
import logging
|
4
3
|
import os
|
5
4
|
import queue
|
6
5
|
import shutil
|
6
|
+
import subprocess
|
7
7
|
import uuid
|
8
8
|
|
9
|
-
from parsl.utils import setproctitle
|
10
|
-
from parsl.process_loggers import wrap_with_logs
|
11
9
|
from parsl.executors.taskvine import exec_parsl_function
|
12
|
-
from parsl.executors.taskvine.utils import VineTaskToParsl
|
13
|
-
from parsl.
|
10
|
+
from parsl.executors.taskvine.utils import VineTaskToParsl, run_parsl_function
|
11
|
+
from parsl.process_loggers import wrap_with_logs
|
12
|
+
from parsl.utils import setproctitle
|
14
13
|
|
15
14
|
try:
|
16
|
-
from ndcctools.taskvine import cvine
|
17
|
-
from ndcctools.taskvine import
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
from ndcctools.taskvine.cvine import VINE_ALLOCATION_MODE_MAX
|
15
|
+
from ndcctools.taskvine import FunctionCall, Manager, Task, cvine
|
16
|
+
from ndcctools.taskvine.cvine import (
|
17
|
+
VINE_ALLOCATION_MODE_EXHAUSTIVE_BUCKETING,
|
18
|
+
VINE_ALLOCATION_MODE_MAX,
|
19
|
+
VINE_ALLOCATION_MODE_MAX_THROUGHPUT,
|
20
|
+
)
|
23
21
|
except ImportError:
|
24
22
|
_taskvine_enabled = False
|
25
23
|
else:
|
@@ -46,11 +44,17 @@ def _set_manager_attributes(m, config):
|
|
46
44
|
# Enable peer transfer feature between workers if specified
|
47
45
|
if config.enable_peer_transfers:
|
48
46
|
m.enable_peer_transfers()
|
47
|
+
else:
|
48
|
+
m.disable_peer_transfers()
|
49
49
|
|
50
50
|
# Set catalog report to parsl if project name exists
|
51
51
|
if m.name:
|
52
52
|
m.set_property("framework", "parsl")
|
53
53
|
|
54
|
+
if config.tune_parameters is not None:
|
55
|
+
for k, v in config.tune_parameters.items():
|
56
|
+
m.tune(k, v)
|
57
|
+
|
54
58
|
|
55
59
|
def _prepare_environment_serverless(manager_config, env_cache_dir, poncho_create_script):
|
56
60
|
# Return path to a packaged poncho environment
|
@@ -24,7 +24,7 @@ class TaskVineManagerConfig:
|
|
24
24
|
|
25
25
|
address: Optional[str]
|
26
26
|
Address of the local machine.
|
27
|
-
If None,
|
27
|
+
If None, :py:func:`parsl.addresses.get_any_address` will be used to determine the address.
|
28
28
|
|
29
29
|
project_name: Optional[str]
|
30
30
|
If given, TaskVine will periodically report its status and performance
|
@@ -56,7 +56,7 @@ class TaskVineManagerConfig:
|
|
56
56
|
environment name is given, TaskVine will package the conda
|
57
57
|
environment in a tarball and send it along with tasks to be
|
58
58
|
executed in a replicated conda environment.
|
59
|
-
If a tarball of packages (
|
59
|
+
If a tarball of packages (``*.tar.gz``) is given, TaskVine
|
60
60
|
skips the packaging step and sends the tarball along with
|
61
61
|
tasks to be executed in a replicated conda environment.
|
62
62
|
|
@@ -156,6 +156,10 @@ class TaskVineManagerConfig:
|
|
156
156
|
Directory to store TaskVine logging facilities.
|
157
157
|
Default is None, in which all TaskVine logs will be contained
|
158
158
|
in the Parsl logging directory.
|
159
|
+
|
160
|
+
tune_parameters: Optional[dict]
|
161
|
+
Extended vine_tune parameters, expressed in a dictionary
|
162
|
+
by { 'tune-parameter' : value }.
|
159
163
|
"""
|
160
164
|
|
161
165
|
# Connection and communication settings
|
@@ -181,6 +185,7 @@ class TaskVineManagerConfig:
|
|
181
185
|
autocategory: bool = True
|
182
186
|
enable_peer_transfers: bool = True
|
183
187
|
wait_for_workers: Optional[int] = None
|
188
|
+
tune_parameters: Optional[dict] = None
|
184
189
|
|
185
190
|
# Logging settings
|
186
191
|
vine_log_dir: Optional[str] = None
|
parsl/executors/threads.py
CHANGED
@@ -1,14 +1,13 @@
|
|
1
|
-
import logging
|
2
|
-
import typeguard
|
3
1
|
import concurrent.futures as cf
|
4
|
-
|
2
|
+
import logging
|
5
3
|
from typing import List, Optional
|
6
4
|
|
5
|
+
import typeguard
|
6
|
+
|
7
7
|
from parsl.data_provider.staging import Staging
|
8
8
|
from parsl.executors.base import ParslExecutor
|
9
|
+
from parsl.executors.errors import InvalidResourceSpecification
|
9
10
|
from parsl.utils import RepresentationMixin
|
10
|
-
from parsl.executors.errors import UnsupportedFeatureError
|
11
|
-
|
12
11
|
|
13
12
|
logger = logging.getLogger(__name__)
|
14
13
|
|
@@ -55,7 +54,8 @@ class ThreadPoolExecutor(ParslExecutor, RepresentationMixin):
|
|
55
54
|
if resource_specification:
|
56
55
|
logger.error("Ignoring the resource specification. "
|
57
56
|
"Parsl resource specification is not supported in ThreadPool Executor.")
|
58
|
-
raise
|
57
|
+
raise InvalidResourceSpecification(set(resource_specification.keys()),
|
58
|
+
"Parsl resource specification is not supported in ThreadPool Executor.")
|
59
59
|
|
60
60
|
return self.executor.submit(func, *args, **kwargs)
|
61
61
|
|
@@ -1,10 +1,11 @@
|
|
1
|
+
import pickle
|
2
|
+
import sys
|
3
|
+
import traceback
|
4
|
+
|
1
5
|
from parsl.app.errors import RemoteExceptionWrapper
|
2
6
|
from parsl.data_provider.files import File
|
3
|
-
from parsl.utils import get_std_fname_mode
|
4
|
-
import traceback
|
5
|
-
import sys
|
6
|
-
import pickle
|
7
7
|
from parsl.serialize import serialize
|
8
|
+
from parsl.utils import get_std_fname_mode
|
8
9
|
|
9
10
|
# This scripts executes a parsl function which is pickled in a file:
|
10
11
|
#
|
@@ -93,7 +94,7 @@ def unpack_source_code_function(function_info, user_namespace):
|
|
93
94
|
|
94
95
|
def unpack_byte_code_function(function_info, user_namespace):
|
95
96
|
from parsl.serialize import unpack_apply_message
|
96
|
-
func, args, kwargs = unpack_apply_message(function_info["byte code"]
|
97
|
+
func, args, kwargs = unpack_apply_message(function_info["byte code"])
|
97
98
|
return (func, 'parsl_function_name', args, kwargs)
|
98
99
|
|
99
100
|
|