parsl 2024.5.20__py3-none-any.whl → 2024.6.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsl/__init__.py +9 -10
- parsl/addresses.py +6 -4
- parsl/app/app.py +3 -6
- parsl/app/bash.py +4 -4
- parsl/app/errors.py +5 -3
- parsl/app/futures.py +3 -3
- parsl/app/python.py +2 -1
- parsl/benchmark/perf.py +2 -1
- parsl/channels/__init__.py +2 -2
- parsl/channels/base.py +0 -1
- parsl/channels/errors.py +2 -1
- parsl/channels/oauth_ssh/oauth_ssh.py +4 -3
- parsl/channels/ssh/ssh.py +9 -1
- parsl/channels/ssh_il/ssh_il.py +1 -0
- parsl/concurrent/__init__.py +2 -2
- parsl/config.py +12 -6
- parsl/configs/ASPIRE1.py +3 -3
- parsl/configs/Azure.py +6 -7
- parsl/configs/ad_hoc.py +4 -3
- parsl/configs/bridges.py +3 -3
- parsl/configs/cc_in2p3.py +2 -2
- parsl/configs/ec2.py +1 -1
- parsl/configs/expanse.py +1 -2
- parsl/configs/frontera.py +2 -3
- parsl/configs/htex_local.py +1 -2
- parsl/configs/illinoiscluster.py +1 -1
- parsl/configs/kubernetes.py +1 -2
- parsl/configs/midway.py +3 -3
- parsl/configs/osg.py +1 -1
- parsl/configs/polaris.py +1 -1
- parsl/configs/stampede2.py +4 -5
- parsl/configs/summit.py +1 -3
- parsl/configs/toss3_llnl.py +1 -2
- parsl/configs/vineex_local.py +3 -3
- parsl/configs/wqex_local.py +2 -2
- parsl/data_provider/data_manager.py +3 -3
- parsl/data_provider/file_noop.py +1 -2
- parsl/data_provider/files.py +3 -3
- parsl/data_provider/ftp.py +1 -3
- parsl/data_provider/globus.py +7 -6
- parsl/data_provider/http.py +2 -2
- parsl/data_provider/rsync.py +1 -1
- parsl/data_provider/staging.py +2 -2
- parsl/data_provider/zip.py +4 -5
- parsl/dataflow/dependency_resolvers.py +115 -0
- parsl/dataflow/dflow.py +65 -54
- parsl/dataflow/errors.py +2 -1
- parsl/dataflow/futures.py +1 -2
- parsl/dataflow/memoization.py +5 -5
- parsl/dataflow/rundirs.py +1 -1
- parsl/dataflow/taskrecord.py +4 -5
- parsl/executors/__init__.py +3 -3
- parsl/executors/base.py +1 -0
- parsl/executors/flux/execute_parsl_task.py +2 -2
- parsl/executors/flux/executor.py +11 -12
- parsl/executors/flux/flux_instance_manager.py +3 -3
- parsl/executors/high_throughput/errors.py +10 -0
- parsl/executors/high_throughput/executor.py +31 -36
- parsl/executors/high_throughput/interchange.py +16 -18
- parsl/executors/high_throughput/manager_record.py +1 -0
- parsl/executors/high_throughput/monitoring_info.py +2 -1
- parsl/executors/high_throughput/mpi_executor.py +6 -3
- parsl/executors/high_throughput/mpi_prefix_composer.py +19 -3
- parsl/executors/high_throughput/mpi_resource_management.py +1 -2
- parsl/executors/high_throughput/probe.py +6 -4
- parsl/executors/high_throughput/process_worker_pool.py +31 -20
- parsl/executors/high_throughput/zmq_pipes.py +63 -15
- parsl/executors/radical/executor.py +15 -15
- parsl/executors/radical/rpex_master.py +1 -2
- parsl/executors/radical/rpex_resources.py +4 -9
- parsl/executors/radical/rpex_worker.py +2 -1
- parsl/executors/status_handling.py +5 -4
- parsl/executors/taskvine/__init__.py +1 -1
- parsl/executors/taskvine/errors.py +1 -1
- parsl/executors/taskvine/exec_parsl_function.py +2 -2
- parsl/executors/taskvine/executor.py +23 -24
- parsl/executors/taskvine/factory.py +1 -1
- parsl/executors/taskvine/manager.py +11 -13
- parsl/executors/threads.py +4 -5
- parsl/executors/workqueue/errors.py +1 -1
- parsl/executors/workqueue/exec_parsl_function.py +5 -4
- parsl/executors/workqueue/executor.py +26 -27
- parsl/executors/workqueue/parsl_coprocess.py +1 -1
- parsl/jobs/error_handlers.py +1 -1
- parsl/jobs/job_status_poller.py +2 -5
- parsl/jobs/states.py +1 -1
- parsl/jobs/strategy.py +2 -2
- parsl/launchers/__init__.py +12 -3
- parsl/launchers/errors.py +1 -1
- parsl/log_utils.py +1 -2
- parsl/monitoring/db_manager.py +16 -10
- parsl/monitoring/monitoring.py +11 -15
- parsl/monitoring/queries/pandas.py +1 -2
- parsl/monitoring/radios.py +2 -4
- parsl/monitoring/remote.py +13 -8
- parsl/monitoring/router.py +8 -11
- parsl/monitoring/types.py +2 -0
- parsl/monitoring/visualization/app.py +4 -2
- parsl/monitoring/visualization/models.py +0 -1
- parsl/monitoring/visualization/plots/default/workflow_plots.py +8 -4
- parsl/monitoring/visualization/plots/default/workflow_resource_plots.py +1 -0
- parsl/monitoring/visualization/utils.py +0 -1
- parsl/monitoring/visualization/views.py +16 -9
- parsl/multiprocessing.py +0 -1
- parsl/process_loggers.py +1 -2
- parsl/providers/__init__.py +9 -12
- parsl/providers/ad_hoc/ad_hoc.py +1 -1
- parsl/providers/aws/aws.py +2 -3
- parsl/providers/azure/azure.py +4 -5
- parsl/providers/base.py +1 -1
- parsl/providers/cluster_provider.py +1 -1
- parsl/providers/cobalt/cobalt.py +3 -3
- parsl/providers/condor/condor.py +4 -2
- parsl/providers/errors.py +2 -2
- parsl/providers/googlecloud/googlecloud.py +2 -1
- parsl/providers/grid_engine/grid_engine.py +2 -2
- parsl/providers/kubernetes/kube.py +5 -3
- parsl/providers/local/local.py +5 -1
- parsl/providers/lsf/lsf.py +2 -2
- parsl/providers/pbspro/pbspro.py +1 -1
- parsl/providers/slurm/slurm.py +5 -5
- parsl/providers/torque/torque.py +1 -1
- parsl/serialize/__init__.py +8 -3
- parsl/serialize/base.py +1 -2
- parsl/serialize/concretes.py +5 -4
- parsl/serialize/proxystore.py +3 -2
- parsl/tests/__init__.py +1 -1
- parsl/tests/configs/ad_hoc_cluster_htex.py +4 -4
- parsl/tests/configs/azure_single_node.py +4 -5
- parsl/tests/configs/bridges.py +3 -2
- parsl/tests/configs/cc_in2p3.py +2 -2
- parsl/tests/configs/comet.py +2 -1
- parsl/tests/configs/ec2_single_node.py +1 -2
- parsl/tests/configs/ec2_spot.py +1 -2
- parsl/tests/configs/frontera.py +3 -2
- parsl/tests/configs/htex_ad_hoc_cluster.py +2 -4
- parsl/tests/configs/htex_local.py +2 -3
- parsl/tests/configs/htex_local_alternate.py +8 -11
- parsl/tests/configs/htex_local_intask_staging.py +5 -7
- parsl/tests/configs/htex_local_rsync_staging.py +4 -6
- parsl/tests/configs/local_adhoc.py +1 -1
- parsl/tests/configs/local_radical.py +1 -3
- parsl/tests/configs/local_radical_mpi.py +2 -2
- parsl/tests/configs/midway.py +2 -2
- parsl/tests/configs/nscc_singapore.py +3 -3
- parsl/tests/configs/osg_htex.py +1 -1
- parsl/tests/configs/petrelkube.py +3 -2
- parsl/tests/configs/summit.py +1 -0
- parsl/tests/configs/swan_htex.py +2 -2
- parsl/tests/configs/taskvine_ex.py +3 -5
- parsl/tests/configs/theta.py +2 -2
- parsl/tests/configs/workqueue_ex.py +3 -4
- parsl/tests/conftest.py +6 -6
- parsl/tests/integration/test_channels/test_ssh_errors.py +1 -1
- parsl/tests/integration/test_stress/test_python_simple.py +3 -4
- parsl/tests/integration/test_stress/test_python_threads.py +3 -5
- parsl/tests/manual_tests/htex_local.py +4 -4
- parsl/tests/manual_tests/test_ad_hoc_htex.py +2 -1
- parsl/tests/manual_tests/test_basic.py +1 -0
- parsl/tests/manual_tests/test_fan_in_out_htex_remote.py +4 -4
- parsl/tests/manual_tests/test_log_filter.py +3 -1
- parsl/tests/manual_tests/test_memory_limits.py +6 -6
- parsl/tests/manual_tests/test_regression_220.py +2 -1
- parsl/tests/manual_tests/test_udp_simple.py +4 -3
- parsl/tests/manual_tests/test_worker_count.py +3 -2
- parsl/tests/scaling_tests/htex_local.py +2 -2
- parsl/tests/scaling_tests/test_scale.py +0 -9
- parsl/tests/scaling_tests/vineex_condor.py +1 -2
- parsl/tests/scaling_tests/vineex_local.py +1 -2
- parsl/tests/site_tests/test_provider.py +3 -1
- parsl/tests/site_tests/test_site.py +2 -0
- parsl/tests/sites/test_affinity.py +7 -5
- parsl/tests/sites/test_dynamic_executor.py +3 -4
- parsl/tests/sites/test_ec2.py +3 -2
- parsl/tests/sites/test_local_adhoc.py +2 -1
- parsl/tests/sites/test_worker_info.py +4 -3
- parsl/tests/test_aalst_patterns.py +0 -1
- parsl/tests/test_bash_apps/test_apptimeout.py +2 -2
- parsl/tests/test_bash_apps/test_error_codes.py +1 -4
- parsl/tests/test_bash_apps/test_memoize_ignore_args.py +1 -0
- parsl/tests/test_bash_apps/test_memoize_ignore_args_regr.py +2 -2
- parsl/tests/test_bash_apps/test_pipeline.py +1 -1
- parsl/tests/test_bash_apps/test_std_uri.py +4 -9
- parsl/tests/test_callables.py +2 -2
- parsl/tests/test_checkpointing/test_periodic.py +2 -7
- parsl/tests/test_checkpointing/test_python_checkpoint_1.py +1 -0
- parsl/tests/test_checkpointing/test_python_checkpoint_2.py +2 -2
- parsl/tests/test_checkpointing/test_python_checkpoint_3.py +0 -1
- parsl/tests/test_checkpointing/test_regression_239.py +1 -1
- parsl/tests/test_checkpointing/test_task_exit.py +1 -2
- parsl/tests/test_docs/test_from_slides.py +2 -2
- parsl/tests/test_docs/test_kwargs.py +1 -1
- parsl/tests/test_docs/test_tutorial_1.py +1 -2
- parsl/tests/test_docs/test_workflow1.py +2 -2
- parsl/tests/test_docs/test_workflow2.py +0 -1
- parsl/tests/test_error_handling/test_rand_fail.py +2 -2
- parsl/tests/test_error_handling/test_resource_spec.py +4 -2
- parsl/tests/test_error_handling/test_retries.py +2 -1
- parsl/tests/test_error_handling/test_retry_handler.py +1 -0
- parsl/tests/test_error_handling/test_retry_handler_failure.py +2 -1
- parsl/tests/test_error_handling/test_serialization_fail.py +1 -1
- parsl/tests/test_error_handling/test_wrap_with_logs.py +1 -0
- parsl/tests/test_flux.py +1 -1
- parsl/tests/test_htex/test_basic.py +0 -1
- parsl/tests/test_htex/test_command_client_timeout.py +66 -0
- parsl/tests/test_htex/test_connected_blocks.py +3 -2
- parsl/tests/test_htex/test_cpu_affinity_explicit.py +6 -10
- parsl/tests/test_htex/test_disconnected_blocks.py +6 -4
- parsl/tests/test_htex/test_drain.py +5 -5
- parsl/tests/test_htex/test_htex.py +1 -2
- parsl/tests/test_htex/test_manager_failure.py +0 -1
- parsl/tests/test_htex/test_managers_command.py +5 -9
- parsl/tests/test_htex/test_missing_worker.py +2 -8
- parsl/tests/test_htex/test_multiple_disconnected_blocks.py +6 -4
- parsl/tests/test_monitoring/test_app_names.py +3 -3
- parsl/tests/test_monitoring/test_basic.py +4 -6
- parsl/tests/test_monitoring/test_db_locks.py +6 -4
- parsl/tests/test_monitoring/test_fuzz_zmq.py +6 -4
- parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +5 -7
- parsl/tests/test_monitoring/test_incomplete_futures.py +5 -4
- parsl/tests/test_monitoring/test_memoization_representation.py +4 -2
- parsl/tests/test_monitoring/test_stdouterr.py +4 -6
- parsl/tests/test_monitoring/test_viz_colouring.py +1 -0
- parsl/tests/test_mpi_apps/test_bad_mpi_config.py +1 -1
- parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py +4 -7
- parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py +15 -4
- parsl/tests/test_mpi_apps/test_mpi_prefix.py +4 -4
- parsl/tests/test_mpi_apps/test_mpi_scheduler.py +7 -2
- parsl/tests/test_mpi_apps/test_mpiex.py +4 -3
- parsl/tests/test_mpi_apps/test_resource_spec.py +21 -17
- parsl/tests/test_providers/test_cobalt_deprecation_warning.py +2 -0
- parsl/tests/test_providers/test_local_provider.py +2 -1
- parsl/tests/test_providers/test_pbspro_template.py +1 -1
- parsl/tests/test_providers/test_slurm_template.py +1 -1
- parsl/tests/test_providers/test_submiterror_deprecation.py +2 -1
- parsl/tests/test_python_apps/test_context_manager.py +5 -12
- parsl/tests/test_python_apps/test_dep_standard_futures.py +2 -1
- parsl/tests/test_python_apps/test_futures.py +2 -1
- parsl/tests/test_python_apps/test_join.py +0 -1
- parsl/tests/test_python_apps/test_lifted.py +11 -7
- parsl/tests/test_python_apps/test_memoize_bad_id_for_memo.py +1 -0
- parsl/tests/test_python_apps/test_pluggable_future_resolution.py +161 -0
- parsl/tests/test_radical/test_mpi_funcs.py +1 -1
- parsl/tests/test_regression/test_1480.py +2 -1
- parsl/tests/test_regression/test_1653.py +2 -1
- parsl/tests/test_regression/test_2652.py +1 -0
- parsl/tests/test_regression/test_69a.py +0 -1
- parsl/tests/test_regression/test_854.py +4 -2
- parsl/tests/test_regression/test_97_parallelism_0.py +1 -2
- parsl/tests/test_regression/test_98.py +0 -1
- parsl/tests/test_scaling/test_block_error_handler.py +9 -4
- parsl/tests/test_scaling/test_regression_1621.py +0 -2
- parsl/tests/test_scaling/test_scale_down.py +2 -3
- parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py +4 -5
- parsl/tests/test_scaling/test_scale_down_htex_unregistered.py +3 -4
- parsl/tests/test_scaling/test_shutdown_scalein.py +1 -4
- parsl/tests/test_serialization/test_2555_caching_deserializer.py +1 -1
- parsl/tests/test_serialization/test_basic.py +2 -1
- parsl/tests/test_serialization/test_htex_code_cache.py +3 -4
- parsl/tests/test_serialization/test_pack_resource_spec.py +2 -1
- parsl/tests/test_serialization/test_proxystore_configured.py +10 -6
- parsl/tests/test_serialization/test_proxystore_impl.py +5 -3
- parsl/tests/test_shutdown/test_kill_monitoring.py +3 -4
- parsl/tests/test_staging/staging_provider.py +2 -2
- parsl/tests/test_staging/test_1316.py +3 -4
- parsl/tests/test_staging/test_docs_1.py +1 -1
- parsl/tests/test_staging/test_docs_2.py +2 -1
- parsl/tests/test_staging/test_elaborate_noop_file.py +2 -3
- parsl/tests/test_staging/test_staging_https.py +2 -2
- parsl/tests/test_staging/test_staging_stdout.py +4 -3
- parsl/tests/test_staging/test_zip_in.py +6 -8
- parsl/tests/test_staging/test_zip_out.py +7 -9
- parsl/tests/test_staging/test_zip_to_zip.py +6 -8
- parsl/tests/test_summary.py +2 -2
- parsl/tests/test_thread_parallelism.py +0 -1
- parsl/tests/test_threads/test_configs.py +1 -2
- parsl/tests/test_threads/test_lazy_errors.py +2 -2
- parsl/usage_tracking/api.py +2 -3
- parsl/usage_tracking/usage.py +8 -18
- parsl/utils.py +13 -2
- parsl/version.py +1 -1
- {parsl-2024.5.20.data → parsl-2024.6.3.data}/scripts/exec_parsl_function.py +5 -4
- {parsl-2024.5.20.data → parsl-2024.6.3.data}/scripts/process_worker_pool.py +31 -20
- {parsl-2024.5.20.dist-info → parsl-2024.6.3.dist-info}/METADATA +6 -6
- parsl-2024.6.3.dist-info/RECORD +471 -0
- parsl-2024.5.20.dist-info/RECORD +0 -468
- {parsl-2024.5.20.data → parsl-2024.6.3.data}/scripts/parsl_coprocess.py +1 -1
- {parsl-2024.5.20.dist-info → parsl-2024.6.3.dist-info}/LICENSE +0 -0
- {parsl-2024.5.20.dist-info → parsl-2024.6.3.dist-info}/WHEEL +0 -0
- {parsl-2024.5.20.dist-info → parsl-2024.6.3.dist-info}/entry_points.txt +0 -0
- {parsl-2024.5.20.dist-info → parsl-2024.6.3.dist-info}/top_level.txt +0 -0
@@ -1,44 +1,39 @@
|
|
1
|
+
import logging
|
2
|
+
import math
|
3
|
+
import pickle
|
4
|
+
import threading
|
1
5
|
import typing
|
6
|
+
import warnings
|
2
7
|
from collections import defaultdict
|
3
8
|
from concurrent.futures import Future
|
4
|
-
import typeguard
|
5
|
-
import logging
|
6
|
-
import threading
|
7
|
-
import queue
|
8
|
-
import pickle
|
9
9
|
from dataclasses import dataclass
|
10
|
-
from multiprocessing import Process
|
11
|
-
from typing import Dict, Sequence
|
12
|
-
|
13
|
-
import
|
14
|
-
import warnings
|
10
|
+
from multiprocessing import Process
|
11
|
+
from typing import Callable, Dict, List, Optional, Sequence, Tuple, Union
|
12
|
+
|
13
|
+
import typeguard
|
15
14
|
|
16
15
|
import parsl.launchers
|
17
|
-
from parsl
|
18
|
-
from parsl.
|
19
|
-
from parsl.serialize.errors import SerializationError, DeserializationError
|
16
|
+
from parsl import curvezmq
|
17
|
+
from parsl.addresses import get_all_addresses
|
20
18
|
from parsl.app.errors import RemoteExceptionWrapper
|
21
|
-
from parsl.
|
22
|
-
from parsl.executors.
|
23
|
-
from parsl.executors.high_throughput import interchange
|
24
|
-
from parsl.executors.errors import
|
25
|
-
BadMessage, ScalingFailed,
|
26
|
-
)
|
19
|
+
from parsl.data_provider.staging import Staging
|
20
|
+
from parsl.executors.errors import BadMessage, ScalingFailed
|
21
|
+
from parsl.executors.high_throughput import interchange, zmq_pipes
|
22
|
+
from parsl.executors.high_throughput.errors import CommandClientTimeoutError
|
27
23
|
from parsl.executors.high_throughput.mpi_prefix_composer import (
|
28
24
|
VALID_LAUNCHERS,
|
29
|
-
validate_resource_spec
|
25
|
+
validate_resource_spec,
|
30
26
|
)
|
31
|
-
|
32
|
-
from parsl import curvezmq
|
33
27
|
from parsl.executors.status_handling import BlockProviderExecutor
|
34
|
-
from parsl.
|
35
|
-
from parsl.data_provider.staging import Staging
|
36
|
-
from parsl.addresses import get_all_addresses
|
37
|
-
from parsl.process_loggers import wrap_with_logs
|
38
|
-
|
28
|
+
from parsl.jobs.states import TERMINAL_STATES, JobState, JobStatus
|
39
29
|
from parsl.multiprocessing import ForkProcess
|
40
|
-
from parsl.
|
30
|
+
from parsl.process_loggers import wrap_with_logs
|
41
31
|
from parsl.providers import LocalProvider
|
32
|
+
from parsl.providers.base import ExecutionProvider
|
33
|
+
from parsl.serialize import deserialize, pack_res_spec_apply_message
|
34
|
+
from parsl.serialize.errors import DeserializationError, SerializationError
|
35
|
+
from parsl.usage_tracking.api import UsageInformation
|
36
|
+
from parsl.utils import RepresentationMixin
|
42
37
|
|
43
38
|
logger = logging.getLogger(__name__)
|
44
39
|
|
@@ -415,13 +410,13 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
|
|
415
410
|
)
|
416
411
|
|
417
412
|
self.outgoing_q = zmq_pipes.TasksOutgoing(
|
418
|
-
|
413
|
+
"127.0.0.1", self.interchange_port_range, self.cert_dir
|
419
414
|
)
|
420
415
|
self.incoming_q = zmq_pipes.ResultsIncoming(
|
421
|
-
|
416
|
+
"127.0.0.1", self.interchange_port_range, self.cert_dir
|
422
417
|
)
|
423
418
|
self.command_client = zmq_pipes.CommandClient(
|
424
|
-
|
419
|
+
"127.0.0.1", self.interchange_port_range, self.cert_dir
|
425
420
|
)
|
426
421
|
|
427
422
|
self._queue_management_thread = None
|
@@ -531,9 +526,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
|
|
531
526
|
Starts the interchange process locally and uses an internal command queue to
|
532
527
|
get the worker task and result ports that the interchange has bound to.
|
533
528
|
"""
|
534
|
-
comm_q = Queue(maxsize=10)
|
535
529
|
self.interchange_proc = ForkProcess(target=interchange.starter,
|
536
|
-
args=(comm_q,),
|
537
530
|
kwargs={"client_ports": (self.outgoing_q.port,
|
538
531
|
self.incoming_q.port,
|
539
532
|
self.command_client.port),
|
@@ -552,9 +545,10 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
|
|
552
545
|
name="HTEX-Interchange"
|
553
546
|
)
|
554
547
|
self.interchange_proc.start()
|
548
|
+
|
555
549
|
try:
|
556
|
-
(self.worker_task_port, self.worker_result_port) =
|
557
|
-
except
|
550
|
+
(self.worker_task_port, self.worker_result_port) = self.command_client.run("WORKER_PORTS", timeout_s=120)
|
551
|
+
except CommandClientTimeoutError:
|
558
552
|
logger.error("Interchange has not completed initialization in 120s. Aborting")
|
559
553
|
raise Exception("Interchange failed to start")
|
560
554
|
|
@@ -645,7 +639,8 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
|
|
645
639
|
Returns:
|
646
640
|
Future
|
647
641
|
"""
|
648
|
-
|
642
|
+
|
643
|
+
validate_resource_spec(resource_specification, self.enable_mpi_mode)
|
649
644
|
|
650
645
|
if self.bad_state_is_set:
|
651
646
|
raise self.executor_exception
|
@@ -1,31 +1,28 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
|
-
import
|
3
|
-
import
|
2
|
+
import datetime
|
3
|
+
import json
|
4
|
+
import logging
|
4
5
|
import os
|
5
|
-
import
|
6
|
+
import pickle
|
6
7
|
import platform
|
8
|
+
import queue
|
7
9
|
import random
|
8
|
-
import time
|
9
|
-
import datetime
|
10
|
-
import pickle
|
11
10
|
import signal
|
12
|
-
import
|
13
|
-
import queue
|
11
|
+
import sys
|
14
12
|
import threading
|
15
|
-
import
|
13
|
+
import time
|
14
|
+
from typing import Any, Dict, List, NoReturn, Optional, Sequence, Set, Tuple, cast
|
16
15
|
|
17
|
-
|
16
|
+
import zmq
|
18
17
|
|
19
18
|
from parsl import curvezmq
|
20
|
-
from parsl.utils import setproctitle
|
21
|
-
from parsl.version import VERSION as PARSL_VERSION
|
22
|
-
from parsl.serialize import serialize as serialize_object
|
23
|
-
|
24
19
|
from parsl.app.errors import RemoteExceptionWrapper
|
25
20
|
from parsl.executors.high_throughput.manager_record import ManagerRecord
|
26
21
|
from parsl.monitoring.message_type import MessageType
|
27
22
|
from parsl.process_loggers import wrap_with_logs
|
28
|
-
|
23
|
+
from parsl.serialize import serialize as serialize_object
|
24
|
+
from parsl.utils import setproctitle
|
25
|
+
from parsl.version import VERSION as PARSL_VERSION
|
29
26
|
|
30
27
|
PKL_HEARTBEAT_CODE = pickle.dumps((2 ** 32) - 1)
|
31
28
|
PKL_DRAINED_CODE = pickle.dumps((2 ** 32) - 2)
|
@@ -328,6 +325,9 @@ class Interchange:
|
|
328
325
|
|
329
326
|
reply = None
|
330
327
|
|
328
|
+
elif command_req == "WORKER_PORTS":
|
329
|
+
reply = (self.worker_task_port, self.worker_result_port)
|
330
|
+
|
331
331
|
else:
|
332
332
|
logger.error(f"Received unknown command: {command_req}")
|
333
333
|
reply = None
|
@@ -672,7 +672,7 @@ def start_file_logger(filename: str, level: int = logging.DEBUG, format_string:
|
|
672
672
|
|
673
673
|
|
674
674
|
@wrap_with_logs(target="interchange")
|
675
|
-
def starter(
|
675
|
+
def starter(*args: Any, **kwargs: Any) -> None:
|
676
676
|
"""Start the interchange process
|
677
677
|
|
678
678
|
The executor is expected to call this function. The args, kwargs match that of the Interchange.__init__
|
@@ -680,6 +680,4 @@ def starter(comm_q: multiprocessing.Queue, *args: Any, **kwargs: Any) -> None:
|
|
680
680
|
setproctitle("parsl: HTEX interchange")
|
681
681
|
# logger = multiprocessing.get_logger()
|
682
682
|
ic = Interchange(*args, **kwargs)
|
683
|
-
comm_q.put((ic.worker_task_port,
|
684
|
-
ic.worker_result_port))
|
685
683
|
ic.start()
|
@@ -1,10 +1,13 @@
|
|
1
1
|
"""A simplified interface for HTEx when running in MPI mode"""
|
2
|
-
from typing import
|
2
|
+
from typing import Callable, Dict, List, Optional, Tuple, Union
|
3
3
|
|
4
4
|
import typeguard
|
5
5
|
|
6
6
|
from parsl.data_provider.staging import Staging
|
7
|
-
from parsl.executors.high_throughput.executor import
|
7
|
+
from parsl.executors.high_throughput.executor import (
|
8
|
+
GENERAL_HTEX_PARAM_DOCS,
|
9
|
+
HighThroughputExecutor,
|
10
|
+
)
|
8
11
|
from parsl.executors.status_handling import BlockProviderExecutor
|
9
12
|
from parsl.jobs.states import JobStatus
|
10
13
|
from parsl.providers import LocalProvider
|
@@ -20,7 +23,7 @@ class MPIExecutor(HighThroughputExecutor):
|
|
20
23
|
to spawn multi-node tasks.
|
21
24
|
|
22
25
|
Specify the maximum number of multi-node tasks to run at once using ``max_workers_per_block``.
|
23
|
-
The
|
26
|
+
The value should be less than or equal to the ``nodes_per_block`` in the Provider.
|
24
27
|
|
25
28
|
Parameters
|
26
29
|
----------
|
@@ -1,5 +1,5 @@
|
|
1
1
|
import logging
|
2
|
-
from typing import Dict, List,
|
2
|
+
from typing import Dict, List, Set, Tuple
|
3
3
|
|
4
4
|
logger = logging.getLogger(__name__)
|
5
5
|
|
@@ -8,8 +8,18 @@ VALID_LAUNCHERS = ('srun',
|
|
8
8
|
'mpiexec')
|
9
9
|
|
10
10
|
|
11
|
+
class MissingResourceSpecification(Exception):
|
12
|
+
"""Exception raised when input is not supplied a resource specification"""
|
13
|
+
|
14
|
+
def __init__(self, reason: str):
|
15
|
+
self.reason = reason
|
16
|
+
|
17
|
+
def __str__(self):
|
18
|
+
return f"Missing resource specification: {self.reason}"
|
19
|
+
|
20
|
+
|
11
21
|
class InvalidResourceSpecification(Exception):
|
12
|
-
"""Exception raised when Invalid
|
22
|
+
"""Exception raised when Invalid input is supplied via resource specification"""
|
13
23
|
|
14
24
|
def __init__(self, invalid_keys: Set[str]):
|
15
25
|
self.invalid_keys = invalid_keys
|
@@ -18,13 +28,19 @@ class InvalidResourceSpecification(Exception):
|
|
18
28
|
return f"Invalid resource specification options supplied: {self.invalid_keys}"
|
19
29
|
|
20
30
|
|
21
|
-
def validate_resource_spec(resource_spec: Dict[str, str]):
|
31
|
+
def validate_resource_spec(resource_spec: Dict[str, str], is_mpi_enabled: bool):
|
22
32
|
"""Basic validation of keys in the resource_spec
|
23
33
|
|
24
34
|
Raises: InvalidResourceSpecification if the resource_spec
|
25
35
|
is invalid (e.g, contains invalid keys)
|
26
36
|
"""
|
27
37
|
user_keys = set(resource_spec.keys())
|
38
|
+
|
39
|
+
# empty resource_spec when mpi_mode is set causes parsl to hang
|
40
|
+
# ref issue #3427
|
41
|
+
if is_mpi_enabled and len(user_keys) == 0:
|
42
|
+
raise MissingResourceSpecification('MPI mode requires optional parsl_resource_specification keyword argument to be configured')
|
43
|
+
|
28
44
|
legal_keys = set(("ranks_per_node",
|
29
45
|
"num_nodes",
|
30
46
|
"num_ranks",
|
@@ -8,8 +8,7 @@ from enum import Enum
|
|
8
8
|
from typing import Dict, List
|
9
9
|
|
10
10
|
from parsl.multiprocessing import SpawnContext
|
11
|
-
from parsl.serialize import
|
12
|
-
unpack_res_spec_apply_message)
|
11
|
+
from parsl.serialize import pack_res_spec_apply_message, unpack_res_spec_apply_message
|
13
12
|
|
14
13
|
logger = logging.getLogger(__name__)
|
15
14
|
|
@@ -1,11 +1,13 @@
|
|
1
|
-
import zmq
|
2
1
|
import argparse
|
3
|
-
import uuid
|
4
|
-
import time
|
5
2
|
import logging
|
6
|
-
|
3
|
+
import time
|
4
|
+
import uuid
|
5
|
+
|
6
|
+
import zmq
|
7
7
|
from zmq.utils.monitor import recv_monitor_message
|
8
8
|
|
9
|
+
from parsl.addresses import get_all_addresses
|
10
|
+
|
9
11
|
logger = logging.getLogger(__name__)
|
10
12
|
|
11
13
|
|
@@ -1,39 +1,41 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
|
3
3
|
import argparse
|
4
|
+
import json
|
4
5
|
import logging
|
6
|
+
import math
|
7
|
+
import multiprocessing
|
5
8
|
import os
|
6
|
-
import
|
9
|
+
import pickle
|
7
10
|
import platform
|
11
|
+
import queue
|
12
|
+
import sys
|
8
13
|
import threading
|
9
|
-
import pickle
|
10
14
|
import time
|
11
|
-
import queue
|
12
15
|
import uuid
|
13
|
-
from typing import Sequence, Optional, Dict, List
|
14
|
-
|
15
|
-
import zmq
|
16
|
-
import math
|
17
|
-
import json
|
18
|
-
import psutil
|
19
|
-
import multiprocessing
|
20
16
|
from multiprocessing.managers import DictProxy
|
21
17
|
from multiprocessing.sharedctypes import Synchronized
|
18
|
+
from typing import Dict, List, Optional, Sequence
|
19
|
+
|
20
|
+
import psutil
|
21
|
+
import zmq
|
22
22
|
|
23
23
|
from parsl import curvezmq
|
24
|
-
from parsl.process_loggers import wrap_with_logs
|
25
|
-
from parsl.version import VERSION as PARSL_VERSION
|
26
24
|
from parsl.app.errors import RemoteExceptionWrapper
|
27
25
|
from parsl.executors.high_throughput.errors import WorkerLost
|
28
|
-
from parsl.executors.high_throughput.
|
29
|
-
|
30
|
-
|
26
|
+
from parsl.executors.high_throughput.mpi_prefix_composer import (
|
27
|
+
VALID_LAUNCHERS,
|
28
|
+
compose_all,
|
29
|
+
)
|
31
30
|
from parsl.executors.high_throughput.mpi_resource_management import (
|
31
|
+
MPITaskScheduler,
|
32
32
|
TaskScheduler,
|
33
|
-
MPITaskScheduler
|
34
33
|
)
|
35
|
-
|
36
|
-
from parsl.
|
34
|
+
from parsl.executors.high_throughput.probe import probe_addresses
|
35
|
+
from parsl.multiprocessing import SpawnContext
|
36
|
+
from parsl.process_loggers import wrap_with_logs
|
37
|
+
from parsl.serialize import serialize, unpack_res_spec_apply_message
|
38
|
+
from parsl.version import VERSION as PARSL_VERSION
|
37
39
|
|
38
40
|
HEARTBEAT_CODE = (2 ** 32) - 1
|
39
41
|
DRAINED_CODE = (2 ** 32) - 2
|
@@ -677,7 +679,8 @@ def worker(
|
|
677
679
|
# If desired, set process affinity
|
678
680
|
if cpu_affinity != "none":
|
679
681
|
# Count the number of cores per worker
|
680
|
-
|
682
|
+
# OSX does not implement os.sched_getaffinity
|
683
|
+
avail_cores = sorted(os.sched_getaffinity(0)) # type: ignore[attr-defined, unused-ignore]
|
681
684
|
cores_per_worker = len(avail_cores) // pool_size
|
682
685
|
assert cores_per_worker > 0, "Affinity does not work if there are more workers than cores"
|
683
686
|
|
@@ -717,7 +720,15 @@ def worker(
|
|
717
720
|
os.environ["KMP_AFFINITY"] = f"explicit,proclist=[{proc_list}]" # For Intel OpenMP
|
718
721
|
|
719
722
|
# Set the affinity for this worker
|
720
|
-
os.sched_setaffinity
|
723
|
+
# OSX does not implement os.sched_setaffinity so type checking
|
724
|
+
# is ignored here in two ways:
|
725
|
+
# On a platform without sched_setaffinity, that attribute will not
|
726
|
+
# be defined, so ignore[attr-defined] will tell mypy to ignore this
|
727
|
+
# incorrect-for-OS X attribute access.
|
728
|
+
# On a platform with sched_setaffinity, that type: ignore message
|
729
|
+
# will be redundant, and ignore[unused-ignore] tells mypy to ignore
|
730
|
+
# that this ignore is unneeded.
|
731
|
+
os.sched_setaffinity(0, my_cores) # type: ignore[attr-defined, unused-ignore]
|
721
732
|
logger.info("Set worker CPU affinity to {}".format(my_cores))
|
722
733
|
|
723
734
|
# If desired, pin to accelerator
|
@@ -1,10 +1,18 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
|
3
|
-
import zmq
|
4
3
|
import logging
|
5
4
|
import threading
|
5
|
+
import time
|
6
|
+
from typing import Optional
|
7
|
+
|
8
|
+
import zmq
|
6
9
|
|
7
10
|
from parsl import curvezmq
|
11
|
+
from parsl.errors import InternalConsistencyError
|
12
|
+
from parsl.executors.high_throughput.errors import (
|
13
|
+
CommandClientBadError,
|
14
|
+
CommandClientTimeoutError,
|
15
|
+
)
|
8
16
|
|
9
17
|
logger = logging.getLogger(__name__)
|
10
18
|
|
@@ -12,25 +20,29 @@ logger = logging.getLogger(__name__)
|
|
12
20
|
class CommandClient:
|
13
21
|
""" CommandClient
|
14
22
|
"""
|
15
|
-
def __init__(self,
|
23
|
+
def __init__(self, ip_address, port_range, cert_dir: Optional[str] = None):
|
16
24
|
"""
|
17
25
|
Parameters
|
18
26
|
----------
|
19
27
|
|
20
|
-
zmq_context: curvezmq.ClientContext
|
21
|
-
CurveZMQ client context used to create secure sockets
|
22
28
|
ip_address: str
|
23
29
|
IP address of the client (where Parsl runs)
|
30
|
+
|
24
31
|
port_range: tuple(int, int)
|
25
32
|
Port range for the comms between client and interchange
|
26
33
|
|
34
|
+
cert_dir: str | None
|
35
|
+
Path to the certificate directory. Setting this to None will disable encryption.
|
36
|
+
default: None
|
37
|
+
|
27
38
|
"""
|
28
|
-
self.zmq_context =
|
39
|
+
self.zmq_context = curvezmq.ClientContext(cert_dir)
|
29
40
|
self.ip_address = ip_address
|
30
41
|
self.port_range = port_range
|
31
42
|
self.port = None
|
32
43
|
self.create_socket_and_bind()
|
33
44
|
self._lock = threading.Lock()
|
45
|
+
self.ok = True
|
34
46
|
|
35
47
|
def create_socket_and_bind(self):
|
36
48
|
""" Creates socket and binds to a port.
|
@@ -46,7 +58,7 @@ class CommandClient:
|
|
46
58
|
else:
|
47
59
|
self.zmq_socket.bind("tcp://{}:{}".format(self.ip_address, self.port))
|
48
60
|
|
49
|
-
def run(self, message, max_retries=3):
|
61
|
+
def run(self, message, max_retries=3, timeout_s=None):
|
50
62
|
""" This function needs to be fast at the same time aware of the possibility of
|
51
63
|
ZMQ pipes overflowing.
|
52
64
|
|
@@ -54,13 +66,43 @@ class CommandClient:
|
|
54
66
|
in ZMQ sockets reaching a broken state once there are ~10k tasks in flight.
|
55
67
|
This issue can be magnified if each the serialized buffer itself is larger.
|
56
68
|
"""
|
69
|
+
if not self.ok:
|
70
|
+
raise CommandClientBadError()
|
71
|
+
|
72
|
+
start_time_s = time.monotonic()
|
73
|
+
|
57
74
|
reply = '__PARSL_ZMQ_PIPES_MAGIC__'
|
58
75
|
with self._lock:
|
59
76
|
for _ in range(max_retries):
|
60
77
|
try:
|
61
78
|
logger.debug("Sending command client command")
|
79
|
+
|
80
|
+
if timeout_s is not None:
|
81
|
+
remaining_time_s = start_time_s + timeout_s - time.monotonic()
|
82
|
+
poll_result = self.zmq_socket.poll(timeout=remaining_time_s * 1000, flags=zmq.POLLOUT)
|
83
|
+
if poll_result == zmq.POLLOUT:
|
84
|
+
pass # this is OK, so continue
|
85
|
+
elif poll_result == 0:
|
86
|
+
raise CommandClientTimeoutError("Waiting for command channel to be ready for a command")
|
87
|
+
else:
|
88
|
+
raise InternalConsistencyError(f"ZMQ poll returned unexpected value: {poll_result}")
|
89
|
+
|
62
90
|
self.zmq_socket.send_pyobj(message, copy=True)
|
63
|
-
|
91
|
+
|
92
|
+
if timeout_s is not None:
|
93
|
+
logger.debug("Polling for command client response or timeout")
|
94
|
+
remaining_time_s = start_time_s + timeout_s - time.monotonic()
|
95
|
+
poll_result = self.zmq_socket.poll(timeout=remaining_time_s * 1000, flags=zmq.POLLIN)
|
96
|
+
if poll_result == zmq.POLLIN:
|
97
|
+
pass # this is OK, so continue
|
98
|
+
elif poll_result == 0:
|
99
|
+
logger.error("Command timed-out - command client is now bad forever")
|
100
|
+
self.ok = False
|
101
|
+
raise CommandClientTimeoutError("Waiting for a reply from command channel")
|
102
|
+
else:
|
103
|
+
raise InternalConsistencyError(f"ZMQ poll returned unexpected value: {poll_result}")
|
104
|
+
|
105
|
+
logger.debug("Receiving command client response")
|
64
106
|
reply = self.zmq_socket.recv_pyobj()
|
65
107
|
logger.debug("Received command client response")
|
66
108
|
except zmq.ZMQError:
|
@@ -85,20 +127,23 @@ class CommandClient:
|
|
85
127
|
class TasksOutgoing:
|
86
128
|
""" Outgoing task queue from the executor to the Interchange
|
87
129
|
"""
|
88
|
-
def __init__(self,
|
130
|
+
def __init__(self, ip_address, port_range, cert_dir: Optional[str] = None):
|
89
131
|
"""
|
90
132
|
Parameters
|
91
133
|
----------
|
92
134
|
|
93
|
-
zmq_context: curvezmq.ClientContext
|
94
|
-
CurveZMQ client context used to create secure sockets
|
95
135
|
ip_address: str
|
96
136
|
IP address of the client (where Parsl runs)
|
137
|
+
|
97
138
|
port_range: tuple(int, int)
|
98
139
|
Port range for the comms between client and interchange
|
99
140
|
|
141
|
+
cert_dir: str | None
|
142
|
+
Path to the certificate directory. Setting this to None will disable encryption.
|
143
|
+
default: None
|
144
|
+
|
100
145
|
"""
|
101
|
-
self.zmq_context =
|
146
|
+
self.zmq_context = curvezmq.ClientContext(cert_dir)
|
102
147
|
self.zmq_socket = self.zmq_context.socket(zmq.DEALER)
|
103
148
|
self.zmq_socket.set_hwm(0)
|
104
149
|
self.port = self.zmq_socket.bind_to_random_port("tcp://{}".format(ip_address),
|
@@ -138,20 +183,23 @@ class ResultsIncoming:
|
|
138
183
|
""" Incoming results queue from the Interchange to the executor
|
139
184
|
"""
|
140
185
|
|
141
|
-
def __init__(self,
|
186
|
+
def __init__(self, ip_address, port_range, cert_dir: Optional[str] = None):
|
142
187
|
"""
|
143
188
|
Parameters
|
144
189
|
----------
|
145
190
|
|
146
|
-
zmq_context: curvezmq.ClientContext
|
147
|
-
CurveZMQ client context used to create secure sockets
|
148
191
|
ip_address: str
|
149
192
|
IP address of the client (where Parsl runs)
|
193
|
+
|
150
194
|
port_range: tuple(int, int)
|
151
195
|
Port range for the comms between client and interchange
|
152
196
|
|
197
|
+
cert_dir: str | None
|
198
|
+
Path to the certificate directory. Setting this to None will disable encryption.
|
199
|
+
default: None
|
200
|
+
|
153
201
|
"""
|
154
|
-
self.zmq_context =
|
202
|
+
self.zmq_context = curvezmq.ClientContext(cert_dir)
|
155
203
|
self.results_receiver = self.zmq_context.socket(zmq.DEALER)
|
156
204
|
self.results_receiver.set_hwm(0)
|
157
205
|
self.port = self.results_receiver.bind_to_random_port("tcp://{}".format(ip_address),
|
@@ -1,30 +1,30 @@
|
|
1
1
|
"""RadicalPilotExecutor builds on the RADICAL-Pilot/Parsl
|
2
2
|
"""
|
3
|
+
import inspect
|
4
|
+
import logging
|
3
5
|
import os
|
4
|
-
import sys
|
5
|
-
import time
|
6
|
-
import parsl
|
7
6
|
import queue
|
8
|
-
import
|
9
|
-
import inspect
|
10
|
-
import requests
|
11
|
-
import typeguard
|
7
|
+
import sys
|
12
8
|
import threading as mt
|
13
|
-
|
9
|
+
import time
|
10
|
+
from concurrent.futures import Future
|
14
11
|
from functools import partial
|
15
|
-
from typing import Optional, Dict
|
16
12
|
from pathlib import Path, PosixPath
|
17
|
-
from
|
13
|
+
from typing import Dict, Optional
|
14
|
+
|
15
|
+
import requests
|
16
|
+
import typeguard
|
18
17
|
|
18
|
+
import parsl
|
19
|
+
from parsl.app.errors import BashExitFailure, RemoteExceptionWrapper
|
19
20
|
from parsl.app.python import timeout
|
20
|
-
from .rpex_resources import ResourceConfig
|
21
21
|
from parsl.data_provider.files import File
|
22
|
-
from parsl.utils import RepresentationMixin
|
23
|
-
from parsl.app.errors import BashExitFailure
|
24
22
|
from parsl.executors.base import ParslExecutor
|
25
|
-
from parsl.app.errors import RemoteExceptionWrapper
|
26
23
|
from parsl.serialize import deserialize, pack_res_spec_apply_message
|
27
|
-
from parsl.serialize.errors import
|
24
|
+
from parsl.serialize.errors import DeserializationError, SerializationError
|
25
|
+
from parsl.utils import RepresentationMixin
|
26
|
+
|
27
|
+
from .rpex_resources import ResourceConfig
|
28
28
|
|
29
29
|
try:
|
30
30
|
import radical.pilot as rp
|
@@ -1,17 +1,12 @@
|
|
1
|
-
import sys
|
2
1
|
import json
|
3
|
-
|
2
|
+
import sys
|
4
3
|
from typing import List
|
5
4
|
|
6
|
-
_setup_paths: List[str]
|
5
|
+
_setup_paths: List[str] = []
|
7
6
|
try:
|
8
7
|
import radical.pilot as rp
|
9
|
-
import radical.utils as ru
|
10
8
|
except ImportError:
|
11
|
-
|
12
|
-
else:
|
13
|
-
_setup_paths = [rp.sdist_path,
|
14
|
-
ru.sdist_path]
|
9
|
+
pass
|
15
10
|
|
16
11
|
|
17
12
|
MPI = "mpi"
|
@@ -77,7 +72,7 @@ class ResourceConfig:
|
|
77
72
|
|
78
73
|
pilot_env_setup : list
|
79
74
|
List of setup commands/packages for the pilot environment.
|
80
|
-
Default
|
75
|
+
Default is an empty list.
|
81
76
|
|
82
77
|
python_v : str
|
83
78
|
The Python version to be used in the pilot environment.
|
@@ -1,10 +1,11 @@
|
|
1
1
|
import sys
|
2
|
+
|
2
3
|
import radical.pilot as rp
|
3
4
|
|
4
5
|
import parsl.app.errors as pe
|
5
6
|
from parsl.app.bash import remote_side_bash_executor
|
6
|
-
from parsl.serialize import unpack_res_spec_apply_message, serialize
|
7
7
|
from parsl.executors.high_throughput.process_worker_pool import execute_task
|
8
|
+
from parsl.serialize import serialize, unpack_res_spec_apply_message
|
8
9
|
|
9
10
|
|
10
11
|
class ParslWorker:
|