parsl 2024.5.20__py3-none-any.whl → 2024.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (291) hide show
  1. parsl/__init__.py +9 -10
  2. parsl/addresses.py +6 -4
  3. parsl/app/app.py +3 -6
  4. parsl/app/bash.py +4 -4
  5. parsl/app/errors.py +5 -3
  6. parsl/app/futures.py +3 -3
  7. parsl/app/python.py +2 -1
  8. parsl/benchmark/perf.py +2 -1
  9. parsl/channels/__init__.py +2 -2
  10. parsl/channels/base.py +0 -1
  11. parsl/channels/errors.py +2 -1
  12. parsl/channels/oauth_ssh/oauth_ssh.py +4 -3
  13. parsl/channels/ssh/ssh.py +9 -1
  14. parsl/channels/ssh_il/ssh_il.py +1 -0
  15. parsl/concurrent/__init__.py +2 -2
  16. parsl/config.py +12 -6
  17. parsl/configs/ASPIRE1.py +3 -3
  18. parsl/configs/Azure.py +6 -7
  19. parsl/configs/ad_hoc.py +4 -3
  20. parsl/configs/bridges.py +3 -3
  21. parsl/configs/cc_in2p3.py +2 -2
  22. parsl/configs/ec2.py +1 -1
  23. parsl/configs/expanse.py +1 -2
  24. parsl/configs/frontera.py +2 -3
  25. parsl/configs/htex_local.py +1 -2
  26. parsl/configs/illinoiscluster.py +1 -1
  27. parsl/configs/kubernetes.py +1 -2
  28. parsl/configs/midway.py +3 -3
  29. parsl/configs/osg.py +1 -1
  30. parsl/configs/polaris.py +1 -1
  31. parsl/configs/stampede2.py +4 -5
  32. parsl/configs/summit.py +1 -3
  33. parsl/configs/toss3_llnl.py +1 -2
  34. parsl/configs/vineex_local.py +3 -3
  35. parsl/configs/wqex_local.py +2 -2
  36. parsl/data_provider/data_manager.py +3 -3
  37. parsl/data_provider/file_noop.py +1 -2
  38. parsl/data_provider/files.py +3 -3
  39. parsl/data_provider/ftp.py +1 -3
  40. parsl/data_provider/globus.py +7 -6
  41. parsl/data_provider/http.py +2 -2
  42. parsl/data_provider/rsync.py +1 -1
  43. parsl/data_provider/staging.py +2 -2
  44. parsl/data_provider/zip.py +4 -5
  45. parsl/dataflow/dependency_resolvers.py +115 -0
  46. parsl/dataflow/dflow.py +65 -54
  47. parsl/dataflow/errors.py +2 -1
  48. parsl/dataflow/futures.py +1 -2
  49. parsl/dataflow/memoization.py +5 -5
  50. parsl/dataflow/rundirs.py +1 -1
  51. parsl/dataflow/taskrecord.py +4 -5
  52. parsl/executors/__init__.py +3 -3
  53. parsl/executors/base.py +1 -0
  54. parsl/executors/flux/execute_parsl_task.py +2 -2
  55. parsl/executors/flux/executor.py +11 -12
  56. parsl/executors/flux/flux_instance_manager.py +3 -3
  57. parsl/executors/high_throughput/errors.py +10 -0
  58. parsl/executors/high_throughput/executor.py +31 -36
  59. parsl/executors/high_throughput/interchange.py +16 -18
  60. parsl/executors/high_throughput/manager_record.py +1 -0
  61. parsl/executors/high_throughput/monitoring_info.py +2 -1
  62. parsl/executors/high_throughput/mpi_executor.py +6 -3
  63. parsl/executors/high_throughput/mpi_prefix_composer.py +19 -3
  64. parsl/executors/high_throughput/mpi_resource_management.py +1 -2
  65. parsl/executors/high_throughput/probe.py +6 -4
  66. parsl/executors/high_throughput/process_worker_pool.py +31 -20
  67. parsl/executors/high_throughput/zmq_pipes.py +63 -15
  68. parsl/executors/radical/executor.py +15 -15
  69. parsl/executors/radical/rpex_master.py +1 -2
  70. parsl/executors/radical/rpex_resources.py +4 -9
  71. parsl/executors/radical/rpex_worker.py +2 -1
  72. parsl/executors/status_handling.py +5 -4
  73. parsl/executors/taskvine/__init__.py +1 -1
  74. parsl/executors/taskvine/errors.py +1 -1
  75. parsl/executors/taskvine/exec_parsl_function.py +2 -2
  76. parsl/executors/taskvine/executor.py +23 -24
  77. parsl/executors/taskvine/factory.py +1 -1
  78. parsl/executors/taskvine/manager.py +11 -13
  79. parsl/executors/threads.py +4 -5
  80. parsl/executors/workqueue/errors.py +1 -1
  81. parsl/executors/workqueue/exec_parsl_function.py +5 -4
  82. parsl/executors/workqueue/executor.py +26 -27
  83. parsl/executors/workqueue/parsl_coprocess.py +1 -1
  84. parsl/jobs/error_handlers.py +1 -1
  85. parsl/jobs/job_status_poller.py +2 -5
  86. parsl/jobs/states.py +1 -1
  87. parsl/jobs/strategy.py +2 -2
  88. parsl/launchers/__init__.py +12 -3
  89. parsl/launchers/errors.py +1 -1
  90. parsl/log_utils.py +1 -2
  91. parsl/monitoring/db_manager.py +16 -10
  92. parsl/monitoring/monitoring.py +11 -15
  93. parsl/monitoring/queries/pandas.py +1 -2
  94. parsl/monitoring/radios.py +2 -4
  95. parsl/monitoring/remote.py +13 -8
  96. parsl/monitoring/router.py +8 -11
  97. parsl/monitoring/types.py +2 -0
  98. parsl/monitoring/visualization/app.py +4 -2
  99. parsl/monitoring/visualization/models.py +0 -1
  100. parsl/monitoring/visualization/plots/default/workflow_plots.py +8 -4
  101. parsl/monitoring/visualization/plots/default/workflow_resource_plots.py +1 -0
  102. parsl/monitoring/visualization/utils.py +0 -1
  103. parsl/monitoring/visualization/views.py +16 -9
  104. parsl/multiprocessing.py +0 -1
  105. parsl/process_loggers.py +1 -2
  106. parsl/providers/__init__.py +9 -12
  107. parsl/providers/ad_hoc/ad_hoc.py +1 -1
  108. parsl/providers/aws/aws.py +2 -3
  109. parsl/providers/azure/azure.py +4 -5
  110. parsl/providers/base.py +1 -1
  111. parsl/providers/cluster_provider.py +1 -1
  112. parsl/providers/cobalt/cobalt.py +3 -3
  113. parsl/providers/condor/condor.py +4 -2
  114. parsl/providers/errors.py +2 -2
  115. parsl/providers/googlecloud/googlecloud.py +2 -1
  116. parsl/providers/grid_engine/grid_engine.py +2 -2
  117. parsl/providers/kubernetes/kube.py +5 -3
  118. parsl/providers/local/local.py +5 -1
  119. parsl/providers/lsf/lsf.py +2 -2
  120. parsl/providers/pbspro/pbspro.py +1 -1
  121. parsl/providers/slurm/slurm.py +5 -5
  122. parsl/providers/torque/torque.py +1 -1
  123. parsl/serialize/__init__.py +8 -3
  124. parsl/serialize/base.py +1 -2
  125. parsl/serialize/concretes.py +5 -4
  126. parsl/serialize/proxystore.py +3 -2
  127. parsl/tests/__init__.py +1 -1
  128. parsl/tests/configs/ad_hoc_cluster_htex.py +4 -4
  129. parsl/tests/configs/azure_single_node.py +4 -5
  130. parsl/tests/configs/bridges.py +3 -2
  131. parsl/tests/configs/cc_in2p3.py +2 -2
  132. parsl/tests/configs/comet.py +2 -1
  133. parsl/tests/configs/ec2_single_node.py +1 -2
  134. parsl/tests/configs/ec2_spot.py +1 -2
  135. parsl/tests/configs/frontera.py +3 -2
  136. parsl/tests/configs/htex_ad_hoc_cluster.py +2 -4
  137. parsl/tests/configs/htex_local.py +2 -3
  138. parsl/tests/configs/htex_local_alternate.py +8 -11
  139. parsl/tests/configs/htex_local_intask_staging.py +5 -7
  140. parsl/tests/configs/htex_local_rsync_staging.py +4 -6
  141. parsl/tests/configs/local_adhoc.py +1 -1
  142. parsl/tests/configs/local_radical.py +1 -3
  143. parsl/tests/configs/local_radical_mpi.py +2 -2
  144. parsl/tests/configs/midway.py +2 -2
  145. parsl/tests/configs/nscc_singapore.py +3 -3
  146. parsl/tests/configs/osg_htex.py +1 -1
  147. parsl/tests/configs/petrelkube.py +3 -2
  148. parsl/tests/configs/summit.py +1 -0
  149. parsl/tests/configs/swan_htex.py +2 -2
  150. parsl/tests/configs/taskvine_ex.py +3 -5
  151. parsl/tests/configs/theta.py +2 -2
  152. parsl/tests/configs/workqueue_ex.py +3 -4
  153. parsl/tests/conftest.py +6 -6
  154. parsl/tests/integration/test_channels/test_ssh_errors.py +1 -1
  155. parsl/tests/integration/test_stress/test_python_simple.py +3 -4
  156. parsl/tests/integration/test_stress/test_python_threads.py +3 -5
  157. parsl/tests/manual_tests/htex_local.py +4 -4
  158. parsl/tests/manual_tests/test_ad_hoc_htex.py +2 -1
  159. parsl/tests/manual_tests/test_basic.py +1 -0
  160. parsl/tests/manual_tests/test_fan_in_out_htex_remote.py +4 -4
  161. parsl/tests/manual_tests/test_log_filter.py +3 -1
  162. parsl/tests/manual_tests/test_memory_limits.py +6 -6
  163. parsl/tests/manual_tests/test_regression_220.py +2 -1
  164. parsl/tests/manual_tests/test_udp_simple.py +4 -3
  165. parsl/tests/manual_tests/test_worker_count.py +3 -2
  166. parsl/tests/scaling_tests/htex_local.py +2 -2
  167. parsl/tests/scaling_tests/test_scale.py +0 -9
  168. parsl/tests/scaling_tests/vineex_condor.py +1 -2
  169. parsl/tests/scaling_tests/vineex_local.py +1 -2
  170. parsl/tests/site_tests/test_provider.py +3 -1
  171. parsl/tests/site_tests/test_site.py +2 -0
  172. parsl/tests/sites/test_affinity.py +7 -5
  173. parsl/tests/sites/test_dynamic_executor.py +3 -4
  174. parsl/tests/sites/test_ec2.py +3 -2
  175. parsl/tests/sites/test_local_adhoc.py +2 -1
  176. parsl/tests/sites/test_worker_info.py +4 -3
  177. parsl/tests/test_aalst_patterns.py +0 -1
  178. parsl/tests/test_bash_apps/test_apptimeout.py +2 -2
  179. parsl/tests/test_bash_apps/test_error_codes.py +1 -4
  180. parsl/tests/test_bash_apps/test_memoize_ignore_args.py +1 -0
  181. parsl/tests/test_bash_apps/test_memoize_ignore_args_regr.py +2 -2
  182. parsl/tests/test_bash_apps/test_pipeline.py +1 -1
  183. parsl/tests/test_bash_apps/test_std_uri.py +4 -9
  184. parsl/tests/test_callables.py +2 -2
  185. parsl/tests/test_checkpointing/test_periodic.py +2 -7
  186. parsl/tests/test_checkpointing/test_python_checkpoint_1.py +1 -0
  187. parsl/tests/test_checkpointing/test_python_checkpoint_2.py +2 -2
  188. parsl/tests/test_checkpointing/test_python_checkpoint_3.py +0 -1
  189. parsl/tests/test_checkpointing/test_regression_239.py +1 -1
  190. parsl/tests/test_checkpointing/test_task_exit.py +1 -2
  191. parsl/tests/test_docs/test_from_slides.py +2 -2
  192. parsl/tests/test_docs/test_kwargs.py +1 -1
  193. parsl/tests/test_docs/test_tutorial_1.py +1 -2
  194. parsl/tests/test_docs/test_workflow1.py +2 -2
  195. parsl/tests/test_docs/test_workflow2.py +0 -1
  196. parsl/tests/test_error_handling/test_rand_fail.py +2 -2
  197. parsl/tests/test_error_handling/test_resource_spec.py +4 -2
  198. parsl/tests/test_error_handling/test_retries.py +2 -1
  199. parsl/tests/test_error_handling/test_retry_handler.py +1 -0
  200. parsl/tests/test_error_handling/test_retry_handler_failure.py +2 -1
  201. parsl/tests/test_error_handling/test_serialization_fail.py +1 -1
  202. parsl/tests/test_error_handling/test_wrap_with_logs.py +1 -0
  203. parsl/tests/test_flux.py +1 -1
  204. parsl/tests/test_htex/test_basic.py +0 -1
  205. parsl/tests/test_htex/test_command_client_timeout.py +66 -0
  206. parsl/tests/test_htex/test_connected_blocks.py +3 -2
  207. parsl/tests/test_htex/test_cpu_affinity_explicit.py +6 -10
  208. parsl/tests/test_htex/test_disconnected_blocks.py +6 -4
  209. parsl/tests/test_htex/test_drain.py +5 -5
  210. parsl/tests/test_htex/test_htex.py +1 -2
  211. parsl/tests/test_htex/test_manager_failure.py +0 -1
  212. parsl/tests/test_htex/test_managers_command.py +5 -9
  213. parsl/tests/test_htex/test_missing_worker.py +2 -8
  214. parsl/tests/test_htex/test_multiple_disconnected_blocks.py +6 -4
  215. parsl/tests/test_monitoring/test_app_names.py +3 -3
  216. parsl/tests/test_monitoring/test_basic.py +4 -6
  217. parsl/tests/test_monitoring/test_db_locks.py +6 -4
  218. parsl/tests/test_monitoring/test_fuzz_zmq.py +6 -4
  219. parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +5 -7
  220. parsl/tests/test_monitoring/test_incomplete_futures.py +5 -4
  221. parsl/tests/test_monitoring/test_memoization_representation.py +4 -2
  222. parsl/tests/test_monitoring/test_stdouterr.py +4 -6
  223. parsl/tests/test_monitoring/test_viz_colouring.py +1 -0
  224. parsl/tests/test_mpi_apps/test_bad_mpi_config.py +1 -1
  225. parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py +4 -7
  226. parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py +15 -4
  227. parsl/tests/test_mpi_apps/test_mpi_prefix.py +4 -4
  228. parsl/tests/test_mpi_apps/test_mpi_scheduler.py +7 -2
  229. parsl/tests/test_mpi_apps/test_mpiex.py +4 -3
  230. parsl/tests/test_mpi_apps/test_resource_spec.py +21 -17
  231. parsl/tests/test_providers/test_cobalt_deprecation_warning.py +2 -0
  232. parsl/tests/test_providers/test_local_provider.py +2 -1
  233. parsl/tests/test_providers/test_pbspro_template.py +1 -1
  234. parsl/tests/test_providers/test_slurm_template.py +1 -1
  235. parsl/tests/test_providers/test_submiterror_deprecation.py +2 -1
  236. parsl/tests/test_python_apps/test_context_manager.py +5 -12
  237. parsl/tests/test_python_apps/test_dep_standard_futures.py +2 -1
  238. parsl/tests/test_python_apps/test_futures.py +2 -1
  239. parsl/tests/test_python_apps/test_join.py +0 -1
  240. parsl/tests/test_python_apps/test_lifted.py +11 -7
  241. parsl/tests/test_python_apps/test_memoize_bad_id_for_memo.py +1 -0
  242. parsl/tests/test_python_apps/test_pluggable_future_resolution.py +161 -0
  243. parsl/tests/test_radical/test_mpi_funcs.py +1 -1
  244. parsl/tests/test_regression/test_1480.py +2 -1
  245. parsl/tests/test_regression/test_1653.py +2 -1
  246. parsl/tests/test_regression/test_2652.py +1 -0
  247. parsl/tests/test_regression/test_69a.py +0 -1
  248. parsl/tests/test_regression/test_854.py +4 -2
  249. parsl/tests/test_regression/test_97_parallelism_0.py +1 -2
  250. parsl/tests/test_regression/test_98.py +0 -1
  251. parsl/tests/test_scaling/test_block_error_handler.py +9 -4
  252. parsl/tests/test_scaling/test_regression_1621.py +0 -2
  253. parsl/tests/test_scaling/test_scale_down.py +2 -3
  254. parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py +4 -5
  255. parsl/tests/test_scaling/test_scale_down_htex_unregistered.py +3 -4
  256. parsl/tests/test_scaling/test_shutdown_scalein.py +1 -4
  257. parsl/tests/test_serialization/test_2555_caching_deserializer.py +1 -1
  258. parsl/tests/test_serialization/test_basic.py +2 -1
  259. parsl/tests/test_serialization/test_htex_code_cache.py +3 -4
  260. parsl/tests/test_serialization/test_pack_resource_spec.py +2 -1
  261. parsl/tests/test_serialization/test_proxystore_configured.py +10 -6
  262. parsl/tests/test_serialization/test_proxystore_impl.py +5 -3
  263. parsl/tests/test_shutdown/test_kill_monitoring.py +3 -4
  264. parsl/tests/test_staging/staging_provider.py +2 -2
  265. parsl/tests/test_staging/test_1316.py +3 -4
  266. parsl/tests/test_staging/test_docs_1.py +1 -1
  267. parsl/tests/test_staging/test_docs_2.py +2 -1
  268. parsl/tests/test_staging/test_elaborate_noop_file.py +2 -3
  269. parsl/tests/test_staging/test_staging_https.py +2 -2
  270. parsl/tests/test_staging/test_staging_stdout.py +4 -3
  271. parsl/tests/test_staging/test_zip_in.py +6 -8
  272. parsl/tests/test_staging/test_zip_out.py +7 -9
  273. parsl/tests/test_staging/test_zip_to_zip.py +6 -8
  274. parsl/tests/test_summary.py +2 -2
  275. parsl/tests/test_thread_parallelism.py +0 -1
  276. parsl/tests/test_threads/test_configs.py +1 -2
  277. parsl/tests/test_threads/test_lazy_errors.py +2 -2
  278. parsl/usage_tracking/api.py +2 -3
  279. parsl/usage_tracking/usage.py +8 -18
  280. parsl/utils.py +13 -2
  281. parsl/version.py +1 -1
  282. {parsl-2024.5.20.data → parsl-2024.6.3.data}/scripts/exec_parsl_function.py +5 -4
  283. {parsl-2024.5.20.data → parsl-2024.6.3.data}/scripts/process_worker_pool.py +31 -20
  284. {parsl-2024.5.20.dist-info → parsl-2024.6.3.dist-info}/METADATA +6 -6
  285. parsl-2024.6.3.dist-info/RECORD +471 -0
  286. parsl-2024.5.20.dist-info/RECORD +0 -468
  287. {parsl-2024.5.20.data → parsl-2024.6.3.data}/scripts/parsl_coprocess.py +1 -1
  288. {parsl-2024.5.20.dist-info → parsl-2024.6.3.dist-info}/LICENSE +0 -0
  289. {parsl-2024.5.20.dist-info → parsl-2024.6.3.dist-info}/WHEEL +0 -0
  290. {parsl-2024.5.20.dist-info → parsl-2024.6.3.dist-info}/entry_points.txt +0 -0
  291. {parsl-2024.5.20.dist-info → parsl-2024.6.3.dist-info}/top_level.txt +0 -0
@@ -1,44 +1,39 @@
1
+ import logging
2
+ import math
3
+ import pickle
4
+ import threading
1
5
  import typing
6
+ import warnings
2
7
  from collections import defaultdict
3
8
  from concurrent.futures import Future
4
- import typeguard
5
- import logging
6
- import threading
7
- import queue
8
- import pickle
9
9
  from dataclasses import dataclass
10
- from multiprocessing import Process, Queue
11
- from typing import Dict, Sequence
12
- from typing import List, Optional, Tuple, Union, Callable
13
- import math
14
- import warnings
10
+ from multiprocessing import Process
11
+ from typing import Callable, Dict, List, Optional, Sequence, Tuple, Union
12
+
13
+ import typeguard
15
14
 
16
15
  import parsl.launchers
17
- from parsl.usage_tracking.api import UsageInformation
18
- from parsl.serialize import pack_res_spec_apply_message, deserialize
19
- from parsl.serialize.errors import SerializationError, DeserializationError
16
+ from parsl import curvezmq
17
+ from parsl.addresses import get_all_addresses
20
18
  from parsl.app.errors import RemoteExceptionWrapper
21
- from parsl.jobs.states import JobStatus, JobState, TERMINAL_STATES
22
- from parsl.executors.high_throughput import zmq_pipes
23
- from parsl.executors.high_throughput import interchange
24
- from parsl.executors.errors import (
25
- BadMessage, ScalingFailed,
26
- )
19
+ from parsl.data_provider.staging import Staging
20
+ from parsl.executors.errors import BadMessage, ScalingFailed
21
+ from parsl.executors.high_throughput import interchange, zmq_pipes
22
+ from parsl.executors.high_throughput.errors import CommandClientTimeoutError
27
23
  from parsl.executors.high_throughput.mpi_prefix_composer import (
28
24
  VALID_LAUNCHERS,
29
- validate_resource_spec
25
+ validate_resource_spec,
30
26
  )
31
-
32
- from parsl import curvezmq
33
27
  from parsl.executors.status_handling import BlockProviderExecutor
34
- from parsl.providers.base import ExecutionProvider
35
- from parsl.data_provider.staging import Staging
36
- from parsl.addresses import get_all_addresses
37
- from parsl.process_loggers import wrap_with_logs
38
-
28
+ from parsl.jobs.states import TERMINAL_STATES, JobState, JobStatus
39
29
  from parsl.multiprocessing import ForkProcess
40
- from parsl.utils import RepresentationMixin
30
+ from parsl.process_loggers import wrap_with_logs
41
31
  from parsl.providers import LocalProvider
32
+ from parsl.providers.base import ExecutionProvider
33
+ from parsl.serialize import deserialize, pack_res_spec_apply_message
34
+ from parsl.serialize.errors import DeserializationError, SerializationError
35
+ from parsl.usage_tracking.api import UsageInformation
36
+ from parsl.utils import RepresentationMixin
42
37
 
43
38
  logger = logging.getLogger(__name__)
44
39
 
@@ -415,13 +410,13 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
415
410
  )
416
411
 
417
412
  self.outgoing_q = zmq_pipes.TasksOutgoing(
418
- curvezmq.ClientContext(self.cert_dir), "127.0.0.1", self.interchange_port_range
413
+ "127.0.0.1", self.interchange_port_range, self.cert_dir
419
414
  )
420
415
  self.incoming_q = zmq_pipes.ResultsIncoming(
421
- curvezmq.ClientContext(self.cert_dir), "127.0.0.1", self.interchange_port_range
416
+ "127.0.0.1", self.interchange_port_range, self.cert_dir
422
417
  )
423
418
  self.command_client = zmq_pipes.CommandClient(
424
- curvezmq.ClientContext(self.cert_dir), "127.0.0.1", self.interchange_port_range
419
+ "127.0.0.1", self.interchange_port_range, self.cert_dir
425
420
  )
426
421
 
427
422
  self._queue_management_thread = None
@@ -531,9 +526,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
531
526
  Starts the interchange process locally and uses an internal command queue to
532
527
  get the worker task and result ports that the interchange has bound to.
533
528
  """
534
- comm_q = Queue(maxsize=10)
535
529
  self.interchange_proc = ForkProcess(target=interchange.starter,
536
- args=(comm_q,),
537
530
  kwargs={"client_ports": (self.outgoing_q.port,
538
531
  self.incoming_q.port,
539
532
  self.command_client.port),
@@ -552,9 +545,10 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
552
545
  name="HTEX-Interchange"
553
546
  )
554
547
  self.interchange_proc.start()
548
+
555
549
  try:
556
- (self.worker_task_port, self.worker_result_port) = comm_q.get(block=True, timeout=120)
557
- except queue.Empty:
550
+ (self.worker_task_port, self.worker_result_port) = self.command_client.run("WORKER_PORTS", timeout_s=120)
551
+ except CommandClientTimeoutError:
558
552
  logger.error("Interchange has not completed initialization in 120s. Aborting")
559
553
  raise Exception("Interchange failed to start")
560
554
 
@@ -645,7 +639,8 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
645
639
  Returns:
646
640
  Future
647
641
  """
648
- validate_resource_spec(resource_specification)
642
+
643
+ validate_resource_spec(resource_specification, self.enable_mpi_mode)
649
644
 
650
645
  if self.bad_state_is_set:
651
646
  raise self.executor_exception
@@ -1,31 +1,28 @@
1
1
  #!/usr/bin/env python
2
- import multiprocessing
3
- import zmq
2
+ import datetime
3
+ import json
4
+ import logging
4
5
  import os
5
- import sys
6
+ import pickle
6
7
  import platform
8
+ import queue
7
9
  import random
8
- import time
9
- import datetime
10
- import pickle
11
10
  import signal
12
- import logging
13
- import queue
11
+ import sys
14
12
  import threading
15
- import json
13
+ import time
14
+ from typing import Any, Dict, List, NoReturn, Optional, Sequence, Set, Tuple, cast
16
15
 
17
- from typing import cast, Any, Dict, NoReturn, Sequence, Set, Optional, Tuple, List
16
+ import zmq
18
17
 
19
18
  from parsl import curvezmq
20
- from parsl.utils import setproctitle
21
- from parsl.version import VERSION as PARSL_VERSION
22
- from parsl.serialize import serialize as serialize_object
23
-
24
19
  from parsl.app.errors import RemoteExceptionWrapper
25
20
  from parsl.executors.high_throughput.manager_record import ManagerRecord
26
21
  from parsl.monitoring.message_type import MessageType
27
22
  from parsl.process_loggers import wrap_with_logs
28
-
23
+ from parsl.serialize import serialize as serialize_object
24
+ from parsl.utils import setproctitle
25
+ from parsl.version import VERSION as PARSL_VERSION
29
26
 
30
27
  PKL_HEARTBEAT_CODE = pickle.dumps((2 ** 32) - 1)
31
28
  PKL_DRAINED_CODE = pickle.dumps((2 ** 32) - 2)
@@ -328,6 +325,9 @@ class Interchange:
328
325
 
329
326
  reply = None
330
327
 
328
+ elif command_req == "WORKER_PORTS":
329
+ reply = (self.worker_task_port, self.worker_result_port)
330
+
331
331
  else:
332
332
  logger.error(f"Received unknown command: {command_req}")
333
333
  reply = None
@@ -672,7 +672,7 @@ def start_file_logger(filename: str, level: int = logging.DEBUG, format_string:
672
672
 
673
673
 
674
674
  @wrap_with_logs(target="interchange")
675
- def starter(comm_q: multiprocessing.Queue, *args: Any, **kwargs: Any) -> None:
675
+ def starter(*args: Any, **kwargs: Any) -> None:
676
676
  """Start the interchange process
677
677
 
678
678
  The executor is expected to call this function. The args, kwargs match that of the Interchange.__init__
@@ -680,6 +680,4 @@ def starter(comm_q: multiprocessing.Queue, *args: Any, **kwargs: Any) -> None:
680
680
  setproctitle("parsl: HTEX interchange")
681
681
  # logger = multiprocessing.get_logger()
682
682
  ic = Interchange(*args, **kwargs)
683
- comm_q.put((ic.worker_task_port,
684
- ic.worker_result_port))
685
683
  ic.start()
@@ -1,5 +1,6 @@
1
1
  from datetime import datetime
2
2
  from typing import Any, List, Optional
3
+
3
4
  from typing_extensions import TypedDict
4
5
 
5
6
 
@@ -3,6 +3,7 @@
3
3
  # then be acquired by any other code running in
4
4
  # a worker context - specifically the monitoring
5
5
  # wrapper code.
6
- from typing import Optional
7
6
  from queue import Queue
7
+ from typing import Optional
8
+
8
9
  result_queue: Optional[Queue] = None
@@ -1,10 +1,13 @@
1
1
  """A simplified interface for HTEx when running in MPI mode"""
2
- from typing import Optional, Tuple, List, Union, Callable, Dict
2
+ from typing import Callable, Dict, List, Optional, Tuple, Union
3
3
 
4
4
  import typeguard
5
5
 
6
6
  from parsl.data_provider.staging import Staging
7
- from parsl.executors.high_throughput.executor import HighThroughputExecutor, GENERAL_HTEX_PARAM_DOCS
7
+ from parsl.executors.high_throughput.executor import (
8
+ GENERAL_HTEX_PARAM_DOCS,
9
+ HighThroughputExecutor,
10
+ )
8
11
  from parsl.executors.status_handling import BlockProviderExecutor
9
12
  from parsl.jobs.states import JobStatus
10
13
  from parsl.providers import LocalProvider
@@ -20,7 +23,7 @@ class MPIExecutor(HighThroughputExecutor):
20
23
  to spawn multi-node tasks.
21
24
 
22
25
  Specify the maximum number of multi-node tasks to run at once using ``max_workers_per_block``.
23
- The maximum number should be smaller than the ``nodes_per_block`` in the Provider.
26
+ The value should be less than or equal to the ``nodes_per_block`` in the Provider.
24
27
 
25
28
  Parameters
26
29
  ----------
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Dict, List, Tuple, Set
2
+ from typing import Dict, List, Set, Tuple
3
3
 
4
4
  logger = logging.getLogger(__name__)
5
5
 
@@ -8,8 +8,18 @@ VALID_LAUNCHERS = ('srun',
8
8
  'mpiexec')
9
9
 
10
10
 
11
+ class MissingResourceSpecification(Exception):
12
+ """Exception raised when input is not supplied a resource specification"""
13
+
14
+ def __init__(self, reason: str):
15
+ self.reason = reason
16
+
17
+ def __str__(self):
18
+ return f"Missing resource specification: {self.reason}"
19
+
20
+
11
21
  class InvalidResourceSpecification(Exception):
12
- """Exception raised when Invalid keys are supplied via resource specification"""
22
+ """Exception raised when Invalid input is supplied via resource specification"""
13
23
 
14
24
  def __init__(self, invalid_keys: Set[str]):
15
25
  self.invalid_keys = invalid_keys
@@ -18,13 +28,19 @@ class InvalidResourceSpecification(Exception):
18
28
  return f"Invalid resource specification options supplied: {self.invalid_keys}"
19
29
 
20
30
 
21
- def validate_resource_spec(resource_spec: Dict[str, str]):
31
+ def validate_resource_spec(resource_spec: Dict[str, str], is_mpi_enabled: bool):
22
32
  """Basic validation of keys in the resource_spec
23
33
 
24
34
  Raises: InvalidResourceSpecification if the resource_spec
25
35
  is invalid (e.g, contains invalid keys)
26
36
  """
27
37
  user_keys = set(resource_spec.keys())
38
+
39
+ # empty resource_spec when mpi_mode is set causes parsl to hang
40
+ # ref issue #3427
41
+ if is_mpi_enabled and len(user_keys) == 0:
42
+ raise MissingResourceSpecification('MPI mode requires optional parsl_resource_specification keyword argument to be configured')
43
+
28
44
  legal_keys = set(("ranks_per_node",
29
45
  "num_nodes",
30
46
  "num_ranks",
@@ -8,8 +8,7 @@ from enum import Enum
8
8
  from typing import Dict, List
9
9
 
10
10
  from parsl.multiprocessing import SpawnContext
11
- from parsl.serialize import (pack_res_spec_apply_message,
12
- unpack_res_spec_apply_message)
11
+ from parsl.serialize import pack_res_spec_apply_message, unpack_res_spec_apply_message
13
12
 
14
13
  logger = logging.getLogger(__name__)
15
14
 
@@ -1,11 +1,13 @@
1
- import zmq
2
1
  import argparse
3
- import uuid
4
- import time
5
2
  import logging
6
- from parsl.addresses import get_all_addresses
3
+ import time
4
+ import uuid
5
+
6
+ import zmq
7
7
  from zmq.utils.monitor import recv_monitor_message
8
8
 
9
+ from parsl.addresses import get_all_addresses
10
+
9
11
  logger = logging.getLogger(__name__)
10
12
 
11
13
 
@@ -1,39 +1,41 @@
1
1
  #!/usr/bin/env python3
2
2
 
3
3
  import argparse
4
+ import json
4
5
  import logging
6
+ import math
7
+ import multiprocessing
5
8
  import os
6
- import sys
9
+ import pickle
7
10
  import platform
11
+ import queue
12
+ import sys
8
13
  import threading
9
- import pickle
10
14
  import time
11
- import queue
12
15
  import uuid
13
- from typing import Sequence, Optional, Dict, List
14
-
15
- import zmq
16
- import math
17
- import json
18
- import psutil
19
- import multiprocessing
20
16
  from multiprocessing.managers import DictProxy
21
17
  from multiprocessing.sharedctypes import Synchronized
18
+ from typing import Dict, List, Optional, Sequence
19
+
20
+ import psutil
21
+ import zmq
22
22
 
23
23
  from parsl import curvezmq
24
- from parsl.process_loggers import wrap_with_logs
25
- from parsl.version import VERSION as PARSL_VERSION
26
24
  from parsl.app.errors import RemoteExceptionWrapper
27
25
  from parsl.executors.high_throughput.errors import WorkerLost
28
- from parsl.executors.high_throughput.probe import probe_addresses
29
- from parsl.multiprocessing import SpawnContext
30
- from parsl.serialize import unpack_res_spec_apply_message, serialize
26
+ from parsl.executors.high_throughput.mpi_prefix_composer import (
27
+ VALID_LAUNCHERS,
28
+ compose_all,
29
+ )
31
30
  from parsl.executors.high_throughput.mpi_resource_management import (
31
+ MPITaskScheduler,
32
32
  TaskScheduler,
33
- MPITaskScheduler
34
33
  )
35
-
36
- from parsl.executors.high_throughput.mpi_prefix_composer import compose_all, VALID_LAUNCHERS
34
+ from parsl.executors.high_throughput.probe import probe_addresses
35
+ from parsl.multiprocessing import SpawnContext
36
+ from parsl.process_loggers import wrap_with_logs
37
+ from parsl.serialize import serialize, unpack_res_spec_apply_message
38
+ from parsl.version import VERSION as PARSL_VERSION
37
39
 
38
40
  HEARTBEAT_CODE = (2 ** 32) - 1
39
41
  DRAINED_CODE = (2 ** 32) - 2
@@ -677,7 +679,8 @@ def worker(
677
679
  # If desired, set process affinity
678
680
  if cpu_affinity != "none":
679
681
  # Count the number of cores per worker
680
- avail_cores = sorted(os.sched_getaffinity(0)) # Get the available threads
682
+ # OSX does not implement os.sched_getaffinity
683
+ avail_cores = sorted(os.sched_getaffinity(0)) # type: ignore[attr-defined, unused-ignore]
681
684
  cores_per_worker = len(avail_cores) // pool_size
682
685
  assert cores_per_worker > 0, "Affinity does not work if there are more workers than cores"
683
686
 
@@ -717,7 +720,15 @@ def worker(
717
720
  os.environ["KMP_AFFINITY"] = f"explicit,proclist=[{proc_list}]" # For Intel OpenMP
718
721
 
719
722
  # Set the affinity for this worker
720
- os.sched_setaffinity(0, my_cores)
723
+ # OSX does not implement os.sched_setaffinity so type checking
724
+ # is ignored here in two ways:
725
+ # On a platform without sched_setaffinity, that attribute will not
726
+ # be defined, so ignore[attr-defined] will tell mypy to ignore this
727
+ # incorrect-for-OS X attribute access.
728
+ # On a platform with sched_setaffinity, that type: ignore message
729
+ # will be redundant, and ignore[unused-ignore] tells mypy to ignore
730
+ # that this ignore is unneeded.
731
+ os.sched_setaffinity(0, my_cores) # type: ignore[attr-defined, unused-ignore]
721
732
  logger.info("Set worker CPU affinity to {}".format(my_cores))
722
733
 
723
734
  # If desired, pin to accelerator
@@ -1,10 +1,18 @@
1
1
  #!/usr/bin/env python3
2
2
 
3
- import zmq
4
3
  import logging
5
4
  import threading
5
+ import time
6
+ from typing import Optional
7
+
8
+ import zmq
6
9
 
7
10
  from parsl import curvezmq
11
+ from parsl.errors import InternalConsistencyError
12
+ from parsl.executors.high_throughput.errors import (
13
+ CommandClientBadError,
14
+ CommandClientTimeoutError,
15
+ )
8
16
 
9
17
  logger = logging.getLogger(__name__)
10
18
 
@@ -12,25 +20,29 @@ logger = logging.getLogger(__name__)
12
20
  class CommandClient:
13
21
  """ CommandClient
14
22
  """
15
- def __init__(self, zmq_context: curvezmq.ClientContext, ip_address, port_range):
23
+ def __init__(self, ip_address, port_range, cert_dir: Optional[str] = None):
16
24
  """
17
25
  Parameters
18
26
  ----------
19
27
 
20
- zmq_context: curvezmq.ClientContext
21
- CurveZMQ client context used to create secure sockets
22
28
  ip_address: str
23
29
  IP address of the client (where Parsl runs)
30
+
24
31
  port_range: tuple(int, int)
25
32
  Port range for the comms between client and interchange
26
33
 
34
+ cert_dir: str | None
35
+ Path to the certificate directory. Setting this to None will disable encryption.
36
+ default: None
37
+
27
38
  """
28
- self.zmq_context = zmq_context
39
+ self.zmq_context = curvezmq.ClientContext(cert_dir)
29
40
  self.ip_address = ip_address
30
41
  self.port_range = port_range
31
42
  self.port = None
32
43
  self.create_socket_and_bind()
33
44
  self._lock = threading.Lock()
45
+ self.ok = True
34
46
 
35
47
  def create_socket_and_bind(self):
36
48
  """ Creates socket and binds to a port.
@@ -46,7 +58,7 @@ class CommandClient:
46
58
  else:
47
59
  self.zmq_socket.bind("tcp://{}:{}".format(self.ip_address, self.port))
48
60
 
49
- def run(self, message, max_retries=3):
61
+ def run(self, message, max_retries=3, timeout_s=None):
50
62
  """ This function needs to be fast at the same time aware of the possibility of
51
63
  ZMQ pipes overflowing.
52
64
 
@@ -54,13 +66,43 @@ class CommandClient:
54
66
  in ZMQ sockets reaching a broken state once there are ~10k tasks in flight.
55
67
  This issue can be magnified if each the serialized buffer itself is larger.
56
68
  """
69
+ if not self.ok:
70
+ raise CommandClientBadError()
71
+
72
+ start_time_s = time.monotonic()
73
+
57
74
  reply = '__PARSL_ZMQ_PIPES_MAGIC__'
58
75
  with self._lock:
59
76
  for _ in range(max_retries):
60
77
  try:
61
78
  logger.debug("Sending command client command")
79
+
80
+ if timeout_s is not None:
81
+ remaining_time_s = start_time_s + timeout_s - time.monotonic()
82
+ poll_result = self.zmq_socket.poll(timeout=remaining_time_s * 1000, flags=zmq.POLLOUT)
83
+ if poll_result == zmq.POLLOUT:
84
+ pass # this is OK, so continue
85
+ elif poll_result == 0:
86
+ raise CommandClientTimeoutError("Waiting for command channel to be ready for a command")
87
+ else:
88
+ raise InternalConsistencyError(f"ZMQ poll returned unexpected value: {poll_result}")
89
+
62
90
  self.zmq_socket.send_pyobj(message, copy=True)
63
- logger.debug("Waiting for command client response")
91
+
92
+ if timeout_s is not None:
93
+ logger.debug("Polling for command client response or timeout")
94
+ remaining_time_s = start_time_s + timeout_s - time.monotonic()
95
+ poll_result = self.zmq_socket.poll(timeout=remaining_time_s * 1000, flags=zmq.POLLIN)
96
+ if poll_result == zmq.POLLIN:
97
+ pass # this is OK, so continue
98
+ elif poll_result == 0:
99
+ logger.error("Command timed-out - command client is now bad forever")
100
+ self.ok = False
101
+ raise CommandClientTimeoutError("Waiting for a reply from command channel")
102
+ else:
103
+ raise InternalConsistencyError(f"ZMQ poll returned unexpected value: {poll_result}")
104
+
105
+ logger.debug("Receiving command client response")
64
106
  reply = self.zmq_socket.recv_pyobj()
65
107
  logger.debug("Received command client response")
66
108
  except zmq.ZMQError:
@@ -85,20 +127,23 @@ class CommandClient:
85
127
  class TasksOutgoing:
86
128
  """ Outgoing task queue from the executor to the Interchange
87
129
  """
88
- def __init__(self, zmq_context: curvezmq.ClientContext, ip_address, port_range):
130
+ def __init__(self, ip_address, port_range, cert_dir: Optional[str] = None):
89
131
  """
90
132
  Parameters
91
133
  ----------
92
134
 
93
- zmq_context: curvezmq.ClientContext
94
- CurveZMQ client context used to create secure sockets
95
135
  ip_address: str
96
136
  IP address of the client (where Parsl runs)
137
+
97
138
  port_range: tuple(int, int)
98
139
  Port range for the comms between client and interchange
99
140
 
141
+ cert_dir: str | None
142
+ Path to the certificate directory. Setting this to None will disable encryption.
143
+ default: None
144
+
100
145
  """
101
- self.zmq_context = zmq_context
146
+ self.zmq_context = curvezmq.ClientContext(cert_dir)
102
147
  self.zmq_socket = self.zmq_context.socket(zmq.DEALER)
103
148
  self.zmq_socket.set_hwm(0)
104
149
  self.port = self.zmq_socket.bind_to_random_port("tcp://{}".format(ip_address),
@@ -138,20 +183,23 @@ class ResultsIncoming:
138
183
  """ Incoming results queue from the Interchange to the executor
139
184
  """
140
185
 
141
- def __init__(self, zmq_context: curvezmq.ClientContext, ip_address, port_range):
186
+ def __init__(self, ip_address, port_range, cert_dir: Optional[str] = None):
142
187
  """
143
188
  Parameters
144
189
  ----------
145
190
 
146
- zmq_context: curvezmq.ClientContext
147
- CurveZMQ client context used to create secure sockets
148
191
  ip_address: str
149
192
  IP address of the client (where Parsl runs)
193
+
150
194
  port_range: tuple(int, int)
151
195
  Port range for the comms between client and interchange
152
196
 
197
+ cert_dir: str | None
198
+ Path to the certificate directory. Setting this to None will disable encryption.
199
+ default: None
200
+
153
201
  """
154
- self.zmq_context = zmq_context
202
+ self.zmq_context = curvezmq.ClientContext(cert_dir)
155
203
  self.results_receiver = self.zmq_context.socket(zmq.DEALER)
156
204
  self.results_receiver.set_hwm(0)
157
205
  self.port = self.results_receiver.bind_to_random_port("tcp://{}".format(ip_address),
@@ -1,30 +1,30 @@
1
1
  """RadicalPilotExecutor builds on the RADICAL-Pilot/Parsl
2
2
  """
3
+ import inspect
4
+ import logging
3
5
  import os
4
- import sys
5
- import time
6
- import parsl
7
6
  import queue
8
- import logging
9
- import inspect
10
- import requests
11
- import typeguard
7
+ import sys
12
8
  import threading as mt
13
-
9
+ import time
10
+ from concurrent.futures import Future
14
11
  from functools import partial
15
- from typing import Optional, Dict
16
12
  from pathlib import Path, PosixPath
17
- from concurrent.futures import Future
13
+ from typing import Dict, Optional
14
+
15
+ import requests
16
+ import typeguard
18
17
 
18
+ import parsl
19
+ from parsl.app.errors import BashExitFailure, RemoteExceptionWrapper
19
20
  from parsl.app.python import timeout
20
- from .rpex_resources import ResourceConfig
21
21
  from parsl.data_provider.files import File
22
- from parsl.utils import RepresentationMixin
23
- from parsl.app.errors import BashExitFailure
24
22
  from parsl.executors.base import ParslExecutor
25
- from parsl.app.errors import RemoteExceptionWrapper
26
23
  from parsl.serialize import deserialize, pack_res_spec_apply_message
27
- from parsl.serialize.errors import SerializationError, DeserializationError
24
+ from parsl.serialize.errors import DeserializationError, SerializationError
25
+ from parsl.utils import RepresentationMixin
26
+
27
+ from .rpex_resources import ResourceConfig
28
28
 
29
29
  try:
30
30
  import radical.pilot as rp
@@ -2,9 +2,8 @@
2
2
 
3
3
  import sys
4
4
 
5
- import radical.utils as ru
6
5
  import radical.pilot as rp
7
-
6
+ import radical.utils as ru
8
7
 
9
8
  # ------------------------------------------------------------------------------
10
9
  #
@@ -1,17 +1,12 @@
1
- import sys
2
1
  import json
3
-
2
+ import sys
4
3
  from typing import List
5
4
 
6
- _setup_paths: List[str]
5
+ _setup_paths: List[str] = []
7
6
  try:
8
7
  import radical.pilot as rp
9
- import radical.utils as ru
10
8
  except ImportError:
11
- _setup_paths = []
12
- else:
13
- _setup_paths = [rp.sdist_path,
14
- ru.sdist_path]
9
+ pass
15
10
 
16
11
 
17
12
  MPI = "mpi"
@@ -77,7 +72,7 @@ class ResourceConfig:
77
72
 
78
73
  pilot_env_setup : list
79
74
  List of setup commands/packages for the pilot environment.
80
- Default setup includes "parsl", rp.sdist_path, and ru.sdist_path.
75
+ Default is an empty list.
81
76
 
82
77
  python_v : str
83
78
  The Python version to be used in the pilot environment.
@@ -1,10 +1,11 @@
1
1
  import sys
2
+
2
3
  import radical.pilot as rp
3
4
 
4
5
  import parsl.app.errors as pe
5
6
  from parsl.app.bash import remote_side_bash_executor
6
- from parsl.serialize import unpack_res_spec_apply_message, serialize
7
7
  from parsl.executors.high_throughput.process_worker_pool import execute_task
8
+ from parsl.serialize import serialize, unpack_res_spec_apply_message
8
9
 
9
10
 
10
11
  class ParslWorker: