parsl 2024.3.18__py3-none-any.whl → 2025.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (369) hide show
  1. parsl/__init__.py +9 -10
  2. parsl/addresses.py +26 -6
  3. parsl/app/app.py +7 -8
  4. parsl/app/bash.py +15 -8
  5. parsl/app/errors.py +10 -13
  6. parsl/app/futures.py +8 -10
  7. parsl/app/python.py +2 -1
  8. parsl/benchmark/perf.py +2 -1
  9. parsl/concurrent/__init__.py +2 -2
  10. parsl/config.py +53 -10
  11. parsl/configs/ASPIRE1.py +6 -5
  12. parsl/configs/Azure.py +9 -8
  13. parsl/configs/bridges.py +6 -4
  14. parsl/configs/cc_in2p3.py +3 -3
  15. parsl/configs/ec2.py +3 -1
  16. parsl/configs/expanse.py +4 -3
  17. parsl/configs/frontera.py +3 -4
  18. parsl/configs/htex_local.py +3 -4
  19. parsl/configs/illinoiscluster.py +3 -1
  20. parsl/configs/improv.py +34 -0
  21. parsl/configs/kubernetes.py +4 -3
  22. parsl/configs/local_threads.py +5 -1
  23. parsl/configs/midway.py +5 -3
  24. parsl/configs/osg.py +4 -2
  25. parsl/configs/polaris.py +4 -2
  26. parsl/configs/stampede2.py +6 -5
  27. parsl/configs/summit.py +3 -3
  28. parsl/configs/toss3_llnl.py +4 -3
  29. parsl/configs/vineex_local.py +6 -4
  30. parsl/configs/wqex_local.py +5 -3
  31. parsl/curvezmq.py +4 -0
  32. parsl/data_provider/data_manager.py +4 -3
  33. parsl/data_provider/file_noop.py +1 -2
  34. parsl/data_provider/files.py +3 -3
  35. parsl/data_provider/ftp.py +1 -3
  36. parsl/data_provider/globus.py +7 -6
  37. parsl/data_provider/http.py +2 -2
  38. parsl/data_provider/rsync.py +1 -1
  39. parsl/data_provider/staging.py +2 -2
  40. parsl/data_provider/zip.py +135 -0
  41. parsl/dataflow/dependency_resolvers.py +115 -0
  42. parsl/dataflow/dflow.py +259 -223
  43. parsl/dataflow/errors.py +3 -5
  44. parsl/dataflow/futures.py +27 -14
  45. parsl/dataflow/memoization.py +5 -5
  46. parsl/dataflow/rundirs.py +5 -6
  47. parsl/dataflow/taskrecord.py +4 -5
  48. parsl/executors/__init__.py +4 -2
  49. parsl/executors/base.py +45 -15
  50. parsl/executors/errors.py +13 -0
  51. parsl/executors/execute_task.py +37 -0
  52. parsl/executors/flux/execute_parsl_task.py +3 -3
  53. parsl/executors/flux/executor.py +18 -19
  54. parsl/executors/flux/flux_instance_manager.py +26 -27
  55. parsl/executors/high_throughput/errors.py +43 -3
  56. parsl/executors/high_throughput/executor.py +307 -285
  57. parsl/executors/high_throughput/interchange.py +137 -168
  58. parsl/executors/high_throughput/manager_record.py +4 -0
  59. parsl/executors/high_throughput/manager_selector.py +55 -0
  60. parsl/executors/high_throughput/monitoring_info.py +2 -1
  61. parsl/executors/high_throughput/mpi_executor.py +113 -0
  62. parsl/executors/high_throughput/mpi_prefix_composer.py +10 -11
  63. parsl/executors/high_throughput/mpi_resource_management.py +6 -17
  64. parsl/executors/high_throughput/probe.py +9 -7
  65. parsl/executors/high_throughput/process_worker_pool.py +77 -75
  66. parsl/executors/high_throughput/zmq_pipes.py +81 -23
  67. parsl/executors/radical/executor.py +130 -79
  68. parsl/executors/radical/rpex_resources.py +17 -15
  69. parsl/executors/radical/rpex_worker.py +4 -3
  70. parsl/executors/status_handling.py +157 -51
  71. parsl/executors/taskvine/__init__.py +1 -1
  72. parsl/executors/taskvine/errors.py +1 -1
  73. parsl/executors/taskvine/exec_parsl_function.py +2 -2
  74. parsl/executors/taskvine/executor.py +38 -55
  75. parsl/executors/taskvine/factory.py +1 -1
  76. parsl/executors/taskvine/factory_config.py +1 -1
  77. parsl/executors/taskvine/manager.py +17 -13
  78. parsl/executors/taskvine/manager_config.py +7 -2
  79. parsl/executors/threads.py +6 -6
  80. parsl/executors/workqueue/errors.py +1 -1
  81. parsl/executors/workqueue/exec_parsl_function.py +6 -5
  82. parsl/executors/workqueue/executor.py +64 -63
  83. parsl/executors/workqueue/parsl_coprocess.py +1 -1
  84. parsl/jobs/error_handlers.py +2 -2
  85. parsl/jobs/job_status_poller.py +28 -112
  86. parsl/jobs/states.py +7 -2
  87. parsl/jobs/strategy.py +43 -31
  88. parsl/launchers/__init__.py +12 -3
  89. parsl/launchers/errors.py +1 -1
  90. parsl/launchers/launchers.py +0 -6
  91. parsl/log_utils.py +1 -2
  92. parsl/monitoring/db_manager.py +55 -93
  93. parsl/monitoring/errors.py +6 -0
  94. parsl/monitoring/monitoring.py +85 -311
  95. parsl/monitoring/queries/pandas.py +1 -2
  96. parsl/monitoring/radios/base.py +13 -0
  97. parsl/monitoring/radios/filesystem.py +52 -0
  98. parsl/monitoring/radios/htex.py +57 -0
  99. parsl/monitoring/radios/multiprocessing.py +17 -0
  100. parsl/monitoring/radios/udp.py +56 -0
  101. parsl/monitoring/radios/zmq.py +17 -0
  102. parsl/monitoring/remote.py +33 -37
  103. parsl/monitoring/router.py +212 -0
  104. parsl/monitoring/types.py +5 -6
  105. parsl/monitoring/visualization/app.py +4 -2
  106. parsl/monitoring/visualization/models.py +0 -1
  107. parsl/monitoring/visualization/plots/default/workflow_plots.py +8 -4
  108. parsl/monitoring/visualization/plots/default/workflow_resource_plots.py +1 -0
  109. parsl/monitoring/visualization/utils.py +0 -1
  110. parsl/monitoring/visualization/views.py +16 -9
  111. parsl/multiprocessing.py +0 -1
  112. parsl/process_loggers.py +1 -2
  113. parsl/providers/__init__.py +8 -17
  114. parsl/providers/aws/aws.py +2 -3
  115. parsl/providers/azure/azure.py +4 -5
  116. parsl/providers/base.py +2 -18
  117. parsl/providers/cluster_provider.py +3 -9
  118. parsl/providers/condor/condor.py +7 -17
  119. parsl/providers/errors.py +2 -2
  120. parsl/providers/googlecloud/googlecloud.py +2 -1
  121. parsl/providers/grid_engine/grid_engine.py +5 -14
  122. parsl/providers/kubernetes/kube.py +80 -40
  123. parsl/providers/local/local.py +13 -26
  124. parsl/providers/lsf/lsf.py +5 -23
  125. parsl/providers/pbspro/pbspro.py +5 -17
  126. parsl/providers/slurm/slurm.py +81 -39
  127. parsl/providers/torque/torque.py +3 -14
  128. parsl/serialize/__init__.py +8 -3
  129. parsl/serialize/base.py +1 -2
  130. parsl/serialize/concretes.py +5 -4
  131. parsl/serialize/facade.py +3 -3
  132. parsl/serialize/proxystore.py +3 -2
  133. parsl/tests/__init__.py +1 -1
  134. parsl/tests/configs/azure_single_node.py +4 -5
  135. parsl/tests/configs/bridges.py +3 -2
  136. parsl/tests/configs/cc_in2p3.py +1 -3
  137. parsl/tests/configs/comet.py +2 -1
  138. parsl/tests/configs/ec2_single_node.py +1 -2
  139. parsl/tests/configs/ec2_spot.py +1 -2
  140. parsl/tests/configs/flux_local.py +11 -0
  141. parsl/tests/configs/frontera.py +2 -3
  142. parsl/tests/configs/htex_local.py +3 -5
  143. parsl/tests/configs/htex_local_alternate.py +11 -15
  144. parsl/tests/configs/htex_local_intask_staging.py +5 -9
  145. parsl/tests/configs/htex_local_rsync_staging.py +4 -8
  146. parsl/tests/configs/local_radical.py +1 -3
  147. parsl/tests/configs/local_radical_mpi.py +2 -2
  148. parsl/tests/configs/local_threads_checkpoint_periodic.py +8 -10
  149. parsl/tests/configs/local_threads_monitoring.py +0 -1
  150. parsl/tests/configs/midway.py +2 -2
  151. parsl/tests/configs/nscc_singapore.py +3 -3
  152. parsl/tests/configs/osg_htex.py +1 -1
  153. parsl/tests/configs/petrelkube.py +3 -2
  154. parsl/tests/configs/slurm_local.py +24 -0
  155. parsl/tests/configs/summit.py +1 -0
  156. parsl/tests/configs/taskvine_ex.py +4 -7
  157. parsl/tests/configs/user_opts.py +0 -7
  158. parsl/tests/configs/workqueue_ex.py +4 -6
  159. parsl/tests/conftest.py +27 -13
  160. parsl/tests/integration/test_stress/test_python_simple.py +3 -4
  161. parsl/tests/integration/test_stress/test_python_threads.py +3 -5
  162. parsl/tests/manual_tests/htex_local.py +4 -6
  163. parsl/tests/manual_tests/test_basic.py +1 -0
  164. parsl/tests/manual_tests/test_log_filter.py +3 -1
  165. parsl/tests/manual_tests/test_memory_limits.py +6 -8
  166. parsl/tests/manual_tests/test_regression_220.py +2 -1
  167. parsl/tests/manual_tests/test_udp_simple.py +4 -4
  168. parsl/tests/manual_tests/test_worker_count.py +3 -2
  169. parsl/tests/scaling_tests/htex_local.py +2 -4
  170. parsl/tests/scaling_tests/test_scale.py +0 -9
  171. parsl/tests/scaling_tests/vineex_condor.py +1 -2
  172. parsl/tests/scaling_tests/vineex_local.py +1 -2
  173. parsl/tests/site_tests/site_config_selector.py +1 -6
  174. parsl/tests/site_tests/test_provider.py +4 -2
  175. parsl/tests/site_tests/test_site.py +2 -0
  176. parsl/tests/sites/test_affinity.py +7 -7
  177. parsl/tests/sites/test_dynamic_executor.py +3 -4
  178. parsl/tests/sites/test_ec2.py +3 -2
  179. parsl/tests/sites/test_worker_info.py +4 -5
  180. parsl/tests/test_aalst_patterns.py +0 -1
  181. parsl/tests/test_bash_apps/test_apptimeout.py +2 -2
  182. parsl/tests/test_bash_apps/test_basic.py +10 -4
  183. parsl/tests/test_bash_apps/test_error_codes.py +5 -7
  184. parsl/tests/test_bash_apps/test_inputs_default.py +25 -0
  185. parsl/tests/test_bash_apps/test_kwarg_storage.py +1 -1
  186. parsl/tests/test_bash_apps/test_memoize.py +2 -8
  187. parsl/tests/test_bash_apps/test_memoize_ignore_args.py +9 -14
  188. parsl/tests/test_bash_apps/test_memoize_ignore_args_regr.py +9 -14
  189. parsl/tests/test_bash_apps/test_multiline.py +1 -1
  190. parsl/tests/test_bash_apps/test_pipeline.py +1 -1
  191. parsl/tests/test_bash_apps/test_std_uri.py +123 -0
  192. parsl/tests/test_bash_apps/test_stdout.py +33 -8
  193. parsl/tests/test_callables.py +2 -2
  194. parsl/tests/test_checkpointing/test_periodic.py +21 -39
  195. parsl/tests/test_checkpointing/test_python_checkpoint_1.py +1 -0
  196. parsl/tests/test_checkpointing/test_python_checkpoint_2.py +2 -2
  197. parsl/tests/test_checkpointing/test_python_checkpoint_3.py +0 -1
  198. parsl/tests/test_checkpointing/test_regression_239.py +1 -1
  199. parsl/tests/test_checkpointing/test_task_exit.py +2 -3
  200. parsl/tests/test_docs/test_from_slides.py +5 -2
  201. parsl/tests/test_docs/test_kwargs.py +4 -1
  202. parsl/tests/test_docs/test_tutorial_1.py +1 -2
  203. parsl/tests/test_docs/test_workflow1.py +2 -2
  204. parsl/tests/test_docs/test_workflow2.py +0 -1
  205. parsl/tests/test_error_handling/test_rand_fail.py +2 -2
  206. parsl/tests/test_error_handling/test_resource_spec.py +10 -12
  207. parsl/tests/test_error_handling/test_retries.py +6 -16
  208. parsl/tests/test_error_handling/test_retry_handler.py +1 -0
  209. parsl/tests/test_error_handling/test_retry_handler_failure.py +2 -1
  210. parsl/tests/test_error_handling/test_serialization_fail.py +1 -1
  211. parsl/tests/test_error_handling/test_wrap_with_logs.py +1 -0
  212. parsl/tests/test_execute_task.py +29 -0
  213. parsl/tests/test_flux.py +1 -1
  214. parsl/tests/test_htex/test_basic.py +2 -3
  215. parsl/tests/test_htex/test_block_manager_selector_unit.py +20 -0
  216. parsl/tests/test_htex/test_command_client_timeout.py +66 -0
  217. parsl/tests/test_htex/test_connected_blocks.py +3 -2
  218. parsl/tests/test_htex/test_cpu_affinity_explicit.py +6 -10
  219. parsl/tests/test_htex/test_disconnected_blocks.py +6 -5
  220. parsl/tests/test_htex/test_disconnected_blocks_failing_provider.py +71 -0
  221. parsl/tests/test_htex/test_drain.py +11 -10
  222. parsl/tests/test_htex/test_htex.py +51 -25
  223. parsl/tests/test_htex/test_manager_failure.py +0 -1
  224. parsl/tests/test_htex/test_manager_selector_by_block.py +51 -0
  225. parsl/tests/test_htex/test_managers_command.py +36 -0
  226. parsl/tests/test_htex/test_missing_worker.py +2 -12
  227. parsl/tests/test_htex/test_multiple_disconnected_blocks.py +9 -9
  228. parsl/tests/test_htex/test_resource_spec_validation.py +45 -0
  229. parsl/tests/test_htex/test_zmq_binding.py +29 -8
  230. parsl/tests/test_monitoring/test_app_names.py +5 -5
  231. parsl/tests/test_monitoring/test_basic.py +73 -25
  232. parsl/tests/test_monitoring/test_db_locks.py +6 -4
  233. parsl/tests/test_monitoring/test_fuzz_zmq.py +19 -8
  234. parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +80 -0
  235. parsl/tests/test_monitoring/test_incomplete_futures.py +5 -4
  236. parsl/tests/test_monitoring/test_memoization_representation.py +4 -2
  237. parsl/tests/test_monitoring/test_stdouterr.py +134 -0
  238. parsl/tests/test_monitoring/test_viz_colouring.py +1 -0
  239. parsl/tests/test_mpi_apps/test_bad_mpi_config.py +33 -26
  240. parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py +28 -11
  241. parsl/tests/test_mpi_apps/test_mpi_prefix.py +4 -4
  242. parsl/tests/test_mpi_apps/test_mpi_scheduler.py +7 -2
  243. parsl/tests/test_mpi_apps/test_mpiex.py +64 -0
  244. parsl/tests/test_mpi_apps/test_resource_spec.py +42 -49
  245. parsl/tests/test_providers/test_kubernetes_provider.py +102 -0
  246. parsl/tests/test_providers/test_local_provider.py +3 -132
  247. parsl/tests/test_providers/test_pbspro_template.py +2 -3
  248. parsl/tests/test_providers/test_slurm_template.py +2 -3
  249. parsl/tests/test_providers/test_submiterror_deprecation.py +2 -1
  250. parsl/tests/test_python_apps/test_context_manager.py +128 -0
  251. parsl/tests/test_python_apps/test_dep_standard_futures.py +2 -1
  252. parsl/tests/test_python_apps/test_dependencies_deep.py +59 -0
  253. parsl/tests/test_python_apps/test_fail.py +0 -25
  254. parsl/tests/test_python_apps/test_futures.py +2 -1
  255. parsl/tests/test_python_apps/test_inputs_default.py +22 -0
  256. parsl/tests/test_python_apps/test_join.py +0 -1
  257. parsl/tests/test_python_apps/test_lifted.py +11 -7
  258. parsl/tests/test_python_apps/test_memoize_bad_id_for_memo.py +1 -0
  259. parsl/tests/test_python_apps/test_outputs.py +1 -1
  260. parsl/tests/test_python_apps/test_pluggable_future_resolution.py +161 -0
  261. parsl/tests/test_radical/test_mpi_funcs.py +1 -2
  262. parsl/tests/test_regression/test_1480.py +2 -1
  263. parsl/tests/test_regression/test_1653.py +2 -1
  264. parsl/tests/test_regression/test_226.py +1 -0
  265. parsl/tests/test_regression/test_2652.py +1 -0
  266. parsl/tests/test_regression/test_69a.py +0 -1
  267. parsl/tests/test_regression/test_854.py +4 -2
  268. parsl/tests/test_regression/test_97_parallelism_0.py +1 -2
  269. parsl/tests/test_regression/test_98.py +0 -1
  270. parsl/tests/test_scaling/test_block_error_handler.py +9 -4
  271. parsl/tests/test_scaling/test_regression_1621.py +11 -15
  272. parsl/tests/test_scaling/test_regression_3568_scaledown_vs_MISSING.py +84 -0
  273. parsl/tests/test_scaling/test_regression_3696_oscillation.py +103 -0
  274. parsl/tests/test_scaling/test_scale_down.py +2 -5
  275. parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py +5 -8
  276. parsl/tests/test_scaling/test_scale_down_htex_unregistered.py +71 -0
  277. parsl/tests/test_scaling/test_shutdown_scalein.py +73 -0
  278. parsl/tests/test_scaling/test_worker_interchange_bad_messages_3262.py +90 -0
  279. parsl/tests/test_serialization/test_2555_caching_deserializer.py +1 -1
  280. parsl/tests/test_serialization/test_3495_deserialize_managerlost.py +47 -0
  281. parsl/tests/test_serialization/test_basic.py +2 -1
  282. parsl/tests/test_serialization/test_htex_code_cache.py +3 -4
  283. parsl/tests/test_serialization/test_pack_resource_spec.py +2 -1
  284. parsl/tests/test_serialization/test_proxystore_configured.py +10 -6
  285. parsl/tests/test_serialization/test_proxystore_impl.py +5 -3
  286. parsl/tests/test_shutdown/test_kill_monitoring.py +64 -0
  287. parsl/tests/test_staging/staging_provider.py +2 -2
  288. parsl/tests/test_staging/test_1316.py +3 -4
  289. parsl/tests/test_staging/test_docs_1.py +2 -1
  290. parsl/tests/test_staging/test_docs_2.py +2 -1
  291. parsl/tests/test_staging/test_elaborate_noop_file.py +2 -3
  292. parsl/tests/{test_data → test_staging}/test_file.py +6 -6
  293. parsl/tests/{test_data → test_staging}/test_output_chain_filenames.py +3 -0
  294. parsl/tests/test_staging/test_staging_ftp.py +1 -0
  295. parsl/tests/test_staging/test_staging_https.py +5 -2
  296. parsl/tests/test_staging/test_staging_stdout.py +64 -0
  297. parsl/tests/test_staging/test_zip_in.py +39 -0
  298. parsl/tests/test_staging/test_zip_out.py +110 -0
  299. parsl/tests/test_staging/test_zip_to_zip.py +41 -0
  300. parsl/tests/test_summary.py +2 -2
  301. parsl/tests/test_thread_parallelism.py +0 -1
  302. parsl/tests/test_threads/test_configs.py +1 -2
  303. parsl/tests/test_threads/test_lazy_errors.py +2 -2
  304. parsl/tests/test_utils/test_execute_wait.py +35 -0
  305. parsl/tests/test_utils/test_sanitize_dns.py +76 -0
  306. parsl/tests/unit/test_address.py +20 -0
  307. parsl/tests/unit/test_file.py +99 -0
  308. parsl/tests/unit/test_usage_tracking.py +66 -0
  309. parsl/usage_tracking/api.py +65 -0
  310. parsl/usage_tracking/levels.py +6 -0
  311. parsl/usage_tracking/usage.py +104 -62
  312. parsl/utils.py +137 -4
  313. parsl/version.py +1 -1
  314. {parsl-2024.3.18.data → parsl-2025.1.13.data}/scripts/exec_parsl_function.py +6 -5
  315. parsl-2025.1.13.data/scripts/interchange.py +649 -0
  316. {parsl-2024.3.18.data → parsl-2025.1.13.data}/scripts/process_worker_pool.py +77 -75
  317. parsl-2025.1.13.dist-info/METADATA +96 -0
  318. parsl-2025.1.13.dist-info/RECORD +462 -0
  319. {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/WHEEL +1 -1
  320. parsl/channels/__init__.py +0 -7
  321. parsl/channels/base.py +0 -141
  322. parsl/channels/errors.py +0 -113
  323. parsl/channels/local/local.py +0 -164
  324. parsl/channels/oauth_ssh/oauth_ssh.py +0 -110
  325. parsl/channels/ssh/ssh.py +0 -276
  326. parsl/channels/ssh_il/__init__.py +0 -0
  327. parsl/channels/ssh_il/ssh_il.py +0 -74
  328. parsl/configs/ad_hoc.py +0 -35
  329. parsl/executors/radical/rpex_master.py +0 -42
  330. parsl/monitoring/radios.py +0 -175
  331. parsl/providers/ad_hoc/__init__.py +0 -0
  332. parsl/providers/ad_hoc/ad_hoc.py +0 -248
  333. parsl/providers/cobalt/__init__.py +0 -0
  334. parsl/providers/cobalt/cobalt.py +0 -236
  335. parsl/providers/cobalt/template.py +0 -17
  336. parsl/tests/configs/ad_hoc_cluster_htex.py +0 -35
  337. parsl/tests/configs/cooley_htex.py +0 -37
  338. parsl/tests/configs/htex_ad_hoc_cluster.py +0 -28
  339. parsl/tests/configs/local_adhoc.py +0 -18
  340. parsl/tests/configs/swan_htex.py +0 -43
  341. parsl/tests/configs/theta.py +0 -37
  342. parsl/tests/integration/test_channels/__init__.py +0 -0
  343. parsl/tests/integration/test_channels/test_channels.py +0 -17
  344. parsl/tests/integration/test_channels/test_local_channel.py +0 -42
  345. parsl/tests/integration/test_channels/test_scp_1.py +0 -45
  346. parsl/tests/integration/test_channels/test_ssh_1.py +0 -40
  347. parsl/tests/integration/test_channels/test_ssh_errors.py +0 -46
  348. parsl/tests/integration/test_channels/test_ssh_file_transport.py +0 -41
  349. parsl/tests/integration/test_channels/test_ssh_interactive.py +0 -24
  350. parsl/tests/manual_tests/test_ad_hoc_htex.py +0 -48
  351. parsl/tests/manual_tests/test_fan_in_out_htex_remote.py +0 -88
  352. parsl/tests/manual_tests/test_oauth_ssh.py +0 -13
  353. parsl/tests/sites/test_local_adhoc.py +0 -61
  354. parsl/tests/test_channels/__init__.py +0 -0
  355. parsl/tests/test_channels/test_large_output.py +0 -22
  356. parsl/tests/test_data/__init__.py +0 -0
  357. parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py +0 -51
  358. parsl/tests/test_providers/test_cobalt_deprecation_warning.py +0 -16
  359. parsl-2024.3.18.dist-info/METADATA +0 -98
  360. parsl-2024.3.18.dist-info/RECORD +0 -449
  361. parsl/{channels/local → monitoring/radios}/__init__.py +0 -0
  362. parsl/{channels/oauth_ssh → tests/test_shutdown}/__init__.py +0 -0
  363. parsl/tests/{test_data → test_staging}/test_file_apps.py +0 -0
  364. parsl/tests/{test_data → test_staging}/test_file_staging.py +0 -0
  365. parsl/{channels/ssh → tests/unit}/__init__.py +0 -0
  366. {parsl-2024.3.18.data → parsl-2025.1.13.data}/scripts/parsl_coprocess.py +1 -1
  367. {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/LICENSE +0 -0
  368. {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/entry_points.txt +0 -0
  369. {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/top_level.txt +0 -0
@@ -1,10 +1,19 @@
1
1
  #!/usr/bin/env python3
2
2
 
3
- import zmq
4
3
  import logging
5
4
  import threading
5
+ import time
6
+ from typing import Optional
7
+
8
+ import zmq
6
9
 
7
10
  from parsl import curvezmq
11
+ from parsl.addresses import tcp_url
12
+ from parsl.errors import InternalConsistencyError
13
+ from parsl.executors.high_throughput.errors import (
14
+ CommandClientBadError,
15
+ CommandClientTimeoutError,
16
+ )
8
17
 
9
18
  logger = logging.getLogger(__name__)
10
19
 
@@ -12,25 +21,29 @@ logger = logging.getLogger(__name__)
12
21
  class CommandClient:
13
22
  """ CommandClient
14
23
  """
15
- def __init__(self, zmq_context: curvezmq.ClientContext, ip_address, port_range):
24
+ def __init__(self, ip_address, port_range, cert_dir: Optional[str] = None):
16
25
  """
17
26
  Parameters
18
27
  ----------
19
28
 
20
- zmq_context: curvezmq.ClientContext
21
- CurveZMQ client context used to create secure sockets
22
29
  ip_address: str
23
30
  IP address of the client (where Parsl runs)
31
+
24
32
  port_range: tuple(int, int)
25
33
  Port range for the comms between client and interchange
26
34
 
35
+ cert_dir: str | None
36
+ Path to the certificate directory. Setting this to None will disable encryption.
37
+ default: None
38
+
27
39
  """
28
- self.zmq_context = zmq_context
40
+ self.zmq_context = curvezmq.ClientContext(cert_dir)
29
41
  self.ip_address = ip_address
30
42
  self.port_range = port_range
31
43
  self.port = None
32
44
  self.create_socket_and_bind()
33
45
  self._lock = threading.Lock()
46
+ self.ok = True
34
47
 
35
48
  def create_socket_and_bind(self):
36
49
  """ Creates socket and binds to a port.
@@ -40,13 +53,13 @@ class CommandClient:
40
53
  self.zmq_socket = self.zmq_context.socket(zmq.REQ)
41
54
  self.zmq_socket.setsockopt(zmq.LINGER, 0)
42
55
  if self.port is None:
43
- self.port = self.zmq_socket.bind_to_random_port("tcp://{}".format(self.ip_address),
56
+ self.port = self.zmq_socket.bind_to_random_port(tcp_url(self.ip_address),
44
57
  min_port=self.port_range[0],
45
58
  max_port=self.port_range[1])
46
59
  else:
47
- self.zmq_socket.bind("tcp://{}:{}".format(self.ip_address, self.port))
60
+ self.zmq_socket.bind(tcp_url(self.ip_address, self.port))
48
61
 
49
- def run(self, message, max_retries=3):
62
+ def run(self, message, max_retries=3, timeout_s=None):
50
63
  """ This function needs to be fast at the same time aware of the possibility of
51
64
  ZMQ pipes overflowing.
52
65
 
@@ -54,13 +67,43 @@ class CommandClient:
54
67
  in ZMQ sockets reaching a broken state once there are ~10k tasks in flight.
55
68
  This issue can be magnified if each the serialized buffer itself is larger.
56
69
  """
70
+ if not self.ok:
71
+ raise CommandClientBadError()
72
+
73
+ start_time_s = time.monotonic()
74
+
57
75
  reply = '__PARSL_ZMQ_PIPES_MAGIC__'
58
76
  with self._lock:
59
77
  for _ in range(max_retries):
60
78
  try:
61
79
  logger.debug("Sending command client command")
80
+
81
+ if timeout_s is not None:
82
+ remaining_time_s = start_time_s + timeout_s - time.monotonic()
83
+ poll_result = self.zmq_socket.poll(timeout=remaining_time_s * 1000, flags=zmq.POLLOUT)
84
+ if poll_result == zmq.POLLOUT:
85
+ pass # this is OK, so continue
86
+ elif poll_result == 0:
87
+ raise CommandClientTimeoutError("Waiting for command channel to be ready for a command")
88
+ else:
89
+ raise InternalConsistencyError(f"ZMQ poll returned unexpected value: {poll_result}")
90
+
62
91
  self.zmq_socket.send_pyobj(message, copy=True)
63
- logger.debug("Waiting for command client response")
92
+
93
+ if timeout_s is not None:
94
+ logger.debug("Polling for command client response or timeout")
95
+ remaining_time_s = start_time_s + timeout_s - time.monotonic()
96
+ poll_result = self.zmq_socket.poll(timeout=remaining_time_s * 1000, flags=zmq.POLLIN)
97
+ if poll_result == zmq.POLLIN:
98
+ pass # this is OK, so continue
99
+ elif poll_result == 0:
100
+ logger.error("Command timed-out - command client is now bad forever")
101
+ self.ok = False
102
+ raise CommandClientTimeoutError("Waiting for a reply from command channel")
103
+ else:
104
+ raise InternalConsistencyError(f"ZMQ poll returned unexpected value: {poll_result}")
105
+
106
+ logger.debug("Receiving command client response")
64
107
  reply = self.zmq_socket.recv_pyobj()
65
108
  logger.debug("Received command client response")
66
109
  except zmq.ZMQError:
@@ -85,23 +128,26 @@ class CommandClient:
85
128
  class TasksOutgoing:
86
129
  """ Outgoing task queue from the executor to the Interchange
87
130
  """
88
- def __init__(self, zmq_context: curvezmq.ClientContext, ip_address, port_range):
131
+ def __init__(self, ip_address, port_range, cert_dir: Optional[str] = None):
89
132
  """
90
133
  Parameters
91
134
  ----------
92
135
 
93
- zmq_context: curvezmq.ClientContext
94
- CurveZMQ client context used to create secure sockets
95
136
  ip_address: str
96
137
  IP address of the client (where Parsl runs)
138
+
97
139
  port_range: tuple(int, int)
98
140
  Port range for the comms between client and interchange
99
141
 
142
+ cert_dir: str | None
143
+ Path to the certificate directory. Setting this to None will disable encryption.
144
+ default: None
145
+
100
146
  """
101
- self.zmq_context = zmq_context
147
+ self.zmq_context = curvezmq.ClientContext(cert_dir)
102
148
  self.zmq_socket = self.zmq_context.socket(zmq.DEALER)
103
149
  self.zmq_socket.set_hwm(0)
104
- self.port = self.zmq_socket.bind_to_random_port("tcp://{}".format(ip_address),
150
+ self.port = self.zmq_socket.bind_to_random_port(tcp_url(ip_address),
105
151
  min_port=port_range[0],
106
152
  max_port=port_range[1])
107
153
  self.poller = zmq.Poller()
@@ -138,31 +184,43 @@ class ResultsIncoming:
138
184
  """ Incoming results queue from the Interchange to the executor
139
185
  """
140
186
 
141
- def __init__(self, zmq_context: curvezmq.ClientContext, ip_address, port_range):
187
+ def __init__(self, ip_address, port_range, cert_dir: Optional[str] = None):
142
188
  """
143
189
  Parameters
144
190
  ----------
145
191
 
146
- zmq_context: curvezmq.ClientContext
147
- CurveZMQ client context used to create secure sockets
148
192
  ip_address: str
149
193
  IP address of the client (where Parsl runs)
194
+
150
195
  port_range: tuple(int, int)
151
196
  Port range for the comms between client and interchange
152
197
 
198
+ cert_dir: str | None
199
+ Path to the certificate directory. Setting this to None will disable encryption.
200
+ default: None
201
+
153
202
  """
154
- self.zmq_context = zmq_context
203
+ self.zmq_context = curvezmq.ClientContext(cert_dir)
155
204
  self.results_receiver = self.zmq_context.socket(zmq.DEALER)
156
205
  self.results_receiver.set_hwm(0)
157
- self.port = self.results_receiver.bind_to_random_port("tcp://{}".format(ip_address),
206
+ self.port = self.results_receiver.bind_to_random_port(tcp_url(ip_address),
158
207
  min_port=port_range[0],
159
208
  max_port=port_range[1])
209
+ self.poller = zmq.Poller()
210
+ self.poller.register(self.results_receiver, zmq.POLLIN)
160
211
 
161
- def get(self):
212
+ def get(self, timeout_ms=None):
213
+ """Get a message from the queue, returning None if timeout expires
214
+ without a message. timeout is measured in milliseconds.
215
+ """
162
216
  logger.debug("Waiting for ResultsIncoming message")
163
- m = self.results_receiver.recv_multipart()
164
- logger.debug("Received ResultsIncoming message")
165
- return m
217
+ socks = dict(self.poller.poll(timeout=timeout_ms))
218
+ if self.results_receiver in socks and socks[self.results_receiver] == zmq.POLLIN:
219
+ m = self.results_receiver.recv_multipart()
220
+ logger.debug("Received ResultsIncoming message")
221
+ return m
222
+ else:
223
+ return None
166
224
 
167
225
  def close(self):
168
226
  self.results_receiver.close()
@@ -1,30 +1,30 @@
1
1
  """RadicalPilotExecutor builds on the RADICAL-Pilot/Parsl
2
2
  """
3
+ import inspect
4
+ import logging
3
5
  import os
6
+ import queue
4
7
  import sys
8
+ import threading as mt
5
9
  import time
6
- import parsl
7
- import queue
8
- import logging
9
- import inspect
10
+ from concurrent.futures import Future
11
+ from functools import partial
12
+ from pathlib import PosixPath
13
+ from typing import Dict, Optional
14
+
10
15
  import requests
11
16
  import typeguard
12
- import threading as mt
13
-
14
- from functools import partial
15
- from typing import Optional, Dict
16
- from pathlib import Path, PosixPath
17
- from concurrent.futures import Future
18
17
 
18
+ import parsl
19
+ from parsl.app.errors import BashExitFailure, RemoteExceptionWrapper
19
20
  from parsl.app.python import timeout
20
- from .rpex_resources import ResourceConfig
21
21
  from parsl.data_provider.files import File
22
- from parsl.utils import RepresentationMixin
23
- from parsl.app.errors import BashExitFailure
24
22
  from parsl.executors.base import ParslExecutor
25
- from parsl.app.errors import RemoteExceptionWrapper
26
23
  from parsl.serialize import deserialize, pack_res_spec_apply_message
27
- from parsl.serialize.errors import SerializationError, DeserializationError
24
+ from parsl.serialize.errors import DeserializationError, SerializationError
25
+ from parsl.utils import RepresentationMixin
26
+
27
+ from .rpex_resources import CLIENT, MPI, ResourceConfig
28
28
 
29
29
  try:
30
30
  import radical.pilot as rp
@@ -59,7 +59,7 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
59
59
  ``rp.PilotManager`` and ``rp.TaskManager``.
60
60
  2. "translate": Unwrap, identify, and parse Parsl ``apps`` into ``rp.TaskDescription``.
61
61
  3. "submit": Submit Parsl apps to ``rp.TaskManager``.
62
- 4. "shut_down": Shut down the RADICAL-Pilot runtime and all associated components.
62
+ 4. "shutdown": Shut down the RADICAL-Pilot runtime and all associated components.
63
63
 
64
64
  Here is a diagram
65
65
 
@@ -133,24 +133,32 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
133
133
  self.resource = resource
134
134
  self._uid = RPEX.lower()
135
135
  self.bulk_mode = bulk_mode
136
+ self._terminate = mt.Event()
136
137
  self.working_dir = working_dir
137
138
  self.pilot_kwargs = rpex_pilot_kwargs
138
139
  self.future_tasks: Dict[str, Future] = {}
139
140
 
140
141
  if rpex_cfg:
141
- self.rpex_cfg = rpex_cfg
142
+ self.rpex_cfg = rpex_cfg.get_config()
142
143
  elif not rpex_cfg and 'local' in resource:
143
- self.rpex_cfg = ResourceConfig()
144
+ self.rpex_cfg = ResourceConfig().get_config()
144
145
  else:
145
- raise ValueError('Resource config file must be '
146
- 'specified for a non-local execution')
146
+ raise ValueError('Resource config must be '
147
+ 'specified for a non-local resources')
147
148
 
148
149
  def task_state_cb(self, task, state):
149
150
  """
150
151
  Update the state of Parsl Future apps
151
152
  Based on RP task state callbacks.
152
153
  """
153
- if not task.uid.startswith('master'):
154
+ # check the Master/Worker state
155
+ if task.mode in [rp.RAPTOR_MASTER, rp.RAPTOR_WORKER]:
156
+ if state == rp.FAILED:
157
+ exception = RuntimeError(f'{task.uid} failed with internal error: {task.stderr}')
158
+ self._fail_all_tasks(exception)
159
+
160
+ # check all other tasks state
161
+ else:
154
162
  parsl_task = self.future_tasks[task.uid]
155
163
 
156
164
  if state == rp.DONE:
@@ -186,6 +194,23 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
186
194
  else:
187
195
  parsl_task.set_exception('Task failed for an unknown reason')
188
196
 
197
+ def _fail_all_tasks(self, exception):
198
+ """
199
+ Fail all outstanding tasks with the given exception.
200
+
201
+ This method iterates through all outstanding tasks in the
202
+ `_future_tasks` dictionary, which have not yet completed,
203
+ and sets the provided exception as their result, indicating
204
+ a failure.
205
+
206
+ Parameters:
207
+ - exception: The exception to be set as the result for all
208
+ outstanding tasks.
209
+ """
210
+ for fut_task in self.future_tasks.values():
211
+ if not fut_task.done():
212
+ fut_task.set_exception(exception)
213
+
189
214
  def start(self):
190
215
  """Create the Pilot component and pass it.
191
216
  """
@@ -202,63 +227,62 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
202
227
  'resource': self.resource}
203
228
 
204
229
  if not self.resource or 'local' in self.resource:
205
- # move the agent sandbox to the working dir mainly
206
- # for debugging purposes. This will allow parsl
207
- # to include the agent sandbox with the ci artifacts.
208
- if os.environ.get("LOCAL_SANDBOX"):
209
- pd_init['sandbox'] = self.run_dir
210
- os.environ["RADICAL_LOG_LVL"] = "DEBUG"
211
-
212
- logger.info("RPEX will be running in the local mode")
230
+ os.environ["RADICAL_LOG_LVL"] = "DEBUG"
231
+ logger.info("RPEX will be running in local mode")
213
232
 
214
233
  pd = rp.PilotDescription(pd_init)
215
234
  pd.verify()
216
235
 
217
- self.rpex_cfg = self.rpex_cfg._get_cfg_file(path=self.run_dir)
218
- cfg = ru.Config(cfg=ru.read_json(self.rpex_cfg))
236
+ # start RP's main components TMGR, PMGR and Pilot
237
+ self.tmgr = rp.TaskManager(session=self.session)
238
+ self.pmgr = rp.PilotManager(session=self.session)
239
+ self.pilot = self.pmgr.submit_pilots(pd)
219
240
 
220
- self.master = cfg.master_descr
221
- self.n_masters = cfg.n_masters
241
+ if not self.pilot.description.get('cores') or not self.pilot.description.get('nodes'):
242
+ logger.warning('no "cores/nodes" per pilot were set, using default resources')
243
+
244
+ self.tmgr.add_pilots(self.pilot)
245
+ self.tmgr.register_callback(self.task_state_cb)
222
246
 
223
- tds = list()
224
- master_path = '{0}/rpex_master.py'.format(PWD)
225
247
  worker_path = '{0}/rpex_worker.py'.format(PWD)
226
248
 
227
- for i in range(self.n_masters):
228
- td = rp.TaskDescription(self.master)
229
- td.mode = rp.RAPTOR_MASTER
230
- td.uid = ru.generate_id('master.%(item_counter)06d', ru.ID_CUSTOM,
249
+ self.masters = []
250
+
251
+ logger.info(f'Starting {self.rpex_cfg.n_masters} masters and {self.rpex_cfg.n_workers} workers for each master')
252
+
253
+ # create N masters
254
+ for _ in range(self.rpex_cfg.n_masters):
255
+ md = rp.TaskDescription(self.rpex_cfg.master_descr)
256
+ md.uid = ru.generate_id('rpex.master.%(item_counter)06d', ru.ID_CUSTOM,
231
257
  ns=self.session.uid)
232
- td.ranks = 1
233
- td.cores_per_rank = 1
234
- td.arguments = [self.rpex_cfg, i]
235
- td.input_staging = self._stage_files([File(master_path),
236
- File(worker_path),
237
- File(self.rpex_cfg)], mode='in')
238
- tds.append(td)
239
258
 
240
- self.pmgr = rp.PilotManager(session=self.session)
241
- self.tmgr = rp.TaskManager(session=self.session)
259
+ # submit the master to the TMGR
260
+ master = self.tmgr.submit_raptors(md)[0]
261
+ self.masters.append(master)
262
+
263
+ workers = []
264
+ # create N workers for each master and submit them to the TMGR
265
+ for _ in range(self.rpex_cfg.n_workers):
266
+ wd = rp.TaskDescription(self.rpex_cfg.worker_descr)
267
+ wd.uid = ru.generate_id('rpex.worker.%(item_counter)06d', ru.ID_CUSTOM,
268
+ ns=self.session.uid)
269
+ wd.raptor_id = master.uid
270
+ wd.input_staging = self._stage_files([File(worker_path)], mode='in')
271
+ workers.append(wd)
242
272
 
243
- # submit pilot(s)
244
- pilot = self.pmgr.submit_pilots(pd)
245
- if not pilot.description.get('cores'):
246
- logger.warning('no "cores" per pilot was set, using default resources {0}'.format(pilot.resources))
273
+ self.tmgr.submit_workers(workers)
247
274
 
248
- self.tmgr.submit_tasks(tds)
275
+ self.select_master = self._cyclic_master_selector()
249
276
 
250
277
  # prepare or use the current env for the agent/pilot side environment
251
- if cfg.pilot_env_mode != 'client':
252
- logger.info("creating {0} environment for the executor".format(cfg.pilot_env.name))
253
- pilot.prepare_env(env_name=cfg.pilot_env.name,
254
- env_spec=cfg.pilot_env.as_dict())
278
+ if self.rpex_cfg.pilot_env_mode != CLIENT:
279
+ logger.info("creating {0} environment for the executor".format(self.rpex_cfg.pilot_env.name))
280
+ self.pilot.prepare_env(env_name=self.rpex_cfg.pilot_env.name,
281
+ env_spec=self.rpex_cfg.pilot_env.as_dict())
255
282
  else:
256
283
  client_env = sys.prefix
257
284
  logger.info("reusing ({0}) environment for the executor".format(client_env))
258
285
 
259
- self.tmgr.add_pilots(pilot)
260
- self.tmgr.register_callback(self.task_state_cb)
261
-
262
286
  # create a bulking thread to run the actual task submission
263
287
  # to RP in bulks
264
288
  if self.bulk_mode:
@@ -272,8 +296,21 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
272
296
  self._bulk_thread.daemon = True
273
297
  self._bulk_thread.start()
274
298
 
299
+ logger.info('bulk mode is on, submitting tasks in bulks')
300
+
275
301
  return True
276
302
 
303
+ def _cyclic_master_selector(self):
304
+ """
305
+ Balance tasks submission across N masters and N workers
306
+ """
307
+ current_master = 0
308
+ masters_uids = [m.uid for m in self.masters]
309
+
310
+ while True:
311
+ yield masters_uids[current_master]
312
+ current_master = (current_master + 1) % len(self.masters)
313
+
277
314
  def unwrap(self, func, args):
278
315
  """
279
316
  Unwrap a Parsl app and its args for further processing.
@@ -364,22 +401,25 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
364
401
 
365
402
  # This is the default mode where the bash_app will be executed as
366
403
  # as a single core process by RP. For cores > 1 the user must use
367
- # above or use MPI functions if their code is Python.
404
+ # task.mode=rp.TASK_EXECUTABLE (above) or use MPI functions if their
405
+ # code is Python.
368
406
  else:
369
407
  task.mode = rp.TASK_PROC
370
- task.raptor_id = 'master.%06d' % (tid % self.n_masters)
408
+ task.raptor_id = next(self.select_master)
371
409
  task.executable = self._pack_and_apply_message(func, args, kwargs)
372
410
 
373
411
  elif PYTHON in task_type or not task_type:
374
412
  task.mode = rp.TASK_FUNCTION
375
- task.raptor_id = 'master.%06d' % (tid % self.n_masters)
413
+ task.raptor_id = next(self.select_master)
376
414
  if kwargs.get('walltime'):
377
415
  func = timeout(func, kwargs['walltime'])
378
416
 
379
- # we process MPI function differently
380
- if 'comm' in kwargs:
417
+ # Check how to serialize the function object
418
+ if MPI in self.rpex_cfg.worker_type.lower():
419
+ task.use_mpi = True
381
420
  task.function = rp.PythonTask(func, *args, **kwargs)
382
421
  else:
422
+ task.use_mpi = False
383
423
  task.function = self._pack_and_apply_message(func, args, kwargs)
384
424
 
385
425
  task.input_staging = self._stage_files(kwargs.get("inputs", []),
@@ -394,7 +434,7 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
394
434
  try:
395
435
  task.verify()
396
436
  except ru.typeddict.TDKeyError as e:
397
- raise Exception(f'{e}. Please check Radical.Pilot TaskDescription documentation')
437
+ raise Exception(f'{e}. Please check: https://radicalpilot.readthedocs.io/en/stable/ documentation')
398
438
 
399
439
  return task
400
440
 
@@ -413,7 +453,11 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
413
453
 
414
454
  def _unpack_and_set_parsl_exception(self, parsl_task, exception):
415
455
  try:
416
- s = rp.utils.deserialize_bson(exception)
456
+ try:
457
+ s = rp.utils.deserialize_bson(exception)
458
+ except Exception:
459
+ s = exception
460
+
417
461
  if isinstance(s, RemoteExceptionWrapper):
418
462
  try:
419
463
  s.reraise()
@@ -421,6 +465,8 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
421
465
  parsl_task.set_exception(e)
422
466
  elif isinstance(s, Exception):
423
467
  parsl_task.set_exception(s)
468
+ elif isinstance(s, str):
469
+ parsl_task.set_exception(eval(s))
424
470
  else:
425
471
  raise ValueError("Unknown exception-like type received: {}".format(type(s)))
426
472
  except Exception as e:
@@ -440,16 +486,10 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
440
486
  elif isinstance(k_val, PosixPath):
441
487
  k_val = k_val.__str__()
442
488
 
443
- # if the stderr/out has no path
444
- # then we consider it local and
445
- # we just set the path to the cwd
446
- if '/' not in k_val:
447
- k_val = CWD + '/' + k_val
448
-
449
- # finally set the stderr/out to
450
- # the desired name by the user
489
+ # set the stderr/out to the desired
490
+ # name by the user
451
491
  setattr(task, k, k_val)
452
- task.sandbox = Path(k_val).parent.__str__()
492
+ task.sandbox = CWD
453
493
 
454
494
  def _stage_files(self, files, mode):
455
495
  """
@@ -477,7 +517,7 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
477
517
  # this indicates that the user
478
518
  # did not provided a specific
479
519
  # output file and RP will stage out
480
- # the task.output from pilot://task_folder
520
+ # the task.stdout from pilot://task_folder
481
521
  # to the CWD or file.url
482
522
  if '/' not in file.url:
483
523
  f = {'source': file.filename,
@@ -493,7 +533,7 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
493
533
 
494
534
  bulk = list()
495
535
 
496
- while True:
536
+ while not self._terminate.is_set():
497
537
 
498
538
  now = time.time() # time of last submission
499
539
 
@@ -513,6 +553,9 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
513
553
  if len(bulk) >= self._max_bulk_size:
514
554
  break
515
555
 
556
+ if self._terminate.is_set():
557
+ break
558
+
516
559
  if bulk:
517
560
  logger.debug('submit bulk: %d', len(bulk))
518
561
  self.tmgr.submit_tasks(bulk)
@@ -548,7 +591,15 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
548
591
 
549
592
  def shutdown(self, hub=True, targets='all', block=False):
550
593
  """Shutdown the executor, including all RADICAL-Pilot components."""
551
- logger.info("RadicalPilotExecutor shutdown")
594
+ logger.info("RadicalPilotExecutor is terminating...")
595
+
596
+ self._terminate.set()
597
+
598
+ # ensure we are in the bulk submssion mode
599
+ if self.bulk_mode:
600
+ self._bulk_thread.join()
601
+
552
602
  self.session.close(download=True)
603
+ logger.info("RadicalPilotExecutor is terminated.")
553
604
 
554
605
  return True
@@ -1,17 +1,13 @@
1
- import sys
2
1
  import json
3
-
2
+ import sys
4
3
  from typing import List
5
4
 
6
- _setup_paths: List[str]
5
+ _setup_paths: List[str] = []
7
6
  try:
8
7
  import radical.pilot as rp
9
8
  import radical.utils as ru
10
9
  except ImportError:
11
- _setup_paths = []
12
- else:
13
- _setup_paths = [rp.sdist_path,
14
- ru.sdist_path]
10
+ pass
15
11
 
16
12
 
17
13
  MPI = "mpi"
@@ -77,7 +73,7 @@ class ResourceConfig:
77
73
 
78
74
  pilot_env_setup : list
79
75
  List of setup commands/packages for the pilot environment.
80
- Default setup includes "parsl", rp.sdist_path, and ru.sdist_path.
76
+ Default is an empty list.
81
77
 
82
78
  python_v : str
83
79
  The Python version to be used in the pilot environment.
@@ -108,7 +104,7 @@ class ResourceConfig:
108
104
  python_v: str = f'{sys.version_info[0]}.{sys.version_info[1]}'
109
105
  worker_type: str = DEFAULT_WORKER
110
106
 
111
- def _get_cfg_file(cls, path=None):
107
+ def get_config(cls, path=None):
112
108
 
113
109
  # Default ENV mode for RP is to reuse
114
110
  # the client side. If this is not the case,
@@ -126,6 +122,7 @@ class ResourceConfig:
126
122
  cfg = {
127
123
  'n_masters': cls.masters,
128
124
  'n_workers': cls.workers,
125
+ 'worker_type': cls.worker_type,
129
126
  'gpus_per_node': cls.worker_gpus_per_node,
130
127
  'cores_per_node': cls.worker_cores_per_node,
131
128
  'cores_per_master': cls.cores_per_master,
@@ -143,9 +140,10 @@ class ResourceConfig:
143
140
  'pilot_env_mode': cls.pilot_env_mode,
144
141
 
145
142
  'master_descr': {
143
+ "ranks": 1,
144
+ "cores_per_rank": 1,
146
145
  "mode": rp.RAPTOR_MASTER,
147
146
  "named_env": cls.pilot_env_name,
148
- "executable": "python3 rpex_master.py",
149
147
  },
150
148
 
151
149
  'worker_descr': {
@@ -154,12 +152,16 @@ class ResourceConfig:
154
152
  "raptor_file": "./rpex_worker.py",
155
153
  "raptor_class": cls.worker_type if
156
154
  cls.worker_type.lower() != MPI else MPI_WORKER,
155
+ "ranks": cls.nodes_per_worker * cls.worker_cores_per_node,
156
+ "gpus_per_rank": cls.nodes_per_worker * cls.worker_gpus_per_node,
157
157
  }}
158
158
 
159
- # Convert the class instance to a cfg file.
160
- config_path = 'rpex.cfg'
159
+ # Convert the class instance to a Json file or a Config dict.
161
160
  if path:
161
+ config_path = 'rpex.cfg'
162
162
  config_path = path + '/' + config_path
163
- with open(config_path, 'w') as f:
164
- json.dump(cfg, f, indent=4)
165
- return config_path
163
+ with open(config_path, 'w') as f:
164
+ json.dump(cfg, f, indent=4)
165
+ else:
166
+ config_obj = ru.Config(from_dict=cfg)
167
+ return config_obj
@@ -1,10 +1,11 @@
1
1
  import sys
2
+
2
3
  import radical.pilot as rp
3
4
 
4
5
  import parsl.app.errors as pe
5
6
  from parsl.app.bash import remote_side_bash_executor
6
- from parsl.serialize import unpack_res_spec_apply_message, serialize
7
- from parsl.executors.high_throughput.process_worker_pool import execute_task
7
+ from parsl.executors.execute_task import execute_task
8
+ from parsl.serialize import serialize, unpack_res_spec_apply_message
8
9
 
9
10
 
10
11
  class ParslWorker:
@@ -32,7 +33,7 @@ class ParslWorker:
32
33
 
33
34
  try:
34
35
  buffer = rp.utils.deserialize_bson(task['description']['executable'])
35
- func, args, kwargs, _resource_spec = unpack_res_spec_apply_message(buffer, {}, copy=False)
36
+ func, args, kwargs, _resource_spec = unpack_res_spec_apply_message(buffer)
36
37
  ret = remote_side_bash_executor(func, *args, **kwargs)
37
38
  exc = (None, None)
38
39
  val = None