parsl 2024.3.18__py3-none-any.whl → 2025.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (369) hide show
  1. parsl/__init__.py +9 -10
  2. parsl/addresses.py +26 -6
  3. parsl/app/app.py +7 -8
  4. parsl/app/bash.py +15 -8
  5. parsl/app/errors.py +10 -13
  6. parsl/app/futures.py +8 -10
  7. parsl/app/python.py +2 -1
  8. parsl/benchmark/perf.py +2 -1
  9. parsl/concurrent/__init__.py +2 -2
  10. parsl/config.py +53 -10
  11. parsl/configs/ASPIRE1.py +6 -5
  12. parsl/configs/Azure.py +9 -8
  13. parsl/configs/bridges.py +6 -4
  14. parsl/configs/cc_in2p3.py +3 -3
  15. parsl/configs/ec2.py +3 -1
  16. parsl/configs/expanse.py +4 -3
  17. parsl/configs/frontera.py +3 -4
  18. parsl/configs/htex_local.py +3 -4
  19. parsl/configs/illinoiscluster.py +3 -1
  20. parsl/configs/improv.py +34 -0
  21. parsl/configs/kubernetes.py +4 -3
  22. parsl/configs/local_threads.py +5 -1
  23. parsl/configs/midway.py +5 -3
  24. parsl/configs/osg.py +4 -2
  25. parsl/configs/polaris.py +4 -2
  26. parsl/configs/stampede2.py +6 -5
  27. parsl/configs/summit.py +3 -3
  28. parsl/configs/toss3_llnl.py +4 -3
  29. parsl/configs/vineex_local.py +6 -4
  30. parsl/configs/wqex_local.py +5 -3
  31. parsl/curvezmq.py +4 -0
  32. parsl/data_provider/data_manager.py +4 -3
  33. parsl/data_provider/file_noop.py +1 -2
  34. parsl/data_provider/files.py +3 -3
  35. parsl/data_provider/ftp.py +1 -3
  36. parsl/data_provider/globus.py +7 -6
  37. parsl/data_provider/http.py +2 -2
  38. parsl/data_provider/rsync.py +1 -1
  39. parsl/data_provider/staging.py +2 -2
  40. parsl/data_provider/zip.py +135 -0
  41. parsl/dataflow/dependency_resolvers.py +115 -0
  42. parsl/dataflow/dflow.py +259 -223
  43. parsl/dataflow/errors.py +3 -5
  44. parsl/dataflow/futures.py +27 -14
  45. parsl/dataflow/memoization.py +5 -5
  46. parsl/dataflow/rundirs.py +5 -6
  47. parsl/dataflow/taskrecord.py +4 -5
  48. parsl/executors/__init__.py +4 -2
  49. parsl/executors/base.py +45 -15
  50. parsl/executors/errors.py +13 -0
  51. parsl/executors/execute_task.py +37 -0
  52. parsl/executors/flux/execute_parsl_task.py +3 -3
  53. parsl/executors/flux/executor.py +18 -19
  54. parsl/executors/flux/flux_instance_manager.py +26 -27
  55. parsl/executors/high_throughput/errors.py +43 -3
  56. parsl/executors/high_throughput/executor.py +307 -285
  57. parsl/executors/high_throughput/interchange.py +137 -168
  58. parsl/executors/high_throughput/manager_record.py +4 -0
  59. parsl/executors/high_throughput/manager_selector.py +55 -0
  60. parsl/executors/high_throughput/monitoring_info.py +2 -1
  61. parsl/executors/high_throughput/mpi_executor.py +113 -0
  62. parsl/executors/high_throughput/mpi_prefix_composer.py +10 -11
  63. parsl/executors/high_throughput/mpi_resource_management.py +6 -17
  64. parsl/executors/high_throughput/probe.py +9 -7
  65. parsl/executors/high_throughput/process_worker_pool.py +77 -75
  66. parsl/executors/high_throughput/zmq_pipes.py +81 -23
  67. parsl/executors/radical/executor.py +130 -79
  68. parsl/executors/radical/rpex_resources.py +17 -15
  69. parsl/executors/radical/rpex_worker.py +4 -3
  70. parsl/executors/status_handling.py +157 -51
  71. parsl/executors/taskvine/__init__.py +1 -1
  72. parsl/executors/taskvine/errors.py +1 -1
  73. parsl/executors/taskvine/exec_parsl_function.py +2 -2
  74. parsl/executors/taskvine/executor.py +38 -55
  75. parsl/executors/taskvine/factory.py +1 -1
  76. parsl/executors/taskvine/factory_config.py +1 -1
  77. parsl/executors/taskvine/manager.py +17 -13
  78. parsl/executors/taskvine/manager_config.py +7 -2
  79. parsl/executors/threads.py +6 -6
  80. parsl/executors/workqueue/errors.py +1 -1
  81. parsl/executors/workqueue/exec_parsl_function.py +6 -5
  82. parsl/executors/workqueue/executor.py +64 -63
  83. parsl/executors/workqueue/parsl_coprocess.py +1 -1
  84. parsl/jobs/error_handlers.py +2 -2
  85. parsl/jobs/job_status_poller.py +28 -112
  86. parsl/jobs/states.py +7 -2
  87. parsl/jobs/strategy.py +43 -31
  88. parsl/launchers/__init__.py +12 -3
  89. parsl/launchers/errors.py +1 -1
  90. parsl/launchers/launchers.py +0 -6
  91. parsl/log_utils.py +1 -2
  92. parsl/monitoring/db_manager.py +55 -93
  93. parsl/monitoring/errors.py +6 -0
  94. parsl/monitoring/monitoring.py +85 -311
  95. parsl/monitoring/queries/pandas.py +1 -2
  96. parsl/monitoring/radios/base.py +13 -0
  97. parsl/monitoring/radios/filesystem.py +52 -0
  98. parsl/monitoring/radios/htex.py +57 -0
  99. parsl/monitoring/radios/multiprocessing.py +17 -0
  100. parsl/monitoring/radios/udp.py +56 -0
  101. parsl/monitoring/radios/zmq.py +17 -0
  102. parsl/monitoring/remote.py +33 -37
  103. parsl/monitoring/router.py +212 -0
  104. parsl/monitoring/types.py +5 -6
  105. parsl/monitoring/visualization/app.py +4 -2
  106. parsl/monitoring/visualization/models.py +0 -1
  107. parsl/monitoring/visualization/plots/default/workflow_plots.py +8 -4
  108. parsl/monitoring/visualization/plots/default/workflow_resource_plots.py +1 -0
  109. parsl/monitoring/visualization/utils.py +0 -1
  110. parsl/monitoring/visualization/views.py +16 -9
  111. parsl/multiprocessing.py +0 -1
  112. parsl/process_loggers.py +1 -2
  113. parsl/providers/__init__.py +8 -17
  114. parsl/providers/aws/aws.py +2 -3
  115. parsl/providers/azure/azure.py +4 -5
  116. parsl/providers/base.py +2 -18
  117. parsl/providers/cluster_provider.py +3 -9
  118. parsl/providers/condor/condor.py +7 -17
  119. parsl/providers/errors.py +2 -2
  120. parsl/providers/googlecloud/googlecloud.py +2 -1
  121. parsl/providers/grid_engine/grid_engine.py +5 -14
  122. parsl/providers/kubernetes/kube.py +80 -40
  123. parsl/providers/local/local.py +13 -26
  124. parsl/providers/lsf/lsf.py +5 -23
  125. parsl/providers/pbspro/pbspro.py +5 -17
  126. parsl/providers/slurm/slurm.py +81 -39
  127. parsl/providers/torque/torque.py +3 -14
  128. parsl/serialize/__init__.py +8 -3
  129. parsl/serialize/base.py +1 -2
  130. parsl/serialize/concretes.py +5 -4
  131. parsl/serialize/facade.py +3 -3
  132. parsl/serialize/proxystore.py +3 -2
  133. parsl/tests/__init__.py +1 -1
  134. parsl/tests/configs/azure_single_node.py +4 -5
  135. parsl/tests/configs/bridges.py +3 -2
  136. parsl/tests/configs/cc_in2p3.py +1 -3
  137. parsl/tests/configs/comet.py +2 -1
  138. parsl/tests/configs/ec2_single_node.py +1 -2
  139. parsl/tests/configs/ec2_spot.py +1 -2
  140. parsl/tests/configs/flux_local.py +11 -0
  141. parsl/tests/configs/frontera.py +2 -3
  142. parsl/tests/configs/htex_local.py +3 -5
  143. parsl/tests/configs/htex_local_alternate.py +11 -15
  144. parsl/tests/configs/htex_local_intask_staging.py +5 -9
  145. parsl/tests/configs/htex_local_rsync_staging.py +4 -8
  146. parsl/tests/configs/local_radical.py +1 -3
  147. parsl/tests/configs/local_radical_mpi.py +2 -2
  148. parsl/tests/configs/local_threads_checkpoint_periodic.py +8 -10
  149. parsl/tests/configs/local_threads_monitoring.py +0 -1
  150. parsl/tests/configs/midway.py +2 -2
  151. parsl/tests/configs/nscc_singapore.py +3 -3
  152. parsl/tests/configs/osg_htex.py +1 -1
  153. parsl/tests/configs/petrelkube.py +3 -2
  154. parsl/tests/configs/slurm_local.py +24 -0
  155. parsl/tests/configs/summit.py +1 -0
  156. parsl/tests/configs/taskvine_ex.py +4 -7
  157. parsl/tests/configs/user_opts.py +0 -7
  158. parsl/tests/configs/workqueue_ex.py +4 -6
  159. parsl/tests/conftest.py +27 -13
  160. parsl/tests/integration/test_stress/test_python_simple.py +3 -4
  161. parsl/tests/integration/test_stress/test_python_threads.py +3 -5
  162. parsl/tests/manual_tests/htex_local.py +4 -6
  163. parsl/tests/manual_tests/test_basic.py +1 -0
  164. parsl/tests/manual_tests/test_log_filter.py +3 -1
  165. parsl/tests/manual_tests/test_memory_limits.py +6 -8
  166. parsl/tests/manual_tests/test_regression_220.py +2 -1
  167. parsl/tests/manual_tests/test_udp_simple.py +4 -4
  168. parsl/tests/manual_tests/test_worker_count.py +3 -2
  169. parsl/tests/scaling_tests/htex_local.py +2 -4
  170. parsl/tests/scaling_tests/test_scale.py +0 -9
  171. parsl/tests/scaling_tests/vineex_condor.py +1 -2
  172. parsl/tests/scaling_tests/vineex_local.py +1 -2
  173. parsl/tests/site_tests/site_config_selector.py +1 -6
  174. parsl/tests/site_tests/test_provider.py +4 -2
  175. parsl/tests/site_tests/test_site.py +2 -0
  176. parsl/tests/sites/test_affinity.py +7 -7
  177. parsl/tests/sites/test_dynamic_executor.py +3 -4
  178. parsl/tests/sites/test_ec2.py +3 -2
  179. parsl/tests/sites/test_worker_info.py +4 -5
  180. parsl/tests/test_aalst_patterns.py +0 -1
  181. parsl/tests/test_bash_apps/test_apptimeout.py +2 -2
  182. parsl/tests/test_bash_apps/test_basic.py +10 -4
  183. parsl/tests/test_bash_apps/test_error_codes.py +5 -7
  184. parsl/tests/test_bash_apps/test_inputs_default.py +25 -0
  185. parsl/tests/test_bash_apps/test_kwarg_storage.py +1 -1
  186. parsl/tests/test_bash_apps/test_memoize.py +2 -8
  187. parsl/tests/test_bash_apps/test_memoize_ignore_args.py +9 -14
  188. parsl/tests/test_bash_apps/test_memoize_ignore_args_regr.py +9 -14
  189. parsl/tests/test_bash_apps/test_multiline.py +1 -1
  190. parsl/tests/test_bash_apps/test_pipeline.py +1 -1
  191. parsl/tests/test_bash_apps/test_std_uri.py +123 -0
  192. parsl/tests/test_bash_apps/test_stdout.py +33 -8
  193. parsl/tests/test_callables.py +2 -2
  194. parsl/tests/test_checkpointing/test_periodic.py +21 -39
  195. parsl/tests/test_checkpointing/test_python_checkpoint_1.py +1 -0
  196. parsl/tests/test_checkpointing/test_python_checkpoint_2.py +2 -2
  197. parsl/tests/test_checkpointing/test_python_checkpoint_3.py +0 -1
  198. parsl/tests/test_checkpointing/test_regression_239.py +1 -1
  199. parsl/tests/test_checkpointing/test_task_exit.py +2 -3
  200. parsl/tests/test_docs/test_from_slides.py +5 -2
  201. parsl/tests/test_docs/test_kwargs.py +4 -1
  202. parsl/tests/test_docs/test_tutorial_1.py +1 -2
  203. parsl/tests/test_docs/test_workflow1.py +2 -2
  204. parsl/tests/test_docs/test_workflow2.py +0 -1
  205. parsl/tests/test_error_handling/test_rand_fail.py +2 -2
  206. parsl/tests/test_error_handling/test_resource_spec.py +10 -12
  207. parsl/tests/test_error_handling/test_retries.py +6 -16
  208. parsl/tests/test_error_handling/test_retry_handler.py +1 -0
  209. parsl/tests/test_error_handling/test_retry_handler_failure.py +2 -1
  210. parsl/tests/test_error_handling/test_serialization_fail.py +1 -1
  211. parsl/tests/test_error_handling/test_wrap_with_logs.py +1 -0
  212. parsl/tests/test_execute_task.py +29 -0
  213. parsl/tests/test_flux.py +1 -1
  214. parsl/tests/test_htex/test_basic.py +2 -3
  215. parsl/tests/test_htex/test_block_manager_selector_unit.py +20 -0
  216. parsl/tests/test_htex/test_command_client_timeout.py +66 -0
  217. parsl/tests/test_htex/test_connected_blocks.py +3 -2
  218. parsl/tests/test_htex/test_cpu_affinity_explicit.py +6 -10
  219. parsl/tests/test_htex/test_disconnected_blocks.py +6 -5
  220. parsl/tests/test_htex/test_disconnected_blocks_failing_provider.py +71 -0
  221. parsl/tests/test_htex/test_drain.py +11 -10
  222. parsl/tests/test_htex/test_htex.py +51 -25
  223. parsl/tests/test_htex/test_manager_failure.py +0 -1
  224. parsl/tests/test_htex/test_manager_selector_by_block.py +51 -0
  225. parsl/tests/test_htex/test_managers_command.py +36 -0
  226. parsl/tests/test_htex/test_missing_worker.py +2 -12
  227. parsl/tests/test_htex/test_multiple_disconnected_blocks.py +9 -9
  228. parsl/tests/test_htex/test_resource_spec_validation.py +45 -0
  229. parsl/tests/test_htex/test_zmq_binding.py +29 -8
  230. parsl/tests/test_monitoring/test_app_names.py +5 -5
  231. parsl/tests/test_monitoring/test_basic.py +73 -25
  232. parsl/tests/test_monitoring/test_db_locks.py +6 -4
  233. parsl/tests/test_monitoring/test_fuzz_zmq.py +19 -8
  234. parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +80 -0
  235. parsl/tests/test_monitoring/test_incomplete_futures.py +5 -4
  236. parsl/tests/test_monitoring/test_memoization_representation.py +4 -2
  237. parsl/tests/test_monitoring/test_stdouterr.py +134 -0
  238. parsl/tests/test_monitoring/test_viz_colouring.py +1 -0
  239. parsl/tests/test_mpi_apps/test_bad_mpi_config.py +33 -26
  240. parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py +28 -11
  241. parsl/tests/test_mpi_apps/test_mpi_prefix.py +4 -4
  242. parsl/tests/test_mpi_apps/test_mpi_scheduler.py +7 -2
  243. parsl/tests/test_mpi_apps/test_mpiex.py +64 -0
  244. parsl/tests/test_mpi_apps/test_resource_spec.py +42 -49
  245. parsl/tests/test_providers/test_kubernetes_provider.py +102 -0
  246. parsl/tests/test_providers/test_local_provider.py +3 -132
  247. parsl/tests/test_providers/test_pbspro_template.py +2 -3
  248. parsl/tests/test_providers/test_slurm_template.py +2 -3
  249. parsl/tests/test_providers/test_submiterror_deprecation.py +2 -1
  250. parsl/tests/test_python_apps/test_context_manager.py +128 -0
  251. parsl/tests/test_python_apps/test_dep_standard_futures.py +2 -1
  252. parsl/tests/test_python_apps/test_dependencies_deep.py +59 -0
  253. parsl/tests/test_python_apps/test_fail.py +0 -25
  254. parsl/tests/test_python_apps/test_futures.py +2 -1
  255. parsl/tests/test_python_apps/test_inputs_default.py +22 -0
  256. parsl/tests/test_python_apps/test_join.py +0 -1
  257. parsl/tests/test_python_apps/test_lifted.py +11 -7
  258. parsl/tests/test_python_apps/test_memoize_bad_id_for_memo.py +1 -0
  259. parsl/tests/test_python_apps/test_outputs.py +1 -1
  260. parsl/tests/test_python_apps/test_pluggable_future_resolution.py +161 -0
  261. parsl/tests/test_radical/test_mpi_funcs.py +1 -2
  262. parsl/tests/test_regression/test_1480.py +2 -1
  263. parsl/tests/test_regression/test_1653.py +2 -1
  264. parsl/tests/test_regression/test_226.py +1 -0
  265. parsl/tests/test_regression/test_2652.py +1 -0
  266. parsl/tests/test_regression/test_69a.py +0 -1
  267. parsl/tests/test_regression/test_854.py +4 -2
  268. parsl/tests/test_regression/test_97_parallelism_0.py +1 -2
  269. parsl/tests/test_regression/test_98.py +0 -1
  270. parsl/tests/test_scaling/test_block_error_handler.py +9 -4
  271. parsl/tests/test_scaling/test_regression_1621.py +11 -15
  272. parsl/tests/test_scaling/test_regression_3568_scaledown_vs_MISSING.py +84 -0
  273. parsl/tests/test_scaling/test_regression_3696_oscillation.py +103 -0
  274. parsl/tests/test_scaling/test_scale_down.py +2 -5
  275. parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py +5 -8
  276. parsl/tests/test_scaling/test_scale_down_htex_unregistered.py +71 -0
  277. parsl/tests/test_scaling/test_shutdown_scalein.py +73 -0
  278. parsl/tests/test_scaling/test_worker_interchange_bad_messages_3262.py +90 -0
  279. parsl/tests/test_serialization/test_2555_caching_deserializer.py +1 -1
  280. parsl/tests/test_serialization/test_3495_deserialize_managerlost.py +47 -0
  281. parsl/tests/test_serialization/test_basic.py +2 -1
  282. parsl/tests/test_serialization/test_htex_code_cache.py +3 -4
  283. parsl/tests/test_serialization/test_pack_resource_spec.py +2 -1
  284. parsl/tests/test_serialization/test_proxystore_configured.py +10 -6
  285. parsl/tests/test_serialization/test_proxystore_impl.py +5 -3
  286. parsl/tests/test_shutdown/test_kill_monitoring.py +64 -0
  287. parsl/tests/test_staging/staging_provider.py +2 -2
  288. parsl/tests/test_staging/test_1316.py +3 -4
  289. parsl/tests/test_staging/test_docs_1.py +2 -1
  290. parsl/tests/test_staging/test_docs_2.py +2 -1
  291. parsl/tests/test_staging/test_elaborate_noop_file.py +2 -3
  292. parsl/tests/{test_data → test_staging}/test_file.py +6 -6
  293. parsl/tests/{test_data → test_staging}/test_output_chain_filenames.py +3 -0
  294. parsl/tests/test_staging/test_staging_ftp.py +1 -0
  295. parsl/tests/test_staging/test_staging_https.py +5 -2
  296. parsl/tests/test_staging/test_staging_stdout.py +64 -0
  297. parsl/tests/test_staging/test_zip_in.py +39 -0
  298. parsl/tests/test_staging/test_zip_out.py +110 -0
  299. parsl/tests/test_staging/test_zip_to_zip.py +41 -0
  300. parsl/tests/test_summary.py +2 -2
  301. parsl/tests/test_thread_parallelism.py +0 -1
  302. parsl/tests/test_threads/test_configs.py +1 -2
  303. parsl/tests/test_threads/test_lazy_errors.py +2 -2
  304. parsl/tests/test_utils/test_execute_wait.py +35 -0
  305. parsl/tests/test_utils/test_sanitize_dns.py +76 -0
  306. parsl/tests/unit/test_address.py +20 -0
  307. parsl/tests/unit/test_file.py +99 -0
  308. parsl/tests/unit/test_usage_tracking.py +66 -0
  309. parsl/usage_tracking/api.py +65 -0
  310. parsl/usage_tracking/levels.py +6 -0
  311. parsl/usage_tracking/usage.py +104 -62
  312. parsl/utils.py +137 -4
  313. parsl/version.py +1 -1
  314. {parsl-2024.3.18.data → parsl-2025.1.13.data}/scripts/exec_parsl_function.py +6 -5
  315. parsl-2025.1.13.data/scripts/interchange.py +649 -0
  316. {parsl-2024.3.18.data → parsl-2025.1.13.data}/scripts/process_worker_pool.py +77 -75
  317. parsl-2025.1.13.dist-info/METADATA +96 -0
  318. parsl-2025.1.13.dist-info/RECORD +462 -0
  319. {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/WHEEL +1 -1
  320. parsl/channels/__init__.py +0 -7
  321. parsl/channels/base.py +0 -141
  322. parsl/channels/errors.py +0 -113
  323. parsl/channels/local/local.py +0 -164
  324. parsl/channels/oauth_ssh/oauth_ssh.py +0 -110
  325. parsl/channels/ssh/ssh.py +0 -276
  326. parsl/channels/ssh_il/__init__.py +0 -0
  327. parsl/channels/ssh_il/ssh_il.py +0 -74
  328. parsl/configs/ad_hoc.py +0 -35
  329. parsl/executors/radical/rpex_master.py +0 -42
  330. parsl/monitoring/radios.py +0 -175
  331. parsl/providers/ad_hoc/__init__.py +0 -0
  332. parsl/providers/ad_hoc/ad_hoc.py +0 -248
  333. parsl/providers/cobalt/__init__.py +0 -0
  334. parsl/providers/cobalt/cobalt.py +0 -236
  335. parsl/providers/cobalt/template.py +0 -17
  336. parsl/tests/configs/ad_hoc_cluster_htex.py +0 -35
  337. parsl/tests/configs/cooley_htex.py +0 -37
  338. parsl/tests/configs/htex_ad_hoc_cluster.py +0 -28
  339. parsl/tests/configs/local_adhoc.py +0 -18
  340. parsl/tests/configs/swan_htex.py +0 -43
  341. parsl/tests/configs/theta.py +0 -37
  342. parsl/tests/integration/test_channels/__init__.py +0 -0
  343. parsl/tests/integration/test_channels/test_channels.py +0 -17
  344. parsl/tests/integration/test_channels/test_local_channel.py +0 -42
  345. parsl/tests/integration/test_channels/test_scp_1.py +0 -45
  346. parsl/tests/integration/test_channels/test_ssh_1.py +0 -40
  347. parsl/tests/integration/test_channels/test_ssh_errors.py +0 -46
  348. parsl/tests/integration/test_channels/test_ssh_file_transport.py +0 -41
  349. parsl/tests/integration/test_channels/test_ssh_interactive.py +0 -24
  350. parsl/tests/manual_tests/test_ad_hoc_htex.py +0 -48
  351. parsl/tests/manual_tests/test_fan_in_out_htex_remote.py +0 -88
  352. parsl/tests/manual_tests/test_oauth_ssh.py +0 -13
  353. parsl/tests/sites/test_local_adhoc.py +0 -61
  354. parsl/tests/test_channels/__init__.py +0 -0
  355. parsl/tests/test_channels/test_large_output.py +0 -22
  356. parsl/tests/test_data/__init__.py +0 -0
  357. parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py +0 -51
  358. parsl/tests/test_providers/test_cobalt_deprecation_warning.py +0 -16
  359. parsl-2024.3.18.dist-info/METADATA +0 -98
  360. parsl-2024.3.18.dist-info/RECORD +0 -449
  361. parsl/{channels/local → monitoring/radios}/__init__.py +0 -0
  362. parsl/{channels/oauth_ssh → tests/test_shutdown}/__init__.py +0 -0
  363. parsl/tests/{test_data → test_staging}/test_file_apps.py +0 -0
  364. parsl/tests/{test_data → test_staging}/test_file_staging.py +0 -0
  365. parsl/{channels/ssh → tests/unit}/__init__.py +0 -0
  366. {parsl-2024.3.18.data → parsl-2025.1.13.data}/scripts/parsl_coprocess.py +1 -1
  367. {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/LICENSE +0 -0
  368. {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/entry_points.txt +0 -0
  369. {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/top_level.txt +0 -0
parsl/dataflow/dflow.py CHANGED
@@ -1,50 +1,54 @@
1
1
  from __future__ import annotations
2
+
2
3
  import atexit
4
+ import concurrent.futures as cf
5
+ import datetime
6
+ import inspect
3
7
  import logging
4
8
  import os
5
- import pathlib
6
9
  import pickle
7
10
  import random
8
- import time
9
- import typeguard
10
- import inspect
11
- import threading
12
11
  import sys
13
- import datetime
12
+ import threading
13
+ import time
14
+ from concurrent.futures import Future
15
+ from functools import partial
14
16
  from getpass import getuser
15
- from typeguard import typechecked
17
+ from socket import gethostname
16
18
  from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
17
19
  from uuid import uuid4
18
- from socket import gethostname
19
- from concurrent.futures import Future
20
- from functools import partial
20
+
21
+ import typeguard
22
+ from typeguard import typechecked
21
23
 
22
24
  import parsl
23
25
  from parsl.app.errors import RemoteExceptionWrapper
24
26
  from parsl.app.futures import DataFuture
25
- from parsl.channels import Channel
26
27
  from parsl.config import Config
27
28
  from parsl.data_provider.data_manager import DataManager
28
29
  from parsl.data_provider.files import File
30
+ from parsl.dataflow.dependency_resolvers import SHALLOW_DEPENDENCY_RESOLVER
29
31
  from parsl.dataflow.errors import BadCheckpoint, DependencyError, JoinError
30
32
  from parsl.dataflow.futures import AppFuture
31
33
  from parsl.dataflow.memoization import Memoizer
32
34
  from parsl.dataflow.rundirs import make_rundir
33
- from parsl.dataflow.states import States, FINAL_STATES, FINAL_FAILURE_STATES
35
+ from parsl.dataflow.states import FINAL_FAILURE_STATES, FINAL_STATES, States
34
36
  from parsl.dataflow.taskrecord import TaskRecord
35
- from parsl.errors import ConfigurationError, InternalConsistencyError, NoDataFlowKernelError
36
- from parsl.jobs.job_status_poller import JobStatusPoller
37
- from parsl.jobs.states import JobStatus, JobState
38
- from parsl.usage_tracking.usage import UsageTracker
37
+ from parsl.errors import (
38
+ ConfigurationError,
39
+ InternalConsistencyError,
40
+ NoDataFlowKernelError,
41
+ )
39
42
  from parsl.executors.base import ParslExecutor
40
43
  from parsl.executors.status_handling import BlockProviderExecutor
41
44
  from parsl.executors.threads import ThreadPoolExecutor
45
+ from parsl.jobs.job_status_poller import JobStatusPoller
42
46
  from parsl.monitoring import MonitoringHub
43
- from parsl.process_loggers import wrap_with_logs
44
- from parsl.providers.base import ExecutionProvider
45
- from parsl.utils import get_version, get_std_fname_mode, get_all_checkpoints, Timer
46
-
47
47
  from parsl.monitoring.message_type import MessageType
48
+ from parsl.monitoring.remote import monitor_wrapper
49
+ from parsl.process_loggers import wrap_with_logs
50
+ from parsl.usage_tracking.usage import UsageTracker
51
+ from parsl.utils import Timer, get_all_checkpoints, get_std_fname_mode, get_version
48
52
 
49
53
  logger = logging.getLogger(__name__)
50
54
 
@@ -106,14 +110,8 @@ class DataFlowKernel:
106
110
  self.monitoring: Optional[MonitoringHub]
107
111
  self.monitoring = config.monitoring
108
112
 
109
- # hub address and port for interchange to connect
110
- self.hub_address = None # type: Optional[str]
111
- self.hub_interchange_port = None # type: Optional[int]
112
113
  if self.monitoring:
113
- if self.monitoring.logdir is None:
114
- self.monitoring.logdir = self.run_dir
115
- self.hub_address = self.monitoring.hub_address
116
- self.hub_interchange_port = self.monitoring.start(self.run_id, self.run_dir, self.config.run_dir)
114
+ self.monitoring.start(self.run_dir, self.config.run_dir)
117
115
 
118
116
  self.time_began = datetime.datetime.now()
119
117
  self.time_completed: Optional[datetime.datetime] = None
@@ -159,8 +157,8 @@ class DataFlowKernel:
159
157
  }
160
158
 
161
159
  if self.monitoring:
162
- self.monitoring.send(MessageType.WORKFLOW_INFO,
163
- workflow_info)
160
+ self.monitoring.send((MessageType.WORKFLOW_INFO,
161
+ workflow_info))
164
162
 
165
163
  if config.checkpoint_files is not None:
166
164
  checkpoints = self.load_checkpoints(config.checkpoint_files)
@@ -179,8 +177,7 @@ class DataFlowKernel:
179
177
  # job_status_poller.add_executors.
180
178
  self.job_status_poller = JobStatusPoller(strategy=self.config.strategy,
181
179
  strategy_period=self.config.strategy_period,
182
- max_idletime=self.config.max_idletime,
183
- dfk=self)
180
+ max_idletime=self.config.max_idletime)
184
181
 
185
182
  self.executors: Dict[str, ParslExecutor] = {}
186
183
 
@@ -204,21 +201,52 @@ class DataFlowKernel:
204
201
  self.tasks: Dict[int, TaskRecord] = {}
205
202
  self.submitter_lock = threading.Lock()
206
203
 
204
+ self.dependency_launch_pool = cf.ThreadPoolExecutor(max_workers=1, thread_name_prefix="Dependency-Launch")
205
+
206
+ self.dependency_resolver = self.config.dependency_resolver if self.config.dependency_resolver is not None \
207
+ else SHALLOW_DEPENDENCY_RESOLVER
208
+
207
209
  atexit.register(self.atexit_cleanup)
208
210
 
211
+ def __enter__(self):
212
+ return self
213
+
214
+ def __exit__(self, exc_type, exc_value, traceback) -> None:
215
+ mode = self.config.exit_mode
216
+ logger.debug("Exiting context manager, with exit mode '%s'", mode)
217
+ if mode == "cleanup":
218
+ logger.info("Calling cleanup for DFK")
219
+ self.cleanup()
220
+ elif mode == "skip":
221
+ logger.info("Skipping all cleanup handling")
222
+ elif mode == "wait":
223
+ if exc_type is None:
224
+ logger.info("Waiting for all tasks to complete")
225
+ self.wait_for_current_tasks()
226
+ self.cleanup()
227
+ else:
228
+ logger.info("There was an exception - cleaning up without waiting for task completion")
229
+ self.cleanup()
230
+ else:
231
+ raise InternalConsistencyError(f"Exit case for {mode} should be unreachable, validated by typeguard on Config()")
232
+
209
233
  def _send_task_log_info(self, task_record: TaskRecord) -> None:
210
234
  if self.monitoring:
211
235
  task_log_info = self._create_task_log_info(task_record)
212
- self.monitoring.send(MessageType.TASK_INFO, task_log_info)
236
+ self.monitoring.send((MessageType.TASK_INFO, task_log_info))
213
237
 
214
- def _create_task_log_info(self, task_record):
238
+ def _create_task_log_info(self, task_record: TaskRecord) -> Dict[str, Any]:
215
239
  """
216
240
  Create the dictionary that will be included in the log.
217
241
  """
218
242
  info_to_monitor = ['func_name', 'memoize', 'hashsum', 'fail_count', 'fail_cost', 'status',
219
243
  'id', 'time_invoked', 'try_time_launched', 'time_returned', 'try_time_returned', 'executor']
220
244
 
221
- task_log_info = {"task_" + k: task_record[k] for k in info_to_monitor}
245
+ # mypy cannot verify that these task_record[k] references are valid:
246
+ # They are valid if all entries in info_to_monitor are declared in the definition of TaskRecord
247
+ # This type: ignore[literal-required] asserts that fact.
248
+ task_log_info = {"task_" + k: task_record[k] for k in info_to_monitor} # type: ignore[literal-required]
249
+
222
250
  task_log_info['run_id'] = self.run_id
223
251
  task_log_info['try_id'] = task_record['try_id']
224
252
  task_log_info['timestamp'] = datetime.datetime.now()
@@ -230,20 +258,28 @@ class DataFlowKernel:
230
258
  task_log_info['task_inputs'] = str(task_record['kwargs'].get('inputs', None))
231
259
  task_log_info['task_outputs'] = str(task_record['kwargs'].get('outputs', None))
232
260
  task_log_info['task_stdin'] = task_record['kwargs'].get('stdin', None)
233
- stdout_spec = task_record['kwargs'].get('stdout', None)
234
- stderr_spec = task_record['kwargs'].get('stderr', None)
235
- try:
236
- stdout_name, _ = get_std_fname_mode('stdout', stdout_spec)
237
- except Exception as e:
238
- logger.warning("Incorrect stdout format {} for Task {}".format(stdout_spec, task_record['id']))
239
- stdout_name = str(e)
240
- try:
241
- stderr_name, _ = get_std_fname_mode('stderr', stderr_spec)
242
- except Exception as e:
243
- logger.warning("Incorrect stderr format {} for Task {}".format(stderr_spec, task_record['id']))
244
- stderr_name = str(e)
245
- task_log_info['task_stdout'] = stdout_name
246
- task_log_info['task_stderr'] = stderr_name
261
+
262
+ def std_spec_to_name(name, spec):
263
+ if spec is None:
264
+ name = ""
265
+ elif isinstance(spec, File):
266
+ name = spec.url
267
+ else:
268
+ # fallthrough case is various str, os.PathLike, tuple modes that
269
+ # can be interpreted by get_std_fname_mode.
270
+ try:
271
+ name, _ = get_std_fname_mode(name, spec)
272
+ except Exception:
273
+ logger.exception(f"Could not parse {name} specification {spec} for task {task_record['id']}")
274
+ name = ""
275
+ return name
276
+
277
+ stdout_spec = task_record['kwargs'].get('stdout')
278
+ task_log_info['task_stdout'] = std_spec_to_name('stdout', stdout_spec)
279
+
280
+ stderr_spec = task_record['kwargs'].get('stderr')
281
+ task_log_info['task_stderr'] = std_spec_to_name('stderr', stderr_spec)
282
+
247
283
  task_log_info['task_fail_history'] = ",".join(task_record['fail_history'])
248
284
  task_log_info['task_depends'] = None
249
285
  if task_record['depends'] is not None:
@@ -584,9 +620,9 @@ class DataFlowKernel:
584
620
  return kwargs.get('_parsl_staging_inhibit', False)
585
621
 
586
622
  def launch_if_ready(self, task_record: TaskRecord) -> None:
587
- """
588
- launch_if_ready will launch the specified task, if it is ready
589
- to run (for example, without dependencies, and in pending state).
623
+ """Schedules a task record for re-inspection to see if it is ready
624
+ for launch and for launch if it is ready. The call will return
625
+ immediately.
590
626
 
591
627
  This should be called by any piece of the DataFlowKernel that
592
628
  thinks a task may have become ready to run.
@@ -595,13 +631,17 @@ class DataFlowKernel:
595
631
  ready to run - launch_if_ready will not incorrectly launch that
596
632
  task.
597
633
 
598
- It is also not an error to call launch_if_ready on a task that has
599
- already been launched - launch_if_ready will not re-launch that
600
- task.
601
-
602
634
  launch_if_ready is thread safe, so may be called from any thread
603
635
  or callback.
604
636
  """
637
+ self.dependency_launch_pool.submit(self._launch_if_ready_async, task_record)
638
+
639
+ @wrap_with_logs
640
+ def _launch_if_ready_async(self, task_record: TaskRecord) -> None:
641
+ """
642
+ _launch_if_ready will launch the specified task, if it is ready
643
+ to run (for example, without dependencies, and in pending state).
644
+ """
605
645
  exec_fu = None
606
646
 
607
647
  task_id = task_record['id']
@@ -667,14 +707,6 @@ class DataFlowKernel:
667
707
  def launch_task(self, task_record: TaskRecord) -> Future:
668
708
  """Handle the actual submission of the task to the executor layer.
669
709
 
670
- If the app task has the executors attributes not set (default=='all')
671
- the task is launched on a randomly selected executor from the
672
- list of executors. This behavior could later be updated to support
673
- binding to executors based on user specified criteria.
674
-
675
- If the app task specifies a particular set of executors, it will be
676
- targeted at those specific executors.
677
-
678
710
  Args:
679
711
  task_record : The task record
680
712
 
@@ -707,14 +739,18 @@ class DataFlowKernel:
707
739
 
708
740
  if self.monitoring is not None and self.monitoring.resource_monitoring_enabled:
709
741
  wrapper_logging_level = logging.DEBUG if self.monitoring.monitoring_debug else logging.INFO
710
- (function, args, kwargs) = self.monitoring.monitor_wrapper(function, args, kwargs, try_id, task_id,
711
- self.monitoring.monitoring_hub_url,
712
- self.run_id,
713
- wrapper_logging_level,
714
- self.monitoring.resource_monitoring_interval,
715
- executor.radio_mode,
716
- executor.monitor_resources(),
717
- self.run_dir)
742
+ (function, args, kwargs) = monitor_wrapper(f=function,
743
+ args=args,
744
+ kwargs=kwargs,
745
+ x_try_id=try_id,
746
+ x_task_id=task_id,
747
+ monitoring_hub_url=self.monitoring.monitoring_hub_url,
748
+ run_id=self.run_id,
749
+ logging_level=wrapper_logging_level,
750
+ sleep_dur=self.monitoring.resource_monitoring_interval,
751
+ radio_mode=executor.radio_mode,
752
+ monitor_resources=executor.monitor_resources(),
753
+ run_dir=self.run_dir)
718
754
 
719
755
  with self.submitter_lock:
720
756
  exec_fu = executor.submit(function, task_record['resource_specification'], *args, **kwargs)
@@ -757,6 +793,10 @@ class DataFlowKernel:
757
793
  (inputs[idx], func) = self.data_manager.optionally_stage_in(f, func, executor)
758
794
 
759
795
  for kwarg, f in kwargs.items():
796
+ # stdout and stderr files should not be staging in (they will be staged *out*
797
+ # in _add_output_deps)
798
+ if kwarg in ['stdout', 'stderr']:
799
+ continue
760
800
  (kwargs[kwarg], func) = self.data_manager.optionally_stage_in(f, func, executor)
761
801
 
762
802
  newargs = list(args)
@@ -769,33 +809,55 @@ class DataFlowKernel:
769
809
  logger.debug("Adding output dependencies")
770
810
  outputs = kwargs.get('outputs', [])
771
811
  app_fut._outputs = []
772
- for idx, f in enumerate(outputs):
773
- if isinstance(f, File) and not self.check_staging_inhibited(kwargs):
812
+
813
+ # Pass over all possible outputs: the outputs kwarg, stdout and stderr
814
+ # and for each of those, perform possible stage-out. This can result in:
815
+ # a DataFuture to be exposed in app_fut to represent the completion of
816
+ # that stageout (sometimes backed by a new sub-workflow for separate-task
817
+ # stageout), a replacement for the function to be executed (intended to
818
+ # be the original function wrapped with an in-task stageout wrapper), a
819
+ # rewritten File object to be passed to task to be executed
820
+
821
+ def stageout_one_file(file: File, rewritable_func: Callable):
822
+ if not self.check_staging_inhibited(kwargs):
774
823
  # replace a File with a DataFuture - either completing when the stageout
775
824
  # future completes, or if no stage out future is returned, then when the
776
825
  # app itself completes.
777
826
 
778
827
  # The staging code will get a clean copy which it is allowed to mutate,
779
828
  # while the DataFuture-contained original will not be modified by any staging.
780
- f_copy = f.cleancopy()
781
- outputs[idx] = f_copy
829
+ f_copy = file.cleancopy()
782
830
 
783
- logger.debug("Submitting stage out for output file {}".format(repr(f)))
831
+ logger.debug("Submitting stage out for output file {}".format(repr(file)))
784
832
  stageout_fut = self.data_manager.stage_out(f_copy, executor, app_fut)
785
833
  if stageout_fut:
786
- logger.debug("Adding a dependency on stageout future for {}".format(repr(f)))
787
- app_fut._outputs.append(DataFuture(stageout_fut, f, tid=app_fut.tid))
834
+ logger.debug("Adding a dependency on stageout future for {}".format(repr(file)))
835
+ df = DataFuture(stageout_fut, file, tid=app_fut.tid)
788
836
  else:
789
- logger.debug("No stageout dependency for {}".format(repr(f)))
790
- app_fut._outputs.append(DataFuture(app_fut, f, tid=app_fut.tid))
837
+ logger.debug("No stageout dependency for {}".format(repr(file)))
838
+ df = DataFuture(app_fut, file, tid=app_fut.tid)
791
839
 
792
840
  # this is a hook for post-task stageout
793
841
  # note that nothing depends on the output - which is maybe a bug
794
842
  # in the not-very-tested stageout system?
795
- func = self.data_manager.replace_task_stage_out(f_copy, func, executor)
843
+ rewritable_func = self.data_manager.replace_task_stage_out(f_copy, rewritable_func, executor)
844
+ return rewritable_func, f_copy, df
796
845
  else:
797
- logger.debug("Not performing output staging for: {}".format(repr(f)))
798
- app_fut._outputs.append(DataFuture(app_fut, f, tid=app_fut.tid))
846
+ logger.debug("Not performing output staging for: {}".format(repr(file)))
847
+ return rewritable_func, file, DataFuture(app_fut, file, tid=app_fut.tid)
848
+
849
+ for idx, file in enumerate(outputs):
850
+ func, outputs[idx], o = stageout_one_file(file, func)
851
+ app_fut._outputs.append(o)
852
+
853
+ file = kwargs.get('stdout')
854
+ if isinstance(file, File):
855
+ func, kwargs['stdout'], app_fut._stdout_future = stageout_one_file(file, func)
856
+
857
+ file = kwargs.get('stderr')
858
+ if isinstance(file, File):
859
+ func, kwargs['stderr'], app_fut._stderr_future = stageout_one_file(file, func)
860
+
799
861
  return func
800
862
 
801
863
  def _gather_all_deps(self, args: Sequence[Any], kwargs: Dict[str, Any]) -> List[Future]:
@@ -812,8 +874,11 @@ class DataFlowKernel:
812
874
  depends: List[Future] = []
813
875
 
814
876
  def check_dep(d: Any) -> None:
815
- if isinstance(d, Future):
816
- depends.extend([d])
877
+ try:
878
+ depends.extend(self.dependency_resolver.traverse_to_gather(d))
879
+ except Exception:
880
+ logger.exception("Exception in dependency_resolver.traverse_to_gather")
881
+ raise
817
882
 
818
883
  # Check the positional args
819
884
  for dep in args:
@@ -830,7 +895,8 @@ class DataFlowKernel:
830
895
 
831
896
  return depends
832
897
 
833
- def _unwrap_futures(self, args, kwargs):
898
+ def _unwrap_futures(self, args: Sequence[Any], kwargs: Dict[str, Any]) \
899
+ -> Tuple[Sequence[Any], Dict[str, Any], Sequence[Tuple[Exception, str]]]:
834
900
  """This function should be called when all dependencies have completed.
835
901
 
836
902
  It will rewrite the arguments for that task, replacing each Future
@@ -851,53 +917,40 @@ class DataFlowKernel:
851
917
  """
852
918
  dep_failures = []
853
919
 
920
+ def append_failure(e: Exception, dep: Future) -> None:
921
+ # If this Future is associated with a task inside this DFK,
922
+ # then refer to the task ID.
923
+ # Otherwise make a repr of the Future object.
924
+ if hasattr(dep, 'task_record') and dep.task_record['dfk'] == self:
925
+ tid = "task " + repr(dep.task_record['id'])
926
+ else:
927
+ tid = repr(dep)
928
+ dep_failures.extend([(e, tid)])
929
+
854
930
  # Replace item in args
855
931
  new_args = []
856
932
  for dep in args:
857
- if isinstance(dep, Future):
858
- try:
859
- new_args.extend([dep.result()])
860
- except Exception as e:
861
- # If this Future is associated with a task inside this DFK,
862
- # then refer to the task ID.
863
- # Otherwise make a repr of the Future object.
864
- if hasattr(dep, 'task_record') and dep.task_record['dfk'] == self:
865
- tid = "task " + repr(dep.task_record['id'])
866
- else:
867
- tid = repr(dep)
868
- dep_failures.extend([(e, tid)])
869
- else:
870
- new_args.extend([dep])
933
+ try:
934
+ new_args.extend([self.dependency_resolver.traverse_to_unwrap(dep)])
935
+ except Exception as e:
936
+ append_failure(e, dep)
871
937
 
872
938
  # Check for explicit kwargs ex, fu_1=<fut>
873
939
  for key in kwargs:
874
940
  dep = kwargs[key]
875
- if isinstance(dep, Future):
876
- try:
877
- kwargs[key] = dep.result()
878
- except Exception as e:
879
- if hasattr(dep, 'task_record'):
880
- tid = dep.task_record['id']
881
- else:
882
- tid = None
883
- dep_failures.extend([(e, tid)])
941
+ try:
942
+ kwargs[key] = self.dependency_resolver.traverse_to_unwrap(dep)
943
+ except Exception as e:
944
+ append_failure(e, dep)
884
945
 
885
946
  # Check for futures in inputs=[<fut>...]
886
947
  if 'inputs' in kwargs:
887
948
  new_inputs = []
888
949
  for dep in kwargs['inputs']:
889
- if isinstance(dep, Future):
890
- try:
891
- new_inputs.extend([dep.result()])
892
- except Exception as e:
893
- if hasattr(dep, 'task_record'):
894
- tid = dep.task_record['id']
895
- else:
896
- tid = None
897
- dep_failures.extend([(e, tid)])
898
-
899
- else:
900
- new_inputs.extend([dep])
950
+ try:
951
+ new_inputs.extend([self.dependency_resolver.traverse_to_unwrap(dep)])
952
+ except Exception as e:
953
+ append_failure(e, dep)
901
954
  kwargs['inputs'] = new_inputs
902
955
 
903
956
  return new_args, kwargs, dep_failures
@@ -929,7 +982,7 @@ class DataFlowKernel:
929
982
  - app_kwargs (dict) : Rest of the kwargs to the fn passed as dict.
930
983
 
931
984
  Returns:
932
- (AppFuture) [DataFutures,]
985
+ AppFuture
933
986
 
934
987
  """
935
988
 
@@ -953,32 +1006,16 @@ class DataFlowKernel:
953
1006
  executor = random.choice(choices)
954
1007
  logger.debug("Task {} will be sent to executor {}".format(task_id, executor))
955
1008
 
956
- # The below uses func.__name__ before it has been wrapped by any staging code.
957
-
958
- label = app_kwargs.get('label')
959
- for kw in ['stdout', 'stderr']:
960
- if kw in app_kwargs:
961
- if app_kwargs[kw] == parsl.AUTO_LOGNAME:
962
- if kw not in ignore_for_cache:
963
- ignore_for_cache += [kw]
964
- app_kwargs[kw] = os.path.join(
965
- self.run_dir,
966
- 'task_logs',
967
- str(int(task_id / 10000)).zfill(4), # limit logs to 10k entries per directory
968
- 'task_{}_{}{}.{}'.format(
969
- str(task_id).zfill(4),
970
- func.__name__,
971
- '' if label is None else '_{}'.format(label),
972
- kw)
973
- )
974
-
975
1009
  resource_specification = app_kwargs.get('parsl_resource_specification', {})
976
1010
 
977
1011
  task_record: TaskRecord
978
- task_record = {'depends': [],
1012
+ task_record = {'args': app_args,
1013
+ 'depends': [],
979
1014
  'dfk': self,
980
1015
  'executor': executor,
1016
+ 'func': func,
981
1017
  'func_name': func.__name__,
1018
+ 'kwargs': app_kwargs,
982
1019
  'memoize': cache,
983
1020
  'hashsum': None,
984
1021
  'exec_fu': None,
@@ -1000,25 +1037,41 @@ class DataFlowKernel:
1000
1037
 
1001
1038
  self.update_task_state(task_record, States.unsched)
1002
1039
 
1040
+ for kw in ['stdout', 'stderr']:
1041
+ if kw in app_kwargs:
1042
+ if app_kwargs[kw] == parsl.AUTO_LOGNAME:
1043
+ if kw not in ignore_for_cache:
1044
+ ignore_for_cache += [kw]
1045
+ if self.config.std_autopath is None:
1046
+ app_kwargs[kw] = self.default_std_autopath(task_record, kw)
1047
+ else:
1048
+ app_kwargs[kw] = self.config.std_autopath(task_record, kw)
1049
+
1003
1050
  app_fu = AppFuture(task_record)
1051
+ task_record['app_fu'] = app_fu
1004
1052
 
1005
1053
  # Transform remote input files to data futures
1006
1054
  app_args, app_kwargs, func = self._add_input_deps(executor, app_args, app_kwargs, func)
1007
1055
 
1008
1056
  func = self._add_output_deps(executor, app_args, app_kwargs, app_fu, func)
1009
1057
 
1058
+ logger.debug("Added output dependencies")
1059
+
1060
+ # Replace the function invocation in the TaskRecord with whatever file-staging
1061
+ # substitutions have been made.
1010
1062
  task_record.update({
1011
1063
  'args': app_args,
1012
1064
  'func': func,
1013
- 'kwargs': app_kwargs,
1014
- 'app_fu': app_fu})
1065
+ 'kwargs': app_kwargs})
1015
1066
 
1016
1067
  assert task_id not in self.tasks
1017
1068
 
1018
1069
  self.tasks[task_id] = task_record
1019
1070
 
1071
+ logger.debug("Gathering dependencies")
1020
1072
  # Get the list of dependencies for the task
1021
1073
  depends = self._gather_all_deps(app_args, app_kwargs)
1074
+ logger.debug("Gathered dependencies")
1022
1075
  task_record['depends'] = depends
1023
1076
 
1024
1077
  depend_descs = []
@@ -1085,73 +1138,28 @@ class DataFlowKernel:
1085
1138
 
1086
1139
  logger.info("End of summary")
1087
1140
 
1088
- def _create_remote_dirs_over_channel(self, provider: ExecutionProvider, channel: Channel) -> None:
1089
- """Create script directories across a channel
1090
-
1091
- Parameters
1092
- ----------
1093
- provider: Provider obj
1094
- Provider for which scripts dirs are being created
1095
- channel: Channel obj
1096
- Channel over which the remote dirs are to be created
1097
- """
1098
- run_dir = self.run_dir
1099
- if channel.script_dir is None:
1100
-
1101
- # This case will be detected as unreachable by mypy, because of
1102
- # the type of script_dir, which is str, not Optional[str].
1103
- # The type system doesn't represent the initialized/uninitialized
1104
- # state of a channel so cannot represent that a channel needs
1105
- # its script directory set or not.
1106
-
1107
- channel.script_dir = os.path.join(run_dir, 'submit_scripts') # type: ignore[unreachable]
1108
-
1109
- # Only create dirs if we aren't on a shared-fs
1110
- if not channel.isdir(run_dir):
1111
- parent, child = pathlib.Path(run_dir).parts[-2:]
1112
- remote_run_dir = os.path.join(parent, child)
1113
- channel.script_dir = os.path.join(remote_run_dir, 'remote_submit_scripts')
1114
- provider.script_dir = os.path.join(run_dir, 'local_submit_scripts')
1115
-
1116
- channel.makedirs(channel.script_dir, exist_ok=True)
1117
-
1118
- def add_executors(self, executors):
1141
+ def add_executors(self, executors: Sequence[ParslExecutor]) -> None:
1119
1142
  for executor in executors:
1120
1143
  executor.run_id = self.run_id
1121
1144
  executor.run_dir = self.run_dir
1122
- executor.hub_address = self.hub_address
1123
- executor.hub_port = self.hub_interchange_port
1145
+ if self.monitoring:
1146
+ executor.hub_address = self.monitoring.hub_address
1147
+ executor.hub_zmq_port = self.monitoring.hub_zmq_port
1148
+ executor.submit_monitoring_radio = self.monitoring.radio
1124
1149
  if hasattr(executor, 'provider'):
1125
1150
  if hasattr(executor.provider, 'script_dir'):
1126
1151
  executor.provider.script_dir = os.path.join(self.run_dir, 'submit_scripts')
1127
1152
  os.makedirs(executor.provider.script_dir, exist_ok=True)
1128
1153
 
1129
- if hasattr(executor.provider, 'channels'):
1130
- logger.debug("Creating script_dir across multiple channels")
1131
- for channel in executor.provider.channels:
1132
- self._create_remote_dirs_over_channel(executor.provider, channel)
1133
- else:
1134
- self._create_remote_dirs_over_channel(executor.provider, executor.provider.channel)
1135
-
1136
1154
  self.executors[executor.label] = executor
1137
- block_ids = executor.start()
1138
- if self.monitoring and block_ids:
1139
- new_status = {}
1140
- for bid in block_ids:
1141
- new_status[bid] = JobStatus(JobState.PENDING)
1142
- msg = executor.create_monitoring_info(new_status)
1143
- logger.debug("Sending monitoring message {} to hub from DFK".format(msg))
1144
- self.monitoring.send(MessageType.BLOCK_INFO, msg)
1155
+ executor.start()
1145
1156
  block_executors = [e for e in executors if isinstance(e, BlockProviderExecutor)]
1146
1157
  self.job_status_poller.add_executors(block_executors)
1147
1158
 
1148
1159
  def atexit_cleanup(self) -> None:
1149
- if not self.cleanup_called:
1150
- logger.warning("Python is exiting with a DFK still running. "
1151
- "You should call parsl.dfk().cleanup() before "
1152
- "exiting to release any resources")
1153
- else:
1154
- logger.info("python process is exiting, but DFK has already been cleaned up")
1160
+ logger.warning("Python is exiting with a DFK still running. "
1161
+ "You should call parsl.dfk().cleanup() before "
1162
+ "exiting to release any resources")
1155
1163
 
1156
1164
  def wait_for_current_tasks(self) -> None:
1157
1165
  """Waits for all tasks in the task list to be completed, by waiting for their
@@ -1207,31 +1215,18 @@ class DataFlowKernel:
1207
1215
  self._checkpoint_timer.close()
1208
1216
 
1209
1217
  # Send final stats
1218
+ logger.info("Sending end message for usage tracking")
1210
1219
  self.usage_tracker.send_end_message()
1211
1220
  self.usage_tracker.close()
1221
+ logger.info("Closed usage tracking")
1212
1222
 
1213
1223
  logger.info("Closing job status poller")
1214
1224
  self.job_status_poller.close()
1215
1225
  logger.info("Terminated job status poller")
1216
1226
 
1217
- logger.info("Scaling in and shutting down executors")
1227
+ logger.info("Shutting down executors")
1218
1228
 
1219
1229
  for executor in self.executors.values():
1220
- if isinstance(executor, BlockProviderExecutor):
1221
- if not executor.bad_state_is_set:
1222
- logger.info(f"Scaling in executor {executor.label}")
1223
- if executor.provider:
1224
- job_ids = executor.provider.resources.keys()
1225
- block_ids = executor.scale_in(len(job_ids))
1226
- if self.monitoring and block_ids:
1227
- new_status = {}
1228
- for bid in block_ids:
1229
- new_status[bid] = JobStatus(JobState.CANCELLED)
1230
- msg = executor.create_monitoring_info(new_status)
1231
- logger.debug("Sending message {} to hub from DFK".format(msg))
1232
- self.monitoring.send(MessageType.BLOCK_INFO, msg)
1233
- else: # and bad_state_is_set
1234
- logger.warning(f"Not shutting down executor {executor.label} because it is in bad state")
1235
1230
  logger.info(f"Shutting down executor {executor.label}")
1236
1231
  executor.shutdown()
1237
1232
  logger.info(f"Shut down executor {executor.label}")
@@ -1241,18 +1236,32 @@ class DataFlowKernel:
1241
1236
 
1242
1237
  if self.monitoring:
1243
1238
  logger.info("Sending final monitoring message")
1244
- self.monitoring.send(MessageType.WORKFLOW_INFO,
1239
+ self.monitoring.send((MessageType.WORKFLOW_INFO,
1245
1240
  {'tasks_failed_count': self.task_state_counts[States.failed],
1246
1241
  'tasks_completed_count': self.task_state_counts[States.exec_done],
1247
1242
  "time_began": self.time_began,
1248
1243
  'time_completed': self.time_completed,
1249
- 'run_id': self.run_id, 'rundir': self.run_dir,
1250
- 'exit_now': True})
1244
+ 'run_id': self.run_id, 'rundir': self.run_dir}))
1251
1245
 
1252
1246
  logger.info("Terminating monitoring")
1253
1247
  self.monitoring.close()
1254
1248
  logger.info("Terminated monitoring")
1255
1249
 
1250
+ logger.info("Terminating dependency launch pool")
1251
+ self.dependency_launch_pool.shutdown()
1252
+ logger.info("Terminated dependency launch pool")
1253
+
1254
+ logger.info("Unregistering atexit hook")
1255
+ atexit.unregister(self.atexit_cleanup)
1256
+ logger.info("Unregistered atexit hook")
1257
+
1258
+ if DataFlowKernelLoader._dfk is self:
1259
+ logger.info("Unregistering default DFK")
1260
+ parsl.clear()
1261
+ logger.info("Unregistered default DFK")
1262
+ else:
1263
+ logger.debug("Cleaning up non-default DFK - not unregistering")
1264
+
1256
1265
  logger.info("DFK cleanup complete")
1257
1266
 
1258
1267
  def checkpoint(self, tasks: Optional[Sequence[TaskRecord]] = None) -> str:
@@ -1388,8 +1397,6 @@ class DataFlowKernel:
1388
1397
  Returns:
1389
1398
  - dict containing, hashed -> future mappings
1390
1399
  """
1391
- self.memo_lookup_table = None
1392
-
1393
1400
  if checkpointDirs:
1394
1401
  return self._load_checkpoints(checkpointDirs)
1395
1402
  else:
@@ -1397,10 +1404,39 @@ class DataFlowKernel:
1397
1404
 
1398
1405
  @staticmethod
1399
1406
  def _log_std_streams(task_record: TaskRecord) -> None:
1400
- if task_record['app_fu'].stdout is not None:
1401
- logger.info("Standard output for task {} available at {}".format(task_record['id'], task_record['app_fu'].stdout))
1402
- if task_record['app_fu'].stderr is not None:
1403
- logger.info("Standard error for task {} available at {}".format(task_record['id'], task_record['app_fu'].stderr))
1407
+ tid = task_record['id']
1408
+
1409
+ def log_std_stream(name: str, target) -> None:
1410
+ if target is None:
1411
+ logger.info(f"{name} for task {tid} will not be redirected.")
1412
+ elif isinstance(target, str):
1413
+ logger.info(f"{name} for task {tid} will be redirected to {target}")
1414
+ elif isinstance(target, os.PathLike):
1415
+ logger.info(f"{name} for task {tid} will be redirected to {os.fspath(target)}")
1416
+ elif isinstance(target, tuple) and len(target) == 2 and isinstance(target[0], str):
1417
+ logger.info(f"{name} for task {tid} will be redirected to {target[0]} with mode {target[1]}")
1418
+ elif isinstance(target, tuple) and len(target) == 2 and isinstance(target[0], os.PathLike):
1419
+ logger.info(f"{name} for task {tid} will be redirected to {os.fspath(target[0])} with mode {target[1]}")
1420
+ elif isinstance(target, DataFuture):
1421
+ logger.info(f"{name} for task {tid} will staged to {target.file_obj.url}")
1422
+ else:
1423
+ logger.error(f"{name} for task {tid} has unknown specification: {target!r}")
1424
+
1425
+ log_std_stream("Standard out", task_record['app_fu'].stdout)
1426
+ log_std_stream("Standard error", task_record['app_fu'].stderr)
1427
+
1428
+ def default_std_autopath(self, taskrecord, kw):
1429
+ label = taskrecord['kwargs'].get('label')
1430
+ task_id = taskrecord['id']
1431
+ return os.path.join(
1432
+ self.run_dir,
1433
+ 'task_logs',
1434
+ str(int(task_id / 10000)).zfill(4), # limit logs to 10k entries per directory
1435
+ 'task_{}_{}{}.{}'.format(
1436
+ str(task_id).zfill(4),
1437
+ taskrecord['func_name'],
1438
+ '' if label is None else '_{}'.format(label),
1439
+ kw))
1404
1440
 
1405
1441
 
1406
1442
  class DataFlowKernelLoader: