parsl 2024.3.11__py3-none-any.whl → 2025.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (369) hide show
  1. parsl/__init__.py +9 -10
  2. parsl/addresses.py +29 -7
  3. parsl/app/app.py +7 -8
  4. parsl/app/bash.py +15 -8
  5. parsl/app/errors.py +10 -13
  6. parsl/app/futures.py +8 -10
  7. parsl/app/python.py +2 -1
  8. parsl/benchmark/perf.py +2 -1
  9. parsl/concurrent/__init__.py +2 -2
  10. parsl/config.py +57 -10
  11. parsl/configs/ASPIRE1.py +6 -5
  12. parsl/configs/Azure.py +9 -8
  13. parsl/configs/bridges.py +6 -4
  14. parsl/configs/cc_in2p3.py +3 -3
  15. parsl/configs/ec2.py +3 -1
  16. parsl/configs/expanse.py +4 -3
  17. parsl/configs/frontera.py +3 -4
  18. parsl/configs/htex_local.py +3 -4
  19. parsl/configs/illinoiscluster.py +3 -1
  20. parsl/configs/improv.py +34 -0
  21. parsl/configs/kubernetes.py +4 -3
  22. parsl/configs/local_threads.py +5 -1
  23. parsl/configs/midway.py +5 -3
  24. parsl/configs/osg.py +4 -2
  25. parsl/configs/polaris.py +4 -2
  26. parsl/configs/stampede2.py +6 -5
  27. parsl/configs/summit.py +3 -3
  28. parsl/configs/toss3_llnl.py +4 -3
  29. parsl/configs/vineex_local.py +6 -4
  30. parsl/configs/wqex_local.py +5 -3
  31. parsl/curvezmq.py +4 -0
  32. parsl/data_provider/data_manager.py +4 -3
  33. parsl/data_provider/file_noop.py +1 -2
  34. parsl/data_provider/files.py +3 -3
  35. parsl/data_provider/ftp.py +1 -3
  36. parsl/data_provider/globus.py +7 -6
  37. parsl/data_provider/http.py +2 -2
  38. parsl/data_provider/rsync.py +1 -1
  39. parsl/data_provider/staging.py +2 -2
  40. parsl/data_provider/zip.py +135 -0
  41. parsl/dataflow/dependency_resolvers.py +115 -0
  42. parsl/dataflow/dflow.py +262 -224
  43. parsl/dataflow/errors.py +3 -5
  44. parsl/dataflow/futures.py +27 -14
  45. parsl/dataflow/memoization.py +5 -5
  46. parsl/dataflow/rundirs.py +5 -6
  47. parsl/dataflow/taskrecord.py +4 -5
  48. parsl/executors/__init__.py +4 -2
  49. parsl/executors/base.py +45 -15
  50. parsl/executors/errors.py +13 -0
  51. parsl/executors/execute_task.py +37 -0
  52. parsl/executors/flux/execute_parsl_task.py +3 -3
  53. parsl/executors/flux/executor.py +18 -19
  54. parsl/executors/flux/flux_instance_manager.py +26 -27
  55. parsl/executors/high_throughput/errors.py +43 -3
  56. parsl/executors/high_throughput/executor.py +316 -282
  57. parsl/executors/high_throughput/interchange.py +158 -167
  58. parsl/executors/high_throughput/manager_record.py +5 -0
  59. parsl/executors/high_throughput/manager_selector.py +55 -0
  60. parsl/executors/high_throughput/monitoring_info.py +2 -1
  61. parsl/executors/high_throughput/mpi_executor.py +113 -0
  62. parsl/executors/high_throughput/mpi_prefix_composer.py +10 -11
  63. parsl/executors/high_throughput/mpi_resource_management.py +6 -17
  64. parsl/executors/high_throughput/probe.py +9 -7
  65. parsl/executors/high_throughput/process_worker_pool.py +115 -77
  66. parsl/executors/high_throughput/zmq_pipes.py +81 -23
  67. parsl/executors/radical/executor.py +130 -79
  68. parsl/executors/radical/rpex_resources.py +17 -15
  69. parsl/executors/radical/rpex_worker.py +4 -3
  70. parsl/executors/status_handling.py +157 -51
  71. parsl/executors/taskvine/__init__.py +1 -1
  72. parsl/executors/taskvine/errors.py +1 -1
  73. parsl/executors/taskvine/exec_parsl_function.py +2 -2
  74. parsl/executors/taskvine/executor.py +41 -57
  75. parsl/executors/taskvine/factory.py +1 -1
  76. parsl/executors/taskvine/factory_config.py +1 -1
  77. parsl/executors/taskvine/manager.py +18 -13
  78. parsl/executors/taskvine/manager_config.py +9 -5
  79. parsl/executors/threads.py +6 -6
  80. parsl/executors/workqueue/errors.py +1 -1
  81. parsl/executors/workqueue/exec_parsl_function.py +6 -5
  82. parsl/executors/workqueue/executor.py +64 -63
  83. parsl/executors/workqueue/parsl_coprocess.py +1 -1
  84. parsl/jobs/error_handlers.py +2 -2
  85. parsl/jobs/job_status_poller.py +30 -113
  86. parsl/jobs/states.py +7 -2
  87. parsl/jobs/strategy.py +43 -31
  88. parsl/launchers/__init__.py +12 -3
  89. parsl/launchers/errors.py +1 -1
  90. parsl/launchers/launchers.py +6 -12
  91. parsl/log_utils.py +9 -6
  92. parsl/monitoring/db_manager.py +59 -95
  93. parsl/monitoring/errors.py +6 -0
  94. parsl/monitoring/monitoring.py +87 -356
  95. parsl/monitoring/queries/pandas.py +1 -2
  96. parsl/monitoring/radios/base.py +13 -0
  97. parsl/monitoring/radios/filesystem.py +52 -0
  98. parsl/monitoring/radios/htex.py +57 -0
  99. parsl/monitoring/radios/multiprocessing.py +17 -0
  100. parsl/monitoring/radios/udp.py +56 -0
  101. parsl/monitoring/radios/zmq.py +17 -0
  102. parsl/monitoring/remote.py +33 -37
  103. parsl/monitoring/router.py +212 -0
  104. parsl/monitoring/types.py +5 -6
  105. parsl/monitoring/visualization/app.py +4 -2
  106. parsl/monitoring/visualization/models.py +0 -1
  107. parsl/monitoring/visualization/plots/default/workflow_plots.py +11 -4
  108. parsl/monitoring/visualization/plots/default/workflow_resource_plots.py +1 -0
  109. parsl/monitoring/visualization/utils.py +0 -1
  110. parsl/monitoring/visualization/views.py +16 -8
  111. parsl/multiprocessing.py +0 -1
  112. parsl/process_loggers.py +1 -2
  113. parsl/providers/__init__.py +8 -17
  114. parsl/providers/aws/aws.py +2 -3
  115. parsl/providers/azure/azure.py +4 -5
  116. parsl/providers/base.py +2 -18
  117. parsl/providers/cluster_provider.py +4 -12
  118. parsl/providers/condor/condor.py +7 -17
  119. parsl/providers/errors.py +2 -2
  120. parsl/providers/googlecloud/googlecloud.py +2 -1
  121. parsl/providers/grid_engine/grid_engine.py +5 -14
  122. parsl/providers/kubernetes/kube.py +80 -40
  123. parsl/providers/local/local.py +13 -26
  124. parsl/providers/lsf/lsf.py +5 -23
  125. parsl/providers/pbspro/pbspro.py +5 -17
  126. parsl/providers/slurm/slurm.py +81 -39
  127. parsl/providers/torque/torque.py +3 -14
  128. parsl/serialize/__init__.py +8 -3
  129. parsl/serialize/base.py +1 -2
  130. parsl/serialize/concretes.py +5 -4
  131. parsl/serialize/facade.py +3 -3
  132. parsl/serialize/proxystore.py +3 -2
  133. parsl/tests/__init__.py +1 -1
  134. parsl/tests/configs/azure_single_node.py +4 -5
  135. parsl/tests/configs/bridges.py +3 -2
  136. parsl/tests/configs/cc_in2p3.py +1 -3
  137. parsl/tests/configs/comet.py +2 -1
  138. parsl/tests/configs/ec2_single_node.py +1 -2
  139. parsl/tests/configs/ec2_spot.py +1 -2
  140. parsl/tests/configs/flux_local.py +11 -0
  141. parsl/tests/configs/frontera.py +2 -3
  142. parsl/tests/configs/htex_local.py +3 -5
  143. parsl/tests/configs/htex_local_alternate.py +11 -15
  144. parsl/tests/configs/htex_local_intask_staging.py +5 -9
  145. parsl/tests/configs/htex_local_rsync_staging.py +4 -8
  146. parsl/tests/configs/local_radical.py +1 -3
  147. parsl/tests/configs/local_radical_mpi.py +2 -2
  148. parsl/tests/configs/local_threads_checkpoint_periodic.py +8 -10
  149. parsl/tests/configs/local_threads_monitoring.py +0 -1
  150. parsl/tests/configs/midway.py +2 -2
  151. parsl/tests/configs/nscc_singapore.py +3 -3
  152. parsl/tests/configs/osg_htex.py +1 -1
  153. parsl/tests/configs/petrelkube.py +3 -2
  154. parsl/tests/configs/slurm_local.py +24 -0
  155. parsl/tests/configs/summit.py +1 -0
  156. parsl/tests/configs/taskvine_ex.py +4 -7
  157. parsl/tests/configs/user_opts.py +2 -8
  158. parsl/tests/configs/workqueue_ex.py +4 -6
  159. parsl/tests/conftest.py +27 -13
  160. parsl/tests/integration/test_stress/test_python_simple.py +3 -4
  161. parsl/tests/integration/test_stress/test_python_threads.py +3 -5
  162. parsl/tests/manual_tests/htex_local.py +4 -6
  163. parsl/tests/manual_tests/test_basic.py +1 -0
  164. parsl/tests/manual_tests/test_log_filter.py +3 -1
  165. parsl/tests/manual_tests/test_memory_limits.py +6 -8
  166. parsl/tests/manual_tests/test_regression_220.py +2 -1
  167. parsl/tests/manual_tests/test_udp_simple.py +4 -4
  168. parsl/tests/manual_tests/test_worker_count.py +3 -2
  169. parsl/tests/scaling_tests/htex_local.py +2 -4
  170. parsl/tests/scaling_tests/test_scale.py +0 -9
  171. parsl/tests/scaling_tests/vineex_condor.py +1 -2
  172. parsl/tests/scaling_tests/vineex_local.py +1 -2
  173. parsl/tests/site_tests/site_config_selector.py +1 -6
  174. parsl/tests/site_tests/test_provider.py +4 -2
  175. parsl/tests/site_tests/test_site.py +2 -0
  176. parsl/tests/sites/test_affinity.py +7 -7
  177. parsl/tests/sites/test_dynamic_executor.py +3 -4
  178. parsl/tests/sites/test_ec2.py +3 -2
  179. parsl/tests/sites/test_worker_info.py +4 -5
  180. parsl/tests/test_aalst_patterns.py +0 -1
  181. parsl/tests/test_bash_apps/test_apptimeout.py +2 -2
  182. parsl/tests/test_bash_apps/test_basic.py +10 -4
  183. parsl/tests/test_bash_apps/test_error_codes.py +5 -7
  184. parsl/tests/test_bash_apps/test_inputs_default.py +25 -0
  185. parsl/tests/test_bash_apps/test_kwarg_storage.py +1 -1
  186. parsl/tests/test_bash_apps/test_memoize.py +2 -8
  187. parsl/tests/test_bash_apps/test_memoize_ignore_args.py +9 -14
  188. parsl/tests/test_bash_apps/test_memoize_ignore_args_regr.py +9 -14
  189. parsl/tests/test_bash_apps/test_multiline.py +1 -1
  190. parsl/tests/test_bash_apps/test_pipeline.py +1 -1
  191. parsl/tests/test_bash_apps/test_std_uri.py +123 -0
  192. parsl/tests/test_bash_apps/test_stdout.py +33 -8
  193. parsl/tests/test_callables.py +2 -2
  194. parsl/tests/test_checkpointing/test_periodic.py +21 -39
  195. parsl/tests/test_checkpointing/test_python_checkpoint_1.py +1 -0
  196. parsl/tests/test_checkpointing/test_python_checkpoint_2.py +2 -2
  197. parsl/tests/test_checkpointing/test_python_checkpoint_3.py +0 -1
  198. parsl/tests/test_checkpointing/test_regression_239.py +1 -1
  199. parsl/tests/test_checkpointing/test_task_exit.py +2 -3
  200. parsl/tests/test_docs/test_from_slides.py +5 -2
  201. parsl/tests/test_docs/test_kwargs.py +4 -1
  202. parsl/tests/test_docs/test_tutorial_1.py +1 -2
  203. parsl/tests/test_docs/test_workflow1.py +2 -2
  204. parsl/tests/test_docs/test_workflow2.py +0 -1
  205. parsl/tests/test_error_handling/test_rand_fail.py +2 -2
  206. parsl/tests/test_error_handling/test_resource_spec.py +10 -12
  207. parsl/tests/test_error_handling/test_retries.py +6 -16
  208. parsl/tests/test_error_handling/test_retry_handler.py +1 -0
  209. parsl/tests/test_error_handling/test_retry_handler_failure.py +2 -1
  210. parsl/tests/test_error_handling/test_serialization_fail.py +1 -1
  211. parsl/tests/test_error_handling/test_wrap_with_logs.py +1 -0
  212. parsl/tests/test_execute_task.py +29 -0
  213. parsl/tests/test_flux.py +1 -1
  214. parsl/tests/test_htex/test_basic.py +2 -3
  215. parsl/tests/test_htex/test_block_manager_selector_unit.py +20 -0
  216. parsl/tests/test_htex/test_command_client_timeout.py +66 -0
  217. parsl/tests/test_htex/test_connected_blocks.py +3 -2
  218. parsl/tests/test_htex/test_cpu_affinity_explicit.py +6 -10
  219. parsl/tests/test_htex/test_disconnected_blocks.py +6 -5
  220. parsl/tests/test_htex/test_disconnected_blocks_failing_provider.py +71 -0
  221. parsl/tests/test_htex/test_drain.py +79 -0
  222. parsl/tests/test_htex/test_htex.py +51 -25
  223. parsl/tests/test_htex/test_manager_failure.py +0 -1
  224. parsl/tests/test_htex/test_manager_selector_by_block.py +51 -0
  225. parsl/tests/test_htex/test_managers_command.py +36 -0
  226. parsl/tests/test_htex/test_missing_worker.py +2 -12
  227. parsl/tests/test_htex/test_multiple_disconnected_blocks.py +9 -9
  228. parsl/tests/test_htex/test_resource_spec_validation.py +45 -0
  229. parsl/tests/test_htex/test_zmq_binding.py +29 -8
  230. parsl/tests/test_monitoring/test_app_names.py +86 -0
  231. parsl/tests/test_monitoring/test_basic.py +73 -25
  232. parsl/tests/test_monitoring/test_db_locks.py +6 -4
  233. parsl/tests/test_monitoring/test_fuzz_zmq.py +19 -8
  234. parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +80 -0
  235. parsl/tests/test_monitoring/test_incomplete_futures.py +5 -4
  236. parsl/tests/test_monitoring/test_memoization_representation.py +4 -2
  237. parsl/tests/test_monitoring/test_stdouterr.py +134 -0
  238. parsl/tests/test_monitoring/test_viz_colouring.py +1 -0
  239. parsl/tests/test_mpi_apps/test_bad_mpi_config.py +33 -26
  240. parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py +28 -11
  241. parsl/tests/test_mpi_apps/test_mpi_prefix.py +4 -4
  242. parsl/tests/test_mpi_apps/test_mpi_scheduler.py +7 -2
  243. parsl/tests/test_mpi_apps/test_mpiex.py +64 -0
  244. parsl/tests/test_mpi_apps/test_resource_spec.py +42 -49
  245. parsl/tests/test_providers/test_kubernetes_provider.py +102 -0
  246. parsl/tests/test_providers/test_local_provider.py +3 -132
  247. parsl/tests/test_providers/test_pbspro_template.py +2 -3
  248. parsl/tests/test_providers/test_slurm_template.py +2 -3
  249. parsl/tests/test_providers/test_submiterror_deprecation.py +2 -1
  250. parsl/tests/test_python_apps/test_context_manager.py +128 -0
  251. parsl/tests/test_python_apps/test_dep_standard_futures.py +2 -1
  252. parsl/tests/test_python_apps/test_dependencies_deep.py +59 -0
  253. parsl/tests/test_python_apps/test_fail.py +0 -25
  254. parsl/tests/test_python_apps/test_futures.py +2 -1
  255. parsl/tests/test_python_apps/test_inputs_default.py +22 -0
  256. parsl/tests/test_python_apps/test_join.py +0 -1
  257. parsl/tests/test_python_apps/test_lifted.py +11 -7
  258. parsl/tests/test_python_apps/test_memoize_bad_id_for_memo.py +1 -0
  259. parsl/tests/test_python_apps/test_outputs.py +1 -1
  260. parsl/tests/test_python_apps/test_pluggable_future_resolution.py +161 -0
  261. parsl/tests/test_radical/test_mpi_funcs.py +1 -2
  262. parsl/tests/test_regression/test_1480.py +2 -1
  263. parsl/tests/test_regression/test_1653.py +2 -1
  264. parsl/tests/test_regression/test_226.py +1 -0
  265. parsl/tests/test_regression/test_2652.py +1 -0
  266. parsl/tests/test_regression/test_69a.py +0 -1
  267. parsl/tests/test_regression/test_854.py +4 -2
  268. parsl/tests/test_regression/test_97_parallelism_0.py +1 -2
  269. parsl/tests/test_regression/test_98.py +0 -1
  270. parsl/tests/test_scaling/test_block_error_handler.py +9 -4
  271. parsl/tests/test_scaling/test_regression_1621.py +11 -15
  272. parsl/tests/test_scaling/test_regression_3568_scaledown_vs_MISSING.py +84 -0
  273. parsl/tests/test_scaling/test_regression_3696_oscillation.py +103 -0
  274. parsl/tests/test_scaling/test_scale_down.py +2 -5
  275. parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py +6 -18
  276. parsl/tests/test_scaling/test_scale_down_htex_unregistered.py +71 -0
  277. parsl/tests/test_scaling/test_shutdown_scalein.py +73 -0
  278. parsl/tests/test_scaling/test_worker_interchange_bad_messages_3262.py +90 -0
  279. parsl/tests/test_serialization/test_2555_caching_deserializer.py +1 -1
  280. parsl/tests/test_serialization/test_3495_deserialize_managerlost.py +47 -0
  281. parsl/tests/test_serialization/test_basic.py +2 -1
  282. parsl/tests/test_serialization/test_htex_code_cache.py +3 -4
  283. parsl/tests/test_serialization/test_pack_resource_spec.py +2 -1
  284. parsl/tests/test_serialization/test_proxystore_configured.py +10 -6
  285. parsl/tests/test_serialization/test_proxystore_impl.py +5 -3
  286. parsl/tests/test_shutdown/test_kill_monitoring.py +64 -0
  287. parsl/tests/test_staging/staging_provider.py +2 -2
  288. parsl/tests/test_staging/test_1316.py +3 -4
  289. parsl/tests/test_staging/test_docs_1.py +2 -1
  290. parsl/tests/test_staging/test_docs_2.py +2 -1
  291. parsl/tests/test_staging/test_elaborate_noop_file.py +2 -3
  292. parsl/tests/{test_data → test_staging}/test_file.py +6 -6
  293. parsl/tests/{test_data → test_staging}/test_output_chain_filenames.py +3 -0
  294. parsl/tests/test_staging/test_staging_ftp.py +1 -0
  295. parsl/tests/test_staging/test_staging_https.py +5 -2
  296. parsl/tests/test_staging/test_staging_stdout.py +64 -0
  297. parsl/tests/test_staging/test_zip_in.py +39 -0
  298. parsl/tests/test_staging/test_zip_out.py +110 -0
  299. parsl/tests/test_staging/test_zip_to_zip.py +41 -0
  300. parsl/tests/test_summary.py +2 -2
  301. parsl/tests/test_thread_parallelism.py +0 -1
  302. parsl/tests/test_threads/test_configs.py +1 -2
  303. parsl/tests/test_threads/test_lazy_errors.py +2 -2
  304. parsl/tests/test_utils/test_execute_wait.py +35 -0
  305. parsl/tests/test_utils/test_sanitize_dns.py +76 -0
  306. parsl/tests/unit/test_address.py +20 -0
  307. parsl/tests/unit/test_file.py +99 -0
  308. parsl/tests/unit/test_usage_tracking.py +66 -0
  309. parsl/usage_tracking/api.py +65 -0
  310. parsl/usage_tracking/levels.py +6 -0
  311. parsl/usage_tracking/usage.py +104 -62
  312. parsl/utils.py +139 -6
  313. parsl/version.py +1 -1
  314. {parsl-2024.3.11.data → parsl-2025.1.13.data}/scripts/exec_parsl_function.py +6 -5
  315. parsl-2025.1.13.data/scripts/interchange.py +649 -0
  316. {parsl-2024.3.11.data → parsl-2025.1.13.data}/scripts/process_worker_pool.py +115 -77
  317. parsl-2025.1.13.dist-info/METADATA +96 -0
  318. parsl-2025.1.13.dist-info/RECORD +462 -0
  319. {parsl-2024.3.11.dist-info → parsl-2025.1.13.dist-info}/WHEEL +1 -1
  320. parsl/channels/__init__.py +0 -7
  321. parsl/channels/base.py +0 -141
  322. parsl/channels/errors.py +0 -113
  323. parsl/channels/local/local.py +0 -164
  324. parsl/channels/oauth_ssh/oauth_ssh.py +0 -110
  325. parsl/channels/ssh/ssh.py +0 -276
  326. parsl/channels/ssh_il/__init__.py +0 -0
  327. parsl/channels/ssh_il/ssh_il.py +0 -74
  328. parsl/configs/ad_hoc.py +0 -35
  329. parsl/executors/radical/rpex_master.py +0 -42
  330. parsl/monitoring/radios.py +0 -175
  331. parsl/providers/ad_hoc/__init__.py +0 -0
  332. parsl/providers/ad_hoc/ad_hoc.py +0 -248
  333. parsl/providers/cobalt/__init__.py +0 -0
  334. parsl/providers/cobalt/cobalt.py +0 -236
  335. parsl/providers/cobalt/template.py +0 -17
  336. parsl/tests/configs/ad_hoc_cluster_htex.py +0 -35
  337. parsl/tests/configs/cooley_htex.py +0 -37
  338. parsl/tests/configs/htex_ad_hoc_cluster.py +0 -28
  339. parsl/tests/configs/local_adhoc.py +0 -18
  340. parsl/tests/configs/swan_htex.py +0 -43
  341. parsl/tests/configs/theta.py +0 -37
  342. parsl/tests/integration/test_channels/__init__.py +0 -0
  343. parsl/tests/integration/test_channels/test_channels.py +0 -17
  344. parsl/tests/integration/test_channels/test_local_channel.py +0 -42
  345. parsl/tests/integration/test_channels/test_scp_1.py +0 -45
  346. parsl/tests/integration/test_channels/test_ssh_1.py +0 -40
  347. parsl/tests/integration/test_channels/test_ssh_errors.py +0 -46
  348. parsl/tests/integration/test_channels/test_ssh_file_transport.py +0 -41
  349. parsl/tests/integration/test_channels/test_ssh_interactive.py +0 -24
  350. parsl/tests/manual_tests/test_ad_hoc_htex.py +0 -48
  351. parsl/tests/manual_tests/test_fan_in_out_htex_remote.py +0 -88
  352. parsl/tests/manual_tests/test_oauth_ssh.py +0 -13
  353. parsl/tests/sites/test_local_adhoc.py +0 -61
  354. parsl/tests/test_channels/__init__.py +0 -0
  355. parsl/tests/test_channels/test_large_output.py +0 -22
  356. parsl/tests/test_data/__init__.py +0 -0
  357. parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py +0 -51
  358. parsl/tests/test_providers/test_cobalt_deprecation_warning.py +0 -16
  359. parsl-2024.3.11.dist-info/METADATA +0 -98
  360. parsl-2024.3.11.dist-info/RECORD +0 -447
  361. parsl/{channels/local → monitoring/radios}/__init__.py +0 -0
  362. parsl/{channels/oauth_ssh → tests/test_shutdown}/__init__.py +0 -0
  363. parsl/tests/{test_data → test_staging}/test_file_apps.py +0 -0
  364. parsl/tests/{test_data → test_staging}/test_file_staging.py +0 -0
  365. parsl/{channels/ssh → tests/unit}/__init__.py +0 -0
  366. {parsl-2024.3.11.data → parsl-2025.1.13.data}/scripts/parsl_coprocess.py +1 -1
  367. {parsl-2024.3.11.dist-info → parsl-2025.1.13.dist-info}/LICENSE +0 -0
  368. {parsl-2024.3.11.dist-info → parsl-2025.1.13.dist-info}/entry_points.txt +0 -0
  369. {parsl-2024.3.11.dist-info → parsl-2025.1.13.dist-info}/top_level.txt +0 -0
parsl/dataflow/dflow.py CHANGED
@@ -1,50 +1,54 @@
1
1
  from __future__ import annotations
2
+
2
3
  import atexit
4
+ import concurrent.futures as cf
5
+ import datetime
6
+ import inspect
3
7
  import logging
4
8
  import os
5
- import pathlib
6
9
  import pickle
7
10
  import random
8
- import time
9
- import typeguard
10
- import inspect
11
- import threading
12
11
  import sys
13
- import datetime
12
+ import threading
13
+ import time
14
+ from concurrent.futures import Future
15
+ from functools import partial
14
16
  from getpass import getuser
15
- from typeguard import typechecked
17
+ from socket import gethostname
16
18
  from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
17
19
  from uuid import uuid4
18
- from socket import gethostname
19
- from concurrent.futures import Future
20
- from functools import partial
20
+
21
+ import typeguard
22
+ from typeguard import typechecked
21
23
 
22
24
  import parsl
23
25
  from parsl.app.errors import RemoteExceptionWrapper
24
26
  from parsl.app.futures import DataFuture
25
- from parsl.channels import Channel
26
27
  from parsl.config import Config
27
28
  from parsl.data_provider.data_manager import DataManager
28
29
  from parsl.data_provider.files import File
30
+ from parsl.dataflow.dependency_resolvers import SHALLOW_DEPENDENCY_RESOLVER
29
31
  from parsl.dataflow.errors import BadCheckpoint, DependencyError, JoinError
30
32
  from parsl.dataflow.futures import AppFuture
31
33
  from parsl.dataflow.memoization import Memoizer
32
34
  from parsl.dataflow.rundirs import make_rundir
33
- from parsl.dataflow.states import States, FINAL_STATES, FINAL_FAILURE_STATES
35
+ from parsl.dataflow.states import FINAL_FAILURE_STATES, FINAL_STATES, States
34
36
  from parsl.dataflow.taskrecord import TaskRecord
35
- from parsl.errors import ConfigurationError, InternalConsistencyError, NoDataFlowKernelError
36
- from parsl.jobs.job_status_poller import JobStatusPoller
37
- from parsl.jobs.states import JobStatus, JobState
38
- from parsl.usage_tracking.usage import UsageTracker
37
+ from parsl.errors import (
38
+ ConfigurationError,
39
+ InternalConsistencyError,
40
+ NoDataFlowKernelError,
41
+ )
39
42
  from parsl.executors.base import ParslExecutor
40
43
  from parsl.executors.status_handling import BlockProviderExecutor
41
44
  from parsl.executors.threads import ThreadPoolExecutor
45
+ from parsl.jobs.job_status_poller import JobStatusPoller
42
46
  from parsl.monitoring import MonitoringHub
43
- from parsl.process_loggers import wrap_with_logs
44
- from parsl.providers.base import ExecutionProvider
45
- from parsl.utils import get_version, get_std_fname_mode, get_all_checkpoints, Timer
46
-
47
47
  from parsl.monitoring.message_type import MessageType
48
+ from parsl.monitoring.remote import monitor_wrapper
49
+ from parsl.process_loggers import wrap_with_logs
50
+ from parsl.usage_tracking.usage import UsageTracker
51
+ from parsl.utils import Timer, get_all_checkpoints, get_std_fname_mode, get_version
48
52
 
49
53
  logger = logging.getLogger(__name__)
50
54
 
@@ -106,14 +110,8 @@ class DataFlowKernel:
106
110
  self.monitoring: Optional[MonitoringHub]
107
111
  self.monitoring = config.monitoring
108
112
 
109
- # hub address and port for interchange to connect
110
- self.hub_address = None # type: Optional[str]
111
- self.hub_interchange_port = None # type: Optional[int]
112
113
  if self.monitoring:
113
- if self.monitoring.logdir is None:
114
- self.monitoring.logdir = self.run_dir
115
- self.hub_address = self.monitoring.hub_address
116
- self.hub_interchange_port = self.monitoring.start(self.run_id, self.run_dir, self.config.run_dir)
114
+ self.monitoring.start(self.run_dir, self.config.run_dir)
117
115
 
118
116
  self.time_began = datetime.datetime.now()
119
117
  self.time_completed: Optional[datetime.datetime] = None
@@ -159,8 +157,8 @@ class DataFlowKernel:
159
157
  }
160
158
 
161
159
  if self.monitoring:
162
- self.monitoring.send(MessageType.WORKFLOW_INFO,
163
- workflow_info)
160
+ self.monitoring.send((MessageType.WORKFLOW_INFO,
161
+ workflow_info))
164
162
 
165
163
  if config.checkpoint_files is not None:
166
164
  checkpoints = self.load_checkpoints(config.checkpoint_files)
@@ -178,8 +176,8 @@ class DataFlowKernel:
178
176
  # this must be set before executors are added since add_executors calls
179
177
  # job_status_poller.add_executors.
180
178
  self.job_status_poller = JobStatusPoller(strategy=self.config.strategy,
181
- max_idletime=self.config.max_idletime,
182
- dfk=self)
179
+ strategy_period=self.config.strategy_period,
180
+ max_idletime=self.config.max_idletime)
183
181
 
184
182
  self.executors: Dict[str, ParslExecutor] = {}
185
183
 
@@ -203,21 +201,52 @@ class DataFlowKernel:
203
201
  self.tasks: Dict[int, TaskRecord] = {}
204
202
  self.submitter_lock = threading.Lock()
205
203
 
204
+ self.dependency_launch_pool = cf.ThreadPoolExecutor(max_workers=1, thread_name_prefix="Dependency-Launch")
205
+
206
+ self.dependency_resolver = self.config.dependency_resolver if self.config.dependency_resolver is not None \
207
+ else SHALLOW_DEPENDENCY_RESOLVER
208
+
206
209
  atexit.register(self.atexit_cleanup)
207
210
 
211
+ def __enter__(self):
212
+ return self
213
+
214
+ def __exit__(self, exc_type, exc_value, traceback) -> None:
215
+ mode = self.config.exit_mode
216
+ logger.debug("Exiting context manager, with exit mode '%s'", mode)
217
+ if mode == "cleanup":
218
+ logger.info("Calling cleanup for DFK")
219
+ self.cleanup()
220
+ elif mode == "skip":
221
+ logger.info("Skipping all cleanup handling")
222
+ elif mode == "wait":
223
+ if exc_type is None:
224
+ logger.info("Waiting for all tasks to complete")
225
+ self.wait_for_current_tasks()
226
+ self.cleanup()
227
+ else:
228
+ logger.info("There was an exception - cleaning up without waiting for task completion")
229
+ self.cleanup()
230
+ else:
231
+ raise InternalConsistencyError(f"Exit case for {mode} should be unreachable, validated by typeguard on Config()")
232
+
208
233
  def _send_task_log_info(self, task_record: TaskRecord) -> None:
209
234
  if self.monitoring:
210
235
  task_log_info = self._create_task_log_info(task_record)
211
- self.monitoring.send(MessageType.TASK_INFO, task_log_info)
236
+ self.monitoring.send((MessageType.TASK_INFO, task_log_info))
212
237
 
213
- def _create_task_log_info(self, task_record):
238
+ def _create_task_log_info(self, task_record: TaskRecord) -> Dict[str, Any]:
214
239
  """
215
240
  Create the dictionary that will be included in the log.
216
241
  """
217
242
  info_to_monitor = ['func_name', 'memoize', 'hashsum', 'fail_count', 'fail_cost', 'status',
218
243
  'id', 'time_invoked', 'try_time_launched', 'time_returned', 'try_time_returned', 'executor']
219
244
 
220
- task_log_info = {"task_" + k: task_record[k] for k in info_to_monitor}
245
+ # mypy cannot verify that these task_record[k] references are valid:
246
+ # They are valid if all entries in info_to_monitor are declared in the definition of TaskRecord
247
+ # This type: ignore[literal-required] asserts that fact.
248
+ task_log_info = {"task_" + k: task_record[k] for k in info_to_monitor} # type: ignore[literal-required]
249
+
221
250
  task_log_info['run_id'] = self.run_id
222
251
  task_log_info['try_id'] = task_record['try_id']
223
252
  task_log_info['timestamp'] = datetime.datetime.now()
@@ -229,20 +258,28 @@ class DataFlowKernel:
229
258
  task_log_info['task_inputs'] = str(task_record['kwargs'].get('inputs', None))
230
259
  task_log_info['task_outputs'] = str(task_record['kwargs'].get('outputs', None))
231
260
  task_log_info['task_stdin'] = task_record['kwargs'].get('stdin', None)
232
- stdout_spec = task_record['kwargs'].get('stdout', None)
233
- stderr_spec = task_record['kwargs'].get('stderr', None)
234
- try:
235
- stdout_name, _ = get_std_fname_mode('stdout', stdout_spec)
236
- except Exception as e:
237
- logger.warning("Incorrect stdout format {} for Task {}".format(stdout_spec, task_record['id']))
238
- stdout_name = str(e)
239
- try:
240
- stderr_name, _ = get_std_fname_mode('stderr', stderr_spec)
241
- except Exception as e:
242
- logger.warning("Incorrect stderr format {} for Task {}".format(stderr_spec, task_record['id']))
243
- stderr_name = str(e)
244
- task_log_info['task_stdout'] = stdout_name
245
- task_log_info['task_stderr'] = stderr_name
261
+
262
+ def std_spec_to_name(name, spec):
263
+ if spec is None:
264
+ name = ""
265
+ elif isinstance(spec, File):
266
+ name = spec.url
267
+ else:
268
+ # fallthrough case is various str, os.PathLike, tuple modes that
269
+ # can be interpreted by get_std_fname_mode.
270
+ try:
271
+ name, _ = get_std_fname_mode(name, spec)
272
+ except Exception:
273
+ logger.exception(f"Could not parse {name} specification {spec} for task {task_record['id']}")
274
+ name = ""
275
+ return name
276
+
277
+ stdout_spec = task_record['kwargs'].get('stdout')
278
+ task_log_info['task_stdout'] = std_spec_to_name('stdout', stdout_spec)
279
+
280
+ stderr_spec = task_record['kwargs'].get('stderr')
281
+ task_log_info['task_stderr'] = std_spec_to_name('stderr', stderr_spec)
282
+
246
283
  task_log_info['task_fail_history'] = ",".join(task_record['fail_history'])
247
284
  task_log_info['task_depends'] = None
248
285
  if task_record['depends'] is not None:
@@ -583,9 +620,9 @@ class DataFlowKernel:
583
620
  return kwargs.get('_parsl_staging_inhibit', False)
584
621
 
585
622
  def launch_if_ready(self, task_record: TaskRecord) -> None:
586
- """
587
- launch_if_ready will launch the specified task, if it is ready
588
- to run (for example, without dependencies, and in pending state).
623
+ """Schedules a task record for re-inspection to see if it is ready
624
+ for launch and for launch if it is ready. The call will return
625
+ immediately.
589
626
 
590
627
  This should be called by any piece of the DataFlowKernel that
591
628
  thinks a task may have become ready to run.
@@ -594,13 +631,17 @@ class DataFlowKernel:
594
631
  ready to run - launch_if_ready will not incorrectly launch that
595
632
  task.
596
633
 
597
- It is also not an error to call launch_if_ready on a task that has
598
- already been launched - launch_if_ready will not re-launch that
599
- task.
600
-
601
634
  launch_if_ready is thread safe, so may be called from any thread
602
635
  or callback.
603
636
  """
637
+ self.dependency_launch_pool.submit(self._launch_if_ready_async, task_record)
638
+
639
+ @wrap_with_logs
640
+ def _launch_if_ready_async(self, task_record: TaskRecord) -> None:
641
+ """
642
+ _launch_if_ready will launch the specified task, if it is ready
643
+ to run (for example, without dependencies, and in pending state).
644
+ """
604
645
  exec_fu = None
605
646
 
606
647
  task_id = task_record['id']
@@ -666,14 +707,6 @@ class DataFlowKernel:
666
707
  def launch_task(self, task_record: TaskRecord) -> Future:
667
708
  """Handle the actual submission of the task to the executor layer.
668
709
 
669
- If the app task has the executors attributes not set (default=='all')
670
- the task is launched on a randomly selected executor from the
671
- list of executors. This behavior could later be updated to support
672
- binding to executors based on user specified criteria.
673
-
674
- If the app task specifies a particular set of executors, it will be
675
- targeted at those specific executors.
676
-
677
710
  Args:
678
711
  task_record : The task record
679
712
 
@@ -706,14 +739,18 @@ class DataFlowKernel:
706
739
 
707
740
  if self.monitoring is not None and self.monitoring.resource_monitoring_enabled:
708
741
  wrapper_logging_level = logging.DEBUG if self.monitoring.monitoring_debug else logging.INFO
709
- (function, args, kwargs) = self.monitoring.monitor_wrapper(function, args, kwargs, try_id, task_id,
710
- self.monitoring.monitoring_hub_url,
711
- self.run_id,
712
- wrapper_logging_level,
713
- self.monitoring.resource_monitoring_interval,
714
- executor.radio_mode,
715
- executor.monitor_resources(),
716
- self.run_dir)
742
+ (function, args, kwargs) = monitor_wrapper(f=function,
743
+ args=args,
744
+ kwargs=kwargs,
745
+ x_try_id=try_id,
746
+ x_task_id=task_id,
747
+ monitoring_hub_url=self.monitoring.monitoring_hub_url,
748
+ run_id=self.run_id,
749
+ logging_level=wrapper_logging_level,
750
+ sleep_dur=self.monitoring.resource_monitoring_interval,
751
+ radio_mode=executor.radio_mode,
752
+ monitor_resources=executor.monitor_resources(),
753
+ run_dir=self.run_dir)
717
754
 
718
755
  with self.submitter_lock:
719
756
  exec_fu = executor.submit(function, task_record['resource_specification'], *args, **kwargs)
@@ -756,6 +793,10 @@ class DataFlowKernel:
756
793
  (inputs[idx], func) = self.data_manager.optionally_stage_in(f, func, executor)
757
794
 
758
795
  for kwarg, f in kwargs.items():
796
+ # stdout and stderr files should not be staging in (they will be staged *out*
797
+ # in _add_output_deps)
798
+ if kwarg in ['stdout', 'stderr']:
799
+ continue
759
800
  (kwargs[kwarg], func) = self.data_manager.optionally_stage_in(f, func, executor)
760
801
 
761
802
  newargs = list(args)
@@ -768,33 +809,55 @@ class DataFlowKernel:
768
809
  logger.debug("Adding output dependencies")
769
810
  outputs = kwargs.get('outputs', [])
770
811
  app_fut._outputs = []
771
- for idx, f in enumerate(outputs):
772
- if isinstance(f, File) and not self.check_staging_inhibited(kwargs):
812
+
813
+ # Pass over all possible outputs: the outputs kwarg, stdout and stderr
814
+ # and for each of those, perform possible stage-out. This can result in:
815
+ # a DataFuture to be exposed in app_fut to represent the completion of
816
+ # that stageout (sometimes backed by a new sub-workflow for separate-task
817
+ # stageout), a replacement for the function to be executed (intended to
818
+ # be the original function wrapped with an in-task stageout wrapper), a
819
+ # rewritten File object to be passed to task to be executed
820
+
821
+ def stageout_one_file(file: File, rewritable_func: Callable):
822
+ if not self.check_staging_inhibited(kwargs):
773
823
  # replace a File with a DataFuture - either completing when the stageout
774
824
  # future completes, or if no stage out future is returned, then when the
775
825
  # app itself completes.
776
826
 
777
827
  # The staging code will get a clean copy which it is allowed to mutate,
778
828
  # while the DataFuture-contained original will not be modified by any staging.
779
- f_copy = f.cleancopy()
780
- outputs[idx] = f_copy
829
+ f_copy = file.cleancopy()
781
830
 
782
- logger.debug("Submitting stage out for output file {}".format(repr(f)))
831
+ logger.debug("Submitting stage out for output file {}".format(repr(file)))
783
832
  stageout_fut = self.data_manager.stage_out(f_copy, executor, app_fut)
784
833
  if stageout_fut:
785
- logger.debug("Adding a dependency on stageout future for {}".format(repr(f)))
786
- app_fut._outputs.append(DataFuture(stageout_fut, f, tid=app_fut.tid))
834
+ logger.debug("Adding a dependency on stageout future for {}".format(repr(file)))
835
+ df = DataFuture(stageout_fut, file, tid=app_fut.tid)
787
836
  else:
788
- logger.debug("No stageout dependency for {}".format(repr(f)))
789
- app_fut._outputs.append(DataFuture(app_fut, f, tid=app_fut.tid))
837
+ logger.debug("No stageout dependency for {}".format(repr(file)))
838
+ df = DataFuture(app_fut, file, tid=app_fut.tid)
790
839
 
791
840
  # this is a hook for post-task stageout
792
841
  # note that nothing depends on the output - which is maybe a bug
793
842
  # in the not-very-tested stageout system?
794
- func = self.data_manager.replace_task_stage_out(f_copy, func, executor)
843
+ rewritable_func = self.data_manager.replace_task_stage_out(f_copy, rewritable_func, executor)
844
+ return rewritable_func, f_copy, df
795
845
  else:
796
- logger.debug("Not performing output staging for: {}".format(repr(f)))
797
- app_fut._outputs.append(DataFuture(app_fut, f, tid=app_fut.tid))
846
+ logger.debug("Not performing output staging for: {}".format(repr(file)))
847
+ return rewritable_func, file, DataFuture(app_fut, file, tid=app_fut.tid)
848
+
849
+ for idx, file in enumerate(outputs):
850
+ func, outputs[idx], o = stageout_one_file(file, func)
851
+ app_fut._outputs.append(o)
852
+
853
+ file = kwargs.get('stdout')
854
+ if isinstance(file, File):
855
+ func, kwargs['stdout'], app_fut._stdout_future = stageout_one_file(file, func)
856
+
857
+ file = kwargs.get('stderr')
858
+ if isinstance(file, File):
859
+ func, kwargs['stderr'], app_fut._stderr_future = stageout_one_file(file, func)
860
+
798
861
  return func
799
862
 
800
863
  def _gather_all_deps(self, args: Sequence[Any], kwargs: Dict[str, Any]) -> List[Future]:
@@ -811,8 +874,11 @@ class DataFlowKernel:
811
874
  depends: List[Future] = []
812
875
 
813
876
  def check_dep(d: Any) -> None:
814
- if isinstance(d, Future):
815
- depends.extend([d])
877
+ try:
878
+ depends.extend(self.dependency_resolver.traverse_to_gather(d))
879
+ except Exception:
880
+ logger.exception("Exception in dependency_resolver.traverse_to_gather")
881
+ raise
816
882
 
817
883
  # Check the positional args
818
884
  for dep in args:
@@ -829,7 +895,8 @@ class DataFlowKernel:
829
895
 
830
896
  return depends
831
897
 
832
- def _unwrap_futures(self, args, kwargs):
898
+ def _unwrap_futures(self, args: Sequence[Any], kwargs: Dict[str, Any]) \
899
+ -> Tuple[Sequence[Any], Dict[str, Any], Sequence[Tuple[Exception, str]]]:
833
900
  """This function should be called when all dependencies have completed.
834
901
 
835
902
  It will rewrite the arguments for that task, replacing each Future
@@ -850,53 +917,40 @@ class DataFlowKernel:
850
917
  """
851
918
  dep_failures = []
852
919
 
920
+ def append_failure(e: Exception, dep: Future) -> None:
921
+ # If this Future is associated with a task inside this DFK,
922
+ # then refer to the task ID.
923
+ # Otherwise make a repr of the Future object.
924
+ if hasattr(dep, 'task_record') and dep.task_record['dfk'] == self:
925
+ tid = "task " + repr(dep.task_record['id'])
926
+ else:
927
+ tid = repr(dep)
928
+ dep_failures.extend([(e, tid)])
929
+
853
930
  # Replace item in args
854
931
  new_args = []
855
932
  for dep in args:
856
- if isinstance(dep, Future):
857
- try:
858
- new_args.extend([dep.result()])
859
- except Exception as e:
860
- # If this Future is associated with a task inside this DFK,
861
- # then refer to the task ID.
862
- # Otherwise make a repr of the Future object.
863
- if hasattr(dep, 'task_record') and dep.task_record['dfk'] == self:
864
- tid = "task " + repr(dep.task_record['id'])
865
- else:
866
- tid = repr(dep)
867
- dep_failures.extend([(e, tid)])
868
- else:
869
- new_args.extend([dep])
933
+ try:
934
+ new_args.extend([self.dependency_resolver.traverse_to_unwrap(dep)])
935
+ except Exception as e:
936
+ append_failure(e, dep)
870
937
 
871
938
  # Check for explicit kwargs ex, fu_1=<fut>
872
939
  for key in kwargs:
873
940
  dep = kwargs[key]
874
- if isinstance(dep, Future):
875
- try:
876
- kwargs[key] = dep.result()
877
- except Exception as e:
878
- if hasattr(dep, 'task_record'):
879
- tid = dep.task_record['id']
880
- else:
881
- tid = None
882
- dep_failures.extend([(e, tid)])
941
+ try:
942
+ kwargs[key] = self.dependency_resolver.traverse_to_unwrap(dep)
943
+ except Exception as e:
944
+ append_failure(e, dep)
883
945
 
884
946
  # Check for futures in inputs=[<fut>...]
885
947
  if 'inputs' in kwargs:
886
948
  new_inputs = []
887
949
  for dep in kwargs['inputs']:
888
- if isinstance(dep, Future):
889
- try:
890
- new_inputs.extend([dep.result()])
891
- except Exception as e:
892
- if hasattr(dep, 'task_record'):
893
- tid = dep.task_record['id']
894
- else:
895
- tid = None
896
- dep_failures.extend([(e, tid)])
897
-
898
- else:
899
- new_inputs.extend([dep])
950
+ try:
951
+ new_inputs.extend([self.dependency_resolver.traverse_to_unwrap(dep)])
952
+ except Exception as e:
953
+ append_failure(e, dep)
900
954
  kwargs['inputs'] = new_inputs
901
955
 
902
956
  return new_args, kwargs, dep_failures
@@ -928,7 +982,7 @@ class DataFlowKernel:
928
982
  - app_kwargs (dict) : Rest of the kwargs to the fn passed as dict.
929
983
 
930
984
  Returns:
931
- (AppFuture) [DataFutures,]
985
+ AppFuture
932
986
 
933
987
  """
934
988
 
@@ -952,32 +1006,16 @@ class DataFlowKernel:
952
1006
  executor = random.choice(choices)
953
1007
  logger.debug("Task {} will be sent to executor {}".format(task_id, executor))
954
1008
 
955
- # The below uses func.__name__ before it has been wrapped by any staging code.
956
-
957
- label = app_kwargs.get('label')
958
- for kw in ['stdout', 'stderr']:
959
- if kw in app_kwargs:
960
- if app_kwargs[kw] == parsl.AUTO_LOGNAME:
961
- if kw not in ignore_for_cache:
962
- ignore_for_cache += [kw]
963
- app_kwargs[kw] = os.path.join(
964
- self.run_dir,
965
- 'task_logs',
966
- str(int(task_id / 10000)).zfill(4), # limit logs to 10k entries per directory
967
- 'task_{}_{}{}.{}'.format(
968
- str(task_id).zfill(4),
969
- func.__name__,
970
- '' if label is None else '_{}'.format(label),
971
- kw)
972
- )
973
-
974
1009
  resource_specification = app_kwargs.get('parsl_resource_specification', {})
975
1010
 
976
1011
  task_record: TaskRecord
977
- task_record = {'depends': [],
1012
+ task_record = {'args': app_args,
1013
+ 'depends': [],
978
1014
  'dfk': self,
979
1015
  'executor': executor,
1016
+ 'func': func,
980
1017
  'func_name': func.__name__,
1018
+ 'kwargs': app_kwargs,
981
1019
  'memoize': cache,
982
1020
  'hashsum': None,
983
1021
  'exec_fu': None,
@@ -999,25 +1037,41 @@ class DataFlowKernel:
999
1037
 
1000
1038
  self.update_task_state(task_record, States.unsched)
1001
1039
 
1040
+ for kw in ['stdout', 'stderr']:
1041
+ if kw in app_kwargs:
1042
+ if app_kwargs[kw] == parsl.AUTO_LOGNAME:
1043
+ if kw not in ignore_for_cache:
1044
+ ignore_for_cache += [kw]
1045
+ if self.config.std_autopath is None:
1046
+ app_kwargs[kw] = self.default_std_autopath(task_record, kw)
1047
+ else:
1048
+ app_kwargs[kw] = self.config.std_autopath(task_record, kw)
1049
+
1002
1050
  app_fu = AppFuture(task_record)
1051
+ task_record['app_fu'] = app_fu
1003
1052
 
1004
1053
  # Transform remote input files to data futures
1005
1054
  app_args, app_kwargs, func = self._add_input_deps(executor, app_args, app_kwargs, func)
1006
1055
 
1007
1056
  func = self._add_output_deps(executor, app_args, app_kwargs, app_fu, func)
1008
1057
 
1058
+ logger.debug("Added output dependencies")
1059
+
1060
+ # Replace the function invocation in the TaskRecord with whatever file-staging
1061
+ # substitutions have been made.
1009
1062
  task_record.update({
1010
1063
  'args': app_args,
1011
1064
  'func': func,
1012
- 'kwargs': app_kwargs,
1013
- 'app_fu': app_fu})
1065
+ 'kwargs': app_kwargs})
1014
1066
 
1015
1067
  assert task_id not in self.tasks
1016
1068
 
1017
1069
  self.tasks[task_id] = task_record
1018
1070
 
1071
+ logger.debug("Gathering dependencies")
1019
1072
  # Get the list of dependencies for the task
1020
1073
  depends = self._gather_all_deps(app_args, app_kwargs)
1074
+ logger.debug("Gathered dependencies")
1021
1075
  task_record['depends'] = depends
1022
1076
 
1023
1077
  depend_descs = []
@@ -1084,73 +1138,28 @@ class DataFlowKernel:
1084
1138
 
1085
1139
  logger.info("End of summary")
1086
1140
 
1087
- def _create_remote_dirs_over_channel(self, provider: ExecutionProvider, channel: Channel) -> None:
1088
- """Create script directories across a channel
1089
-
1090
- Parameters
1091
- ----------
1092
- provider: Provider obj
1093
- Provider for which scripts dirs are being created
1094
- channel: Channel obj
1095
- Channel over which the remote dirs are to be created
1096
- """
1097
- run_dir = self.run_dir
1098
- if channel.script_dir is None:
1099
-
1100
- # This case will be detected as unreachable by mypy, because of
1101
- # the type of script_dir, which is str, not Optional[str].
1102
- # The type system doesn't represent the initialized/uninitialized
1103
- # state of a channel so cannot represent that a channel needs
1104
- # its script directory set or not.
1105
-
1106
- channel.script_dir = os.path.join(run_dir, 'submit_scripts') # type: ignore[unreachable]
1107
-
1108
- # Only create dirs if we aren't on a shared-fs
1109
- if not channel.isdir(run_dir):
1110
- parent, child = pathlib.Path(run_dir).parts[-2:]
1111
- remote_run_dir = os.path.join(parent, child)
1112
- channel.script_dir = os.path.join(remote_run_dir, 'remote_submit_scripts')
1113
- provider.script_dir = os.path.join(run_dir, 'local_submit_scripts')
1114
-
1115
- channel.makedirs(channel.script_dir, exist_ok=True)
1116
-
1117
- def add_executors(self, executors):
1141
+ def add_executors(self, executors: Sequence[ParslExecutor]) -> None:
1118
1142
  for executor in executors:
1119
1143
  executor.run_id = self.run_id
1120
1144
  executor.run_dir = self.run_dir
1121
- executor.hub_address = self.hub_address
1122
- executor.hub_port = self.hub_interchange_port
1145
+ if self.monitoring:
1146
+ executor.hub_address = self.monitoring.hub_address
1147
+ executor.hub_zmq_port = self.monitoring.hub_zmq_port
1148
+ executor.submit_monitoring_radio = self.monitoring.radio
1123
1149
  if hasattr(executor, 'provider'):
1124
1150
  if hasattr(executor.provider, 'script_dir'):
1125
1151
  executor.provider.script_dir = os.path.join(self.run_dir, 'submit_scripts')
1126
1152
  os.makedirs(executor.provider.script_dir, exist_ok=True)
1127
1153
 
1128
- if hasattr(executor.provider, 'channels'):
1129
- logger.debug("Creating script_dir across multiple channels")
1130
- for channel in executor.provider.channels:
1131
- self._create_remote_dirs_over_channel(executor.provider, channel)
1132
- else:
1133
- self._create_remote_dirs_over_channel(executor.provider, executor.provider.channel)
1134
-
1135
1154
  self.executors[executor.label] = executor
1136
- block_ids = executor.start()
1137
- if self.monitoring and block_ids:
1138
- new_status = {}
1139
- for bid in block_ids:
1140
- new_status[bid] = JobStatus(JobState.PENDING)
1141
- msg = executor.create_monitoring_info(new_status)
1142
- logger.debug("Sending monitoring message {} to hub from DFK".format(msg))
1143
- self.monitoring.send(MessageType.BLOCK_INFO, msg)
1155
+ executor.start()
1144
1156
  block_executors = [e for e in executors if isinstance(e, BlockProviderExecutor)]
1145
1157
  self.job_status_poller.add_executors(block_executors)
1146
1158
 
1147
1159
  def atexit_cleanup(self) -> None:
1148
- if not self.cleanup_called:
1149
- logger.warning("Python is exiting with a DFK still running. "
1150
- "You should call parsl.dfk().cleanup() before "
1151
- "exiting to release any resources")
1152
- else:
1153
- logger.info("python process is exiting, but DFK has already been cleaned up")
1160
+ logger.warning("Python is exiting with a DFK still running. "
1161
+ "You should call parsl.dfk().cleanup() before "
1162
+ "exiting to release any resources")
1154
1163
 
1155
1164
  def wait_for_current_tasks(self) -> None:
1156
1165
  """Waits for all tasks in the task list to be completed, by waiting for their
@@ -1170,7 +1179,8 @@ class DataFlowKernel:
1170
1179
  fut = task_record['app_fu']
1171
1180
  if not fut.done():
1172
1181
  fut.exception()
1173
- # now app future is done, poll until DFK state is final: a DFK state being final and the app future being done do not imply each other.
1182
+ # now app future is done, poll until DFK state is final: a
1183
+ # DFK state being final and the app future being done do not imply each other.
1174
1184
  while task_record['status'] not in FINAL_STATES:
1175
1185
  time.sleep(0.1)
1176
1186
 
@@ -1205,31 +1215,18 @@ class DataFlowKernel:
1205
1215
  self._checkpoint_timer.close()
1206
1216
 
1207
1217
  # Send final stats
1218
+ logger.info("Sending end message for usage tracking")
1208
1219
  self.usage_tracker.send_end_message()
1209
1220
  self.usage_tracker.close()
1221
+ logger.info("Closed usage tracking")
1210
1222
 
1211
1223
  logger.info("Closing job status poller")
1212
1224
  self.job_status_poller.close()
1213
1225
  logger.info("Terminated job status poller")
1214
1226
 
1215
- logger.info("Scaling in and shutting down executors")
1227
+ logger.info("Shutting down executors")
1216
1228
 
1217
1229
  for executor in self.executors.values():
1218
- if isinstance(executor, BlockProviderExecutor):
1219
- if not executor.bad_state_is_set:
1220
- logger.info(f"Scaling in executor {executor.label}")
1221
- if executor.provider:
1222
- job_ids = executor.provider.resources.keys()
1223
- block_ids = executor.scale_in(len(job_ids))
1224
- if self.monitoring and block_ids:
1225
- new_status = {}
1226
- for bid in block_ids:
1227
- new_status[bid] = JobStatus(JobState.CANCELLED)
1228
- msg = executor.create_monitoring_info(new_status)
1229
- logger.debug("Sending message {} to hub from DFK".format(msg))
1230
- self.monitoring.send(MessageType.BLOCK_INFO, msg)
1231
- else: # and bad_state_is_set
1232
- logger.warning(f"Not shutting down executor {executor.label} because it is in bad state")
1233
1230
  logger.info(f"Shutting down executor {executor.label}")
1234
1231
  executor.shutdown()
1235
1232
  logger.info(f"Shut down executor {executor.label}")
@@ -1239,18 +1236,32 @@ class DataFlowKernel:
1239
1236
 
1240
1237
  if self.monitoring:
1241
1238
  logger.info("Sending final monitoring message")
1242
- self.monitoring.send(MessageType.WORKFLOW_INFO,
1239
+ self.monitoring.send((MessageType.WORKFLOW_INFO,
1243
1240
  {'tasks_failed_count': self.task_state_counts[States.failed],
1244
1241
  'tasks_completed_count': self.task_state_counts[States.exec_done],
1245
1242
  "time_began": self.time_began,
1246
1243
  'time_completed': self.time_completed,
1247
- 'run_id': self.run_id, 'rundir': self.run_dir,
1248
- 'exit_now': True})
1244
+ 'run_id': self.run_id, 'rundir': self.run_dir}))
1249
1245
 
1250
1246
  logger.info("Terminating monitoring")
1251
1247
  self.monitoring.close()
1252
1248
  logger.info("Terminated monitoring")
1253
1249
 
1250
+ logger.info("Terminating dependency launch pool")
1251
+ self.dependency_launch_pool.shutdown()
1252
+ logger.info("Terminated dependency launch pool")
1253
+
1254
+ logger.info("Unregistering atexit hook")
1255
+ atexit.unregister(self.atexit_cleanup)
1256
+ logger.info("Unregistered atexit hook")
1257
+
1258
+ if DataFlowKernelLoader._dfk is self:
1259
+ logger.info("Unregistering default DFK")
1260
+ parsl.clear()
1261
+ logger.info("Unregistered default DFK")
1262
+ else:
1263
+ logger.debug("Cleaning up non-default DFK - not unregistering")
1264
+
1254
1265
  logger.info("DFK cleanup complete")
1255
1266
 
1256
1267
  def checkpoint(self, tasks: Optional[Sequence[TaskRecord]] = None) -> str:
@@ -1386,8 +1397,6 @@ class DataFlowKernel:
1386
1397
  Returns:
1387
1398
  - dict containing, hashed -> future mappings
1388
1399
  """
1389
- self.memo_lookup_table = None
1390
-
1391
1400
  if checkpointDirs:
1392
1401
  return self._load_checkpoints(checkpointDirs)
1393
1402
  else:
@@ -1395,10 +1404,39 @@ class DataFlowKernel:
1395
1404
 
1396
1405
  @staticmethod
1397
1406
  def _log_std_streams(task_record: TaskRecord) -> None:
1398
- if task_record['app_fu'].stdout is not None:
1399
- logger.info("Standard output for task {} available at {}".format(task_record['id'], task_record['app_fu'].stdout))
1400
- if task_record['app_fu'].stderr is not None:
1401
- logger.info("Standard error for task {} available at {}".format(task_record['id'], task_record['app_fu'].stderr))
1407
+ tid = task_record['id']
1408
+
1409
+ def log_std_stream(name: str, target) -> None:
1410
+ if target is None:
1411
+ logger.info(f"{name} for task {tid} will not be redirected.")
1412
+ elif isinstance(target, str):
1413
+ logger.info(f"{name} for task {tid} will be redirected to {target}")
1414
+ elif isinstance(target, os.PathLike):
1415
+ logger.info(f"{name} for task {tid} will be redirected to {os.fspath(target)}")
1416
+ elif isinstance(target, tuple) and len(target) == 2 and isinstance(target[0], str):
1417
+ logger.info(f"{name} for task {tid} will be redirected to {target[0]} with mode {target[1]}")
1418
+ elif isinstance(target, tuple) and len(target) == 2 and isinstance(target[0], os.PathLike):
1419
+ logger.info(f"{name} for task {tid} will be redirected to {os.fspath(target[0])} with mode {target[1]}")
1420
+ elif isinstance(target, DataFuture):
1421
+ logger.info(f"{name} for task {tid} will staged to {target.file_obj.url}")
1422
+ else:
1423
+ logger.error(f"{name} for task {tid} has unknown specification: {target!r}")
1424
+
1425
+ log_std_stream("Standard out", task_record['app_fu'].stdout)
1426
+ log_std_stream("Standard error", task_record['app_fu'].stderr)
1427
+
1428
+ def default_std_autopath(self, taskrecord, kw):
1429
+ label = taskrecord['kwargs'].get('label')
1430
+ task_id = taskrecord['id']
1431
+ return os.path.join(
1432
+ self.run_dir,
1433
+ 'task_logs',
1434
+ str(int(task_id / 10000)).zfill(4), # limit logs to 10k entries per directory
1435
+ 'task_{}_{}{}.{}'.format(
1436
+ str(task_id).zfill(4),
1437
+ taskrecord['func_name'],
1438
+ '' if label is None else '_{}'.format(label),
1439
+ kw))
1402
1440
 
1403
1441
 
1404
1442
  class DataFlowKernelLoader: