parsl 2024.3.18__py3-none-any.whl → 2025.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (369) hide show
  1. parsl/__init__.py +9 -10
  2. parsl/addresses.py +26 -6
  3. parsl/app/app.py +7 -8
  4. parsl/app/bash.py +15 -8
  5. parsl/app/errors.py +10 -13
  6. parsl/app/futures.py +8 -10
  7. parsl/app/python.py +2 -1
  8. parsl/benchmark/perf.py +2 -1
  9. parsl/concurrent/__init__.py +2 -2
  10. parsl/config.py +53 -10
  11. parsl/configs/ASPIRE1.py +6 -5
  12. parsl/configs/Azure.py +9 -8
  13. parsl/configs/bridges.py +6 -4
  14. parsl/configs/cc_in2p3.py +3 -3
  15. parsl/configs/ec2.py +3 -1
  16. parsl/configs/expanse.py +4 -3
  17. parsl/configs/frontera.py +3 -4
  18. parsl/configs/htex_local.py +3 -4
  19. parsl/configs/illinoiscluster.py +3 -1
  20. parsl/configs/improv.py +34 -0
  21. parsl/configs/kubernetes.py +4 -3
  22. parsl/configs/local_threads.py +5 -1
  23. parsl/configs/midway.py +5 -3
  24. parsl/configs/osg.py +4 -2
  25. parsl/configs/polaris.py +4 -2
  26. parsl/configs/stampede2.py +6 -5
  27. parsl/configs/summit.py +3 -3
  28. parsl/configs/toss3_llnl.py +4 -3
  29. parsl/configs/vineex_local.py +6 -4
  30. parsl/configs/wqex_local.py +5 -3
  31. parsl/curvezmq.py +4 -0
  32. parsl/data_provider/data_manager.py +4 -3
  33. parsl/data_provider/file_noop.py +1 -2
  34. parsl/data_provider/files.py +3 -3
  35. parsl/data_provider/ftp.py +1 -3
  36. parsl/data_provider/globus.py +7 -6
  37. parsl/data_provider/http.py +2 -2
  38. parsl/data_provider/rsync.py +1 -1
  39. parsl/data_provider/staging.py +2 -2
  40. parsl/data_provider/zip.py +135 -0
  41. parsl/dataflow/dependency_resolvers.py +115 -0
  42. parsl/dataflow/dflow.py +259 -223
  43. parsl/dataflow/errors.py +3 -5
  44. parsl/dataflow/futures.py +27 -14
  45. parsl/dataflow/memoization.py +5 -5
  46. parsl/dataflow/rundirs.py +5 -6
  47. parsl/dataflow/taskrecord.py +4 -5
  48. parsl/executors/__init__.py +4 -2
  49. parsl/executors/base.py +45 -15
  50. parsl/executors/errors.py +13 -0
  51. parsl/executors/execute_task.py +37 -0
  52. parsl/executors/flux/execute_parsl_task.py +3 -3
  53. parsl/executors/flux/executor.py +18 -19
  54. parsl/executors/flux/flux_instance_manager.py +26 -27
  55. parsl/executors/high_throughput/errors.py +43 -3
  56. parsl/executors/high_throughput/executor.py +307 -285
  57. parsl/executors/high_throughput/interchange.py +137 -168
  58. parsl/executors/high_throughput/manager_record.py +4 -0
  59. parsl/executors/high_throughput/manager_selector.py +55 -0
  60. parsl/executors/high_throughput/monitoring_info.py +2 -1
  61. parsl/executors/high_throughput/mpi_executor.py +113 -0
  62. parsl/executors/high_throughput/mpi_prefix_composer.py +10 -11
  63. parsl/executors/high_throughput/mpi_resource_management.py +6 -17
  64. parsl/executors/high_throughput/probe.py +9 -7
  65. parsl/executors/high_throughput/process_worker_pool.py +77 -75
  66. parsl/executors/high_throughput/zmq_pipes.py +81 -23
  67. parsl/executors/radical/executor.py +130 -79
  68. parsl/executors/radical/rpex_resources.py +17 -15
  69. parsl/executors/radical/rpex_worker.py +4 -3
  70. parsl/executors/status_handling.py +157 -51
  71. parsl/executors/taskvine/__init__.py +1 -1
  72. parsl/executors/taskvine/errors.py +1 -1
  73. parsl/executors/taskvine/exec_parsl_function.py +2 -2
  74. parsl/executors/taskvine/executor.py +38 -55
  75. parsl/executors/taskvine/factory.py +1 -1
  76. parsl/executors/taskvine/factory_config.py +1 -1
  77. parsl/executors/taskvine/manager.py +17 -13
  78. parsl/executors/taskvine/manager_config.py +7 -2
  79. parsl/executors/threads.py +6 -6
  80. parsl/executors/workqueue/errors.py +1 -1
  81. parsl/executors/workqueue/exec_parsl_function.py +6 -5
  82. parsl/executors/workqueue/executor.py +64 -63
  83. parsl/executors/workqueue/parsl_coprocess.py +1 -1
  84. parsl/jobs/error_handlers.py +2 -2
  85. parsl/jobs/job_status_poller.py +28 -112
  86. parsl/jobs/states.py +7 -2
  87. parsl/jobs/strategy.py +43 -31
  88. parsl/launchers/__init__.py +12 -3
  89. parsl/launchers/errors.py +1 -1
  90. parsl/launchers/launchers.py +0 -6
  91. parsl/log_utils.py +1 -2
  92. parsl/monitoring/db_manager.py +55 -93
  93. parsl/monitoring/errors.py +6 -0
  94. parsl/monitoring/monitoring.py +85 -311
  95. parsl/monitoring/queries/pandas.py +1 -2
  96. parsl/monitoring/radios/base.py +13 -0
  97. parsl/monitoring/radios/filesystem.py +52 -0
  98. parsl/monitoring/radios/htex.py +57 -0
  99. parsl/monitoring/radios/multiprocessing.py +17 -0
  100. parsl/monitoring/radios/udp.py +56 -0
  101. parsl/monitoring/radios/zmq.py +17 -0
  102. parsl/monitoring/remote.py +33 -37
  103. parsl/monitoring/router.py +212 -0
  104. parsl/monitoring/types.py +5 -6
  105. parsl/monitoring/visualization/app.py +4 -2
  106. parsl/monitoring/visualization/models.py +0 -1
  107. parsl/monitoring/visualization/plots/default/workflow_plots.py +8 -4
  108. parsl/monitoring/visualization/plots/default/workflow_resource_plots.py +1 -0
  109. parsl/monitoring/visualization/utils.py +0 -1
  110. parsl/monitoring/visualization/views.py +16 -9
  111. parsl/multiprocessing.py +0 -1
  112. parsl/process_loggers.py +1 -2
  113. parsl/providers/__init__.py +8 -17
  114. parsl/providers/aws/aws.py +2 -3
  115. parsl/providers/azure/azure.py +4 -5
  116. parsl/providers/base.py +2 -18
  117. parsl/providers/cluster_provider.py +3 -9
  118. parsl/providers/condor/condor.py +7 -17
  119. parsl/providers/errors.py +2 -2
  120. parsl/providers/googlecloud/googlecloud.py +2 -1
  121. parsl/providers/grid_engine/grid_engine.py +5 -14
  122. parsl/providers/kubernetes/kube.py +80 -40
  123. parsl/providers/local/local.py +13 -26
  124. parsl/providers/lsf/lsf.py +5 -23
  125. parsl/providers/pbspro/pbspro.py +5 -17
  126. parsl/providers/slurm/slurm.py +81 -39
  127. parsl/providers/torque/torque.py +3 -14
  128. parsl/serialize/__init__.py +8 -3
  129. parsl/serialize/base.py +1 -2
  130. parsl/serialize/concretes.py +5 -4
  131. parsl/serialize/facade.py +3 -3
  132. parsl/serialize/proxystore.py +3 -2
  133. parsl/tests/__init__.py +1 -1
  134. parsl/tests/configs/azure_single_node.py +4 -5
  135. parsl/tests/configs/bridges.py +3 -2
  136. parsl/tests/configs/cc_in2p3.py +1 -3
  137. parsl/tests/configs/comet.py +2 -1
  138. parsl/tests/configs/ec2_single_node.py +1 -2
  139. parsl/tests/configs/ec2_spot.py +1 -2
  140. parsl/tests/configs/flux_local.py +11 -0
  141. parsl/tests/configs/frontera.py +2 -3
  142. parsl/tests/configs/htex_local.py +3 -5
  143. parsl/tests/configs/htex_local_alternate.py +11 -15
  144. parsl/tests/configs/htex_local_intask_staging.py +5 -9
  145. parsl/tests/configs/htex_local_rsync_staging.py +4 -8
  146. parsl/tests/configs/local_radical.py +1 -3
  147. parsl/tests/configs/local_radical_mpi.py +2 -2
  148. parsl/tests/configs/local_threads_checkpoint_periodic.py +8 -10
  149. parsl/tests/configs/local_threads_monitoring.py +0 -1
  150. parsl/tests/configs/midway.py +2 -2
  151. parsl/tests/configs/nscc_singapore.py +3 -3
  152. parsl/tests/configs/osg_htex.py +1 -1
  153. parsl/tests/configs/petrelkube.py +3 -2
  154. parsl/tests/configs/slurm_local.py +24 -0
  155. parsl/tests/configs/summit.py +1 -0
  156. parsl/tests/configs/taskvine_ex.py +4 -7
  157. parsl/tests/configs/user_opts.py +0 -7
  158. parsl/tests/configs/workqueue_ex.py +4 -6
  159. parsl/tests/conftest.py +27 -13
  160. parsl/tests/integration/test_stress/test_python_simple.py +3 -4
  161. parsl/tests/integration/test_stress/test_python_threads.py +3 -5
  162. parsl/tests/manual_tests/htex_local.py +4 -6
  163. parsl/tests/manual_tests/test_basic.py +1 -0
  164. parsl/tests/manual_tests/test_log_filter.py +3 -1
  165. parsl/tests/manual_tests/test_memory_limits.py +6 -8
  166. parsl/tests/manual_tests/test_regression_220.py +2 -1
  167. parsl/tests/manual_tests/test_udp_simple.py +4 -4
  168. parsl/tests/manual_tests/test_worker_count.py +3 -2
  169. parsl/tests/scaling_tests/htex_local.py +2 -4
  170. parsl/tests/scaling_tests/test_scale.py +0 -9
  171. parsl/tests/scaling_tests/vineex_condor.py +1 -2
  172. parsl/tests/scaling_tests/vineex_local.py +1 -2
  173. parsl/tests/site_tests/site_config_selector.py +1 -6
  174. parsl/tests/site_tests/test_provider.py +4 -2
  175. parsl/tests/site_tests/test_site.py +2 -0
  176. parsl/tests/sites/test_affinity.py +7 -7
  177. parsl/tests/sites/test_dynamic_executor.py +3 -4
  178. parsl/tests/sites/test_ec2.py +3 -2
  179. parsl/tests/sites/test_worker_info.py +4 -5
  180. parsl/tests/test_aalst_patterns.py +0 -1
  181. parsl/tests/test_bash_apps/test_apptimeout.py +2 -2
  182. parsl/tests/test_bash_apps/test_basic.py +10 -4
  183. parsl/tests/test_bash_apps/test_error_codes.py +5 -7
  184. parsl/tests/test_bash_apps/test_inputs_default.py +25 -0
  185. parsl/tests/test_bash_apps/test_kwarg_storage.py +1 -1
  186. parsl/tests/test_bash_apps/test_memoize.py +2 -8
  187. parsl/tests/test_bash_apps/test_memoize_ignore_args.py +9 -14
  188. parsl/tests/test_bash_apps/test_memoize_ignore_args_regr.py +9 -14
  189. parsl/tests/test_bash_apps/test_multiline.py +1 -1
  190. parsl/tests/test_bash_apps/test_pipeline.py +1 -1
  191. parsl/tests/test_bash_apps/test_std_uri.py +123 -0
  192. parsl/tests/test_bash_apps/test_stdout.py +33 -8
  193. parsl/tests/test_callables.py +2 -2
  194. parsl/tests/test_checkpointing/test_periodic.py +21 -39
  195. parsl/tests/test_checkpointing/test_python_checkpoint_1.py +1 -0
  196. parsl/tests/test_checkpointing/test_python_checkpoint_2.py +2 -2
  197. parsl/tests/test_checkpointing/test_python_checkpoint_3.py +0 -1
  198. parsl/tests/test_checkpointing/test_regression_239.py +1 -1
  199. parsl/tests/test_checkpointing/test_task_exit.py +2 -3
  200. parsl/tests/test_docs/test_from_slides.py +5 -2
  201. parsl/tests/test_docs/test_kwargs.py +4 -1
  202. parsl/tests/test_docs/test_tutorial_1.py +1 -2
  203. parsl/tests/test_docs/test_workflow1.py +2 -2
  204. parsl/tests/test_docs/test_workflow2.py +0 -1
  205. parsl/tests/test_error_handling/test_rand_fail.py +2 -2
  206. parsl/tests/test_error_handling/test_resource_spec.py +10 -12
  207. parsl/tests/test_error_handling/test_retries.py +6 -16
  208. parsl/tests/test_error_handling/test_retry_handler.py +1 -0
  209. parsl/tests/test_error_handling/test_retry_handler_failure.py +2 -1
  210. parsl/tests/test_error_handling/test_serialization_fail.py +1 -1
  211. parsl/tests/test_error_handling/test_wrap_with_logs.py +1 -0
  212. parsl/tests/test_execute_task.py +29 -0
  213. parsl/tests/test_flux.py +1 -1
  214. parsl/tests/test_htex/test_basic.py +2 -3
  215. parsl/tests/test_htex/test_block_manager_selector_unit.py +20 -0
  216. parsl/tests/test_htex/test_command_client_timeout.py +66 -0
  217. parsl/tests/test_htex/test_connected_blocks.py +3 -2
  218. parsl/tests/test_htex/test_cpu_affinity_explicit.py +6 -10
  219. parsl/tests/test_htex/test_disconnected_blocks.py +6 -5
  220. parsl/tests/test_htex/test_disconnected_blocks_failing_provider.py +71 -0
  221. parsl/tests/test_htex/test_drain.py +11 -10
  222. parsl/tests/test_htex/test_htex.py +51 -25
  223. parsl/tests/test_htex/test_manager_failure.py +0 -1
  224. parsl/tests/test_htex/test_manager_selector_by_block.py +51 -0
  225. parsl/tests/test_htex/test_managers_command.py +36 -0
  226. parsl/tests/test_htex/test_missing_worker.py +2 -12
  227. parsl/tests/test_htex/test_multiple_disconnected_blocks.py +9 -9
  228. parsl/tests/test_htex/test_resource_spec_validation.py +45 -0
  229. parsl/tests/test_htex/test_zmq_binding.py +29 -8
  230. parsl/tests/test_monitoring/test_app_names.py +5 -5
  231. parsl/tests/test_monitoring/test_basic.py +73 -25
  232. parsl/tests/test_monitoring/test_db_locks.py +6 -4
  233. parsl/tests/test_monitoring/test_fuzz_zmq.py +19 -8
  234. parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +80 -0
  235. parsl/tests/test_monitoring/test_incomplete_futures.py +5 -4
  236. parsl/tests/test_monitoring/test_memoization_representation.py +4 -2
  237. parsl/tests/test_monitoring/test_stdouterr.py +134 -0
  238. parsl/tests/test_monitoring/test_viz_colouring.py +1 -0
  239. parsl/tests/test_mpi_apps/test_bad_mpi_config.py +33 -26
  240. parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py +28 -11
  241. parsl/tests/test_mpi_apps/test_mpi_prefix.py +4 -4
  242. parsl/tests/test_mpi_apps/test_mpi_scheduler.py +7 -2
  243. parsl/tests/test_mpi_apps/test_mpiex.py +64 -0
  244. parsl/tests/test_mpi_apps/test_resource_spec.py +42 -49
  245. parsl/tests/test_providers/test_kubernetes_provider.py +102 -0
  246. parsl/tests/test_providers/test_local_provider.py +3 -132
  247. parsl/tests/test_providers/test_pbspro_template.py +2 -3
  248. parsl/tests/test_providers/test_slurm_template.py +2 -3
  249. parsl/tests/test_providers/test_submiterror_deprecation.py +2 -1
  250. parsl/tests/test_python_apps/test_context_manager.py +128 -0
  251. parsl/tests/test_python_apps/test_dep_standard_futures.py +2 -1
  252. parsl/tests/test_python_apps/test_dependencies_deep.py +59 -0
  253. parsl/tests/test_python_apps/test_fail.py +0 -25
  254. parsl/tests/test_python_apps/test_futures.py +2 -1
  255. parsl/tests/test_python_apps/test_inputs_default.py +22 -0
  256. parsl/tests/test_python_apps/test_join.py +0 -1
  257. parsl/tests/test_python_apps/test_lifted.py +11 -7
  258. parsl/tests/test_python_apps/test_memoize_bad_id_for_memo.py +1 -0
  259. parsl/tests/test_python_apps/test_outputs.py +1 -1
  260. parsl/tests/test_python_apps/test_pluggable_future_resolution.py +161 -0
  261. parsl/tests/test_radical/test_mpi_funcs.py +1 -2
  262. parsl/tests/test_regression/test_1480.py +2 -1
  263. parsl/tests/test_regression/test_1653.py +2 -1
  264. parsl/tests/test_regression/test_226.py +1 -0
  265. parsl/tests/test_regression/test_2652.py +1 -0
  266. parsl/tests/test_regression/test_69a.py +0 -1
  267. parsl/tests/test_regression/test_854.py +4 -2
  268. parsl/tests/test_regression/test_97_parallelism_0.py +1 -2
  269. parsl/tests/test_regression/test_98.py +0 -1
  270. parsl/tests/test_scaling/test_block_error_handler.py +9 -4
  271. parsl/tests/test_scaling/test_regression_1621.py +11 -15
  272. parsl/tests/test_scaling/test_regression_3568_scaledown_vs_MISSING.py +84 -0
  273. parsl/tests/test_scaling/test_regression_3696_oscillation.py +103 -0
  274. parsl/tests/test_scaling/test_scale_down.py +2 -5
  275. parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py +5 -8
  276. parsl/tests/test_scaling/test_scale_down_htex_unregistered.py +71 -0
  277. parsl/tests/test_scaling/test_shutdown_scalein.py +73 -0
  278. parsl/tests/test_scaling/test_worker_interchange_bad_messages_3262.py +90 -0
  279. parsl/tests/test_serialization/test_2555_caching_deserializer.py +1 -1
  280. parsl/tests/test_serialization/test_3495_deserialize_managerlost.py +47 -0
  281. parsl/tests/test_serialization/test_basic.py +2 -1
  282. parsl/tests/test_serialization/test_htex_code_cache.py +3 -4
  283. parsl/tests/test_serialization/test_pack_resource_spec.py +2 -1
  284. parsl/tests/test_serialization/test_proxystore_configured.py +10 -6
  285. parsl/tests/test_serialization/test_proxystore_impl.py +5 -3
  286. parsl/tests/test_shutdown/test_kill_monitoring.py +64 -0
  287. parsl/tests/test_staging/staging_provider.py +2 -2
  288. parsl/tests/test_staging/test_1316.py +3 -4
  289. parsl/tests/test_staging/test_docs_1.py +2 -1
  290. parsl/tests/test_staging/test_docs_2.py +2 -1
  291. parsl/tests/test_staging/test_elaborate_noop_file.py +2 -3
  292. parsl/tests/{test_data → test_staging}/test_file.py +6 -6
  293. parsl/tests/{test_data → test_staging}/test_output_chain_filenames.py +3 -0
  294. parsl/tests/test_staging/test_staging_ftp.py +1 -0
  295. parsl/tests/test_staging/test_staging_https.py +5 -2
  296. parsl/tests/test_staging/test_staging_stdout.py +64 -0
  297. parsl/tests/test_staging/test_zip_in.py +39 -0
  298. parsl/tests/test_staging/test_zip_out.py +110 -0
  299. parsl/tests/test_staging/test_zip_to_zip.py +41 -0
  300. parsl/tests/test_summary.py +2 -2
  301. parsl/tests/test_thread_parallelism.py +0 -1
  302. parsl/tests/test_threads/test_configs.py +1 -2
  303. parsl/tests/test_threads/test_lazy_errors.py +2 -2
  304. parsl/tests/test_utils/test_execute_wait.py +35 -0
  305. parsl/tests/test_utils/test_sanitize_dns.py +76 -0
  306. parsl/tests/unit/test_address.py +20 -0
  307. parsl/tests/unit/test_file.py +99 -0
  308. parsl/tests/unit/test_usage_tracking.py +66 -0
  309. parsl/usage_tracking/api.py +65 -0
  310. parsl/usage_tracking/levels.py +6 -0
  311. parsl/usage_tracking/usage.py +104 -62
  312. parsl/utils.py +137 -4
  313. parsl/version.py +1 -1
  314. {parsl-2024.3.18.data → parsl-2025.1.13.data}/scripts/exec_parsl_function.py +6 -5
  315. parsl-2025.1.13.data/scripts/interchange.py +649 -0
  316. {parsl-2024.3.18.data → parsl-2025.1.13.data}/scripts/process_worker_pool.py +77 -75
  317. parsl-2025.1.13.dist-info/METADATA +96 -0
  318. parsl-2025.1.13.dist-info/RECORD +462 -0
  319. {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/WHEEL +1 -1
  320. parsl/channels/__init__.py +0 -7
  321. parsl/channels/base.py +0 -141
  322. parsl/channels/errors.py +0 -113
  323. parsl/channels/local/local.py +0 -164
  324. parsl/channels/oauth_ssh/oauth_ssh.py +0 -110
  325. parsl/channels/ssh/ssh.py +0 -276
  326. parsl/channels/ssh_il/__init__.py +0 -0
  327. parsl/channels/ssh_il/ssh_il.py +0 -74
  328. parsl/configs/ad_hoc.py +0 -35
  329. parsl/executors/radical/rpex_master.py +0 -42
  330. parsl/monitoring/radios.py +0 -175
  331. parsl/providers/ad_hoc/__init__.py +0 -0
  332. parsl/providers/ad_hoc/ad_hoc.py +0 -248
  333. parsl/providers/cobalt/__init__.py +0 -0
  334. parsl/providers/cobalt/cobalt.py +0 -236
  335. parsl/providers/cobalt/template.py +0 -17
  336. parsl/tests/configs/ad_hoc_cluster_htex.py +0 -35
  337. parsl/tests/configs/cooley_htex.py +0 -37
  338. parsl/tests/configs/htex_ad_hoc_cluster.py +0 -28
  339. parsl/tests/configs/local_adhoc.py +0 -18
  340. parsl/tests/configs/swan_htex.py +0 -43
  341. parsl/tests/configs/theta.py +0 -37
  342. parsl/tests/integration/test_channels/__init__.py +0 -0
  343. parsl/tests/integration/test_channels/test_channels.py +0 -17
  344. parsl/tests/integration/test_channels/test_local_channel.py +0 -42
  345. parsl/tests/integration/test_channels/test_scp_1.py +0 -45
  346. parsl/tests/integration/test_channels/test_ssh_1.py +0 -40
  347. parsl/tests/integration/test_channels/test_ssh_errors.py +0 -46
  348. parsl/tests/integration/test_channels/test_ssh_file_transport.py +0 -41
  349. parsl/tests/integration/test_channels/test_ssh_interactive.py +0 -24
  350. parsl/tests/manual_tests/test_ad_hoc_htex.py +0 -48
  351. parsl/tests/manual_tests/test_fan_in_out_htex_remote.py +0 -88
  352. parsl/tests/manual_tests/test_oauth_ssh.py +0 -13
  353. parsl/tests/sites/test_local_adhoc.py +0 -61
  354. parsl/tests/test_channels/__init__.py +0 -0
  355. parsl/tests/test_channels/test_large_output.py +0 -22
  356. parsl/tests/test_data/__init__.py +0 -0
  357. parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py +0 -51
  358. parsl/tests/test_providers/test_cobalt_deprecation_warning.py +0 -16
  359. parsl-2024.3.18.dist-info/METADATA +0 -98
  360. parsl-2024.3.18.dist-info/RECORD +0 -449
  361. parsl/{channels/local → monitoring/radios}/__init__.py +0 -0
  362. parsl/{channels/oauth_ssh → tests/test_shutdown}/__init__.py +0 -0
  363. parsl/tests/{test_data → test_staging}/test_file_apps.py +0 -0
  364. parsl/tests/{test_data → test_staging}/test_file_staging.py +0 -0
  365. parsl/{channels/ssh → tests/unit}/__init__.py +0 -0
  366. {parsl-2024.3.18.data → parsl-2025.1.13.data}/scripts/parsl_coprocess.py +1 -1
  367. {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/LICENSE +0 -0
  368. {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/entry_points.txt +0 -0
  369. {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/top_level.txt +0 -0
@@ -1,31 +1,31 @@
1
1
  #!/usr/bin/env python
2
- import multiprocessing
3
- import zmq
4
- import os
5
- import sys
6
- import platform
7
- import random
8
- import time
9
2
  import datetime
10
- import pickle
11
- import signal
3
+ import json
12
4
  import logging
5
+ import os
6
+ import pickle
7
+ import platform
13
8
  import queue
9
+ import sys
14
10
  import threading
15
- import json
11
+ import time
12
+ from typing import Any, Dict, List, NoReturn, Optional, Sequence, Set, Tuple, cast
16
13
 
17
- from typing import cast, Any, Dict, NoReturn, Sequence, Set, Optional, Tuple, List
14
+ import zmq
18
15
 
19
16
  from parsl import curvezmq
20
- from parsl.utils import setproctitle
21
- from parsl.version import VERSION as PARSL_VERSION
22
- from parsl.serialize import serialize as serialize_object
23
-
17
+ from parsl.addresses import tcp_url
24
18
  from parsl.app.errors import RemoteExceptionWrapper
19
+ from parsl.executors.high_throughput.errors import ManagerLost, VersionMismatch
25
20
  from parsl.executors.high_throughput.manager_record import ManagerRecord
21
+ from parsl.executors.high_throughput.manager_selector import ManagerSelector
26
22
  from parsl.monitoring.message_type import MessageType
23
+ from parsl.monitoring.radios.base import MonitoringRadioSender
24
+ from parsl.monitoring.radios.zmq import ZMQRadioSender
27
25
  from parsl.process_loggers import wrap_with_logs
28
-
26
+ from parsl.serialize import serialize as serialize_object
27
+ from parsl.utils import setproctitle
28
+ from parsl.version import VERSION as PARSL_VERSION
29
29
 
30
30
  PKL_HEARTBEAT_CODE = pickle.dumps((2 ** 32) - 1)
31
31
  PKL_DRAINED_CODE = pickle.dumps((2 ** 32) - 2)
@@ -34,32 +34,6 @@ LOGGER_NAME = "interchange"
34
34
  logger = logging.getLogger(LOGGER_NAME)
35
35
 
36
36
 
37
- class ManagerLost(Exception):
38
- ''' Task lost due to manager loss. Manager is considered lost when multiple heartbeats
39
- have been missed.
40
- '''
41
- def __init__(self, manager_id: bytes, hostname: str) -> None:
42
- self.manager_id = manager_id
43
- self.tstamp = time.time()
44
- self.hostname = hostname
45
-
46
- def __str__(self) -> str:
47
- return "Task failure due to loss of manager {} on host {}".format(self.manager_id.decode(), self.hostname)
48
-
49
-
50
- class VersionMismatch(Exception):
51
- ''' Manager and Interchange versions do not match
52
- '''
53
- def __init__(self, interchange_version: str, manager_version: str):
54
- self.interchange_version = interchange_version
55
- self.manager_version = manager_version
56
-
57
- def __str__(self) -> str:
58
- return "Manager version info {} does not match interchange version info {}, causing a critical failure".format(
59
- self.manager_version,
60
- self.interchange_version)
61
-
62
-
63
37
  class Interchange:
64
38
  """ Interchange is a task orchestrator for distributed systems.
65
39
 
@@ -68,18 +42,21 @@ class Interchange:
68
42
  3. Detect workers that have failed using heartbeats
69
43
  """
70
44
  def __init__(self,
71
- client_address: str = "127.0.0.1",
72
- interchange_address: Optional[str] = None,
73
- client_ports: Tuple[int, int, int] = (50055, 50056, 50057),
74
- worker_ports: Optional[Tuple[int, int]] = None,
75
- worker_port_range: Tuple[int, int] = (54000, 55000),
76
- hub_address: Optional[str] = None,
77
- hub_port: Optional[int] = None,
78
- heartbeat_threshold: int = 60,
79
- logdir: str = ".",
80
- logging_level: int = logging.INFO,
81
- poll_period: int = 10,
82
- cert_dir: Optional[str] = None,
45
+ *,
46
+ client_address: str,
47
+ interchange_address: Optional[str],
48
+ client_ports: Tuple[int, int, int],
49
+ worker_ports: Optional[Tuple[int, int]],
50
+ worker_port_range: Tuple[int, int],
51
+ hub_address: Optional[str],
52
+ hub_zmq_port: Optional[int],
53
+ heartbeat_threshold: int,
54
+ logdir: str,
55
+ logging_level: int,
56
+ poll_period: int,
57
+ cert_dir: Optional[str],
58
+ manager_selector: ManagerSelector,
59
+ run_id: str,
83
60
  ) -> None:
84
61
  """
85
62
  Parameters
@@ -91,45 +68,44 @@ class Interchange:
91
68
  If specified the interchange will only listen on this address for connections from workers
92
69
  else, it binds to all addresses.
93
70
 
94
- client_ports : triple(int, int, int)
71
+ client_ports : tuple(int, int, int)
95
72
  The ports at which the client can be reached
96
73
 
97
74
  worker_ports : tuple(int, int)
98
- The specific two ports at which workers will connect to the Interchange. Default: None
75
+ The specific two ports at which workers will connect to the Interchange.
99
76
 
100
77
  worker_port_range : tuple(int, int)
101
78
  The interchange picks ports at random from the range which will be used by workers.
102
- This is overridden when the worker_ports option is set. Default: (54000, 55000)
79
+ This is overridden when the worker_ports option is set.
103
80
 
104
81
  hub_address : str
105
82
  The IP address at which the interchange can send info about managers to when monitoring is enabled.
106
- Default: None (meaning monitoring disabled)
83
+ When None, monitoring is disabled.
107
84
 
108
- hub_port : str
85
+ hub_zmq_port : str
109
86
  The port at which the interchange can send info about managers to when monitoring is enabled.
110
- Default: None (meaning monitoring disabled)
87
+ When None, monitoring is disabled.
111
88
 
112
89
  heartbeat_threshold : int
113
90
  Number of seconds since the last heartbeat after which worker is considered lost.
114
91
 
115
92
  logdir : str
116
- Parsl log directory paths. Logs and temp files go here. Default: '.'
93
+ Parsl log directory paths. Logs and temp files go here.
117
94
 
118
95
  logging_level : int
119
- Logging level as defined in the logging module. Default: logging.INFO
96
+ Logging level as defined in the logging module.
120
97
 
121
98
  poll_period : int
122
- The main thread polling period, in milliseconds. Default: 10ms
99
+ The main thread polling period, in milliseconds.
123
100
 
124
101
  cert_dir : str | None
125
- Path to the certificate directory. Default: None
102
+ Path to the certificate directory.
126
103
  """
127
104
  self.cert_dir = cert_dir
128
105
  self.logdir = logdir
129
106
  os.makedirs(self.logdir, exist_ok=True)
130
107
 
131
108
  start_file_logger("{}/interchange.log".format(self.logdir), level=logging_level)
132
- logger.propagate = False
133
109
  logger.debug("Initializing Interchange process")
134
110
 
135
111
  self.client_address = client_address
@@ -141,17 +117,19 @@ class Interchange:
141
117
  self.zmq_context = curvezmq.ServerContext(self.cert_dir)
142
118
  self.task_incoming = self.zmq_context.socket(zmq.DEALER)
143
119
  self.task_incoming.set_hwm(0)
144
- self.task_incoming.connect("tcp://{}:{}".format(client_address, client_ports[0]))
120
+ self.task_incoming.connect(tcp_url(client_address, client_ports[0]))
145
121
  self.results_outgoing = self.zmq_context.socket(zmq.DEALER)
146
122
  self.results_outgoing.set_hwm(0)
147
- self.results_outgoing.connect("tcp://{}:{}".format(client_address, client_ports[1]))
123
+ self.results_outgoing.connect(tcp_url(client_address, client_ports[1]))
148
124
 
149
125
  self.command_channel = self.zmq_context.socket(zmq.REP)
150
- self.command_channel.connect("tcp://{}:{}".format(client_address, client_ports[2]))
126
+ self.command_channel.connect(tcp_url(client_address, client_ports[2]))
151
127
  logger.info("Connected to client")
152
128
 
129
+ self.run_id = run_id
130
+
153
131
  self.hub_address = hub_address
154
- self.hub_port = hub_port
132
+ self.hub_zmq_port = hub_zmq_port
155
133
 
156
134
  self.pending_task_queue: queue.Queue[Any] = queue.Queue(maxsize=10 ** 6)
157
135
  self.count = 0
@@ -168,14 +146,14 @@ class Interchange:
168
146
  self.worker_task_port = self.worker_ports[0]
169
147
  self.worker_result_port = self.worker_ports[1]
170
148
 
171
- self.task_outgoing.bind(f"tcp://{self.interchange_address}:{self.worker_task_port}")
172
- self.results_incoming.bind(f"tcp://{self.interchange_address}:{self.worker_result_port}")
149
+ self.task_outgoing.bind(tcp_url(self.interchange_address, self.worker_task_port))
150
+ self.results_incoming.bind(tcp_url(self.interchange_address, self.worker_result_port))
173
151
 
174
152
  else:
175
- self.worker_task_port = self.task_outgoing.bind_to_random_port(f"tcp://{self.interchange_address}",
153
+ self.worker_task_port = self.task_outgoing.bind_to_random_port(tcp_url(self.interchange_address),
176
154
  min_port=worker_port_range[0],
177
155
  max_port=worker_port_range[1], max_tries=100)
178
- self.worker_result_port = self.results_incoming.bind_to_random_port(f"tcp://{self.interchange_address}",
156
+ self.worker_result_port = self.results_incoming.bind_to_random_port(tcp_url(self.interchange_address),
179
157
  min_port=worker_port_range[0],
180
158
  max_port=worker_port_range[1], max_tries=100)
181
159
 
@@ -187,6 +165,8 @@ class Interchange:
187
165
 
188
166
  self.heartbeat_threshold = heartbeat_threshold
189
167
 
168
+ self.manager_selector = manager_selector
169
+
190
170
  self.current_platform = {'parsl_v': PARSL_VERSION,
191
171
  'python_v': "{}.{}.{}".format(sys.version_info.major,
192
172
  sys.version_info.minor,
@@ -243,27 +223,16 @@ class Interchange:
243
223
  task_counter += 1
244
224
  logger.debug(f"Fetched {task_counter} tasks so far")
245
225
 
246
- def _create_monitoring_channel(self) -> Optional[zmq.Socket]:
247
- if self.hub_address and self.hub_port:
248
- logger.info("Connecting to MonitoringHub")
249
- # This is a one-off because monitoring is unencrypted
250
- hub_channel = zmq.Context().socket(zmq.DEALER)
251
- hub_channel.set_hwm(0)
252
- hub_channel.connect("tcp://{}:{}".format(self.hub_address, self.hub_port))
253
- logger.info("Connected to MonitoringHub")
254
- return hub_channel
255
- else:
256
- return None
257
-
258
- def _send_monitoring_info(self, hub_channel: Optional[zmq.Socket], manager: ManagerRecord) -> None:
259
- if hub_channel:
226
+ def _send_monitoring_info(self, monitoring_radio: Optional[MonitoringRadioSender], manager: ManagerRecord) -> None:
227
+ if monitoring_radio:
260
228
  logger.info("Sending message {} to MonitoringHub".format(manager))
261
229
 
262
230
  d: Dict = cast(Dict, manager.copy())
263
231
  d['timestamp'] = datetime.datetime.now()
264
232
  d['last_heartbeat'] = datetime.datetime.fromtimestamp(d['last_heartbeat'])
233
+ d['run_id'] = self.run_id
265
234
 
266
- hub_channel.send_pyobj((MessageType.NODE_INFO, d))
235
+ monitoring_radio.send((MessageType.NODE_INFO, d))
267
236
 
268
237
  @wrap_with_logs(target="interchange")
269
238
  def _command_server(self) -> NoReturn:
@@ -271,8 +240,11 @@ class Interchange:
271
240
  """
272
241
  logger.debug("Command Server Starting")
273
242
 
274
- # Need to create a new ZMQ socket for command server thread
275
- hub_channel = self._create_monitoring_channel()
243
+ if self.hub_address is not None and self.hub_zmq_port is not None:
244
+ logger.debug("Creating monitoring radio to %s:%s", self.hub_address, self.hub_zmq_port)
245
+ monitoring_radio = ZMQRadioSender(self.hub_address, self.hub_zmq_port)
246
+ else:
247
+ monitoring_radio = None
276
248
 
277
249
  reply: Any # the type of reply depends on the command_req received (aka this needs dependent types...)
278
250
 
@@ -280,13 +252,7 @@ class Interchange:
280
252
  try:
281
253
  command_req = self.command_channel.recv_pyobj()
282
254
  logger.debug("Received command request: {}".format(command_req))
283
- if command_req == "OUTSTANDING_C":
284
- outstanding = self.pending_task_queue.qsize()
285
- for manager in self._ready_managers.values():
286
- outstanding += len(manager['tasks'])
287
- reply = outstanding
288
-
289
- elif command_req == "CONNECTED_BLOCKS":
255
+ if command_req == "CONNECTED_BLOCKS":
290
256
  reply = self.connected_block_history
291
257
 
292
258
  elif command_req == "WORKERS":
@@ -310,6 +276,8 @@ class Interchange:
310
276
  'tasks': len(m['tasks']),
311
277
  'idle_duration': idle_duration,
312
278
  'active': m['active'],
279
+ 'parsl_version': m['parsl_version'],
280
+ 'python_version': m['python_version'],
313
281
  'draining': m['draining']}
314
282
  reply.append(resp)
315
283
 
@@ -320,13 +288,17 @@ class Interchange:
320
288
  if manager_id in self._ready_managers:
321
289
  m = self._ready_managers[manager_id]
322
290
  m['active'] = False
323
- self._send_monitoring_info(hub_channel, m)
291
+ self._send_monitoring_info(monitoring_radio, m)
324
292
  else:
325
293
  logger.warning("Worker to hold was not in ready managers list")
326
294
 
327
295
  reply = None
328
296
 
297
+ elif command_req == "WORKER_PORTS":
298
+ reply = (self.worker_task_port, self.worker_result_port)
299
+
329
300
  else:
301
+ logger.error(f"Received unknown command: {command_req}")
330
302
  reply = None
331
303
 
332
304
  logger.debug("Reply: {}".format(reply))
@@ -341,19 +313,14 @@ class Interchange:
341
313
  """ Start the interchange
342
314
  """
343
315
 
344
- # If a user workflow has set its own signal handler for sigterm, that
345
- # handler will be inherited by the interchange process because it is
346
- # launched as a multiprocessing fork process.
347
- # That can interfere with the interchange shutdown mechanism, which is
348
- # to receive a SIGTERM and exit immediately.
349
- # See Parsl issue #2343 (Threads and multiprocessing cannot be
350
- # intermingled without deadlocks) which talks about other fork-related
351
- # parent-process-inheritance problems.
352
- signal.signal(signal.SIGTERM, signal.SIG_DFL)
353
-
354
- logger.info("Incoming ports bound")
316
+ logger.info("Starting main interchange method")
355
317
 
356
- hub_channel = self._create_monitoring_channel()
318
+ if self.hub_address is not None and self.hub_zmq_port is not None:
319
+ logger.debug("Creating monitoring radio to %s:%s", self.hub_address, self.hub_zmq_port)
320
+ monitoring_radio = ZMQRadioSender(self.hub_address, self.hub_zmq_port)
321
+ logger.debug("Created monitoring radio")
322
+ else:
323
+ monitoring_radio = None
357
324
 
358
325
  poll_period = self.poll_period
359
326
 
@@ -384,21 +351,21 @@ class Interchange:
384
351
  while not kill_event.is_set():
385
352
  self.socks = dict(poller.poll(timeout=poll_period))
386
353
 
387
- self.process_task_outgoing_incoming(interesting_managers, hub_channel, kill_event)
388
- self.process_results_incoming(interesting_managers, hub_channel)
389
- self.expire_bad_managers(interesting_managers, hub_channel)
390
- self.expire_drained_managers(interesting_managers, hub_channel)
354
+ self.process_task_outgoing_incoming(interesting_managers, monitoring_radio, kill_event)
355
+ self.process_results_incoming(interesting_managers, monitoring_radio)
356
+ self.expire_bad_managers(interesting_managers, monitoring_radio)
357
+ self.expire_drained_managers(interesting_managers, monitoring_radio)
391
358
  self.process_tasks_to_send(interesting_managers)
392
359
 
393
360
  self.zmq_context.destroy()
394
361
  delta = time.time() - start
395
- logger.info("Processed {} tasks in {} seconds".format(self.count, delta))
362
+ logger.info(f"Processed {self.count} tasks in {delta} seconds")
396
363
  logger.warning("Exiting")
397
364
 
398
365
  def process_task_outgoing_incoming(
399
366
  self,
400
367
  interesting_managers: Set[bytes],
401
- hub_channel: Optional[zmq.Socket],
368
+ monitoring_radio: Optional[MonitoringRadioSender],
402
369
  kill_event: threading.Event
403
370
  ) -> None:
404
371
  """Process one message from manager on the task_outgoing channel.
@@ -413,9 +380,8 @@ class Interchange:
413
380
  try:
414
381
  msg = json.loads(message[1].decode('utf-8'))
415
382
  except Exception:
416
- logger.warning("Got Exception reading message from manager: {!r}".format(
417
- manager_id), exc_info=True)
418
- logger.debug("Message: \n{!r}\n".format(message[1]))
383
+ logger.warning(f"Got Exception reading message from manager: {manager_id!r}", exc_info=True)
384
+ logger.debug("Message:\n %r\n", message[1])
419
385
  return
420
386
 
421
387
  # perform a bit of validation on the structure of the deserialized
@@ -423,7 +389,7 @@ class Interchange:
423
389
  # in obviously malformed cases
424
390
  if not isinstance(msg, dict) or 'type' not in msg:
425
391
  logger.error(f"JSON message was not correctly formatted from manager: {manager_id!r}")
426
- logger.debug("Message: \n{!r}\n".format(message[1]))
392
+ logger.debug("Message:\n %r\n", message[1])
427
393
  return
428
394
 
429
395
  if msg['type'] == 'registration':
@@ -431,15 +397,18 @@ class Interchange:
431
397
  self._ready_managers[manager_id] = {'last_heartbeat': time.time(),
432
398
  'idle_since': time.time(),
433
399
  'block_id': None,
400
+ 'start_time': msg['start_time'],
434
401
  'max_capacity': 0,
435
402
  'worker_count': 0,
436
403
  'active': True,
437
404
  'draining': False,
405
+ 'parsl_version': msg['parsl_v'],
406
+ 'python_version': msg['python_v'],
438
407
  'tasks': []}
439
408
  self.connected_block_history.append(msg['block_id'])
440
409
 
441
410
  interesting_managers.add(manager_id)
442
- logger.info("Adding manager: {!r} to ready queue".format(manager_id))
411
+ logger.info(f"Adding manager: {manager_id!r} to ready queue")
443
412
  m = self._ready_managers[manager_id]
444
413
 
445
414
  # m is a ManagerRecord, but msg is a dict[Any,Any] and so can
@@ -448,12 +417,12 @@ class Interchange:
448
417
  # later.
449
418
  m.update(msg) # type: ignore[typeddict-item]
450
419
 
451
- logger.info("Registration info for manager {!r}: {}".format(manager_id, msg))
452
- self._send_monitoring_info(hub_channel, m)
420
+ logger.info(f"Registration info for manager {manager_id!r}: {msg}")
421
+ self._send_monitoring_info(monitoring_radio, m)
453
422
 
454
423
  if (msg['python_v'].rsplit(".", 1)[0] != self.current_platform['python_v'].rsplit(".", 1)[0] or
455
424
  msg['parsl_v'] != self.current_platform['parsl_v']):
456
- logger.error("Manager {!r} has incompatible version info with the interchange".format(manager_id))
425
+ logger.error(f"Manager {manager_id!r} has incompatible version info with the interchange")
457
426
  logger.debug("Setting kill event")
458
427
  kill_event.set()
459
428
  e = VersionMismatch("py.v={} parsl.v={}".format(self.current_platform['python_v'].rsplit(".", 1)[0],
@@ -466,21 +435,24 @@ class Interchange:
466
435
  self.results_outgoing.send(pkl_package)
467
436
  logger.error("Sent failure reports, shutting down interchange")
468
437
  else:
469
- logger.info("Manager {!r} has compatible Parsl version {}".format(manager_id, msg['parsl_v']))
470
- logger.info("Manager {!r} has compatible Python version {}".format(manager_id,
471
- msg['python_v'].rsplit(".", 1)[0]))
438
+ logger.info(f"Manager {manager_id!r} has compatible Parsl version {msg['parsl_v']}")
439
+ logger.info(f"Manager {manager_id!r} has compatible Python version {msg['python_v'].rsplit('.', 1)[0]}")
472
440
  elif msg['type'] == 'heartbeat':
473
- self._ready_managers[manager_id]['last_heartbeat'] = time.time()
474
- logger.debug("Manager {!r} sent heartbeat via tasks connection".format(manager_id))
475
- self.task_outgoing.send_multipart([manager_id, b'', PKL_HEARTBEAT_CODE])
441
+ manager = self._ready_managers.get(manager_id)
442
+ if manager:
443
+ manager['last_heartbeat'] = time.time()
444
+ logger.debug("Manager %r sent heartbeat via tasks connection", manager_id)
445
+ self.task_outgoing.send_multipart([manager_id, b'', PKL_HEARTBEAT_CODE])
446
+ else:
447
+ logger.warning("Received heartbeat via tasks connection for not-registered manager %r", manager_id)
476
448
  elif msg['type'] == 'drain':
477
449
  self._ready_managers[manager_id]['draining'] = True
478
- logger.debug(f"Manager {manager_id!r} requested drain")
450
+ logger.debug("Manager %r requested drain", manager_id)
479
451
  else:
480
452
  logger.error(f"Unexpected message type received from manager: {msg['type']}")
481
453
  logger.debug("leaving task_outgoing section")
482
454
 
483
- def expire_drained_managers(self, interesting_managers: Set[bytes], hub_channel: Optional[zmq.Socket]) -> None:
455
+ def expire_drained_managers(self, interesting_managers: Set[bytes], monitoring_radio: Optional[MonitoringRadioSender]) -> None:
484
456
 
485
457
  for manager_id in list(interesting_managers):
486
458
  # is it always true that a draining manager will be in interesting managers?
@@ -493,18 +465,19 @@ class Interchange:
493
465
  self._ready_managers.pop(manager_id)
494
466
 
495
467
  m['active'] = False
496
- self._send_monitoring_info(hub_channel, m)
468
+ self._send_monitoring_info(monitoring_radio, m)
497
469
 
498
470
  def process_tasks_to_send(self, interesting_managers: Set[bytes]) -> None:
499
471
  # Check if there are tasks that could be sent to managers
500
472
 
501
- logger.debug("Managers count (interesting/total): {interesting}/{total}".format(
502
- total=len(self._ready_managers),
503
- interesting=len(interesting_managers)))
473
+ logger.debug(
474
+ "Managers count (interesting/total): %d/%d",
475
+ len(interesting_managers),
476
+ len(self._ready_managers)
477
+ )
504
478
 
505
479
  if interesting_managers and not self.pending_task_queue.empty():
506
- shuffled_managers = list(interesting_managers)
507
- random.shuffle(shuffled_managers)
480
+ shuffled_managers = self.manager_selector.sort_managers(self._ready_managers, interesting_managers)
508
481
 
509
482
  while shuffled_managers and not self.pending_task_queue.empty(): # cf. the if statement above...
510
483
  manager_id = shuffled_managers.pop()
@@ -512,7 +485,7 @@ class Interchange:
512
485
  tasks_inflight = len(m['tasks'])
513
486
  real_capacity = m['max_capacity'] - tasks_inflight
514
487
 
515
- if (real_capacity and m['active'] and not m['draining']):
488
+ if real_capacity and m["active"] and not m["draining"]:
516
489
  tasks = self.get_tasks(real_capacity)
517
490
  if tasks:
518
491
  self.task_outgoing.send_multipart([manager_id, b'', pickle.dumps(tasks)])
@@ -521,31 +494,31 @@ class Interchange:
521
494
  tids = [t['task_id'] for t in tasks]
522
495
  m['tasks'].extend(tids)
523
496
  m['idle_since'] = None
524
- logger.debug("Sent tasks: {} to manager {!r}".format(tids, manager_id))
497
+ logger.debug("Sent tasks: %s to manager %r", tids, manager_id)
525
498
  # recompute real_capacity after sending tasks
526
499
  real_capacity = m['max_capacity'] - tasks_inflight
527
500
  if real_capacity > 0:
528
- logger.debug("Manager {!r} has free capacity {}".format(manager_id, real_capacity))
501
+ logger.debug("Manager %r has free capacity %s", manager_id, real_capacity)
529
502
  # ... so keep it in the interesting_managers list
530
503
  else:
531
- logger.debug("Manager {!r} is now saturated".format(manager_id))
504
+ logger.debug("Manager %r is now saturated", manager_id)
532
505
  interesting_managers.remove(manager_id)
533
506
  else:
534
507
  interesting_managers.remove(manager_id)
535
508
  # logger.debug("Nothing to send to manager {}".format(manager_id))
536
- logger.debug("leaving _ready_managers section, with {} managers still interesting".format(len(interesting_managers)))
509
+ logger.debug("leaving _ready_managers section, with %s managers still interesting", len(interesting_managers))
537
510
  else:
538
511
  logger.debug("either no interesting managers or no tasks, so skipping manager pass")
539
512
 
540
- def process_results_incoming(self, interesting_managers: Set[bytes], hub_channel: Optional[zmq.Socket]) -> None:
513
+ def process_results_incoming(self, interesting_managers: Set[bytes], monitoring_radio: Optional[MonitoringRadioSender]) -> None:
541
514
  # Receive any results and forward to client
542
515
  if self.results_incoming in self.socks and self.socks[self.results_incoming] == zmq.POLLIN:
543
516
  logger.debug("entering results_incoming section")
544
517
  manager_id, *all_messages = self.results_incoming.recv_multipart()
545
518
  if manager_id not in self._ready_managers:
546
- logger.warning("Received a result from a un-registered manager: {!r}".format(manager_id))
519
+ logger.warning(f"Received a result from a un-registered manager: {manager_id!r}")
547
520
  else:
548
- logger.debug(f"Got {len(all_messages)} result items in batch from manager {manager_id!r}")
521
+ logger.debug("Got %s result items in batch from manager %r", len(all_messages), manager_id)
549
522
 
550
523
  b_messages = []
551
524
 
@@ -557,16 +530,15 @@ class Interchange:
557
530
  elif r['type'] == 'monitoring':
558
531
  # the monitoring code makes the assumption that no
559
532
  # monitoring messages will be received if monitoring
560
- # is not configured, and that hub_channel will only
533
+ # is not configured, and that monitoring_radio will only
561
534
  # be None when monitoring is not configurated.
562
- assert hub_channel is not None
535
+ assert monitoring_radio is not None
563
536
 
564
- hub_channel.send_pyobj(r['payload'])
537
+ monitoring_radio.send(r['payload'])
565
538
  elif r['type'] == 'heartbeat':
566
- logger.debug(f"Manager {manager_id!r} sent heartbeat via results connection")
567
- b_messages.append((p_message, r))
539
+ logger.debug("Manager %r sent heartbeat via results connection", manager_id)
568
540
  else:
569
- logger.error("Interchange discarding result_queue message of unknown type: {}".format(r['type']))
541
+ logger.error("Interchange discarding result_queue message of unknown type: %s", r["type"])
570
542
 
571
543
  got_result = False
572
544
  m = self._ready_managers[manager_id]
@@ -575,14 +547,16 @@ class Interchange:
575
547
  if r['type'] == 'result':
576
548
  got_result = True
577
549
  try:
578
- logger.debug(f"Removing task {r['task_id']} from manager record {manager_id!r}")
550
+ logger.debug("Removing task %s from manager record %r", r["task_id"], manager_id)
579
551
  m['tasks'].remove(r['task_id'])
580
552
  except Exception:
581
553
  # If we reach here, there's something very wrong.
582
- logger.exception("Ignoring exception removing task_id {} for manager {!r} with task list {}".format(
554
+ logger.exception(
555
+ "Ignoring exception removing task_id %s for manager %r with task list %s",
583
556
  r['task_id'],
584
557
  manager_id,
585
- m['tasks']))
558
+ m["tasks"]
559
+ )
586
560
 
587
561
  b_messages_to_send = []
588
562
  for (b_message, _) in b_messages:
@@ -593,7 +567,7 @@ class Interchange:
593
567
  self.results_outgoing.send_multipart(b_messages_to_send)
594
568
  logger.debug("Sent messages on results_outgoing")
595
569
 
596
- logger.debug(f"Current tasks on manager {manager_id!r}: {m['tasks']}")
570
+ logger.debug("Current tasks on manager %r: %s", manager_id, m["tasks"])
597
571
  if len(m['tasks']) == 0 and m['idle_since'] is None:
598
572
  m['idle_since'] = time.time()
599
573
 
@@ -605,7 +579,7 @@ class Interchange:
605
579
  interesting_managers.add(manager_id)
606
580
  logger.debug("leaving results_incoming section")
607
581
 
608
- def expire_bad_managers(self, interesting_managers: Set[bytes], hub_channel: Optional[zmq.Socket]) -> None:
582
+ def expire_bad_managers(self, interesting_managers: Set[bytes], monitoring_radio: Optional[MonitoringRadioSender]) -> None:
609
583
  bad_managers = [(manager_id, m) for (manager_id, m) in self._ready_managers.items() if
610
584
  time.time() - m['last_heartbeat'] > self.heartbeat_threshold]
611
585
  for (manager_id, m) in bad_managers:
@@ -613,7 +587,7 @@ class Interchange:
613
587
  logger.warning(f"Too many heartbeats missed for manager {manager_id!r} - removing manager")
614
588
  if m['active']:
615
589
  m['active'] = False
616
- self._send_monitoring_info(hub_channel, m)
590
+ self._send_monitoring_info(monitoring_radio, m)
617
591
 
618
592
  logger.warning(f"Cancelling htex tasks {m['tasks']} on removed manager")
619
593
  for tid in m['tasks']:
@@ -666,15 +640,10 @@ def start_file_logger(filename: str, level: int = logging.DEBUG, format_string:
666
640
  logger.addHandler(handler)
667
641
 
668
642
 
669
- @wrap_with_logs(target="interchange")
670
- def starter(comm_q: multiprocessing.Queue, *args: Any, **kwargs: Any) -> None:
671
- """Start the interchange process
672
-
673
- The executor is expected to call this function. The args, kwargs match that of the Interchange.__init__
674
- """
643
+ if __name__ == "__main__":
675
644
  setproctitle("parsl: HTEX interchange")
676
- # logger = multiprocessing.get_logger()
677
- ic = Interchange(*args, **kwargs)
678
- comm_q.put((ic.worker_task_port,
679
- ic.worker_result_port))
645
+
646
+ config = pickle.load(sys.stdin.buffer)
647
+
648
+ ic = Interchange(**config)
680
649
  ic.start()
@@ -1,10 +1,12 @@
1
1
  from datetime import datetime
2
2
  from typing import Any, List, Optional
3
+
3
4
  from typing_extensions import TypedDict
4
5
 
5
6
 
6
7
  class ManagerRecord(TypedDict, total=False):
7
8
  block_id: Optional[str]
9
+ start_time: float
8
10
  tasks: List[Any]
9
11
  worker_count: int
10
12
  max_capacity: int
@@ -14,3 +16,5 @@ class ManagerRecord(TypedDict, total=False):
14
16
  last_heartbeat: float
15
17
  idle_since: Optional[float]
16
18
  timestamp: datetime
19
+ parsl_version: str
20
+ python_version: str