parsl 2024.3.11__py3-none-any.whl → 2025.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (369) hide show
  1. parsl/__init__.py +9 -10
  2. parsl/addresses.py +29 -7
  3. parsl/app/app.py +7 -8
  4. parsl/app/bash.py +15 -8
  5. parsl/app/errors.py +10 -13
  6. parsl/app/futures.py +8 -10
  7. parsl/app/python.py +2 -1
  8. parsl/benchmark/perf.py +2 -1
  9. parsl/concurrent/__init__.py +2 -2
  10. parsl/config.py +57 -10
  11. parsl/configs/ASPIRE1.py +6 -5
  12. parsl/configs/Azure.py +9 -8
  13. parsl/configs/bridges.py +6 -4
  14. parsl/configs/cc_in2p3.py +3 -3
  15. parsl/configs/ec2.py +3 -1
  16. parsl/configs/expanse.py +4 -3
  17. parsl/configs/frontera.py +3 -4
  18. parsl/configs/htex_local.py +3 -4
  19. parsl/configs/illinoiscluster.py +3 -1
  20. parsl/configs/improv.py +34 -0
  21. parsl/configs/kubernetes.py +4 -3
  22. parsl/configs/local_threads.py +5 -1
  23. parsl/configs/midway.py +5 -3
  24. parsl/configs/osg.py +4 -2
  25. parsl/configs/polaris.py +4 -2
  26. parsl/configs/stampede2.py +6 -5
  27. parsl/configs/summit.py +3 -3
  28. parsl/configs/toss3_llnl.py +4 -3
  29. parsl/configs/vineex_local.py +6 -4
  30. parsl/configs/wqex_local.py +5 -3
  31. parsl/curvezmq.py +4 -0
  32. parsl/data_provider/data_manager.py +4 -3
  33. parsl/data_provider/file_noop.py +1 -2
  34. parsl/data_provider/files.py +3 -3
  35. parsl/data_provider/ftp.py +1 -3
  36. parsl/data_provider/globus.py +7 -6
  37. parsl/data_provider/http.py +2 -2
  38. parsl/data_provider/rsync.py +1 -1
  39. parsl/data_provider/staging.py +2 -2
  40. parsl/data_provider/zip.py +135 -0
  41. parsl/dataflow/dependency_resolvers.py +115 -0
  42. parsl/dataflow/dflow.py +262 -224
  43. parsl/dataflow/errors.py +3 -5
  44. parsl/dataflow/futures.py +27 -14
  45. parsl/dataflow/memoization.py +5 -5
  46. parsl/dataflow/rundirs.py +5 -6
  47. parsl/dataflow/taskrecord.py +4 -5
  48. parsl/executors/__init__.py +4 -2
  49. parsl/executors/base.py +45 -15
  50. parsl/executors/errors.py +13 -0
  51. parsl/executors/execute_task.py +37 -0
  52. parsl/executors/flux/execute_parsl_task.py +3 -3
  53. parsl/executors/flux/executor.py +18 -19
  54. parsl/executors/flux/flux_instance_manager.py +26 -27
  55. parsl/executors/high_throughput/errors.py +43 -3
  56. parsl/executors/high_throughput/executor.py +316 -282
  57. parsl/executors/high_throughput/interchange.py +158 -167
  58. parsl/executors/high_throughput/manager_record.py +5 -0
  59. parsl/executors/high_throughput/manager_selector.py +55 -0
  60. parsl/executors/high_throughput/monitoring_info.py +2 -1
  61. parsl/executors/high_throughput/mpi_executor.py +113 -0
  62. parsl/executors/high_throughput/mpi_prefix_composer.py +10 -11
  63. parsl/executors/high_throughput/mpi_resource_management.py +6 -17
  64. parsl/executors/high_throughput/probe.py +9 -7
  65. parsl/executors/high_throughput/process_worker_pool.py +115 -77
  66. parsl/executors/high_throughput/zmq_pipes.py +81 -23
  67. parsl/executors/radical/executor.py +130 -79
  68. parsl/executors/radical/rpex_resources.py +17 -15
  69. parsl/executors/radical/rpex_worker.py +4 -3
  70. parsl/executors/status_handling.py +157 -51
  71. parsl/executors/taskvine/__init__.py +1 -1
  72. parsl/executors/taskvine/errors.py +1 -1
  73. parsl/executors/taskvine/exec_parsl_function.py +2 -2
  74. parsl/executors/taskvine/executor.py +41 -57
  75. parsl/executors/taskvine/factory.py +1 -1
  76. parsl/executors/taskvine/factory_config.py +1 -1
  77. parsl/executors/taskvine/manager.py +18 -13
  78. parsl/executors/taskvine/manager_config.py +9 -5
  79. parsl/executors/threads.py +6 -6
  80. parsl/executors/workqueue/errors.py +1 -1
  81. parsl/executors/workqueue/exec_parsl_function.py +6 -5
  82. parsl/executors/workqueue/executor.py +64 -63
  83. parsl/executors/workqueue/parsl_coprocess.py +1 -1
  84. parsl/jobs/error_handlers.py +2 -2
  85. parsl/jobs/job_status_poller.py +30 -113
  86. parsl/jobs/states.py +7 -2
  87. parsl/jobs/strategy.py +43 -31
  88. parsl/launchers/__init__.py +12 -3
  89. parsl/launchers/errors.py +1 -1
  90. parsl/launchers/launchers.py +6 -12
  91. parsl/log_utils.py +9 -6
  92. parsl/monitoring/db_manager.py +59 -95
  93. parsl/monitoring/errors.py +6 -0
  94. parsl/monitoring/monitoring.py +87 -356
  95. parsl/monitoring/queries/pandas.py +1 -2
  96. parsl/monitoring/radios/base.py +13 -0
  97. parsl/monitoring/radios/filesystem.py +52 -0
  98. parsl/monitoring/radios/htex.py +57 -0
  99. parsl/monitoring/radios/multiprocessing.py +17 -0
  100. parsl/monitoring/radios/udp.py +56 -0
  101. parsl/monitoring/radios/zmq.py +17 -0
  102. parsl/monitoring/remote.py +33 -37
  103. parsl/monitoring/router.py +212 -0
  104. parsl/monitoring/types.py +5 -6
  105. parsl/monitoring/visualization/app.py +4 -2
  106. parsl/monitoring/visualization/models.py +0 -1
  107. parsl/monitoring/visualization/plots/default/workflow_plots.py +11 -4
  108. parsl/monitoring/visualization/plots/default/workflow_resource_plots.py +1 -0
  109. parsl/monitoring/visualization/utils.py +0 -1
  110. parsl/monitoring/visualization/views.py +16 -8
  111. parsl/multiprocessing.py +0 -1
  112. parsl/process_loggers.py +1 -2
  113. parsl/providers/__init__.py +8 -17
  114. parsl/providers/aws/aws.py +2 -3
  115. parsl/providers/azure/azure.py +4 -5
  116. parsl/providers/base.py +2 -18
  117. parsl/providers/cluster_provider.py +4 -12
  118. parsl/providers/condor/condor.py +7 -17
  119. parsl/providers/errors.py +2 -2
  120. parsl/providers/googlecloud/googlecloud.py +2 -1
  121. parsl/providers/grid_engine/grid_engine.py +5 -14
  122. parsl/providers/kubernetes/kube.py +80 -40
  123. parsl/providers/local/local.py +13 -26
  124. parsl/providers/lsf/lsf.py +5 -23
  125. parsl/providers/pbspro/pbspro.py +5 -17
  126. parsl/providers/slurm/slurm.py +81 -39
  127. parsl/providers/torque/torque.py +3 -14
  128. parsl/serialize/__init__.py +8 -3
  129. parsl/serialize/base.py +1 -2
  130. parsl/serialize/concretes.py +5 -4
  131. parsl/serialize/facade.py +3 -3
  132. parsl/serialize/proxystore.py +3 -2
  133. parsl/tests/__init__.py +1 -1
  134. parsl/tests/configs/azure_single_node.py +4 -5
  135. parsl/tests/configs/bridges.py +3 -2
  136. parsl/tests/configs/cc_in2p3.py +1 -3
  137. parsl/tests/configs/comet.py +2 -1
  138. parsl/tests/configs/ec2_single_node.py +1 -2
  139. parsl/tests/configs/ec2_spot.py +1 -2
  140. parsl/tests/configs/flux_local.py +11 -0
  141. parsl/tests/configs/frontera.py +2 -3
  142. parsl/tests/configs/htex_local.py +3 -5
  143. parsl/tests/configs/htex_local_alternate.py +11 -15
  144. parsl/tests/configs/htex_local_intask_staging.py +5 -9
  145. parsl/tests/configs/htex_local_rsync_staging.py +4 -8
  146. parsl/tests/configs/local_radical.py +1 -3
  147. parsl/tests/configs/local_radical_mpi.py +2 -2
  148. parsl/tests/configs/local_threads_checkpoint_periodic.py +8 -10
  149. parsl/tests/configs/local_threads_monitoring.py +0 -1
  150. parsl/tests/configs/midway.py +2 -2
  151. parsl/tests/configs/nscc_singapore.py +3 -3
  152. parsl/tests/configs/osg_htex.py +1 -1
  153. parsl/tests/configs/petrelkube.py +3 -2
  154. parsl/tests/configs/slurm_local.py +24 -0
  155. parsl/tests/configs/summit.py +1 -0
  156. parsl/tests/configs/taskvine_ex.py +4 -7
  157. parsl/tests/configs/user_opts.py +2 -8
  158. parsl/tests/configs/workqueue_ex.py +4 -6
  159. parsl/tests/conftest.py +27 -13
  160. parsl/tests/integration/test_stress/test_python_simple.py +3 -4
  161. parsl/tests/integration/test_stress/test_python_threads.py +3 -5
  162. parsl/tests/manual_tests/htex_local.py +4 -6
  163. parsl/tests/manual_tests/test_basic.py +1 -0
  164. parsl/tests/manual_tests/test_log_filter.py +3 -1
  165. parsl/tests/manual_tests/test_memory_limits.py +6 -8
  166. parsl/tests/manual_tests/test_regression_220.py +2 -1
  167. parsl/tests/manual_tests/test_udp_simple.py +4 -4
  168. parsl/tests/manual_tests/test_worker_count.py +3 -2
  169. parsl/tests/scaling_tests/htex_local.py +2 -4
  170. parsl/tests/scaling_tests/test_scale.py +0 -9
  171. parsl/tests/scaling_tests/vineex_condor.py +1 -2
  172. parsl/tests/scaling_tests/vineex_local.py +1 -2
  173. parsl/tests/site_tests/site_config_selector.py +1 -6
  174. parsl/tests/site_tests/test_provider.py +4 -2
  175. parsl/tests/site_tests/test_site.py +2 -0
  176. parsl/tests/sites/test_affinity.py +7 -7
  177. parsl/tests/sites/test_dynamic_executor.py +3 -4
  178. parsl/tests/sites/test_ec2.py +3 -2
  179. parsl/tests/sites/test_worker_info.py +4 -5
  180. parsl/tests/test_aalst_patterns.py +0 -1
  181. parsl/tests/test_bash_apps/test_apptimeout.py +2 -2
  182. parsl/tests/test_bash_apps/test_basic.py +10 -4
  183. parsl/tests/test_bash_apps/test_error_codes.py +5 -7
  184. parsl/tests/test_bash_apps/test_inputs_default.py +25 -0
  185. parsl/tests/test_bash_apps/test_kwarg_storage.py +1 -1
  186. parsl/tests/test_bash_apps/test_memoize.py +2 -8
  187. parsl/tests/test_bash_apps/test_memoize_ignore_args.py +9 -14
  188. parsl/tests/test_bash_apps/test_memoize_ignore_args_regr.py +9 -14
  189. parsl/tests/test_bash_apps/test_multiline.py +1 -1
  190. parsl/tests/test_bash_apps/test_pipeline.py +1 -1
  191. parsl/tests/test_bash_apps/test_std_uri.py +123 -0
  192. parsl/tests/test_bash_apps/test_stdout.py +33 -8
  193. parsl/tests/test_callables.py +2 -2
  194. parsl/tests/test_checkpointing/test_periodic.py +21 -39
  195. parsl/tests/test_checkpointing/test_python_checkpoint_1.py +1 -0
  196. parsl/tests/test_checkpointing/test_python_checkpoint_2.py +2 -2
  197. parsl/tests/test_checkpointing/test_python_checkpoint_3.py +0 -1
  198. parsl/tests/test_checkpointing/test_regression_239.py +1 -1
  199. parsl/tests/test_checkpointing/test_task_exit.py +2 -3
  200. parsl/tests/test_docs/test_from_slides.py +5 -2
  201. parsl/tests/test_docs/test_kwargs.py +4 -1
  202. parsl/tests/test_docs/test_tutorial_1.py +1 -2
  203. parsl/tests/test_docs/test_workflow1.py +2 -2
  204. parsl/tests/test_docs/test_workflow2.py +0 -1
  205. parsl/tests/test_error_handling/test_rand_fail.py +2 -2
  206. parsl/tests/test_error_handling/test_resource_spec.py +10 -12
  207. parsl/tests/test_error_handling/test_retries.py +6 -16
  208. parsl/tests/test_error_handling/test_retry_handler.py +1 -0
  209. parsl/tests/test_error_handling/test_retry_handler_failure.py +2 -1
  210. parsl/tests/test_error_handling/test_serialization_fail.py +1 -1
  211. parsl/tests/test_error_handling/test_wrap_with_logs.py +1 -0
  212. parsl/tests/test_execute_task.py +29 -0
  213. parsl/tests/test_flux.py +1 -1
  214. parsl/tests/test_htex/test_basic.py +2 -3
  215. parsl/tests/test_htex/test_block_manager_selector_unit.py +20 -0
  216. parsl/tests/test_htex/test_command_client_timeout.py +66 -0
  217. parsl/tests/test_htex/test_connected_blocks.py +3 -2
  218. parsl/tests/test_htex/test_cpu_affinity_explicit.py +6 -10
  219. parsl/tests/test_htex/test_disconnected_blocks.py +6 -5
  220. parsl/tests/test_htex/test_disconnected_blocks_failing_provider.py +71 -0
  221. parsl/tests/test_htex/test_drain.py +79 -0
  222. parsl/tests/test_htex/test_htex.py +51 -25
  223. parsl/tests/test_htex/test_manager_failure.py +0 -1
  224. parsl/tests/test_htex/test_manager_selector_by_block.py +51 -0
  225. parsl/tests/test_htex/test_managers_command.py +36 -0
  226. parsl/tests/test_htex/test_missing_worker.py +2 -12
  227. parsl/tests/test_htex/test_multiple_disconnected_blocks.py +9 -9
  228. parsl/tests/test_htex/test_resource_spec_validation.py +45 -0
  229. parsl/tests/test_htex/test_zmq_binding.py +29 -8
  230. parsl/tests/test_monitoring/test_app_names.py +86 -0
  231. parsl/tests/test_monitoring/test_basic.py +73 -25
  232. parsl/tests/test_monitoring/test_db_locks.py +6 -4
  233. parsl/tests/test_monitoring/test_fuzz_zmq.py +19 -8
  234. parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +80 -0
  235. parsl/tests/test_monitoring/test_incomplete_futures.py +5 -4
  236. parsl/tests/test_monitoring/test_memoization_representation.py +4 -2
  237. parsl/tests/test_monitoring/test_stdouterr.py +134 -0
  238. parsl/tests/test_monitoring/test_viz_colouring.py +1 -0
  239. parsl/tests/test_mpi_apps/test_bad_mpi_config.py +33 -26
  240. parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py +28 -11
  241. parsl/tests/test_mpi_apps/test_mpi_prefix.py +4 -4
  242. parsl/tests/test_mpi_apps/test_mpi_scheduler.py +7 -2
  243. parsl/tests/test_mpi_apps/test_mpiex.py +64 -0
  244. parsl/tests/test_mpi_apps/test_resource_spec.py +42 -49
  245. parsl/tests/test_providers/test_kubernetes_provider.py +102 -0
  246. parsl/tests/test_providers/test_local_provider.py +3 -132
  247. parsl/tests/test_providers/test_pbspro_template.py +2 -3
  248. parsl/tests/test_providers/test_slurm_template.py +2 -3
  249. parsl/tests/test_providers/test_submiterror_deprecation.py +2 -1
  250. parsl/tests/test_python_apps/test_context_manager.py +128 -0
  251. parsl/tests/test_python_apps/test_dep_standard_futures.py +2 -1
  252. parsl/tests/test_python_apps/test_dependencies_deep.py +59 -0
  253. parsl/tests/test_python_apps/test_fail.py +0 -25
  254. parsl/tests/test_python_apps/test_futures.py +2 -1
  255. parsl/tests/test_python_apps/test_inputs_default.py +22 -0
  256. parsl/tests/test_python_apps/test_join.py +0 -1
  257. parsl/tests/test_python_apps/test_lifted.py +11 -7
  258. parsl/tests/test_python_apps/test_memoize_bad_id_for_memo.py +1 -0
  259. parsl/tests/test_python_apps/test_outputs.py +1 -1
  260. parsl/tests/test_python_apps/test_pluggable_future_resolution.py +161 -0
  261. parsl/tests/test_radical/test_mpi_funcs.py +1 -2
  262. parsl/tests/test_regression/test_1480.py +2 -1
  263. parsl/tests/test_regression/test_1653.py +2 -1
  264. parsl/tests/test_regression/test_226.py +1 -0
  265. parsl/tests/test_regression/test_2652.py +1 -0
  266. parsl/tests/test_regression/test_69a.py +0 -1
  267. parsl/tests/test_regression/test_854.py +4 -2
  268. parsl/tests/test_regression/test_97_parallelism_0.py +1 -2
  269. parsl/tests/test_regression/test_98.py +0 -1
  270. parsl/tests/test_scaling/test_block_error_handler.py +9 -4
  271. parsl/tests/test_scaling/test_regression_1621.py +11 -15
  272. parsl/tests/test_scaling/test_regression_3568_scaledown_vs_MISSING.py +84 -0
  273. parsl/tests/test_scaling/test_regression_3696_oscillation.py +103 -0
  274. parsl/tests/test_scaling/test_scale_down.py +2 -5
  275. parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py +6 -18
  276. parsl/tests/test_scaling/test_scale_down_htex_unregistered.py +71 -0
  277. parsl/tests/test_scaling/test_shutdown_scalein.py +73 -0
  278. parsl/tests/test_scaling/test_worker_interchange_bad_messages_3262.py +90 -0
  279. parsl/tests/test_serialization/test_2555_caching_deserializer.py +1 -1
  280. parsl/tests/test_serialization/test_3495_deserialize_managerlost.py +47 -0
  281. parsl/tests/test_serialization/test_basic.py +2 -1
  282. parsl/tests/test_serialization/test_htex_code_cache.py +3 -4
  283. parsl/tests/test_serialization/test_pack_resource_spec.py +2 -1
  284. parsl/tests/test_serialization/test_proxystore_configured.py +10 -6
  285. parsl/tests/test_serialization/test_proxystore_impl.py +5 -3
  286. parsl/tests/test_shutdown/test_kill_monitoring.py +64 -0
  287. parsl/tests/test_staging/staging_provider.py +2 -2
  288. parsl/tests/test_staging/test_1316.py +3 -4
  289. parsl/tests/test_staging/test_docs_1.py +2 -1
  290. parsl/tests/test_staging/test_docs_2.py +2 -1
  291. parsl/tests/test_staging/test_elaborate_noop_file.py +2 -3
  292. parsl/tests/{test_data → test_staging}/test_file.py +6 -6
  293. parsl/tests/{test_data → test_staging}/test_output_chain_filenames.py +3 -0
  294. parsl/tests/test_staging/test_staging_ftp.py +1 -0
  295. parsl/tests/test_staging/test_staging_https.py +5 -2
  296. parsl/tests/test_staging/test_staging_stdout.py +64 -0
  297. parsl/tests/test_staging/test_zip_in.py +39 -0
  298. parsl/tests/test_staging/test_zip_out.py +110 -0
  299. parsl/tests/test_staging/test_zip_to_zip.py +41 -0
  300. parsl/tests/test_summary.py +2 -2
  301. parsl/tests/test_thread_parallelism.py +0 -1
  302. parsl/tests/test_threads/test_configs.py +1 -2
  303. parsl/tests/test_threads/test_lazy_errors.py +2 -2
  304. parsl/tests/test_utils/test_execute_wait.py +35 -0
  305. parsl/tests/test_utils/test_sanitize_dns.py +76 -0
  306. parsl/tests/unit/test_address.py +20 -0
  307. parsl/tests/unit/test_file.py +99 -0
  308. parsl/tests/unit/test_usage_tracking.py +66 -0
  309. parsl/usage_tracking/api.py +65 -0
  310. parsl/usage_tracking/levels.py +6 -0
  311. parsl/usage_tracking/usage.py +104 -62
  312. parsl/utils.py +139 -6
  313. parsl/version.py +1 -1
  314. {parsl-2024.3.11.data → parsl-2025.1.13.data}/scripts/exec_parsl_function.py +6 -5
  315. parsl-2025.1.13.data/scripts/interchange.py +649 -0
  316. {parsl-2024.3.11.data → parsl-2025.1.13.data}/scripts/process_worker_pool.py +115 -77
  317. parsl-2025.1.13.dist-info/METADATA +96 -0
  318. parsl-2025.1.13.dist-info/RECORD +462 -0
  319. {parsl-2024.3.11.dist-info → parsl-2025.1.13.dist-info}/WHEEL +1 -1
  320. parsl/channels/__init__.py +0 -7
  321. parsl/channels/base.py +0 -141
  322. parsl/channels/errors.py +0 -113
  323. parsl/channels/local/local.py +0 -164
  324. parsl/channels/oauth_ssh/oauth_ssh.py +0 -110
  325. parsl/channels/ssh/ssh.py +0 -276
  326. parsl/channels/ssh_il/__init__.py +0 -0
  327. parsl/channels/ssh_il/ssh_il.py +0 -74
  328. parsl/configs/ad_hoc.py +0 -35
  329. parsl/executors/radical/rpex_master.py +0 -42
  330. parsl/monitoring/radios.py +0 -175
  331. parsl/providers/ad_hoc/__init__.py +0 -0
  332. parsl/providers/ad_hoc/ad_hoc.py +0 -248
  333. parsl/providers/cobalt/__init__.py +0 -0
  334. parsl/providers/cobalt/cobalt.py +0 -236
  335. parsl/providers/cobalt/template.py +0 -17
  336. parsl/tests/configs/ad_hoc_cluster_htex.py +0 -35
  337. parsl/tests/configs/cooley_htex.py +0 -37
  338. parsl/tests/configs/htex_ad_hoc_cluster.py +0 -28
  339. parsl/tests/configs/local_adhoc.py +0 -18
  340. parsl/tests/configs/swan_htex.py +0 -43
  341. parsl/tests/configs/theta.py +0 -37
  342. parsl/tests/integration/test_channels/__init__.py +0 -0
  343. parsl/tests/integration/test_channels/test_channels.py +0 -17
  344. parsl/tests/integration/test_channels/test_local_channel.py +0 -42
  345. parsl/tests/integration/test_channels/test_scp_1.py +0 -45
  346. parsl/tests/integration/test_channels/test_ssh_1.py +0 -40
  347. parsl/tests/integration/test_channels/test_ssh_errors.py +0 -46
  348. parsl/tests/integration/test_channels/test_ssh_file_transport.py +0 -41
  349. parsl/tests/integration/test_channels/test_ssh_interactive.py +0 -24
  350. parsl/tests/manual_tests/test_ad_hoc_htex.py +0 -48
  351. parsl/tests/manual_tests/test_fan_in_out_htex_remote.py +0 -88
  352. parsl/tests/manual_tests/test_oauth_ssh.py +0 -13
  353. parsl/tests/sites/test_local_adhoc.py +0 -61
  354. parsl/tests/test_channels/__init__.py +0 -0
  355. parsl/tests/test_channels/test_large_output.py +0 -22
  356. parsl/tests/test_data/__init__.py +0 -0
  357. parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py +0 -51
  358. parsl/tests/test_providers/test_cobalt_deprecation_warning.py +0 -16
  359. parsl-2024.3.11.dist-info/METADATA +0 -98
  360. parsl-2024.3.11.dist-info/RECORD +0 -447
  361. parsl/{channels/local → monitoring/radios}/__init__.py +0 -0
  362. parsl/{channels/oauth_ssh → tests/test_shutdown}/__init__.py +0 -0
  363. parsl/tests/{test_data → test_staging}/test_file_apps.py +0 -0
  364. parsl/tests/{test_data → test_staging}/test_file_staging.py +0 -0
  365. parsl/{channels/ssh → tests/unit}/__init__.py +0 -0
  366. {parsl-2024.3.11.data → parsl-2025.1.13.data}/scripts/parsl_coprocess.py +1 -1
  367. {parsl-2024.3.11.dist-info → parsl-2025.1.13.dist-info}/LICENSE +0 -0
  368. {parsl-2024.3.11.dist-info → parsl-2025.1.13.dist-info}/entry_points.txt +0 -0
  369. {parsl-2024.3.11.dist-info → parsl-2025.1.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,649 @@
1
+ #!python
2
+ import datetime
3
+ import json
4
+ import logging
5
+ import os
6
+ import pickle
7
+ import platform
8
+ import queue
9
+ import sys
10
+ import threading
11
+ import time
12
+ from typing import Any, Dict, List, NoReturn, Optional, Sequence, Set, Tuple, cast
13
+
14
+ import zmq
15
+
16
+ from parsl import curvezmq
17
+ from parsl.addresses import tcp_url
18
+ from parsl.app.errors import RemoteExceptionWrapper
19
+ from parsl.executors.high_throughput.errors import ManagerLost, VersionMismatch
20
+ from parsl.executors.high_throughput.manager_record import ManagerRecord
21
+ from parsl.executors.high_throughput.manager_selector import ManagerSelector
22
+ from parsl.monitoring.message_type import MessageType
23
+ from parsl.monitoring.radios.base import MonitoringRadioSender
24
+ from parsl.monitoring.radios.zmq import ZMQRadioSender
25
+ from parsl.process_loggers import wrap_with_logs
26
+ from parsl.serialize import serialize as serialize_object
27
+ from parsl.utils import setproctitle
28
+ from parsl.version import VERSION as PARSL_VERSION
29
+
30
+ PKL_HEARTBEAT_CODE = pickle.dumps((2 ** 32) - 1)
31
+ PKL_DRAINED_CODE = pickle.dumps((2 ** 32) - 2)
32
+
33
+ LOGGER_NAME = "interchange"
34
+ logger = logging.getLogger(LOGGER_NAME)
35
+
36
+
37
+ class Interchange:
38
+ """ Interchange is a task orchestrator for distributed systems.
39
+
40
+ 1. Asynchronously queue large volume of tasks (>100K)
41
+ 2. Allow for workers to join and leave the union
42
+ 3. Detect workers that have failed using heartbeats
43
+ """
44
+ def __init__(self,
45
+ *,
46
+ client_address: str,
47
+ interchange_address: Optional[str],
48
+ client_ports: Tuple[int, int, int],
49
+ worker_ports: Optional[Tuple[int, int]],
50
+ worker_port_range: Tuple[int, int],
51
+ hub_address: Optional[str],
52
+ hub_zmq_port: Optional[int],
53
+ heartbeat_threshold: int,
54
+ logdir: str,
55
+ logging_level: int,
56
+ poll_period: int,
57
+ cert_dir: Optional[str],
58
+ manager_selector: ManagerSelector,
59
+ run_id: str,
60
+ ) -> None:
61
+ """
62
+ Parameters
63
+ ----------
64
+ client_address : str
65
+ The ip address at which the parsl client can be reached. Default: "127.0.0.1"
66
+
67
+ interchange_address : Optional str
68
+ If specified the interchange will only listen on this address for connections from workers
69
+ else, it binds to all addresses.
70
+
71
+ client_ports : tuple(int, int, int)
72
+ The ports at which the client can be reached
73
+
74
+ worker_ports : tuple(int, int)
75
+ The specific two ports at which workers will connect to the Interchange.
76
+
77
+ worker_port_range : tuple(int, int)
78
+ The interchange picks ports at random from the range which will be used by workers.
79
+ This is overridden when the worker_ports option is set.
80
+
81
+ hub_address : str
82
+ The IP address at which the interchange can send info about managers to when monitoring is enabled.
83
+ When None, monitoring is disabled.
84
+
85
+ hub_zmq_port : str
86
+ The port at which the interchange can send info about managers to when monitoring is enabled.
87
+ When None, monitoring is disabled.
88
+
89
+ heartbeat_threshold : int
90
+ Number of seconds since the last heartbeat after which worker is considered lost.
91
+
92
+ logdir : str
93
+ Parsl log directory paths. Logs and temp files go here.
94
+
95
+ logging_level : int
96
+ Logging level as defined in the logging module.
97
+
98
+ poll_period : int
99
+ The main thread polling period, in milliseconds.
100
+
101
+ cert_dir : str | None
102
+ Path to the certificate directory.
103
+ """
104
+ self.cert_dir = cert_dir
105
+ self.logdir = logdir
106
+ os.makedirs(self.logdir, exist_ok=True)
107
+
108
+ start_file_logger("{}/interchange.log".format(self.logdir), level=logging_level)
109
+ logger.debug("Initializing Interchange process")
110
+
111
+ self.client_address = client_address
112
+ self.interchange_address: str = interchange_address or "*"
113
+ self.poll_period = poll_period
114
+
115
+ logger.info("Attempting connection to client at {} on ports: {},{},{}".format(
116
+ client_address, client_ports[0], client_ports[1], client_ports[2]))
117
+ self.zmq_context = curvezmq.ServerContext(self.cert_dir)
118
+ self.task_incoming = self.zmq_context.socket(zmq.DEALER)
119
+ self.task_incoming.set_hwm(0)
120
+ self.task_incoming.connect(tcp_url(client_address, client_ports[0]))
121
+ self.results_outgoing = self.zmq_context.socket(zmq.DEALER)
122
+ self.results_outgoing.set_hwm(0)
123
+ self.results_outgoing.connect(tcp_url(client_address, client_ports[1]))
124
+
125
+ self.command_channel = self.zmq_context.socket(zmq.REP)
126
+ self.command_channel.connect(tcp_url(client_address, client_ports[2]))
127
+ logger.info("Connected to client")
128
+
129
+ self.run_id = run_id
130
+
131
+ self.hub_address = hub_address
132
+ self.hub_zmq_port = hub_zmq_port
133
+
134
+ self.pending_task_queue: queue.Queue[Any] = queue.Queue(maxsize=10 ** 6)
135
+ self.count = 0
136
+
137
+ self.worker_ports = worker_ports
138
+ self.worker_port_range = worker_port_range
139
+
140
+ self.task_outgoing = self.zmq_context.socket(zmq.ROUTER)
141
+ self.task_outgoing.set_hwm(0)
142
+ self.results_incoming = self.zmq_context.socket(zmq.ROUTER)
143
+ self.results_incoming.set_hwm(0)
144
+
145
+ if self.worker_ports:
146
+ self.worker_task_port = self.worker_ports[0]
147
+ self.worker_result_port = self.worker_ports[1]
148
+
149
+ self.task_outgoing.bind(tcp_url(self.interchange_address, self.worker_task_port))
150
+ self.results_incoming.bind(tcp_url(self.interchange_address, self.worker_result_port))
151
+
152
+ else:
153
+ self.worker_task_port = self.task_outgoing.bind_to_random_port(tcp_url(self.interchange_address),
154
+ min_port=worker_port_range[0],
155
+ max_port=worker_port_range[1], max_tries=100)
156
+ self.worker_result_port = self.results_incoming.bind_to_random_port(tcp_url(self.interchange_address),
157
+ min_port=worker_port_range[0],
158
+ max_port=worker_port_range[1], max_tries=100)
159
+
160
+ logger.info("Bound to ports {},{} for incoming worker connections".format(
161
+ self.worker_task_port, self.worker_result_port))
162
+
163
+ self._ready_managers: Dict[bytes, ManagerRecord] = {}
164
+ self.connected_block_history: List[str] = []
165
+
166
+ self.heartbeat_threshold = heartbeat_threshold
167
+
168
+ self.manager_selector = manager_selector
169
+
170
+ self.current_platform = {'parsl_v': PARSL_VERSION,
171
+ 'python_v': "{}.{}.{}".format(sys.version_info.major,
172
+ sys.version_info.minor,
173
+ sys.version_info.micro),
174
+ 'os': platform.system(),
175
+ 'hostname': platform.node(),
176
+ 'dir': os.getcwd()}
177
+
178
+ logger.info("Platform info: {}".format(self.current_platform))
179
+
180
+ def get_tasks(self, count: int) -> Sequence[dict]:
181
+ """ Obtains a batch of tasks from the internal pending_task_queue
182
+
183
+ Parameters
184
+ ----------
185
+ count: int
186
+ Count of tasks to get from the queue
187
+
188
+ Returns
189
+ -------
190
+ List of upto count tasks. May return fewer than count down to an empty list
191
+ eg. [{'task_id':<x>, 'buffer':<buf>} ... ]
192
+ """
193
+ tasks = []
194
+ for _ in range(0, count):
195
+ try:
196
+ x = self.pending_task_queue.get(block=False)
197
+ except queue.Empty:
198
+ break
199
+ else:
200
+ tasks.append(x)
201
+
202
+ return tasks
203
+
204
+ @wrap_with_logs(target="interchange")
205
+ def task_puller(self) -> NoReturn:
206
+ """Pull tasks from the incoming tasks zmq pipe onto the internal
207
+ pending task queue
208
+ """
209
+ logger.info("Starting")
210
+ task_counter = 0
211
+
212
+ while True:
213
+ logger.debug("launching recv_pyobj")
214
+ try:
215
+ msg = self.task_incoming.recv_pyobj()
216
+ except zmq.Again:
217
+ # We just timed out while attempting to receive
218
+ logger.debug("zmq.Again with {} tasks in internal queue".format(self.pending_task_queue.qsize()))
219
+ continue
220
+
221
+ logger.debug("putting message onto pending_task_queue")
222
+ self.pending_task_queue.put(msg)
223
+ task_counter += 1
224
+ logger.debug(f"Fetched {task_counter} tasks so far")
225
+
226
+ def _send_monitoring_info(self, monitoring_radio: Optional[MonitoringRadioSender], manager: ManagerRecord) -> None:
227
+ if monitoring_radio:
228
+ logger.info("Sending message {} to MonitoringHub".format(manager))
229
+
230
+ d: Dict = cast(Dict, manager.copy())
231
+ d['timestamp'] = datetime.datetime.now()
232
+ d['last_heartbeat'] = datetime.datetime.fromtimestamp(d['last_heartbeat'])
233
+ d['run_id'] = self.run_id
234
+
235
+ monitoring_radio.send((MessageType.NODE_INFO, d))
236
+
237
+ @wrap_with_logs(target="interchange")
238
+ def _command_server(self) -> NoReturn:
239
+ """ Command server to run async command to the interchange
240
+ """
241
+ logger.debug("Command Server Starting")
242
+
243
+ if self.hub_address is not None and self.hub_zmq_port is not None:
244
+ logger.debug("Creating monitoring radio to %s:%s", self.hub_address, self.hub_zmq_port)
245
+ monitoring_radio = ZMQRadioSender(self.hub_address, self.hub_zmq_port)
246
+ else:
247
+ monitoring_radio = None
248
+
249
+ reply: Any # the type of reply depends on the command_req received (aka this needs dependent types...)
250
+
251
+ while True:
252
+ try:
253
+ command_req = self.command_channel.recv_pyobj()
254
+ logger.debug("Received command request: {}".format(command_req))
255
+ if command_req == "CONNECTED_BLOCKS":
256
+ reply = self.connected_block_history
257
+
258
+ elif command_req == "WORKERS":
259
+ num_workers = 0
260
+ for manager in self._ready_managers.values():
261
+ num_workers += manager['worker_count']
262
+ reply = num_workers
263
+
264
+ elif command_req == "MANAGERS":
265
+ reply = []
266
+ for manager_id in self._ready_managers:
267
+ m = self._ready_managers[manager_id]
268
+ idle_since = m['idle_since']
269
+ if idle_since is not None:
270
+ idle_duration = time.time() - idle_since
271
+ else:
272
+ idle_duration = 0.0
273
+ resp = {'manager': manager_id.decode('utf-8'),
274
+ 'block_id': m['block_id'],
275
+ 'worker_count': m['worker_count'],
276
+ 'tasks': len(m['tasks']),
277
+ 'idle_duration': idle_duration,
278
+ 'active': m['active'],
279
+ 'parsl_version': m['parsl_version'],
280
+ 'python_version': m['python_version'],
281
+ 'draining': m['draining']}
282
+ reply.append(resp)
283
+
284
+ elif command_req.startswith("HOLD_WORKER"):
285
+ cmd, s_manager = command_req.split(';')
286
+ manager_id = s_manager.encode('utf-8')
287
+ logger.info("Received HOLD_WORKER for {!r}".format(manager_id))
288
+ if manager_id in self._ready_managers:
289
+ m = self._ready_managers[manager_id]
290
+ m['active'] = False
291
+ self._send_monitoring_info(monitoring_radio, m)
292
+ else:
293
+ logger.warning("Worker to hold was not in ready managers list")
294
+
295
+ reply = None
296
+
297
+ elif command_req == "WORKER_PORTS":
298
+ reply = (self.worker_task_port, self.worker_result_port)
299
+
300
+ else:
301
+ logger.error(f"Received unknown command: {command_req}")
302
+ reply = None
303
+
304
+ logger.debug("Reply: {}".format(reply))
305
+ self.command_channel.send_pyobj(reply)
306
+
307
+ except zmq.Again:
308
+ logger.debug("Command thread is alive")
309
+ continue
310
+
311
+ @wrap_with_logs
312
+ def start(self) -> None:
313
+ """ Start the interchange
314
+ """
315
+
316
+ logger.info("Starting main interchange method")
317
+
318
+ if self.hub_address is not None and self.hub_zmq_port is not None:
319
+ logger.debug("Creating monitoring radio to %s:%s", self.hub_address, self.hub_zmq_port)
320
+ monitoring_radio = ZMQRadioSender(self.hub_address, self.hub_zmq_port)
321
+ logger.debug("Created monitoring radio")
322
+ else:
323
+ monitoring_radio = None
324
+
325
+ poll_period = self.poll_period
326
+
327
+ start = time.time()
328
+
329
+ self._task_puller_thread = threading.Thread(target=self.task_puller,
330
+ name="Interchange-Task-Puller",
331
+ daemon=True)
332
+ self._task_puller_thread.start()
333
+
334
+ self._command_thread = threading.Thread(target=self._command_server,
335
+ name="Interchange-Command",
336
+ daemon=True)
337
+ self._command_thread.start()
338
+
339
+ kill_event = threading.Event()
340
+
341
+ poller = zmq.Poller()
342
+ poller.register(self.task_outgoing, zmq.POLLIN)
343
+ poller.register(self.results_incoming, zmq.POLLIN)
344
+
345
+ # These are managers which we should examine in an iteration
346
+ # for scheduling a job (or maybe any other attention?).
347
+ # Anything altering the state of the manager should add it
348
+ # onto this list.
349
+ interesting_managers: Set[bytes] = set()
350
+
351
+ while not kill_event.is_set():
352
+ self.socks = dict(poller.poll(timeout=poll_period))
353
+
354
+ self.process_task_outgoing_incoming(interesting_managers, monitoring_radio, kill_event)
355
+ self.process_results_incoming(interesting_managers, monitoring_radio)
356
+ self.expire_bad_managers(interesting_managers, monitoring_radio)
357
+ self.expire_drained_managers(interesting_managers, monitoring_radio)
358
+ self.process_tasks_to_send(interesting_managers)
359
+
360
+ self.zmq_context.destroy()
361
+ delta = time.time() - start
362
+ logger.info(f"Processed {self.count} tasks in {delta} seconds")
363
+ logger.warning("Exiting")
364
+
365
+ def process_task_outgoing_incoming(
366
+ self,
367
+ interesting_managers: Set[bytes],
368
+ monitoring_radio: Optional[MonitoringRadioSender],
369
+ kill_event: threading.Event
370
+ ) -> None:
371
+ """Process one message from manager on the task_outgoing channel.
372
+ Note that this message flow is in contradiction to the name of the
373
+ channel - it is not an outgoing message and it is not a task.
374
+ """
375
+ if self.task_outgoing in self.socks and self.socks[self.task_outgoing] == zmq.POLLIN:
376
+ logger.debug("starting task_outgoing section")
377
+ message = self.task_outgoing.recv_multipart()
378
+ manager_id = message[0]
379
+
380
+ try:
381
+ msg = json.loads(message[1].decode('utf-8'))
382
+ except Exception:
383
+ logger.warning(f"Got Exception reading message from manager: {manager_id!r}", exc_info=True)
384
+ logger.debug("Message:\n %r\n", message[1])
385
+ return
386
+
387
+ # perform a bit of validation on the structure of the deserialized
388
+ # object, at least enough to behave like a deserialization error
389
+ # in obviously malformed cases
390
+ if not isinstance(msg, dict) or 'type' not in msg:
391
+ logger.error(f"JSON message was not correctly formatted from manager: {manager_id!r}")
392
+ logger.debug("Message:\n %r\n", message[1])
393
+ return
394
+
395
+ if msg['type'] == 'registration':
396
+ # We set up an entry only if registration works correctly
397
+ self._ready_managers[manager_id] = {'last_heartbeat': time.time(),
398
+ 'idle_since': time.time(),
399
+ 'block_id': None,
400
+ 'start_time': msg['start_time'],
401
+ 'max_capacity': 0,
402
+ 'worker_count': 0,
403
+ 'active': True,
404
+ 'draining': False,
405
+ 'parsl_version': msg['parsl_v'],
406
+ 'python_version': msg['python_v'],
407
+ 'tasks': []}
408
+ self.connected_block_history.append(msg['block_id'])
409
+
410
+ interesting_managers.add(manager_id)
411
+ logger.info(f"Adding manager: {manager_id!r} to ready queue")
412
+ m = self._ready_managers[manager_id]
413
+
414
+ # m is a ManagerRecord, but msg is a dict[Any,Any] and so can
415
+ # contain arbitrary fields beyond those in ManagerRecord (and
416
+ # indeed does - for example, python_v) which are then ignored
417
+ # later.
418
+ m.update(msg) # type: ignore[typeddict-item]
419
+
420
+ logger.info(f"Registration info for manager {manager_id!r}: {msg}")
421
+ self._send_monitoring_info(monitoring_radio, m)
422
+
423
+ if (msg['python_v'].rsplit(".", 1)[0] != self.current_platform['python_v'].rsplit(".", 1)[0] or
424
+ msg['parsl_v'] != self.current_platform['parsl_v']):
425
+ logger.error(f"Manager {manager_id!r} has incompatible version info with the interchange")
426
+ logger.debug("Setting kill event")
427
+ kill_event.set()
428
+ e = VersionMismatch("py.v={} parsl.v={}".format(self.current_platform['python_v'].rsplit(".", 1)[0],
429
+ self.current_platform['parsl_v']),
430
+ "py.v={} parsl.v={}".format(msg['python_v'].rsplit(".", 1)[0],
431
+ msg['parsl_v'])
432
+ )
433
+ result_package = {'type': 'result', 'task_id': -1, 'exception': serialize_object(e)}
434
+ pkl_package = pickle.dumps(result_package)
435
+ self.results_outgoing.send(pkl_package)
436
+ logger.error("Sent failure reports, shutting down interchange")
437
+ else:
438
+ logger.info(f"Manager {manager_id!r} has compatible Parsl version {msg['parsl_v']}")
439
+ logger.info(f"Manager {manager_id!r} has compatible Python version {msg['python_v'].rsplit('.', 1)[0]}")
440
+ elif msg['type'] == 'heartbeat':
441
+ manager = self._ready_managers.get(manager_id)
442
+ if manager:
443
+ manager['last_heartbeat'] = time.time()
444
+ logger.debug("Manager %r sent heartbeat via tasks connection", manager_id)
445
+ self.task_outgoing.send_multipart([manager_id, b'', PKL_HEARTBEAT_CODE])
446
+ else:
447
+ logger.warning("Received heartbeat via tasks connection for not-registered manager %r", manager_id)
448
+ elif msg['type'] == 'drain':
449
+ self._ready_managers[manager_id]['draining'] = True
450
+ logger.debug("Manager %r requested drain", manager_id)
451
+ else:
452
+ logger.error(f"Unexpected message type received from manager: {msg['type']}")
453
+ logger.debug("leaving task_outgoing section")
454
+
455
+ def expire_drained_managers(self, interesting_managers: Set[bytes], monitoring_radio: Optional[MonitoringRadioSender]) -> None:
456
+
457
+ for manager_id in list(interesting_managers):
458
+ # is it always true that a draining manager will be in interesting managers?
459
+ # i think so because it will have outstanding capacity?
460
+ m = self._ready_managers[manager_id]
461
+ if m['draining'] and len(m['tasks']) == 0:
462
+ logger.info(f"Manager {manager_id!r} is drained - sending drained message to manager")
463
+ self.task_outgoing.send_multipart([manager_id, b'', PKL_DRAINED_CODE])
464
+ interesting_managers.remove(manager_id)
465
+ self._ready_managers.pop(manager_id)
466
+
467
+ m['active'] = False
468
+ self._send_monitoring_info(monitoring_radio, m)
469
+
470
+ def process_tasks_to_send(self, interesting_managers: Set[bytes]) -> None:
471
+ # Check if there are tasks that could be sent to managers
472
+
473
+ logger.debug(
474
+ "Managers count (interesting/total): %d/%d",
475
+ len(interesting_managers),
476
+ len(self._ready_managers)
477
+ )
478
+
479
+ if interesting_managers and not self.pending_task_queue.empty():
480
+ shuffled_managers = self.manager_selector.sort_managers(self._ready_managers, interesting_managers)
481
+
482
+ while shuffled_managers and not self.pending_task_queue.empty(): # cf. the if statement above...
483
+ manager_id = shuffled_managers.pop()
484
+ m = self._ready_managers[manager_id]
485
+ tasks_inflight = len(m['tasks'])
486
+ real_capacity = m['max_capacity'] - tasks_inflight
487
+
488
+ if real_capacity and m["active"] and not m["draining"]:
489
+ tasks = self.get_tasks(real_capacity)
490
+ if tasks:
491
+ self.task_outgoing.send_multipart([manager_id, b'', pickle.dumps(tasks)])
492
+ task_count = len(tasks)
493
+ self.count += task_count
494
+ tids = [t['task_id'] for t in tasks]
495
+ m['tasks'].extend(tids)
496
+ m['idle_since'] = None
497
+ logger.debug("Sent tasks: %s to manager %r", tids, manager_id)
498
+ # recompute real_capacity after sending tasks
499
+ real_capacity = m['max_capacity'] - tasks_inflight
500
+ if real_capacity > 0:
501
+ logger.debug("Manager %r has free capacity %s", manager_id, real_capacity)
502
+ # ... so keep it in the interesting_managers list
503
+ else:
504
+ logger.debug("Manager %r is now saturated", manager_id)
505
+ interesting_managers.remove(manager_id)
506
+ else:
507
+ interesting_managers.remove(manager_id)
508
+ # logger.debug("Nothing to send to manager {}".format(manager_id))
509
+ logger.debug("leaving _ready_managers section, with %s managers still interesting", len(interesting_managers))
510
+ else:
511
+ logger.debug("either no interesting managers or no tasks, so skipping manager pass")
512
+
513
+ def process_results_incoming(self, interesting_managers: Set[bytes], monitoring_radio: Optional[MonitoringRadioSender]) -> None:
514
+ # Receive any results and forward to client
515
+ if self.results_incoming in self.socks and self.socks[self.results_incoming] == zmq.POLLIN:
516
+ logger.debug("entering results_incoming section")
517
+ manager_id, *all_messages = self.results_incoming.recv_multipart()
518
+ if manager_id not in self._ready_managers:
519
+ logger.warning(f"Received a result from a un-registered manager: {manager_id!r}")
520
+ else:
521
+ logger.debug("Got %s result items in batch from manager %r", len(all_messages), manager_id)
522
+
523
+ b_messages = []
524
+
525
+ for p_message in all_messages:
526
+ r = pickle.loads(p_message)
527
+ if r['type'] == 'result':
528
+ # process this for task ID and forward to executor
529
+ b_messages.append((p_message, r))
530
+ elif r['type'] == 'monitoring':
531
+ # the monitoring code makes the assumption that no
532
+ # monitoring messages will be received if monitoring
533
+ # is not configured, and that monitoring_radio will only
534
+ # be None when monitoring is not configurated.
535
+ assert monitoring_radio is not None
536
+
537
+ monitoring_radio.send(r['payload'])
538
+ elif r['type'] == 'heartbeat':
539
+ logger.debug("Manager %r sent heartbeat via results connection", manager_id)
540
+ else:
541
+ logger.error("Interchange discarding result_queue message of unknown type: %s", r["type"])
542
+
543
+ got_result = False
544
+ m = self._ready_managers[manager_id]
545
+ for (_, r) in b_messages:
546
+ assert 'type' in r, f"Message is missing type entry: {r}"
547
+ if r['type'] == 'result':
548
+ got_result = True
549
+ try:
550
+ logger.debug("Removing task %s from manager record %r", r["task_id"], manager_id)
551
+ m['tasks'].remove(r['task_id'])
552
+ except Exception:
553
+ # If we reach here, there's something very wrong.
554
+ logger.exception(
555
+ "Ignoring exception removing task_id %s for manager %r with task list %s",
556
+ r['task_id'],
557
+ manager_id,
558
+ m["tasks"]
559
+ )
560
+
561
+ b_messages_to_send = []
562
+ for (b_message, _) in b_messages:
563
+ b_messages_to_send.append(b_message)
564
+
565
+ if b_messages_to_send:
566
+ logger.debug("Sending messages on results_outgoing")
567
+ self.results_outgoing.send_multipart(b_messages_to_send)
568
+ logger.debug("Sent messages on results_outgoing")
569
+
570
+ logger.debug("Current tasks on manager %r: %s", manager_id, m["tasks"])
571
+ if len(m['tasks']) == 0 and m['idle_since'] is None:
572
+ m['idle_since'] = time.time()
573
+
574
+ # A manager is only made interesting here if a result was
575
+ # received, which means there should be capacity for a new
576
+ # task now. Heartbeats and monitoring messages do not make a
577
+ # manager become interesting.
578
+ if got_result:
579
+ interesting_managers.add(manager_id)
580
+ logger.debug("leaving results_incoming section")
581
+
582
+ def expire_bad_managers(self, interesting_managers: Set[bytes], monitoring_radio: Optional[MonitoringRadioSender]) -> None:
583
+ bad_managers = [(manager_id, m) for (manager_id, m) in self._ready_managers.items() if
584
+ time.time() - m['last_heartbeat'] > self.heartbeat_threshold]
585
+ for (manager_id, m) in bad_managers:
586
+ logger.debug("Last: {} Current: {}".format(m['last_heartbeat'], time.time()))
587
+ logger.warning(f"Too many heartbeats missed for manager {manager_id!r} - removing manager")
588
+ if m['active']:
589
+ m['active'] = False
590
+ self._send_monitoring_info(monitoring_radio, m)
591
+
592
+ logger.warning(f"Cancelling htex tasks {m['tasks']} on removed manager")
593
+ for tid in m['tasks']:
594
+ try:
595
+ raise ManagerLost(manager_id, m['hostname'])
596
+ except Exception:
597
+ result_package = {'type': 'result', 'task_id': tid, 'exception': serialize_object(RemoteExceptionWrapper(*sys.exc_info()))}
598
+ pkl_package = pickle.dumps(result_package)
599
+ self.results_outgoing.send(pkl_package)
600
+ logger.warning("Sent failure reports, unregistering manager")
601
+ self._ready_managers.pop(manager_id, 'None')
602
+ if manager_id in interesting_managers:
603
+ interesting_managers.remove(manager_id)
604
+
605
+
606
+ def start_file_logger(filename: str, level: int = logging.DEBUG, format_string: Optional[str] = None) -> None:
607
+ """Add a stream log handler.
608
+
609
+ Parameters
610
+ ---------
611
+
612
+ filename: string
613
+ Name of the file to write logs to. Required.
614
+ level: logging.LEVEL
615
+ Set the logging level. Default=logging.DEBUG
616
+ - format_string (string): Set the format string
617
+ format_string: string
618
+ Format string to use.
619
+
620
+ Returns
621
+ -------
622
+ None.
623
+ """
624
+ if format_string is None:
625
+ format_string = (
626
+
627
+ "%(asctime)s.%(msecs)03d %(name)s:%(lineno)d "
628
+ "%(processName)s(%(process)d) %(threadName)s "
629
+ "%(funcName)s [%(levelname)s] %(message)s"
630
+
631
+ )
632
+
633
+ global logger
634
+ logger = logging.getLogger(LOGGER_NAME)
635
+ logger.setLevel(level)
636
+ handler = logging.FileHandler(filename)
637
+ handler.setLevel(level)
638
+ formatter = logging.Formatter(format_string, datefmt='%Y-%m-%d %H:%M:%S')
639
+ handler.setFormatter(formatter)
640
+ logger.addHandler(handler)
641
+
642
+
643
+ if __name__ == "__main__":
644
+ setproctitle("parsl: HTEX interchange")
645
+
646
+ config = pickle.load(sys.stdin.buffer)
647
+
648
+ ic = Interchange(**config)
649
+ ic.start()