parsl 2024.3.18__py3-none-any.whl → 2025.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (369) hide show
  1. parsl/__init__.py +9 -10
  2. parsl/addresses.py +26 -6
  3. parsl/app/app.py +7 -8
  4. parsl/app/bash.py +15 -8
  5. parsl/app/errors.py +10 -13
  6. parsl/app/futures.py +8 -10
  7. parsl/app/python.py +2 -1
  8. parsl/benchmark/perf.py +2 -1
  9. parsl/concurrent/__init__.py +2 -2
  10. parsl/config.py +53 -10
  11. parsl/configs/ASPIRE1.py +6 -5
  12. parsl/configs/Azure.py +9 -8
  13. parsl/configs/bridges.py +6 -4
  14. parsl/configs/cc_in2p3.py +3 -3
  15. parsl/configs/ec2.py +3 -1
  16. parsl/configs/expanse.py +4 -3
  17. parsl/configs/frontera.py +3 -4
  18. parsl/configs/htex_local.py +3 -4
  19. parsl/configs/illinoiscluster.py +3 -1
  20. parsl/configs/improv.py +34 -0
  21. parsl/configs/kubernetes.py +4 -3
  22. parsl/configs/local_threads.py +5 -1
  23. parsl/configs/midway.py +5 -3
  24. parsl/configs/osg.py +4 -2
  25. parsl/configs/polaris.py +4 -2
  26. parsl/configs/stampede2.py +6 -5
  27. parsl/configs/summit.py +3 -3
  28. parsl/configs/toss3_llnl.py +4 -3
  29. parsl/configs/vineex_local.py +6 -4
  30. parsl/configs/wqex_local.py +5 -3
  31. parsl/curvezmq.py +4 -0
  32. parsl/data_provider/data_manager.py +4 -3
  33. parsl/data_provider/file_noop.py +1 -2
  34. parsl/data_provider/files.py +3 -3
  35. parsl/data_provider/ftp.py +1 -3
  36. parsl/data_provider/globus.py +7 -6
  37. parsl/data_provider/http.py +2 -2
  38. parsl/data_provider/rsync.py +1 -1
  39. parsl/data_provider/staging.py +2 -2
  40. parsl/data_provider/zip.py +135 -0
  41. parsl/dataflow/dependency_resolvers.py +115 -0
  42. parsl/dataflow/dflow.py +259 -223
  43. parsl/dataflow/errors.py +3 -5
  44. parsl/dataflow/futures.py +27 -14
  45. parsl/dataflow/memoization.py +5 -5
  46. parsl/dataflow/rundirs.py +5 -6
  47. parsl/dataflow/taskrecord.py +4 -5
  48. parsl/executors/__init__.py +4 -2
  49. parsl/executors/base.py +45 -15
  50. parsl/executors/errors.py +13 -0
  51. parsl/executors/execute_task.py +37 -0
  52. parsl/executors/flux/execute_parsl_task.py +3 -3
  53. parsl/executors/flux/executor.py +18 -19
  54. parsl/executors/flux/flux_instance_manager.py +26 -27
  55. parsl/executors/high_throughput/errors.py +43 -3
  56. parsl/executors/high_throughput/executor.py +307 -285
  57. parsl/executors/high_throughput/interchange.py +137 -168
  58. parsl/executors/high_throughput/manager_record.py +4 -0
  59. parsl/executors/high_throughput/manager_selector.py +55 -0
  60. parsl/executors/high_throughput/monitoring_info.py +2 -1
  61. parsl/executors/high_throughput/mpi_executor.py +113 -0
  62. parsl/executors/high_throughput/mpi_prefix_composer.py +10 -11
  63. parsl/executors/high_throughput/mpi_resource_management.py +6 -17
  64. parsl/executors/high_throughput/probe.py +9 -7
  65. parsl/executors/high_throughput/process_worker_pool.py +77 -75
  66. parsl/executors/high_throughput/zmq_pipes.py +81 -23
  67. parsl/executors/radical/executor.py +130 -79
  68. parsl/executors/radical/rpex_resources.py +17 -15
  69. parsl/executors/radical/rpex_worker.py +4 -3
  70. parsl/executors/status_handling.py +157 -51
  71. parsl/executors/taskvine/__init__.py +1 -1
  72. parsl/executors/taskvine/errors.py +1 -1
  73. parsl/executors/taskvine/exec_parsl_function.py +2 -2
  74. parsl/executors/taskvine/executor.py +38 -55
  75. parsl/executors/taskvine/factory.py +1 -1
  76. parsl/executors/taskvine/factory_config.py +1 -1
  77. parsl/executors/taskvine/manager.py +17 -13
  78. parsl/executors/taskvine/manager_config.py +7 -2
  79. parsl/executors/threads.py +6 -6
  80. parsl/executors/workqueue/errors.py +1 -1
  81. parsl/executors/workqueue/exec_parsl_function.py +6 -5
  82. parsl/executors/workqueue/executor.py +64 -63
  83. parsl/executors/workqueue/parsl_coprocess.py +1 -1
  84. parsl/jobs/error_handlers.py +2 -2
  85. parsl/jobs/job_status_poller.py +28 -112
  86. parsl/jobs/states.py +7 -2
  87. parsl/jobs/strategy.py +43 -31
  88. parsl/launchers/__init__.py +12 -3
  89. parsl/launchers/errors.py +1 -1
  90. parsl/launchers/launchers.py +0 -6
  91. parsl/log_utils.py +1 -2
  92. parsl/monitoring/db_manager.py +55 -93
  93. parsl/monitoring/errors.py +6 -0
  94. parsl/monitoring/monitoring.py +85 -311
  95. parsl/monitoring/queries/pandas.py +1 -2
  96. parsl/monitoring/radios/base.py +13 -0
  97. parsl/monitoring/radios/filesystem.py +52 -0
  98. parsl/monitoring/radios/htex.py +57 -0
  99. parsl/monitoring/radios/multiprocessing.py +17 -0
  100. parsl/monitoring/radios/udp.py +56 -0
  101. parsl/monitoring/radios/zmq.py +17 -0
  102. parsl/monitoring/remote.py +33 -37
  103. parsl/monitoring/router.py +212 -0
  104. parsl/monitoring/types.py +5 -6
  105. parsl/monitoring/visualization/app.py +4 -2
  106. parsl/monitoring/visualization/models.py +0 -1
  107. parsl/monitoring/visualization/plots/default/workflow_plots.py +8 -4
  108. parsl/monitoring/visualization/plots/default/workflow_resource_plots.py +1 -0
  109. parsl/monitoring/visualization/utils.py +0 -1
  110. parsl/monitoring/visualization/views.py +16 -9
  111. parsl/multiprocessing.py +0 -1
  112. parsl/process_loggers.py +1 -2
  113. parsl/providers/__init__.py +8 -17
  114. parsl/providers/aws/aws.py +2 -3
  115. parsl/providers/azure/azure.py +4 -5
  116. parsl/providers/base.py +2 -18
  117. parsl/providers/cluster_provider.py +3 -9
  118. parsl/providers/condor/condor.py +7 -17
  119. parsl/providers/errors.py +2 -2
  120. parsl/providers/googlecloud/googlecloud.py +2 -1
  121. parsl/providers/grid_engine/grid_engine.py +5 -14
  122. parsl/providers/kubernetes/kube.py +80 -40
  123. parsl/providers/local/local.py +13 -26
  124. parsl/providers/lsf/lsf.py +5 -23
  125. parsl/providers/pbspro/pbspro.py +5 -17
  126. parsl/providers/slurm/slurm.py +81 -39
  127. parsl/providers/torque/torque.py +3 -14
  128. parsl/serialize/__init__.py +8 -3
  129. parsl/serialize/base.py +1 -2
  130. parsl/serialize/concretes.py +5 -4
  131. parsl/serialize/facade.py +3 -3
  132. parsl/serialize/proxystore.py +3 -2
  133. parsl/tests/__init__.py +1 -1
  134. parsl/tests/configs/azure_single_node.py +4 -5
  135. parsl/tests/configs/bridges.py +3 -2
  136. parsl/tests/configs/cc_in2p3.py +1 -3
  137. parsl/tests/configs/comet.py +2 -1
  138. parsl/tests/configs/ec2_single_node.py +1 -2
  139. parsl/tests/configs/ec2_spot.py +1 -2
  140. parsl/tests/configs/flux_local.py +11 -0
  141. parsl/tests/configs/frontera.py +2 -3
  142. parsl/tests/configs/htex_local.py +3 -5
  143. parsl/tests/configs/htex_local_alternate.py +11 -15
  144. parsl/tests/configs/htex_local_intask_staging.py +5 -9
  145. parsl/tests/configs/htex_local_rsync_staging.py +4 -8
  146. parsl/tests/configs/local_radical.py +1 -3
  147. parsl/tests/configs/local_radical_mpi.py +2 -2
  148. parsl/tests/configs/local_threads_checkpoint_periodic.py +8 -10
  149. parsl/tests/configs/local_threads_monitoring.py +0 -1
  150. parsl/tests/configs/midway.py +2 -2
  151. parsl/tests/configs/nscc_singapore.py +3 -3
  152. parsl/tests/configs/osg_htex.py +1 -1
  153. parsl/tests/configs/petrelkube.py +3 -2
  154. parsl/tests/configs/slurm_local.py +24 -0
  155. parsl/tests/configs/summit.py +1 -0
  156. parsl/tests/configs/taskvine_ex.py +4 -7
  157. parsl/tests/configs/user_opts.py +0 -7
  158. parsl/tests/configs/workqueue_ex.py +4 -6
  159. parsl/tests/conftest.py +27 -13
  160. parsl/tests/integration/test_stress/test_python_simple.py +3 -4
  161. parsl/tests/integration/test_stress/test_python_threads.py +3 -5
  162. parsl/tests/manual_tests/htex_local.py +4 -6
  163. parsl/tests/manual_tests/test_basic.py +1 -0
  164. parsl/tests/manual_tests/test_log_filter.py +3 -1
  165. parsl/tests/manual_tests/test_memory_limits.py +6 -8
  166. parsl/tests/manual_tests/test_regression_220.py +2 -1
  167. parsl/tests/manual_tests/test_udp_simple.py +4 -4
  168. parsl/tests/manual_tests/test_worker_count.py +3 -2
  169. parsl/tests/scaling_tests/htex_local.py +2 -4
  170. parsl/tests/scaling_tests/test_scale.py +0 -9
  171. parsl/tests/scaling_tests/vineex_condor.py +1 -2
  172. parsl/tests/scaling_tests/vineex_local.py +1 -2
  173. parsl/tests/site_tests/site_config_selector.py +1 -6
  174. parsl/tests/site_tests/test_provider.py +4 -2
  175. parsl/tests/site_tests/test_site.py +2 -0
  176. parsl/tests/sites/test_affinity.py +7 -7
  177. parsl/tests/sites/test_dynamic_executor.py +3 -4
  178. parsl/tests/sites/test_ec2.py +3 -2
  179. parsl/tests/sites/test_worker_info.py +4 -5
  180. parsl/tests/test_aalst_patterns.py +0 -1
  181. parsl/tests/test_bash_apps/test_apptimeout.py +2 -2
  182. parsl/tests/test_bash_apps/test_basic.py +10 -4
  183. parsl/tests/test_bash_apps/test_error_codes.py +5 -7
  184. parsl/tests/test_bash_apps/test_inputs_default.py +25 -0
  185. parsl/tests/test_bash_apps/test_kwarg_storage.py +1 -1
  186. parsl/tests/test_bash_apps/test_memoize.py +2 -8
  187. parsl/tests/test_bash_apps/test_memoize_ignore_args.py +9 -14
  188. parsl/tests/test_bash_apps/test_memoize_ignore_args_regr.py +9 -14
  189. parsl/tests/test_bash_apps/test_multiline.py +1 -1
  190. parsl/tests/test_bash_apps/test_pipeline.py +1 -1
  191. parsl/tests/test_bash_apps/test_std_uri.py +123 -0
  192. parsl/tests/test_bash_apps/test_stdout.py +33 -8
  193. parsl/tests/test_callables.py +2 -2
  194. parsl/tests/test_checkpointing/test_periodic.py +21 -39
  195. parsl/tests/test_checkpointing/test_python_checkpoint_1.py +1 -0
  196. parsl/tests/test_checkpointing/test_python_checkpoint_2.py +2 -2
  197. parsl/tests/test_checkpointing/test_python_checkpoint_3.py +0 -1
  198. parsl/tests/test_checkpointing/test_regression_239.py +1 -1
  199. parsl/tests/test_checkpointing/test_task_exit.py +2 -3
  200. parsl/tests/test_docs/test_from_slides.py +5 -2
  201. parsl/tests/test_docs/test_kwargs.py +4 -1
  202. parsl/tests/test_docs/test_tutorial_1.py +1 -2
  203. parsl/tests/test_docs/test_workflow1.py +2 -2
  204. parsl/tests/test_docs/test_workflow2.py +0 -1
  205. parsl/tests/test_error_handling/test_rand_fail.py +2 -2
  206. parsl/tests/test_error_handling/test_resource_spec.py +10 -12
  207. parsl/tests/test_error_handling/test_retries.py +6 -16
  208. parsl/tests/test_error_handling/test_retry_handler.py +1 -0
  209. parsl/tests/test_error_handling/test_retry_handler_failure.py +2 -1
  210. parsl/tests/test_error_handling/test_serialization_fail.py +1 -1
  211. parsl/tests/test_error_handling/test_wrap_with_logs.py +1 -0
  212. parsl/tests/test_execute_task.py +29 -0
  213. parsl/tests/test_flux.py +1 -1
  214. parsl/tests/test_htex/test_basic.py +2 -3
  215. parsl/tests/test_htex/test_block_manager_selector_unit.py +20 -0
  216. parsl/tests/test_htex/test_command_client_timeout.py +66 -0
  217. parsl/tests/test_htex/test_connected_blocks.py +3 -2
  218. parsl/tests/test_htex/test_cpu_affinity_explicit.py +6 -10
  219. parsl/tests/test_htex/test_disconnected_blocks.py +6 -5
  220. parsl/tests/test_htex/test_disconnected_blocks_failing_provider.py +71 -0
  221. parsl/tests/test_htex/test_drain.py +11 -10
  222. parsl/tests/test_htex/test_htex.py +51 -25
  223. parsl/tests/test_htex/test_manager_failure.py +0 -1
  224. parsl/tests/test_htex/test_manager_selector_by_block.py +51 -0
  225. parsl/tests/test_htex/test_managers_command.py +36 -0
  226. parsl/tests/test_htex/test_missing_worker.py +2 -12
  227. parsl/tests/test_htex/test_multiple_disconnected_blocks.py +9 -9
  228. parsl/tests/test_htex/test_resource_spec_validation.py +45 -0
  229. parsl/tests/test_htex/test_zmq_binding.py +29 -8
  230. parsl/tests/test_monitoring/test_app_names.py +5 -5
  231. parsl/tests/test_monitoring/test_basic.py +73 -25
  232. parsl/tests/test_monitoring/test_db_locks.py +6 -4
  233. parsl/tests/test_monitoring/test_fuzz_zmq.py +19 -8
  234. parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +80 -0
  235. parsl/tests/test_monitoring/test_incomplete_futures.py +5 -4
  236. parsl/tests/test_monitoring/test_memoization_representation.py +4 -2
  237. parsl/tests/test_monitoring/test_stdouterr.py +134 -0
  238. parsl/tests/test_monitoring/test_viz_colouring.py +1 -0
  239. parsl/tests/test_mpi_apps/test_bad_mpi_config.py +33 -26
  240. parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py +28 -11
  241. parsl/tests/test_mpi_apps/test_mpi_prefix.py +4 -4
  242. parsl/tests/test_mpi_apps/test_mpi_scheduler.py +7 -2
  243. parsl/tests/test_mpi_apps/test_mpiex.py +64 -0
  244. parsl/tests/test_mpi_apps/test_resource_spec.py +42 -49
  245. parsl/tests/test_providers/test_kubernetes_provider.py +102 -0
  246. parsl/tests/test_providers/test_local_provider.py +3 -132
  247. parsl/tests/test_providers/test_pbspro_template.py +2 -3
  248. parsl/tests/test_providers/test_slurm_template.py +2 -3
  249. parsl/tests/test_providers/test_submiterror_deprecation.py +2 -1
  250. parsl/tests/test_python_apps/test_context_manager.py +128 -0
  251. parsl/tests/test_python_apps/test_dep_standard_futures.py +2 -1
  252. parsl/tests/test_python_apps/test_dependencies_deep.py +59 -0
  253. parsl/tests/test_python_apps/test_fail.py +0 -25
  254. parsl/tests/test_python_apps/test_futures.py +2 -1
  255. parsl/tests/test_python_apps/test_inputs_default.py +22 -0
  256. parsl/tests/test_python_apps/test_join.py +0 -1
  257. parsl/tests/test_python_apps/test_lifted.py +11 -7
  258. parsl/tests/test_python_apps/test_memoize_bad_id_for_memo.py +1 -0
  259. parsl/tests/test_python_apps/test_outputs.py +1 -1
  260. parsl/tests/test_python_apps/test_pluggable_future_resolution.py +161 -0
  261. parsl/tests/test_radical/test_mpi_funcs.py +1 -2
  262. parsl/tests/test_regression/test_1480.py +2 -1
  263. parsl/tests/test_regression/test_1653.py +2 -1
  264. parsl/tests/test_regression/test_226.py +1 -0
  265. parsl/tests/test_regression/test_2652.py +1 -0
  266. parsl/tests/test_regression/test_69a.py +0 -1
  267. parsl/tests/test_regression/test_854.py +4 -2
  268. parsl/tests/test_regression/test_97_parallelism_0.py +1 -2
  269. parsl/tests/test_regression/test_98.py +0 -1
  270. parsl/tests/test_scaling/test_block_error_handler.py +9 -4
  271. parsl/tests/test_scaling/test_regression_1621.py +11 -15
  272. parsl/tests/test_scaling/test_regression_3568_scaledown_vs_MISSING.py +84 -0
  273. parsl/tests/test_scaling/test_regression_3696_oscillation.py +103 -0
  274. parsl/tests/test_scaling/test_scale_down.py +2 -5
  275. parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py +5 -8
  276. parsl/tests/test_scaling/test_scale_down_htex_unregistered.py +71 -0
  277. parsl/tests/test_scaling/test_shutdown_scalein.py +73 -0
  278. parsl/tests/test_scaling/test_worker_interchange_bad_messages_3262.py +90 -0
  279. parsl/tests/test_serialization/test_2555_caching_deserializer.py +1 -1
  280. parsl/tests/test_serialization/test_3495_deserialize_managerlost.py +47 -0
  281. parsl/tests/test_serialization/test_basic.py +2 -1
  282. parsl/tests/test_serialization/test_htex_code_cache.py +3 -4
  283. parsl/tests/test_serialization/test_pack_resource_spec.py +2 -1
  284. parsl/tests/test_serialization/test_proxystore_configured.py +10 -6
  285. parsl/tests/test_serialization/test_proxystore_impl.py +5 -3
  286. parsl/tests/test_shutdown/test_kill_monitoring.py +64 -0
  287. parsl/tests/test_staging/staging_provider.py +2 -2
  288. parsl/tests/test_staging/test_1316.py +3 -4
  289. parsl/tests/test_staging/test_docs_1.py +2 -1
  290. parsl/tests/test_staging/test_docs_2.py +2 -1
  291. parsl/tests/test_staging/test_elaborate_noop_file.py +2 -3
  292. parsl/tests/{test_data → test_staging}/test_file.py +6 -6
  293. parsl/tests/{test_data → test_staging}/test_output_chain_filenames.py +3 -0
  294. parsl/tests/test_staging/test_staging_ftp.py +1 -0
  295. parsl/tests/test_staging/test_staging_https.py +5 -2
  296. parsl/tests/test_staging/test_staging_stdout.py +64 -0
  297. parsl/tests/test_staging/test_zip_in.py +39 -0
  298. parsl/tests/test_staging/test_zip_out.py +110 -0
  299. parsl/tests/test_staging/test_zip_to_zip.py +41 -0
  300. parsl/tests/test_summary.py +2 -2
  301. parsl/tests/test_thread_parallelism.py +0 -1
  302. parsl/tests/test_threads/test_configs.py +1 -2
  303. parsl/tests/test_threads/test_lazy_errors.py +2 -2
  304. parsl/tests/test_utils/test_execute_wait.py +35 -0
  305. parsl/tests/test_utils/test_sanitize_dns.py +76 -0
  306. parsl/tests/unit/test_address.py +20 -0
  307. parsl/tests/unit/test_file.py +99 -0
  308. parsl/tests/unit/test_usage_tracking.py +66 -0
  309. parsl/usage_tracking/api.py +65 -0
  310. parsl/usage_tracking/levels.py +6 -0
  311. parsl/usage_tracking/usage.py +104 -62
  312. parsl/utils.py +137 -4
  313. parsl/version.py +1 -1
  314. {parsl-2024.3.18.data → parsl-2025.1.13.data}/scripts/exec_parsl_function.py +6 -5
  315. parsl-2025.1.13.data/scripts/interchange.py +649 -0
  316. {parsl-2024.3.18.data → parsl-2025.1.13.data}/scripts/process_worker_pool.py +77 -75
  317. parsl-2025.1.13.dist-info/METADATA +96 -0
  318. parsl-2025.1.13.dist-info/RECORD +462 -0
  319. {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/WHEEL +1 -1
  320. parsl/channels/__init__.py +0 -7
  321. parsl/channels/base.py +0 -141
  322. parsl/channels/errors.py +0 -113
  323. parsl/channels/local/local.py +0 -164
  324. parsl/channels/oauth_ssh/oauth_ssh.py +0 -110
  325. parsl/channels/ssh/ssh.py +0 -276
  326. parsl/channels/ssh_il/__init__.py +0 -0
  327. parsl/channels/ssh_il/ssh_il.py +0 -74
  328. parsl/configs/ad_hoc.py +0 -35
  329. parsl/executors/radical/rpex_master.py +0 -42
  330. parsl/monitoring/radios.py +0 -175
  331. parsl/providers/ad_hoc/__init__.py +0 -0
  332. parsl/providers/ad_hoc/ad_hoc.py +0 -248
  333. parsl/providers/cobalt/__init__.py +0 -0
  334. parsl/providers/cobalt/cobalt.py +0 -236
  335. parsl/providers/cobalt/template.py +0 -17
  336. parsl/tests/configs/ad_hoc_cluster_htex.py +0 -35
  337. parsl/tests/configs/cooley_htex.py +0 -37
  338. parsl/tests/configs/htex_ad_hoc_cluster.py +0 -28
  339. parsl/tests/configs/local_adhoc.py +0 -18
  340. parsl/tests/configs/swan_htex.py +0 -43
  341. parsl/tests/configs/theta.py +0 -37
  342. parsl/tests/integration/test_channels/__init__.py +0 -0
  343. parsl/tests/integration/test_channels/test_channels.py +0 -17
  344. parsl/tests/integration/test_channels/test_local_channel.py +0 -42
  345. parsl/tests/integration/test_channels/test_scp_1.py +0 -45
  346. parsl/tests/integration/test_channels/test_ssh_1.py +0 -40
  347. parsl/tests/integration/test_channels/test_ssh_errors.py +0 -46
  348. parsl/tests/integration/test_channels/test_ssh_file_transport.py +0 -41
  349. parsl/tests/integration/test_channels/test_ssh_interactive.py +0 -24
  350. parsl/tests/manual_tests/test_ad_hoc_htex.py +0 -48
  351. parsl/tests/manual_tests/test_fan_in_out_htex_remote.py +0 -88
  352. parsl/tests/manual_tests/test_oauth_ssh.py +0 -13
  353. parsl/tests/sites/test_local_adhoc.py +0 -61
  354. parsl/tests/test_channels/__init__.py +0 -0
  355. parsl/tests/test_channels/test_large_output.py +0 -22
  356. parsl/tests/test_data/__init__.py +0 -0
  357. parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py +0 -51
  358. parsl/tests/test_providers/test_cobalt_deprecation_warning.py +0 -16
  359. parsl-2024.3.18.dist-info/METADATA +0 -98
  360. parsl-2024.3.18.dist-info/RECORD +0 -449
  361. parsl/{channels/local → monitoring/radios}/__init__.py +0 -0
  362. parsl/{channels/oauth_ssh → tests/test_shutdown}/__init__.py +0 -0
  363. parsl/tests/{test_data → test_staging}/test_file_apps.py +0 -0
  364. parsl/tests/{test_data → test_staging}/test_file_staging.py +0 -0
  365. parsl/{channels/ssh → tests/unit}/__init__.py +0 -0
  366. {parsl-2024.3.18.data → parsl-2025.1.13.data}/scripts/parsl_coprocess.py +1 -1
  367. {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/LICENSE +0 -0
  368. {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/entry_points.txt +0 -0
  369. {parsl-2024.3.18.dist-info → parsl-2025.1.13.dist-info}/top_level.txt +0 -0
@@ -3,50 +3,49 @@ Cooperative Computing Lab (CCL) at Notre Dame to provide a fault-tolerant,
3
3
  high-throughput system for delegating Parsl tasks to thousands of remote machines
4
4
  """
5
5
 
6
- import threading
7
- import multiprocessing
8
- import logging
9
- from concurrent.futures import Future
10
- from ctypes import c_bool
11
-
12
- import tempfile
13
6
  import hashlib
14
- import subprocess
7
+ import inspect
8
+ import itertools
9
+ import logging
10
+ import multiprocessing
15
11
  import os
16
- import socket
17
- import time
18
12
  import pickle
19
13
  import queue
20
- import inspect
21
14
  import shutil
22
- import itertools
15
+ import socket
16
+ import subprocess
17
+ import tempfile
18
+ import threading
19
+ import time
20
+ from collections import namedtuple
21
+ from concurrent.futures import Future
22
+ from ctypes import c_bool
23
+ from typing import Dict, List, Optional, Set, Union
24
+
25
+ import typeguard
23
26
 
24
- from parsl.serialize import pack_apply_message, deserialize
25
27
  import parsl.utils as putils
26
- from parsl.executors.errors import ExecutorError
27
28
  from parsl.data_provider.files import File
29
+ from parsl.data_provider.staging import Staging
28
30
  from parsl.errors import OptionalModuleMissing
31
+ from parsl.executors.errors import ExecutorError, InvalidResourceSpecification
29
32
  from parsl.executors.status_handling import BlockProviderExecutor
30
- from parsl.providers.base import ExecutionProvider
31
- from parsl.providers import LocalProvider, CondorProvider
32
33
  from parsl.executors.workqueue import exec_parsl_function
33
34
  from parsl.process_loggers import wrap_with_logs
35
+ from parsl.providers import CondorProvider, LocalProvider
36
+ from parsl.providers.base import ExecutionProvider
37
+ from parsl.serialize import deserialize, pack_apply_message
34
38
  from parsl.utils import setproctitle
35
39
 
36
- import typeguard
37
- from typing import Dict, List, Optional, Set, Union
38
- from parsl.data_provider.staging import Staging
39
-
40
- from .errors import WorkQueueTaskFailure
41
- from .errors import WorkQueueFailure
42
-
43
- from collections import namedtuple
40
+ from .errors import WorkQueueFailure, WorkQueueTaskFailure
44
41
 
45
42
  try:
46
43
  import work_queue as wq
47
- from work_queue import WorkQueue
48
- from work_queue import WORK_QUEUE_DEFAULT_PORT
49
- from work_queue import WORK_QUEUE_ALLOCATION_MODE_MAX_THROUGHPUT
44
+ from work_queue import (
45
+ WORK_QUEUE_ALLOCATION_MODE_MAX_THROUGHPUT,
46
+ WORK_QUEUE_DEFAULT_PORT,
47
+ WorkQueue,
48
+ )
50
49
  except ImportError:
51
50
  _work_queue_enabled = False
52
51
  WORK_QUEUE_DEFAULT_PORT = 0
@@ -216,6 +215,13 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
216
215
  This requires a version of Work Queue / cctools after commit
217
216
  874df524516441da531b694afc9d591e8b134b73 (release 7.5.0 is too early).
218
217
  Default is False.
218
+
219
+ scaling_cores_per_worker: int
220
+ When using Parsl scaling, this specifies the number of cores that a
221
+ worker is expected to have available for computation. Default 1. This
222
+ parameter can be ignored when using a fixed number of blocks, or when
223
+ using one task per worker (by omitting a ``cores`` resource
224
+ specifiation for each task).
219
225
  """
220
226
 
221
227
  radio_mode = "filesystem"
@@ -245,16 +251,17 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
245
251
  full_debug: bool = True,
246
252
  worker_executable: str = 'work_queue_worker',
247
253
  function_dir: Optional[str] = None,
248
- coprocess: bool = False):
254
+ coprocess: bool = False,
255
+ scaling_cores_per_worker: int = 1):
249
256
  BlockProviderExecutor.__init__(self, provider=provider,
250
257
  block_error_handler=True)
251
258
  if not _work_queue_enabled:
252
259
  raise OptionalModuleMissing(['work_queue'], "WorkQueueExecutor requires the work_queue module.")
253
260
 
261
+ self.scaling_cores_per_worker = scaling_cores_per_worker
254
262
  self.label = label
255
263
  self.task_queue = multiprocessing.Queue() # type: multiprocessing.Queue
256
264
  self.collector_queue = multiprocessing.Queue() # type: multiprocessing.Queue
257
- self.blocks = {} # type: Dict[str, str]
258
265
  self.address = address
259
266
  self.port = port
260
267
  self.executor_task_counter = -1
@@ -412,7 +419,7 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
412
419
  message = "Task resource specification only accepts these types of resources: {}".format(
413
420
  ', '.join(acceptable_fields))
414
421
  logger.error(message)
415
- raise ExecutorError(self, message)
422
+ raise InvalidResourceSpecification(keys, message)
416
423
 
417
424
  # this checks that either all of the required resource types are specified, or
418
425
  # that none of them are: the `required_resource_types` are not actually required,
@@ -423,9 +430,10 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
423
430
  logger.error("Running with `autolabel=False`. In this mode, "
424
431
  "task resource specification requires "
425
432
  "three resources to be specified simultaneously: cores, memory, and disk")
426
- raise ExecutorError(self, "Task resource specification requires "
427
- "three resources to be specified simultaneously: cores, memory, and disk. "
428
- "Try setting autolabel=True if you are unsure of the resource usage")
433
+ raise InvalidResourceSpecification(keys,
434
+ "Task resource specification requires "
435
+ "three resources to be specified simultaneously: cores, memory, and disk. "
436
+ "Try setting autolabel=True if you are unsure of the resource usage")
429
437
 
430
438
  for k in keys:
431
439
  if k == 'cores':
@@ -471,6 +479,8 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
471
479
  # Create a Future object and have it be mapped from the task ID in the tasks dictionary
472
480
  fu = Future()
473
481
  fu.parsl_executor_task_id = executor_task_id
482
+ assert isinstance(resource_specification, dict)
483
+ fu.resource_specification = resource_specification
474
484
  logger.debug("Getting tasks_lock to set WQ-level task entry")
475
485
  with self.tasks_lock:
476
486
  logger.debug("Got tasks_lock to set WQ-level task entry")
@@ -654,42 +664,31 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
654
664
  self.worker_command = self._construct_worker_command()
655
665
  self._patch_providers()
656
666
 
657
- if hasattr(self.provider, 'init_blocks'):
658
- try:
659
- self.scale_out(blocks=self.provider.init_blocks)
660
- except Exception as e:
661
- logger.error("Initial block scaling out failed: {}".format(e))
662
- raise e
663
-
664
667
  @property
665
668
  def outstanding(self) -> int:
666
- """Count the number of outstanding tasks. This is inefficiently
669
+ """Count the number of outstanding slots required. This is inefficiently
667
670
  implemented and probably could be replaced with a counter.
668
671
  """
672
+ logger.debug("Calculating outstanding task slot load")
669
673
  outstanding = 0
674
+ tasks = 0 # only for log message...
670
675
  with self.tasks_lock:
671
676
  for fut in self.tasks.values():
672
677
  if not fut.done():
673
- outstanding += 1
674
- logger.debug(f"Counted {outstanding} outstanding tasks")
678
+ # if a task does not specify a core count, Work Queue will allocate an entire
679
+ # worker node to that task. That's approximated here by saying that it uses
680
+ # scaling_cores_per_worker.
681
+ resource_spec = getattr(fut, 'resource_specification', {})
682
+ cores = resource_spec.get('cores', self.scaling_cores_per_worker)
683
+
684
+ outstanding += cores
685
+ tasks += 1
686
+ logger.debug(f"Counted {tasks} outstanding tasks with {outstanding} outstanding slots")
675
687
  return outstanding
676
688
 
677
689
  @property
678
690
  def workers_per_node(self) -> Union[int, float]:
679
- return 1
680
-
681
- def scale_in(self, count):
682
- """Scale in method.
683
- """
684
- # Obtain list of blocks to kill
685
- to_kill = list(self.blocks.keys())[:count]
686
- kill_ids = [self.blocks[block] for block in to_kill]
687
-
688
- # Cancel the blocks provisioned
689
- if self.provider:
690
- self.provider.cancel(kill_ids)
691
- else:
692
- logger.error("No execution provider available to scale")
691
+ return self.scaling_cores_per_worker
693
692
 
694
693
  def shutdown(self, *args, **kwargs):
695
694
  """Shutdown the executor. Sets flag to cancel the submit process and
@@ -698,17 +697,19 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
698
697
  logger.debug("Work Queue shutdown started")
699
698
  self.should_stop.value = True
700
699
 
701
- # Remove the workers that are still going
702
- kill_ids = [self.blocks[block] for block in self.blocks.keys()]
703
- if self.provider:
704
- logger.debug("Cancelling blocks")
705
- self.provider.cancel(kill_ids)
706
-
707
700
  logger.debug("Joining on submit process")
708
701
  self.submit_process.join()
702
+ self.submit_process.close()
703
+
709
704
  logger.debug("Joining on collector thread")
710
705
  self.collector_thread.join()
711
706
 
707
+ logger.debug("Closing multiprocessing queues")
708
+ self.task_queue.close()
709
+ self.task_queue.join_thread()
710
+ self.collector_queue.close()
711
+ self.collector_queue.join_thread()
712
+
712
713
  logger.debug("Work Queue shutdown completed")
713
714
 
714
715
  @wrap_with_logs
@@ -1,8 +1,8 @@
1
1
  #! /usr/bin/env python3
2
2
 
3
- import socket
4
3
  import json
5
4
  import os
5
+ import socket
6
6
  import sys
7
7
 
8
8
  # If enabled, coprocess will print to stdout
@@ -3,8 +3,8 @@ from __future__ import annotations
3
3
  from typing import Dict, Tuple
4
4
 
5
5
  import parsl.executors.status_handling as status_handling
6
- from parsl.jobs.states import JobStatus, JobState
7
6
  from parsl.jobs.errors import TooManyJobFailuresError
7
+ from parsl.jobs.states import JobState, JobStatus
8
8
 
9
9
 
10
10
  def noop_error_handler(executor: status_handling.BlockProviderExecutor, status: Dict[str, JobStatus], threshold: int = 3) -> None:
@@ -20,7 +20,7 @@ def simple_error_handler(executor: status_handling.BlockProviderExecutor, status
20
20
  executor.set_bad_state_and_fail_all(_get_error(status))
21
21
 
22
22
 
23
- def windowed_error_handler(executor: status_handling.BlockProviderExecutor, status: Dict[str, JobStatus], threshold: int = 3):
23
+ def windowed_error_handler(executor: status_handling.BlockProviderExecutor, status: Dict[str, JobStatus], threshold: int = 3) -> None:
24
24
  sorted_status = [(key, status[key]) for key in sorted(status, key=lambda x: int(x))]
25
25
  current_window = dict(sorted_status[-threshold:])
26
26
  total, failed = _count_jobs(current_window)
@@ -1,137 +1,53 @@
1
1
  import logging
2
- import parsl
3
- import time
4
- import zmq
5
- from typing import Dict, List, Sequence, Optional, Union
2
+ from typing import List, Optional, Sequence, Union
6
3
 
7
- from parsl.jobs.states import JobStatus, JobState
8
- from parsl.jobs.strategy import Strategy
9
4
  from parsl.executors.status_handling import BlockProviderExecutor
10
- from parsl.monitoring.message_type import MessageType
11
-
12
-
5
+ from parsl.jobs.strategy import Strategy
13
6
  from parsl.utils import Timer
14
7
 
15
-
16
8
  logger = logging.getLogger(__name__)
17
9
 
18
10
 
19
- class PollItem:
20
- def __init__(self, executor: BlockProviderExecutor, dfk: Optional["parsl.dataflow.dflow.DataFlowKernel"] = None):
21
- self._executor = executor
22
- self._dfk = dfk
23
- self._interval = executor.status_polling_interval
24
- self._last_poll_time = 0.0
25
- self._status = {} # type: Dict[str, JobStatus]
26
-
27
- # Create a ZMQ channel to send poll status to monitoring
28
- self.monitoring_enabled = False
29
- if self._dfk and self._dfk.monitoring is not None:
30
- self.monitoring_enabled = True
31
- hub_address = self._dfk.hub_address
32
- hub_port = self._dfk.hub_interchange_port
33
- context = zmq.Context()
34
- self.hub_channel = context.socket(zmq.DEALER)
35
- self.hub_channel.set_hwm(0)
36
- self.hub_channel.connect("tcp://{}:{}".format(hub_address, hub_port))
37
- logger.info("Monitoring enabled on job status poller")
38
-
39
- def _should_poll(self, now: float) -> bool:
40
- return now >= self._last_poll_time + self._interval
41
-
42
- def poll(self, now: float) -> None:
43
- if self._should_poll(now):
44
- previous_status = self._status
45
- self._status = self._executor.status()
46
- self._last_poll_time = now
47
- delta_status = {}
48
- for block_id in self._status:
49
- if block_id not in previous_status \
50
- or previous_status[block_id].state != self._status[block_id].state:
51
- delta_status[block_id] = self._status[block_id]
52
-
53
- if delta_status:
54
- self.send_monitoring_info(delta_status)
55
-
56
- def send_monitoring_info(self, status: Dict) -> None:
57
- # Send monitoring info for HTEX when monitoring enabled
58
- if self.monitoring_enabled:
59
- msg = self._executor.create_monitoring_info(status)
60
- logger.debug("Sending message {} to hub from job status poller".format(msg))
61
- self.hub_channel.send_pyobj((MessageType.BLOCK_INFO, msg))
62
-
63
- @property
64
- def status(self) -> Dict[str, JobStatus]:
65
- """Return the status of all jobs/blocks of the executor of this poller.
66
-
67
- :return: a dictionary mapping block ids (in string) to job status
68
- """
69
- return self._status
70
-
71
- @property
72
- def executor(self) -> BlockProviderExecutor:
73
- return self._executor
74
-
75
- def scale_in(self, n, max_idletime=None):
76
-
77
- if max_idletime is None:
78
- block_ids = self._executor.scale_in(n)
79
- else:
80
- # This is a HighThroughputExecutor-specific interface violation.
81
- # This code hopes, through pan-codebase reasoning, that this
82
- # scale_in method really does come from HighThroughputExecutor,
83
- # and so does have an extra max_idletime parameter not present
84
- # in the executor interface.
85
- block_ids = self._executor.scale_in(n, max_idletime=max_idletime)
86
- if block_ids is not None:
87
- new_status = {}
88
- for block_id in block_ids:
89
- new_status[block_id] = JobStatus(JobState.CANCELLED)
90
- del self._status[block_id]
91
- self.send_monitoring_info(new_status)
92
- return block_ids
93
-
94
- def scale_out(self, n):
95
- block_ids = self._executor.scale_out(n)
96
- if block_ids is not None:
97
- new_status = {}
98
- for block_id in block_ids:
99
- new_status[block_id] = JobStatus(JobState.PENDING)
100
- self.send_monitoring_info(new_status)
101
- self._status.update(new_status)
102
- return block_ids
103
-
104
- def __repr__(self) -> str:
105
- return self._status.__repr__()
106
-
107
-
108
11
  class JobStatusPoller(Timer):
109
12
  def __init__(self, *, strategy: Optional[str], max_idletime: float,
110
- strategy_period: Union[float, int],
111
- dfk: Optional["parsl.dataflow.dflow.DataFlowKernel"] = None) -> None:
112
- self._poll_items = [] # type: List[PollItem]
113
- self.dfk = dfk
13
+ strategy_period: Union[float, int]) -> None:
14
+ self._executors = [] # type: List[BlockProviderExecutor]
114
15
  self._strategy = Strategy(strategy=strategy,
115
16
  max_idletime=max_idletime)
116
17
  super().__init__(self.poll, interval=strategy_period, name="JobStatusPoller")
117
18
 
118
19
  def poll(self) -> None:
119
20
  self._update_state()
120
- self._run_error_handlers(self._poll_items)
121
- self._strategy.strategize(self._poll_items)
21
+ self._run_error_handlers(self._executors)
22
+ self._strategy.strategize(self._executors)
122
23
 
123
- def _run_error_handlers(self, status: List[PollItem]) -> None:
124
- for es in status:
125
- es.executor.handle_errors(es.status)
24
+ def _run_error_handlers(self, executors: List[BlockProviderExecutor]) -> None:
25
+ for e in executors:
26
+ e.handle_errors(e.status_facade)
126
27
 
127
28
  def _update_state(self) -> None:
128
- now = time.time()
129
- for item in self._poll_items:
130
- item.poll(now)
29
+ for item in self._executors:
30
+ item.poll_facade()
131
31
 
132
32
  def add_executors(self, executors: Sequence[BlockProviderExecutor]) -> None:
133
33
  for executor in executors:
134
34
  if executor.status_polling_interval > 0:
135
35
  logger.debug("Adding executor {}".format(executor.label))
136
- self._poll_items.append(PollItem(executor, self.dfk))
36
+ self._executors.append(executor)
137
37
  self._strategy.add_executors(executors)
38
+
39
+ def close(self, timeout: Optional[float] = None) -> None:
40
+ super().close(timeout)
41
+ for executor in self._executors:
42
+ if not executor.bad_state_is_set:
43
+ logger.info(f"Scaling in executor {executor.label}")
44
+
45
+ # this code needs to be at least as many blocks as need
46
+ # cancelling, but it is safe to be more, as the scaling
47
+ # code will cope with being asked to cancel more blocks
48
+ # than exist.
49
+ block_count = len(executor.status_facade)
50
+ executor.scale_in_facade(block_count)
51
+
52
+ else: # and bad_state_is_set
53
+ logger.warning(f"Not scaling in executor {executor.label} because it is in bad state")
parsl/jobs/states.py CHANGED
@@ -1,6 +1,6 @@
1
+ import logging
1
2
  import os
2
3
  from enum import IntEnum
3
- import logging
4
4
  from typing import Optional
5
5
 
6
6
  logger = logging.getLogger(__name__)
@@ -46,12 +46,17 @@ class JobState(IntEnum):
46
46
  bad worker environment or network connectivity issues.
47
47
  """
48
48
 
49
+ SCALED_IN = 9
50
+ """This job has been deliberately scaled in. Scaling code should not be concerned
51
+ that the job never ran (for example for error handling purposes).
52
+ """
53
+
49
54
  def __str__(self) -> str:
50
55
  return f"{self.__class__.__name__}.{self.name}"
51
56
 
52
57
 
53
58
  TERMINAL_STATES = [JobState.CANCELLED, JobState.COMPLETED, JobState.FAILED,
54
- JobState.TIMEOUT, JobState.MISSING]
59
+ JobState.TIMEOUT, JobState.MISSING, JobState.SCALED_IN]
55
60
 
56
61
 
57
62
  class JobStatus:
parsl/jobs/strategy.py CHANGED
@@ -1,19 +1,17 @@
1
1
  from __future__ import annotations
2
+
2
3
  import logging
3
- import time
4
4
  import math
5
+ import time
5
6
  import warnings
6
7
  from typing import Dict, List, Optional, Sequence, TypedDict
7
8
 
8
- import parsl.jobs.job_status_poller as jsp
9
-
10
9
  from parsl.executors import HighThroughputExecutor
11
10
  from parsl.executors.base import ParslExecutor
12
11
  from parsl.executors.status_handling import BlockProviderExecutor
13
12
  from parsl.jobs.states import JobState
14
13
  from parsl.process_loggers import wrap_with_logs
15
14
 
16
-
17
15
  logger = logging.getLogger(__name__)
18
16
 
19
17
 
@@ -26,6 +24,10 @@ class ExecutorState(TypedDict):
26
24
  If the executor is not idle, then None.
27
25
  """
28
26
 
27
+ first: bool
28
+ """True if this executor has not yet had a strategy poll.
29
+ """
30
+
29
31
 
30
32
  class Strategy:
31
33
  """Scaling strategy.
@@ -129,8 +131,8 @@ class Strategy:
129
131
  self.executors = {}
130
132
  self.max_idletime = max_idletime
131
133
 
132
- self.strategies = {None: self._strategy_noop,
133
- 'none': self._strategy_noop,
134
+ self.strategies = {None: self._strategy_init_only,
135
+ 'none': self._strategy_init_only,
134
136
  'simple': self._strategy_simple,
135
137
  'htex_auto_scale': self._strategy_htex_auto_scale}
136
138
 
@@ -144,17 +146,23 @@ class Strategy:
144
146
 
145
147
  def add_executors(self, executors: Sequence[ParslExecutor]) -> None:
146
148
  for executor in executors:
147
- self.executors[executor.label] = {'idle_since': None}
149
+ self.executors[executor.label] = {'idle_since': None, 'first': True}
148
150
 
149
- def _strategy_noop(self, status: List[jsp.PollItem]) -> None:
150
- """Do nothing.
151
+ def _strategy_init_only(self, executors: List[BlockProviderExecutor]) -> None:
152
+ """Scale up to init_blocks at the start, then nothing more.
151
153
  """
152
- logger.debug("strategy_noop: doing nothing")
154
+ for executor in executors:
155
+ if self.executors[executor.label]['first']:
156
+ logger.debug(f"strategy_init_only: scaling out {executor.provider.init_blocks} initial blocks for {executor.label}")
157
+ executor.scale_out_facade(executor.provider.init_blocks)
158
+ self.executors[executor.label]['first'] = False
159
+ else:
160
+ logger.debug("strategy_init_only: doing nothing")
153
161
 
154
- def _strategy_simple(self, status_list: List[jsp.PollItem]) -> None:
155
- self._general_strategy(status_list, strategy_type='simple')
162
+ def _strategy_simple(self, executors: List[BlockProviderExecutor]) -> None:
163
+ self._general_strategy(executors, strategy_type='simple')
156
164
 
157
- def _strategy_htex_auto_scale(self, status_list: List[jsp.PollItem]) -> None:
165
+ def _strategy_htex_auto_scale(self, executors: List[BlockProviderExecutor]) -> None:
158
166
  """HTEX specific auto scaling strategy
159
167
 
160
168
  This strategy works only for HTEX. This strategy will scale out by
@@ -169,24 +177,25 @@ class Strategy:
169
177
  expected to scale in effectively only when # of workers, or tasks executing
170
178
  per block is close to 1.
171
179
  """
172
- self._general_strategy(status_list, strategy_type='htex')
180
+ self._general_strategy(executors, strategy_type='htex')
173
181
 
174
182
  @wrap_with_logs
175
- def _general_strategy(self, status_list, *, strategy_type):
176
- logger.debug(f"general strategy starting with strategy_type {strategy_type} for {len(status_list)} executors")
183
+ def _general_strategy(self, executors: List[BlockProviderExecutor], *, strategy_type: str) -> None:
184
+ logger.debug(f"general strategy starting with strategy_type {strategy_type} for {len(executors)} executors")
177
185
 
178
- for exec_status in status_list:
179
- executor = exec_status.executor
186
+ for executor in executors:
180
187
  label = executor.label
181
- if not isinstance(executor, BlockProviderExecutor):
182
- logger.debug(f"Not strategizing for executor {label} because scaling not enabled")
183
- continue
184
188
  logger.debug(f"Strategizing for executor {label}")
185
189
 
190
+ if self.executors[label]['first']:
191
+ logger.debug(f"Scaling out {executor.provider.init_blocks} initial blocks for {label}")
192
+ executor.scale_out_facade(executor.provider.init_blocks)
193
+ self.executors[label]['first'] = False
194
+
186
195
  # Tasks that are either pending completion
187
196
  active_tasks = executor.outstanding
188
197
 
189
- status = exec_status.status
198
+ status = executor.status_facade
190
199
 
191
200
  # FIXME we need to handle case where provider does not define these
192
201
  # FIXME probably more of this logic should be moved to the provider
@@ -230,23 +239,26 @@ class Strategy:
230
239
  else:
231
240
  # We want to make sure that max_idletime is reached
232
241
  # before killing off resources
233
- logger.debug(f"Strategy case 1b: Executor has no active tasks, and more ({active_blocks}) than minimum blocks ({min_blocks})")
242
+ logger.debug(f"Strategy case 1b: Executor has no active tasks, and more ({active_blocks})"
243
+ f" than minimum blocks ({min_blocks})")
234
244
 
235
245
  if not self.executors[executor.label]['idle_since']:
236
246
  logger.debug(f"Starting idle timer for executor. If idle time exceeds {self.max_idletime}s, blocks will be scaled in")
237
247
  self.executors[executor.label]['idle_since'] = time.time()
238
-
239
248
  idle_since = self.executors[executor.label]['idle_since']
249
+ assert idle_since is not None, "The `if` statement above this assert should have forced idle time to be not-None"
250
+
240
251
  idle_duration = time.time() - idle_since
241
252
  if idle_duration > self.max_idletime:
242
253
  # We have resources idle for the max duration,
243
254
  # we have to scale_in now.
244
255
  logger.debug(f"Idle time has reached {self.max_idletime}s for executor {label}; scaling in")
245
- exec_status.scale_in(active_blocks - min_blocks)
256
+ executor.scale_in_facade(active_blocks - min_blocks)
246
257
 
247
258
  else:
248
259
  logger.debug(
249
- f"Idle time {idle_duration}s is less than max_idletime {self.max_idletime}s for executor {label}; not scaling in")
260
+ f"Idle time {idle_duration}s is less than max_idletime {self.max_idletime}s"
261
+ f" for executor {label}; not scaling in")
250
262
 
251
263
  # Case 2
252
264
  # More tasks than the available slots.
@@ -265,7 +277,7 @@ class Strategy:
265
277
  excess_blocks = math.ceil(float(excess_slots) / (tasks_per_node * nodes_per_block))
266
278
  excess_blocks = min(excess_blocks, max_blocks - active_blocks)
267
279
  logger.debug(f"Requesting {excess_blocks} more blocks")
268
- exec_status.scale_out(excess_blocks)
280
+ executor.scale_out_facade(excess_blocks)
269
281
 
270
282
  elif active_slots == 0 and active_tasks > 0:
271
283
  logger.debug("Strategy case 4a: No active slots but some active tasks - could scale out by a single block")
@@ -274,7 +286,7 @@ class Strategy:
274
286
  if active_blocks < max_blocks:
275
287
  logger.debug("Requesting single block")
276
288
 
277
- exec_status.scale_out(1)
289
+ executor.scale_out_facade(1)
278
290
  else:
279
291
  logger.debug("Not requesting single block, because at maxblocks already")
280
292
 
@@ -286,11 +298,11 @@ class Strategy:
286
298
  # Scale in for htex
287
299
  if isinstance(executor, HighThroughputExecutor):
288
300
  if active_blocks > min_blocks:
289
- excess_slots = math.ceil(active_slots - (active_tasks * parallelism))
290
- excess_blocks = math.ceil(float(excess_slots) / (tasks_per_node * nodes_per_block))
301
+ excess_slots = math.floor(active_slots - (active_tasks * parallelism))
302
+ excess_blocks = math.floor(float(excess_slots) / (tasks_per_node * nodes_per_block))
291
303
  excess_blocks = min(excess_blocks, active_blocks - min_blocks)
292
304
  logger.debug(f"Requesting scaling in by {excess_blocks} blocks with idle time {self.max_idletime}s")
293
- exec_status.scale_in(excess_blocks, max_idletime=self.max_idletime)
305
+ executor.scale_in_facade(excess_blocks, max_idletime=self.max_idletime)
294
306
  else:
295
307
  logger.error("This strategy does not support scaling in except for HighThroughputExecutor - taking no action")
296
308
  else:
@@ -1,6 +1,15 @@
1
- from parsl.launchers.launchers import SimpleLauncher, SingleNodeLauncher, \
2
- SrunLauncher, AprunLauncher, SrunMPILauncher, WrappedLauncher, \
3
- GnuParallelLauncher, MpiExecLauncher, MpiRunLauncher, JsrunLauncher
1
+ from parsl.launchers.launchers import (
2
+ AprunLauncher,
3
+ GnuParallelLauncher,
4
+ JsrunLauncher,
5
+ MpiExecLauncher,
6
+ MpiRunLauncher,
7
+ SimpleLauncher,
8
+ SingleNodeLauncher,
9
+ SrunLauncher,
10
+ SrunMPILauncher,
11
+ WrappedLauncher,
12
+ )
4
13
 
5
14
  __all__ = ['SimpleLauncher',
6
15
  'WrappedLauncher',
parsl/launchers/errors.py CHANGED
@@ -1,5 +1,5 @@
1
- from parsl.providers.errors import ExecutionProviderException
2
1
  from parsl.launchers.base import Launcher
2
+ from parsl.providers.errors import ExecutionProviderException
3
3
 
4
4
 
5
5
  class BadLauncher(ExecutionProviderException, TypeError):