parsl 2024.3.11__py3-none-any.whl → 2025.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsl/__init__.py +9 -10
- parsl/addresses.py +29 -7
- parsl/app/app.py +7 -8
- parsl/app/bash.py +15 -8
- parsl/app/errors.py +10 -13
- parsl/app/futures.py +8 -10
- parsl/app/python.py +2 -1
- parsl/benchmark/perf.py +2 -1
- parsl/concurrent/__init__.py +2 -2
- parsl/config.py +57 -10
- parsl/configs/ASPIRE1.py +6 -5
- parsl/configs/Azure.py +9 -8
- parsl/configs/bridges.py +6 -4
- parsl/configs/cc_in2p3.py +3 -3
- parsl/configs/ec2.py +3 -1
- parsl/configs/expanse.py +4 -3
- parsl/configs/frontera.py +3 -4
- parsl/configs/htex_local.py +3 -4
- parsl/configs/illinoiscluster.py +3 -1
- parsl/configs/improv.py +34 -0
- parsl/configs/kubernetes.py +4 -3
- parsl/configs/local_threads.py +5 -1
- parsl/configs/midway.py +5 -3
- parsl/configs/osg.py +4 -2
- parsl/configs/polaris.py +4 -2
- parsl/configs/stampede2.py +6 -5
- parsl/configs/summit.py +3 -3
- parsl/configs/toss3_llnl.py +4 -3
- parsl/configs/vineex_local.py +6 -4
- parsl/configs/wqex_local.py +5 -3
- parsl/curvezmq.py +4 -0
- parsl/data_provider/data_manager.py +4 -3
- parsl/data_provider/file_noop.py +1 -2
- parsl/data_provider/files.py +3 -3
- parsl/data_provider/ftp.py +1 -3
- parsl/data_provider/globus.py +7 -6
- parsl/data_provider/http.py +2 -2
- parsl/data_provider/rsync.py +1 -1
- parsl/data_provider/staging.py +2 -2
- parsl/data_provider/zip.py +135 -0
- parsl/dataflow/dependency_resolvers.py +115 -0
- parsl/dataflow/dflow.py +262 -224
- parsl/dataflow/errors.py +3 -5
- parsl/dataflow/futures.py +27 -14
- parsl/dataflow/memoization.py +5 -5
- parsl/dataflow/rundirs.py +5 -6
- parsl/dataflow/taskrecord.py +4 -5
- parsl/executors/__init__.py +4 -2
- parsl/executors/base.py +45 -15
- parsl/executors/errors.py +13 -0
- parsl/executors/execute_task.py +37 -0
- parsl/executors/flux/execute_parsl_task.py +3 -3
- parsl/executors/flux/executor.py +18 -19
- parsl/executors/flux/flux_instance_manager.py +26 -27
- parsl/executors/high_throughput/errors.py +43 -3
- parsl/executors/high_throughput/executor.py +316 -282
- parsl/executors/high_throughput/interchange.py +158 -167
- parsl/executors/high_throughput/manager_record.py +5 -0
- parsl/executors/high_throughput/manager_selector.py +55 -0
- parsl/executors/high_throughput/monitoring_info.py +2 -1
- parsl/executors/high_throughput/mpi_executor.py +113 -0
- parsl/executors/high_throughput/mpi_prefix_composer.py +10 -11
- parsl/executors/high_throughput/mpi_resource_management.py +6 -17
- parsl/executors/high_throughput/probe.py +9 -7
- parsl/executors/high_throughput/process_worker_pool.py +115 -77
- parsl/executors/high_throughput/zmq_pipes.py +81 -23
- parsl/executors/radical/executor.py +130 -79
- parsl/executors/radical/rpex_resources.py +17 -15
- parsl/executors/radical/rpex_worker.py +4 -3
- parsl/executors/status_handling.py +157 -51
- parsl/executors/taskvine/__init__.py +1 -1
- parsl/executors/taskvine/errors.py +1 -1
- parsl/executors/taskvine/exec_parsl_function.py +2 -2
- parsl/executors/taskvine/executor.py +41 -57
- parsl/executors/taskvine/factory.py +1 -1
- parsl/executors/taskvine/factory_config.py +1 -1
- parsl/executors/taskvine/manager.py +18 -13
- parsl/executors/taskvine/manager_config.py +9 -5
- parsl/executors/threads.py +6 -6
- parsl/executors/workqueue/errors.py +1 -1
- parsl/executors/workqueue/exec_parsl_function.py +6 -5
- parsl/executors/workqueue/executor.py +64 -63
- parsl/executors/workqueue/parsl_coprocess.py +1 -1
- parsl/jobs/error_handlers.py +2 -2
- parsl/jobs/job_status_poller.py +30 -113
- parsl/jobs/states.py +7 -2
- parsl/jobs/strategy.py +43 -31
- parsl/launchers/__init__.py +12 -3
- parsl/launchers/errors.py +1 -1
- parsl/launchers/launchers.py +6 -12
- parsl/log_utils.py +9 -6
- parsl/monitoring/db_manager.py +59 -95
- parsl/monitoring/errors.py +6 -0
- parsl/monitoring/monitoring.py +87 -356
- parsl/monitoring/queries/pandas.py +1 -2
- parsl/monitoring/radios/base.py +13 -0
- parsl/monitoring/radios/filesystem.py +52 -0
- parsl/monitoring/radios/htex.py +57 -0
- parsl/monitoring/radios/multiprocessing.py +17 -0
- parsl/monitoring/radios/udp.py +56 -0
- parsl/monitoring/radios/zmq.py +17 -0
- parsl/monitoring/remote.py +33 -37
- parsl/monitoring/router.py +212 -0
- parsl/monitoring/types.py +5 -6
- parsl/monitoring/visualization/app.py +4 -2
- parsl/monitoring/visualization/models.py +0 -1
- parsl/monitoring/visualization/plots/default/workflow_plots.py +11 -4
- parsl/monitoring/visualization/plots/default/workflow_resource_plots.py +1 -0
- parsl/monitoring/visualization/utils.py +0 -1
- parsl/monitoring/visualization/views.py +16 -8
- parsl/multiprocessing.py +0 -1
- parsl/process_loggers.py +1 -2
- parsl/providers/__init__.py +8 -17
- parsl/providers/aws/aws.py +2 -3
- parsl/providers/azure/azure.py +4 -5
- parsl/providers/base.py +2 -18
- parsl/providers/cluster_provider.py +4 -12
- parsl/providers/condor/condor.py +7 -17
- parsl/providers/errors.py +2 -2
- parsl/providers/googlecloud/googlecloud.py +2 -1
- parsl/providers/grid_engine/grid_engine.py +5 -14
- parsl/providers/kubernetes/kube.py +80 -40
- parsl/providers/local/local.py +13 -26
- parsl/providers/lsf/lsf.py +5 -23
- parsl/providers/pbspro/pbspro.py +5 -17
- parsl/providers/slurm/slurm.py +81 -39
- parsl/providers/torque/torque.py +3 -14
- parsl/serialize/__init__.py +8 -3
- parsl/serialize/base.py +1 -2
- parsl/serialize/concretes.py +5 -4
- parsl/serialize/facade.py +3 -3
- parsl/serialize/proxystore.py +3 -2
- parsl/tests/__init__.py +1 -1
- parsl/tests/configs/azure_single_node.py +4 -5
- parsl/tests/configs/bridges.py +3 -2
- parsl/tests/configs/cc_in2p3.py +1 -3
- parsl/tests/configs/comet.py +2 -1
- parsl/tests/configs/ec2_single_node.py +1 -2
- parsl/tests/configs/ec2_spot.py +1 -2
- parsl/tests/configs/flux_local.py +11 -0
- parsl/tests/configs/frontera.py +2 -3
- parsl/tests/configs/htex_local.py +3 -5
- parsl/tests/configs/htex_local_alternate.py +11 -15
- parsl/tests/configs/htex_local_intask_staging.py +5 -9
- parsl/tests/configs/htex_local_rsync_staging.py +4 -8
- parsl/tests/configs/local_radical.py +1 -3
- parsl/tests/configs/local_radical_mpi.py +2 -2
- parsl/tests/configs/local_threads_checkpoint_periodic.py +8 -10
- parsl/tests/configs/local_threads_monitoring.py +0 -1
- parsl/tests/configs/midway.py +2 -2
- parsl/tests/configs/nscc_singapore.py +3 -3
- parsl/tests/configs/osg_htex.py +1 -1
- parsl/tests/configs/petrelkube.py +3 -2
- parsl/tests/configs/slurm_local.py +24 -0
- parsl/tests/configs/summit.py +1 -0
- parsl/tests/configs/taskvine_ex.py +4 -7
- parsl/tests/configs/user_opts.py +2 -8
- parsl/tests/configs/workqueue_ex.py +4 -6
- parsl/tests/conftest.py +27 -13
- parsl/tests/integration/test_stress/test_python_simple.py +3 -4
- parsl/tests/integration/test_stress/test_python_threads.py +3 -5
- parsl/tests/manual_tests/htex_local.py +4 -6
- parsl/tests/manual_tests/test_basic.py +1 -0
- parsl/tests/manual_tests/test_log_filter.py +3 -1
- parsl/tests/manual_tests/test_memory_limits.py +6 -8
- parsl/tests/manual_tests/test_regression_220.py +2 -1
- parsl/tests/manual_tests/test_udp_simple.py +4 -4
- parsl/tests/manual_tests/test_worker_count.py +3 -2
- parsl/tests/scaling_tests/htex_local.py +2 -4
- parsl/tests/scaling_tests/test_scale.py +0 -9
- parsl/tests/scaling_tests/vineex_condor.py +1 -2
- parsl/tests/scaling_tests/vineex_local.py +1 -2
- parsl/tests/site_tests/site_config_selector.py +1 -6
- parsl/tests/site_tests/test_provider.py +4 -2
- parsl/tests/site_tests/test_site.py +2 -0
- parsl/tests/sites/test_affinity.py +7 -7
- parsl/tests/sites/test_dynamic_executor.py +3 -4
- parsl/tests/sites/test_ec2.py +3 -2
- parsl/tests/sites/test_worker_info.py +4 -5
- parsl/tests/test_aalst_patterns.py +0 -1
- parsl/tests/test_bash_apps/test_apptimeout.py +2 -2
- parsl/tests/test_bash_apps/test_basic.py +10 -4
- parsl/tests/test_bash_apps/test_error_codes.py +5 -7
- parsl/tests/test_bash_apps/test_inputs_default.py +25 -0
- parsl/tests/test_bash_apps/test_kwarg_storage.py +1 -1
- parsl/tests/test_bash_apps/test_memoize.py +2 -8
- parsl/tests/test_bash_apps/test_memoize_ignore_args.py +9 -14
- parsl/tests/test_bash_apps/test_memoize_ignore_args_regr.py +9 -14
- parsl/tests/test_bash_apps/test_multiline.py +1 -1
- parsl/tests/test_bash_apps/test_pipeline.py +1 -1
- parsl/tests/test_bash_apps/test_std_uri.py +123 -0
- parsl/tests/test_bash_apps/test_stdout.py +33 -8
- parsl/tests/test_callables.py +2 -2
- parsl/tests/test_checkpointing/test_periodic.py +21 -39
- parsl/tests/test_checkpointing/test_python_checkpoint_1.py +1 -0
- parsl/tests/test_checkpointing/test_python_checkpoint_2.py +2 -2
- parsl/tests/test_checkpointing/test_python_checkpoint_3.py +0 -1
- parsl/tests/test_checkpointing/test_regression_239.py +1 -1
- parsl/tests/test_checkpointing/test_task_exit.py +2 -3
- parsl/tests/test_docs/test_from_slides.py +5 -2
- parsl/tests/test_docs/test_kwargs.py +4 -1
- parsl/tests/test_docs/test_tutorial_1.py +1 -2
- parsl/tests/test_docs/test_workflow1.py +2 -2
- parsl/tests/test_docs/test_workflow2.py +0 -1
- parsl/tests/test_error_handling/test_rand_fail.py +2 -2
- parsl/tests/test_error_handling/test_resource_spec.py +10 -12
- parsl/tests/test_error_handling/test_retries.py +6 -16
- parsl/tests/test_error_handling/test_retry_handler.py +1 -0
- parsl/tests/test_error_handling/test_retry_handler_failure.py +2 -1
- parsl/tests/test_error_handling/test_serialization_fail.py +1 -1
- parsl/tests/test_error_handling/test_wrap_with_logs.py +1 -0
- parsl/tests/test_execute_task.py +29 -0
- parsl/tests/test_flux.py +1 -1
- parsl/tests/test_htex/test_basic.py +2 -3
- parsl/tests/test_htex/test_block_manager_selector_unit.py +20 -0
- parsl/tests/test_htex/test_command_client_timeout.py +66 -0
- parsl/tests/test_htex/test_connected_blocks.py +3 -2
- parsl/tests/test_htex/test_cpu_affinity_explicit.py +6 -10
- parsl/tests/test_htex/test_disconnected_blocks.py +6 -5
- parsl/tests/test_htex/test_disconnected_blocks_failing_provider.py +71 -0
- parsl/tests/test_htex/test_drain.py +79 -0
- parsl/tests/test_htex/test_htex.py +51 -25
- parsl/tests/test_htex/test_manager_failure.py +0 -1
- parsl/tests/test_htex/test_manager_selector_by_block.py +51 -0
- parsl/tests/test_htex/test_managers_command.py +36 -0
- parsl/tests/test_htex/test_missing_worker.py +2 -12
- parsl/tests/test_htex/test_multiple_disconnected_blocks.py +9 -9
- parsl/tests/test_htex/test_resource_spec_validation.py +45 -0
- parsl/tests/test_htex/test_zmq_binding.py +29 -8
- parsl/tests/test_monitoring/test_app_names.py +86 -0
- parsl/tests/test_monitoring/test_basic.py +73 -25
- parsl/tests/test_monitoring/test_db_locks.py +6 -4
- parsl/tests/test_monitoring/test_fuzz_zmq.py +19 -8
- parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +80 -0
- parsl/tests/test_monitoring/test_incomplete_futures.py +5 -4
- parsl/tests/test_monitoring/test_memoization_representation.py +4 -2
- parsl/tests/test_monitoring/test_stdouterr.py +134 -0
- parsl/tests/test_monitoring/test_viz_colouring.py +1 -0
- parsl/tests/test_mpi_apps/test_bad_mpi_config.py +33 -26
- parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py +28 -11
- parsl/tests/test_mpi_apps/test_mpi_prefix.py +4 -4
- parsl/tests/test_mpi_apps/test_mpi_scheduler.py +7 -2
- parsl/tests/test_mpi_apps/test_mpiex.py +64 -0
- parsl/tests/test_mpi_apps/test_resource_spec.py +42 -49
- parsl/tests/test_providers/test_kubernetes_provider.py +102 -0
- parsl/tests/test_providers/test_local_provider.py +3 -132
- parsl/tests/test_providers/test_pbspro_template.py +2 -3
- parsl/tests/test_providers/test_slurm_template.py +2 -3
- parsl/tests/test_providers/test_submiterror_deprecation.py +2 -1
- parsl/tests/test_python_apps/test_context_manager.py +128 -0
- parsl/tests/test_python_apps/test_dep_standard_futures.py +2 -1
- parsl/tests/test_python_apps/test_dependencies_deep.py +59 -0
- parsl/tests/test_python_apps/test_fail.py +0 -25
- parsl/tests/test_python_apps/test_futures.py +2 -1
- parsl/tests/test_python_apps/test_inputs_default.py +22 -0
- parsl/tests/test_python_apps/test_join.py +0 -1
- parsl/tests/test_python_apps/test_lifted.py +11 -7
- parsl/tests/test_python_apps/test_memoize_bad_id_for_memo.py +1 -0
- parsl/tests/test_python_apps/test_outputs.py +1 -1
- parsl/tests/test_python_apps/test_pluggable_future_resolution.py +161 -0
- parsl/tests/test_radical/test_mpi_funcs.py +1 -2
- parsl/tests/test_regression/test_1480.py +2 -1
- parsl/tests/test_regression/test_1653.py +2 -1
- parsl/tests/test_regression/test_226.py +1 -0
- parsl/tests/test_regression/test_2652.py +1 -0
- parsl/tests/test_regression/test_69a.py +0 -1
- parsl/tests/test_regression/test_854.py +4 -2
- parsl/tests/test_regression/test_97_parallelism_0.py +1 -2
- parsl/tests/test_regression/test_98.py +0 -1
- parsl/tests/test_scaling/test_block_error_handler.py +9 -4
- parsl/tests/test_scaling/test_regression_1621.py +11 -15
- parsl/tests/test_scaling/test_regression_3568_scaledown_vs_MISSING.py +84 -0
- parsl/tests/test_scaling/test_regression_3696_oscillation.py +103 -0
- parsl/tests/test_scaling/test_scale_down.py +2 -5
- parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py +6 -18
- parsl/tests/test_scaling/test_scale_down_htex_unregistered.py +71 -0
- parsl/tests/test_scaling/test_shutdown_scalein.py +73 -0
- parsl/tests/test_scaling/test_worker_interchange_bad_messages_3262.py +90 -0
- parsl/tests/test_serialization/test_2555_caching_deserializer.py +1 -1
- parsl/tests/test_serialization/test_3495_deserialize_managerlost.py +47 -0
- parsl/tests/test_serialization/test_basic.py +2 -1
- parsl/tests/test_serialization/test_htex_code_cache.py +3 -4
- parsl/tests/test_serialization/test_pack_resource_spec.py +2 -1
- parsl/tests/test_serialization/test_proxystore_configured.py +10 -6
- parsl/tests/test_serialization/test_proxystore_impl.py +5 -3
- parsl/tests/test_shutdown/test_kill_monitoring.py +64 -0
- parsl/tests/test_staging/staging_provider.py +2 -2
- parsl/tests/test_staging/test_1316.py +3 -4
- parsl/tests/test_staging/test_docs_1.py +2 -1
- parsl/tests/test_staging/test_docs_2.py +2 -1
- parsl/tests/test_staging/test_elaborate_noop_file.py +2 -3
- parsl/tests/{test_data → test_staging}/test_file.py +6 -6
- parsl/tests/{test_data → test_staging}/test_output_chain_filenames.py +3 -0
- parsl/tests/test_staging/test_staging_ftp.py +1 -0
- parsl/tests/test_staging/test_staging_https.py +5 -2
- parsl/tests/test_staging/test_staging_stdout.py +64 -0
- parsl/tests/test_staging/test_zip_in.py +39 -0
- parsl/tests/test_staging/test_zip_out.py +110 -0
- parsl/tests/test_staging/test_zip_to_zip.py +41 -0
- parsl/tests/test_summary.py +2 -2
- parsl/tests/test_thread_parallelism.py +0 -1
- parsl/tests/test_threads/test_configs.py +1 -2
- parsl/tests/test_threads/test_lazy_errors.py +2 -2
- parsl/tests/test_utils/test_execute_wait.py +35 -0
- parsl/tests/test_utils/test_sanitize_dns.py +76 -0
- parsl/tests/unit/test_address.py +20 -0
- parsl/tests/unit/test_file.py +99 -0
- parsl/tests/unit/test_usage_tracking.py +66 -0
- parsl/usage_tracking/api.py +65 -0
- parsl/usage_tracking/levels.py +6 -0
- parsl/usage_tracking/usage.py +104 -62
- parsl/utils.py +139 -6
- parsl/version.py +1 -1
- {parsl-2024.3.11.data → parsl-2025.1.13.data}/scripts/exec_parsl_function.py +6 -5
- parsl-2025.1.13.data/scripts/interchange.py +649 -0
- {parsl-2024.3.11.data → parsl-2025.1.13.data}/scripts/process_worker_pool.py +115 -77
- parsl-2025.1.13.dist-info/METADATA +96 -0
- parsl-2025.1.13.dist-info/RECORD +462 -0
- {parsl-2024.3.11.dist-info → parsl-2025.1.13.dist-info}/WHEEL +1 -1
- parsl/channels/__init__.py +0 -7
- parsl/channels/base.py +0 -141
- parsl/channels/errors.py +0 -113
- parsl/channels/local/local.py +0 -164
- parsl/channels/oauth_ssh/oauth_ssh.py +0 -110
- parsl/channels/ssh/ssh.py +0 -276
- parsl/channels/ssh_il/__init__.py +0 -0
- parsl/channels/ssh_il/ssh_il.py +0 -74
- parsl/configs/ad_hoc.py +0 -35
- parsl/executors/radical/rpex_master.py +0 -42
- parsl/monitoring/radios.py +0 -175
- parsl/providers/ad_hoc/__init__.py +0 -0
- parsl/providers/ad_hoc/ad_hoc.py +0 -248
- parsl/providers/cobalt/__init__.py +0 -0
- parsl/providers/cobalt/cobalt.py +0 -236
- parsl/providers/cobalt/template.py +0 -17
- parsl/tests/configs/ad_hoc_cluster_htex.py +0 -35
- parsl/tests/configs/cooley_htex.py +0 -37
- parsl/tests/configs/htex_ad_hoc_cluster.py +0 -28
- parsl/tests/configs/local_adhoc.py +0 -18
- parsl/tests/configs/swan_htex.py +0 -43
- parsl/tests/configs/theta.py +0 -37
- parsl/tests/integration/test_channels/__init__.py +0 -0
- parsl/tests/integration/test_channels/test_channels.py +0 -17
- parsl/tests/integration/test_channels/test_local_channel.py +0 -42
- parsl/tests/integration/test_channels/test_scp_1.py +0 -45
- parsl/tests/integration/test_channels/test_ssh_1.py +0 -40
- parsl/tests/integration/test_channels/test_ssh_errors.py +0 -46
- parsl/tests/integration/test_channels/test_ssh_file_transport.py +0 -41
- parsl/tests/integration/test_channels/test_ssh_interactive.py +0 -24
- parsl/tests/manual_tests/test_ad_hoc_htex.py +0 -48
- parsl/tests/manual_tests/test_fan_in_out_htex_remote.py +0 -88
- parsl/tests/manual_tests/test_oauth_ssh.py +0 -13
- parsl/tests/sites/test_local_adhoc.py +0 -61
- parsl/tests/test_channels/__init__.py +0 -0
- parsl/tests/test_channels/test_large_output.py +0 -22
- parsl/tests/test_data/__init__.py +0 -0
- parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py +0 -51
- parsl/tests/test_providers/test_cobalt_deprecation_warning.py +0 -16
- parsl-2024.3.11.dist-info/METADATA +0 -98
- parsl-2024.3.11.dist-info/RECORD +0 -447
- parsl/{channels/local → monitoring/radios}/__init__.py +0 -0
- parsl/{channels/oauth_ssh → tests/test_shutdown}/__init__.py +0 -0
- parsl/tests/{test_data → test_staging}/test_file_apps.py +0 -0
- parsl/tests/{test_data → test_staging}/test_file_staging.py +0 -0
- parsl/{channels/ssh → tests/unit}/__init__.py +0 -0
- {parsl-2024.3.11.data → parsl-2025.1.13.data}/scripts/parsl_coprocess.py +1 -1
- {parsl-2024.3.11.dist-info → parsl-2025.1.13.dist-info}/LICENSE +0 -0
- {parsl-2024.3.11.dist-info → parsl-2025.1.13.dist-info}/entry_points.txt +0 -0
- {parsl-2024.3.11.dist-info → parsl-2025.1.13.dist-info}/top_level.txt +0 -0
parsl/providers/azure/azure.py
CHANGED
@@ -4,22 +4,21 @@ import os
|
|
4
4
|
import time
|
5
5
|
from string import Template
|
6
6
|
|
7
|
-
from parsl.errors import ConfigurationError
|
7
|
+
from parsl.errors import ConfigurationError, OptionalModuleMissing
|
8
8
|
from parsl.jobs.states import JobState, JobStatus
|
9
|
+
from parsl.launchers import SingleNodeLauncher
|
9
10
|
from parsl.providers.azure.template import template_string
|
10
11
|
from parsl.providers.base import ExecutionProvider
|
11
|
-
from parsl.errors import OptionalModuleMissing
|
12
12
|
from parsl.utils import RepresentationMixin
|
13
|
-
from parsl.launchers import SingleNodeLauncher
|
14
13
|
|
15
14
|
logger = logging.getLogger(__name__)
|
16
15
|
|
17
16
|
try:
|
18
17
|
from azure.common.credentials import ServicePrincipalCredentials
|
19
|
-
from azure.mgmt.resource import ResourceManagementClient
|
20
|
-
from azure.mgmt.network import NetworkManagementClient
|
21
18
|
from azure.mgmt.compute import ComputeManagementClient
|
22
19
|
from azure.mgmt.compute.models import DiskCreateOption
|
20
|
+
from azure.mgmt.network import NetworkManagementClient
|
21
|
+
from azure.mgmt.resource import ResourceManagementClient
|
23
22
|
from msrestazure.azure_exceptions import CloudError
|
24
23
|
|
25
24
|
_api_enabled = True
|
parsl/providers/base.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1
|
-
from abc import ABCMeta, abstractmethod, abstractproperty
|
2
1
|
import logging
|
2
|
+
from abc import ABCMeta, abstractmethod, abstractproperty
|
3
3
|
from typing import Any, Dict, List, Optional
|
4
4
|
|
5
|
-
from parsl.channels.base import Channel
|
6
5
|
from parsl.jobs.states import JobStatus
|
7
6
|
|
8
7
|
logger = logging.getLogger(__name__)
|
@@ -12,7 +11,7 @@ class ExecutionProvider(metaclass=ABCMeta):
|
|
12
11
|
"""Execution providers are responsible for managing execution resources
|
13
12
|
that have a Local Resource Manager (LRM). For instance, campus clusters
|
14
13
|
and supercomputers generally have LRMs (schedulers) such as Slurm,
|
15
|
-
Torque/PBS,
|
14
|
+
Torque/PBS, and Condor. Clouds, on the other hand, have API
|
16
15
|
interfaces that allow much more fine-grained composition of an execution
|
17
16
|
environment. An execution provider abstracts these types of resources and
|
18
17
|
provides a single uniform interface to them.
|
@@ -154,18 +153,3 @@ class ExecutionProvider(metaclass=ABCMeta):
|
|
154
153
|
:return: the number of seconds to wait between calls to status()
|
155
154
|
"""
|
156
155
|
pass
|
157
|
-
|
158
|
-
|
159
|
-
class Channeled():
|
160
|
-
"""A marker type to indicate that parsl should manage a Channel for this provider"""
|
161
|
-
def __init__(self) -> None:
|
162
|
-
self.channel: Channel
|
163
|
-
pass
|
164
|
-
|
165
|
-
|
166
|
-
class MultiChanneled():
|
167
|
-
"""A marker type to indicate that parsl should manage multiple Channels for this provider"""
|
168
|
-
|
169
|
-
def __init__(self) -> None:
|
170
|
-
self.channels: List[Channel]
|
171
|
-
pass
|
@@ -2,10 +2,11 @@ import logging
|
|
2
2
|
from abc import abstractmethod
|
3
3
|
from string import Template
|
4
4
|
|
5
|
-
from parsl.providers.errors import SchedulerMissingArgs, ScriptPathError
|
6
5
|
from parsl.launchers.base import Launcher
|
7
6
|
from parsl.launchers.errors import BadLauncher
|
8
7
|
from parsl.providers.base import ExecutionProvider
|
8
|
+
from parsl.providers.errors import SchedulerMissingArgs, ScriptPathError
|
9
|
+
from parsl.utils import execute_wait
|
9
10
|
|
10
11
|
logger = logging.getLogger(__name__)
|
11
12
|
|
@@ -17,11 +18,6 @@ class ClusterProvider(ExecutionProvider):
|
|
17
18
|
----------
|
18
19
|
label : str
|
19
20
|
Label for this provider.
|
20
|
-
channel : Channel
|
21
|
-
Channel for accessing this provider. Possible channels include
|
22
|
-
:class:`~parsl.channels.LocalChannel` (the default),
|
23
|
-
:class:`~parsl.channels.SSHChannel`, or
|
24
|
-
:class:`~parsl.channels.SSHInteractiveLoginChannel`.
|
25
21
|
walltime : str
|
26
22
|
Walltime requested per block in HH:MM:SS.
|
27
23
|
launcher : Launcher
|
@@ -47,7 +43,6 @@ class ClusterProvider(ExecutionProvider):
|
|
47
43
|
|
48
44
|
def __init__(self,
|
49
45
|
label,
|
50
|
-
channel,
|
51
46
|
nodes_per_block,
|
52
47
|
init_blocks,
|
53
48
|
min_blocks,
|
@@ -58,7 +53,6 @@ class ClusterProvider(ExecutionProvider):
|
|
58
53
|
cmd_timeout=10):
|
59
54
|
|
60
55
|
self._label = label
|
61
|
-
self.channel = channel
|
62
56
|
self.nodes_per_block = nodes_per_block
|
63
57
|
self.init_blocks = init_blocks
|
64
58
|
self.min_blocks = min_blocks
|
@@ -79,7 +73,7 @@ class ClusterProvider(ExecutionProvider):
|
|
79
73
|
t = self.cmd_timeout
|
80
74
|
if timeout is not None:
|
81
75
|
t = timeout
|
82
|
-
return
|
76
|
+
return execute_wait(cmd, t)
|
83
77
|
|
84
78
|
def _write_submit_script(self, template, script_filename, job_name, configs):
|
85
79
|
"""Generate submit script and write it to a file.
|
@@ -91,7 +85,7 @@ class ClusterProvider(ExecutionProvider):
|
|
91
85
|
- configs (dict) : configs that get pushed into the template
|
92
86
|
|
93
87
|
Returns:
|
94
|
-
-
|
88
|
+
- None
|
95
89
|
|
96
90
|
Raises:
|
97
91
|
SchedulerMissingArgs : If template is missing args
|
@@ -117,8 +111,6 @@ class ClusterProvider(ExecutionProvider):
|
|
117
111
|
logger.error("Uncategorized error: %s", e)
|
118
112
|
raise e
|
119
113
|
|
120
|
-
return True
|
121
|
-
|
122
114
|
@abstractmethod
|
123
115
|
def _status(self):
|
124
116
|
pass
|
parsl/providers/condor/condor.py
CHANGED
@@ -2,21 +2,20 @@ import logging
|
|
2
2
|
import os
|
3
3
|
import re
|
4
4
|
import time
|
5
|
+
|
5
6
|
import typeguard
|
6
7
|
|
7
|
-
from parsl.channels import LocalChannel
|
8
8
|
from parsl.jobs.states import JobState, JobStatus
|
9
|
-
from parsl.utils import RepresentationMixin
|
10
9
|
from parsl.launchers import SingleNodeLauncher
|
11
10
|
from parsl.launchers.base import Launcher
|
12
|
-
from parsl.providers.condor.template import template_string
|
13
11
|
from parsl.providers.cluster_provider import ClusterProvider
|
12
|
+
from parsl.providers.condor.template import template_string
|
14
13
|
from parsl.providers.errors import ScaleOutFailed
|
14
|
+
from parsl.utils import RepresentationMixin
|
15
15
|
|
16
16
|
logger = logging.getLogger(__name__)
|
17
17
|
|
18
18
|
from typing import Dict, List, Optional
|
19
|
-
from parsl.channels.base import Channel
|
20
19
|
|
21
20
|
# See http://pages.cs.wisc.edu/~adesmet/status.html
|
22
21
|
translate_table = {
|
@@ -34,11 +33,6 @@ class CondorProvider(RepresentationMixin, ClusterProvider):
|
|
34
33
|
|
35
34
|
Parameters
|
36
35
|
----------
|
37
|
-
channel : Channel
|
38
|
-
Channel for accessing this provider. Possible channels include
|
39
|
-
:class:`~parsl.channels.LocalChannel` (the default),
|
40
|
-
:class:`~parsl.channels.SSHChannel`, or
|
41
|
-
:class:`~parsl.channels.SSHInteractiveLoginChannel`.
|
42
36
|
nodes_per_block : int
|
43
37
|
Nodes to provision per block.
|
44
38
|
cores_per_slot : int
|
@@ -80,7 +74,6 @@ class CondorProvider(RepresentationMixin, ClusterProvider):
|
|
80
74
|
"""
|
81
75
|
@typeguard.typechecked
|
82
76
|
def __init__(self,
|
83
|
-
channel: Channel = LocalChannel(),
|
84
77
|
nodes_per_block: int = 1,
|
85
78
|
cores_per_slot: Optional[int] = None,
|
86
79
|
mem_per_slot: Optional[float] = None,
|
@@ -101,7 +94,6 @@ class CondorProvider(RepresentationMixin, ClusterProvider):
|
|
101
94
|
|
102
95
|
label = 'condor'
|
103
96
|
super().__init__(label,
|
104
|
-
channel,
|
105
97
|
nodes_per_block,
|
106
98
|
init_blocks,
|
107
99
|
min_blocks,
|
@@ -227,7 +219,7 @@ class CondorProvider(RepresentationMixin, ClusterProvider):
|
|
227
219
|
|
228
220
|
job_config = {}
|
229
221
|
job_config["job_name"] = job_name
|
230
|
-
job_config["submit_script_dir"] = self.
|
222
|
+
job_config["submit_script_dir"] = self.script_dir
|
231
223
|
job_config["project"] = self.project
|
232
224
|
job_config["nodes"] = self.nodes_per_block
|
233
225
|
job_config["scheduler_options"] = scheduler_options
|
@@ -246,16 +238,14 @@ class CondorProvider(RepresentationMixin, ClusterProvider):
|
|
246
238
|
with open(userscript_path, 'w') as f:
|
247
239
|
f.write(job_config["worker_init"] + '\n' + wrapped_command)
|
248
240
|
|
249
|
-
|
250
|
-
the_input_files = [user_script_path] + self.transfer_input_files
|
241
|
+
the_input_files = [userscript_path] + self.transfer_input_files
|
251
242
|
job_config["input_files"] = ','.join(the_input_files)
|
252
|
-
job_config["job_script"] = os.path.basename(
|
243
|
+
job_config["job_script"] = os.path.basename(userscript_path)
|
253
244
|
|
254
245
|
# Construct and move the submit script
|
255
246
|
self._write_submit_script(template_string, script_path, job_name, job_config)
|
256
|
-
channel_script_path = self.channel.push_file(script_path, self.channel.script_dir)
|
257
247
|
|
258
|
-
cmd = "condor_submit {0}".format(
|
248
|
+
cmd = "condor_submit {0}".format(script_path)
|
259
249
|
try:
|
260
250
|
retcode, stdout, stderr = self.execute_wait(cmd)
|
261
251
|
except Exception as e:
|
parsl/providers/errors.py
CHANGED
@@ -2,11 +2,10 @@ import logging
|
|
2
2
|
import os
|
3
3
|
import time
|
4
4
|
|
5
|
-
from parsl.
|
5
|
+
from parsl.jobs.states import JobState, JobStatus
|
6
|
+
from parsl.launchers import SingleNodeLauncher
|
6
7
|
from parsl.providers.cluster_provider import ClusterProvider
|
7
8
|
from parsl.providers.grid_engine.template import template_string
|
8
|
-
from parsl.launchers import SingleNodeLauncher
|
9
|
-
from parsl.jobs.states import JobState, JobStatus
|
10
9
|
from parsl.utils import RepresentationMixin
|
11
10
|
|
12
11
|
logger = logging.getLogger(__name__)
|
@@ -36,11 +35,6 @@ class GridEngineProvider(ClusterProvider, RepresentationMixin):
|
|
36
35
|
|
37
36
|
Parameters
|
38
37
|
----------
|
39
|
-
channel : Channel
|
40
|
-
Channel for accessing this provider. Possible channels include
|
41
|
-
:class:`~parsl.channels.LocalChannel` (the default),
|
42
|
-
:class:`~parsl.channels.SSHChannel`, or
|
43
|
-
:class:`~parsl.channels.SSHInteractiveLoginChannel`.
|
44
38
|
nodes_per_block : int
|
45
39
|
Nodes to provision per block.
|
46
40
|
min_blocks : int
|
@@ -65,7 +59,6 @@ class GridEngineProvider(ClusterProvider, RepresentationMixin):
|
|
65
59
|
"""
|
66
60
|
|
67
61
|
def __init__(self,
|
68
|
-
channel=LocalChannel(),
|
69
62
|
nodes_per_block=1,
|
70
63
|
init_blocks=1,
|
71
64
|
min_blocks=0,
|
@@ -79,7 +72,6 @@ class GridEngineProvider(ClusterProvider, RepresentationMixin):
|
|
79
72
|
queue=None):
|
80
73
|
label = 'grid_engine'
|
81
74
|
super().__init__(label,
|
82
|
-
channel,
|
83
75
|
nodes_per_block,
|
84
76
|
init_blocks,
|
85
77
|
min_blocks,
|
@@ -103,7 +95,7 @@ class GridEngineProvider(ClusterProvider, RepresentationMixin):
|
|
103
95
|
self.nodes_per_block, tasks_per_node))
|
104
96
|
|
105
97
|
job_config = {}
|
106
|
-
job_config["submit_script_dir"] = self.
|
98
|
+
job_config["submit_script_dir"] = self.script_dir
|
107
99
|
job_config["nodes"] = self.nodes_per_block
|
108
100
|
job_config["walltime"] = self.walltime
|
109
101
|
job_config["scheduler_options"] = self.scheduler_options
|
@@ -145,11 +137,10 @@ class GridEngineProvider(ClusterProvider, RepresentationMixin):
|
|
145
137
|
logger.debug("Writing submit script")
|
146
138
|
self._write_submit_script(template_string, script_path, job_name, job_config)
|
147
139
|
|
148
|
-
channel_script_path = self.channel.push_file(script_path, self.channel.script_dir)
|
149
140
|
if self.queue is not None:
|
150
|
-
cmd = "qsub -q {0} -terse {1}".format(self.queue,
|
141
|
+
cmd = "qsub -q {0} -terse {1}".format(self.queue, script_path)
|
151
142
|
else:
|
152
|
-
cmd = "qsub -terse {0}".format(
|
143
|
+
cmd = "qsub -terse {0}".format(script_path)
|
153
144
|
retcode, stdout, stderr = self.execute_wait(cmd)
|
154
145
|
|
155
146
|
if retcode == 0:
|
@@ -1,16 +1,14 @@
|
|
1
1
|
import logging
|
2
|
-
import
|
3
|
-
from
|
2
|
+
import uuid
|
3
|
+
from typing import Any, Dict, List, Optional, Tuple
|
4
4
|
|
5
|
-
|
5
|
+
import typeguard
|
6
6
|
|
7
7
|
from parsl.errors import OptionalModuleMissing
|
8
8
|
from parsl.jobs.states import JobState, JobStatus
|
9
9
|
from parsl.providers.base import ExecutionProvider
|
10
|
-
from parsl.
|
11
|
-
|
12
|
-
import typeguard
|
13
|
-
from typing import Any, Dict, List, Optional, Tuple
|
10
|
+
from parsl.providers.kubernetes.template import template_string
|
11
|
+
from parsl.utils import RepresentationMixin, sanitize_dns_subdomain_rfc1123
|
14
12
|
|
15
13
|
try:
|
16
14
|
from kubernetes import client, config
|
@@ -18,6 +16,8 @@ try:
|
|
18
16
|
except (ImportError, NameError, FileNotFoundError):
|
19
17
|
_kubernetes_enabled = False
|
20
18
|
|
19
|
+
logger = logging.getLogger(__name__)
|
20
|
+
|
21
21
|
translate_table = {
|
22
22
|
'Running': JobState.RUNNING,
|
23
23
|
'Pending': JobState.PENDING,
|
@@ -81,6 +81,10 @@ class KubernetesProvider(ExecutionProvider, RepresentationMixin):
|
|
81
81
|
persistent_volumes: list[(str, str)]
|
82
82
|
List of tuples describing persistent volumes to be mounted in the pod.
|
83
83
|
The tuples consist of (PVC Name, Mount Directory).
|
84
|
+
service_account_name: str
|
85
|
+
Name of the service account to run the pod as.
|
86
|
+
annotations: Dict[str, str]
|
87
|
+
Annotations to set on the pod.
|
84
88
|
"""
|
85
89
|
@typeguard.typechecked
|
86
90
|
def __init__(self,
|
@@ -101,11 +105,32 @@ class KubernetesProvider(ExecutionProvider, RepresentationMixin):
|
|
101
105
|
group_id: Optional[str] = None,
|
102
106
|
run_as_non_root: bool = False,
|
103
107
|
secret: Optional[str] = None,
|
104
|
-
persistent_volumes: List[Tuple[str, str]] = []
|
108
|
+
persistent_volumes: List[Tuple[str, str]] = [],
|
109
|
+
service_account_name: Optional[str] = None,
|
110
|
+
annotations: Optional[Dict[str, str]] = None) -> None:
|
105
111
|
if not _kubernetes_enabled:
|
106
112
|
raise OptionalModuleMissing(['kubernetes'],
|
107
113
|
"Kubernetes provider requires kubernetes module and config.")
|
108
|
-
|
114
|
+
try:
|
115
|
+
config.load_kube_config()
|
116
|
+
except config.config_exception.ConfigException:
|
117
|
+
# `load_kube_config` assumes a local kube-config file, and fails if not
|
118
|
+
# present, raising:
|
119
|
+
#
|
120
|
+
# kubernetes.config.config_exception.ConfigException: Invalid
|
121
|
+
# kube-config file. No configuration found.
|
122
|
+
#
|
123
|
+
# Since running a parsl driver script on a kubernetes cluster is a common
|
124
|
+
# pattern to enable worker-interchange communication, this enables an
|
125
|
+
# in-cluster config to be loaded if a kube-config file isn't found.
|
126
|
+
#
|
127
|
+
# Based on: https://github.com/kubernetes-client/python/issues/1005
|
128
|
+
try:
|
129
|
+
config.load_incluster_config()
|
130
|
+
except config.config_exception.ConfigException:
|
131
|
+
raise config.config_exception.ConfigException(
|
132
|
+
"Failed to load both kube-config file and in-cluster configuration."
|
133
|
+
)
|
109
134
|
|
110
135
|
self.namespace = namespace
|
111
136
|
self.image = image
|
@@ -125,6 +150,8 @@ class KubernetesProvider(ExecutionProvider, RepresentationMixin):
|
|
125
150
|
self.group_id = group_id
|
126
151
|
self.run_as_non_root = run_as_non_root
|
127
152
|
self.persistent_volumes = persistent_volumes
|
153
|
+
self.service_account_name = service_account_name
|
154
|
+
self.annotations = annotations
|
128
155
|
|
129
156
|
self.kube_client = client.CoreV1Api()
|
130
157
|
|
@@ -132,41 +159,46 @@ class KubernetesProvider(ExecutionProvider, RepresentationMixin):
|
|
132
159
|
self.resources: Dict[object, Dict[str, Any]]
|
133
160
|
self.resources = {}
|
134
161
|
|
135
|
-
def submit(self, cmd_string, tasks_per_node, job_name="parsl"):
|
162
|
+
def submit(self, cmd_string: str, tasks_per_node: int, job_name: str = "parsl.kube"):
|
136
163
|
""" Submit a job
|
137
164
|
Args:
|
138
165
|
- cmd_string :(String) - Name of the container to initiate
|
139
166
|
- tasks_per_node (int) : command invocations to be launched per node
|
140
167
|
|
141
168
|
Kwargs:
|
142
|
-
- job_name (String): Name for job
|
169
|
+
- job_name (String): Name for job
|
143
170
|
|
144
171
|
Returns:
|
145
|
-
- None: At capacity, cannot provision more
|
146
172
|
- job_id: (string) Identifier for the job
|
147
173
|
"""
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
174
|
+
job_id = uuid.uuid4().hex[:8]
|
175
|
+
|
176
|
+
pod_name = self.pod_name or job_name
|
177
|
+
try:
|
178
|
+
pod_name = sanitize_dns_subdomain_rfc1123(pod_name)
|
179
|
+
except ValueError:
|
180
|
+
logger.warning(
|
181
|
+
f"Invalid pod name '{pod_name}' for job '{job_id}', falling back to 'parsl.kube'"
|
182
|
+
)
|
183
|
+
pod_name = "parsl.kube"
|
184
|
+
pod_name = pod_name[:253 - 1 - len(job_id)] # Leave room for the job ID
|
185
|
+
pod_name = pod_name.rstrip(".-") # Remove trailing dot or hyphen after trim
|
186
|
+
pod_name = f"{pod_name}.{job_id}"
|
157
187
|
|
158
188
|
formatted_cmd = template_string.format(command=cmd_string,
|
159
189
|
worker_init=self.worker_init)
|
160
190
|
|
161
|
-
logger.debug("Pod name
|
191
|
+
logger.debug("Pod name: %s", pod_name)
|
162
192
|
self._create_pod(image=self.image,
|
163
193
|
pod_name=pod_name,
|
164
|
-
|
194
|
+
job_id=job_id,
|
165
195
|
cmd_string=formatted_cmd,
|
166
|
-
volumes=self.persistent_volumes
|
167
|
-
|
196
|
+
volumes=self.persistent_volumes,
|
197
|
+
service_account_name=self.service_account_name,
|
198
|
+
annotations=self.annotations)
|
199
|
+
self.resources[job_id] = {'status': JobStatus(JobState.RUNNING), 'pod_name': pod_name}
|
168
200
|
|
169
|
-
return
|
201
|
+
return job_id
|
170
202
|
|
171
203
|
def status(self, job_ids):
|
172
204
|
""" Get the status of a list of jobs identified by the job identifiers
|
@@ -182,6 +214,9 @@ class KubernetesProvider(ExecutionProvider, RepresentationMixin):
|
|
182
214
|
self._status()
|
183
215
|
return [self.resources[jid]['status'] for jid in job_ids]
|
184
216
|
|
217
|
+
def _get_pod_name(self, job_id: str) -> str:
|
218
|
+
return self.resources[job_id]['pod_name']
|
219
|
+
|
185
220
|
def cancel(self, job_ids):
|
186
221
|
""" Cancels the jobs specified by a list of job ids
|
187
222
|
Args:
|
@@ -191,7 +226,8 @@ class KubernetesProvider(ExecutionProvider, RepresentationMixin):
|
|
191
226
|
"""
|
192
227
|
for job in job_ids:
|
193
228
|
logger.debug("Terminating job/pod: {0}".format(job))
|
194
|
-
self.
|
229
|
+
pod_name = self._get_pod_name(job)
|
230
|
+
self._delete_pod(pod_name)
|
195
231
|
|
196
232
|
self.resources[job]['status'] = JobStatus(JobState.CANCELLED)
|
197
233
|
rets = [True for i in job_ids]
|
@@ -212,13 +248,14 @@ class KubernetesProvider(ExecutionProvider, RepresentationMixin):
|
|
212
248
|
for jid in to_poll_job_ids:
|
213
249
|
phase = None
|
214
250
|
try:
|
215
|
-
|
251
|
+
pod_name = self._get_pod_name(jid)
|
252
|
+
pod = self.kube_client.read_namespaced_pod(name=pod_name, namespace=self.namespace)
|
216
253
|
except Exception:
|
217
254
|
logger.exception("Failed to poll pod {} status, most likely because pod was terminated".format(jid))
|
218
255
|
if self.resources[jid]['status'] is JobStatus(JobState.RUNNING):
|
219
256
|
phase = 'Unknown'
|
220
257
|
else:
|
221
|
-
phase =
|
258
|
+
phase = pod.status.phase
|
222
259
|
if phase:
|
223
260
|
status = translate_table.get(phase, JobState.UNKNOWN)
|
224
261
|
logger.debug("Updating pod {} with status {} to parsl status {}".format(jid,
|
@@ -227,17 +264,19 @@ class KubernetesProvider(ExecutionProvider, RepresentationMixin):
|
|
227
264
|
self.resources[jid]['status'] = JobStatus(status)
|
228
265
|
|
229
266
|
def _create_pod(self,
|
230
|
-
image,
|
231
|
-
pod_name,
|
232
|
-
|
233
|
-
port=80,
|
267
|
+
image: str,
|
268
|
+
pod_name: str,
|
269
|
+
job_id: str,
|
270
|
+
port: int = 80,
|
234
271
|
cmd_string=None,
|
235
|
-
volumes=[]
|
272
|
+
volumes=[],
|
273
|
+
service_account_name=None,
|
274
|
+
annotations=None):
|
236
275
|
""" Create a kubernetes pod for the job.
|
237
276
|
Args:
|
238
277
|
- image (string) : Docker image to launch
|
239
278
|
- pod_name (string) : Name of the pod
|
240
|
-
-
|
279
|
+
- job_id (string) : Job ID
|
241
280
|
KWargs:
|
242
281
|
- port (integer) : Container port
|
243
282
|
Returns:
|
@@ -253,7 +292,7 @@ class KubernetesProvider(ExecutionProvider, RepresentationMixin):
|
|
253
292
|
# Create the environment variables and command to initiate IPP
|
254
293
|
environment_vars = client.V1EnvVar(name="TEST", value="SOME DATA")
|
255
294
|
|
256
|
-
launch_args = ["-c", "{0}
|
295
|
+
launch_args = ["-c", "{0}".format(cmd_string)]
|
257
296
|
|
258
297
|
volume_mounts = []
|
259
298
|
# Create mount paths for the volumes
|
@@ -267,7 +306,7 @@ class KubernetesProvider(ExecutionProvider, RepresentationMixin):
|
|
267
306
|
)
|
268
307
|
# Configure Pod template container
|
269
308
|
container = client.V1Container(
|
270
|
-
name=
|
309
|
+
name=job_id,
|
271
310
|
image=image,
|
272
311
|
resources=resources,
|
273
312
|
ports=[client.V1ContainerPort(container_port=port)],
|
@@ -290,11 +329,12 @@ class KubernetesProvider(ExecutionProvider, RepresentationMixin):
|
|
290
329
|
claim_name=volume[0])))
|
291
330
|
|
292
331
|
metadata = client.V1ObjectMeta(name=pod_name,
|
293
|
-
labels={"
|
332
|
+
labels={"parsl-job-id": job_id},
|
333
|
+
annotations=annotations)
|
294
334
|
spec = client.V1PodSpec(containers=[container],
|
295
335
|
image_pull_secrets=[secret],
|
296
|
-
volumes=volume_defs
|
297
|
-
)
|
336
|
+
volumes=volume_defs,
|
337
|
+
service_account_name=service_account_name)
|
298
338
|
|
299
339
|
pod = client.V1Pod(spec=spec, metadata=metadata)
|
300
340
|
api_response = self.kube_client.create_namespaced_pod(namespace=self.namespace,
|
parsl/providers/local/local.py
CHANGED
@@ -2,12 +2,15 @@ import logging
|
|
2
2
|
import os
|
3
3
|
import time
|
4
4
|
|
5
|
-
from parsl.channels import LocalChannel
|
6
5
|
from parsl.jobs.states import JobState, JobStatus
|
7
6
|
from parsl.launchers import SingleNodeLauncher
|
8
7
|
from parsl.providers.base import ExecutionProvider
|
9
|
-
from parsl.providers.errors import
|
10
|
-
|
8
|
+
from parsl.providers.errors import (
|
9
|
+
SchedulerMissingArgs,
|
10
|
+
ScriptPathError,
|
11
|
+
SubmitException,
|
12
|
+
)
|
13
|
+
from parsl.utils import RepresentationMixin, execute_wait
|
11
14
|
|
12
15
|
logger = logging.getLogger(__name__)
|
13
16
|
|
@@ -28,15 +31,11 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
|
|
28
31
|
Ratio of provisioned task slots to active tasks. A parallelism value of 1 represents aggressive
|
29
32
|
scaling where as many resources as possible are used; parallelism close to 0 represents
|
30
33
|
the opposite situation in which as few resources as possible (i.e., min_blocks) are used.
|
31
|
-
move_files : Optional[Bool]
|
32
|
-
Should files be moved? By default, Parsl will try to figure this out itself (= None).
|
33
|
-
If True, then will always move. If False, will never move.
|
34
34
|
worker_init : str
|
35
35
|
Command to be run before starting a worker, such as 'module load Anaconda; source activate env'.
|
36
36
|
"""
|
37
37
|
|
38
38
|
def __init__(self,
|
39
|
-
channel=LocalChannel(),
|
40
39
|
nodes_per_block=1,
|
41
40
|
launcher=SingleNodeLauncher(),
|
42
41
|
init_blocks=1,
|
@@ -44,9 +43,7 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
|
|
44
43
|
max_blocks=1,
|
45
44
|
worker_init='',
|
46
45
|
cmd_timeout=30,
|
47
|
-
parallelism=1
|
48
|
-
move_files=None):
|
49
|
-
self.channel = channel
|
46
|
+
parallelism=1):
|
50
47
|
self._label = 'local'
|
51
48
|
self.nodes_per_block = nodes_per_block
|
52
49
|
self.launcher = launcher
|
@@ -57,7 +54,6 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
|
|
57
54
|
self.parallelism = parallelism
|
58
55
|
self.script_dir = None
|
59
56
|
self.cmd_timeout = cmd_timeout
|
60
|
-
self.move_files = move_files
|
61
57
|
|
62
58
|
# Dictionary that keeps track of jobs, keyed on job_id
|
63
59
|
self.resources = {}
|
@@ -79,7 +75,6 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
|
|
79
75
|
if job_dict['status'] and job_dict['status'].terminal:
|
80
76
|
# We already checked this and it can't change after that
|
81
77
|
continue
|
82
|
-
# Script path should point to remote path if _should_move_files() is True
|
83
78
|
script_path = job_dict['script_path']
|
84
79
|
|
85
80
|
alive = self._is_alive(job_dict)
|
@@ -120,7 +115,7 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
|
|
120
115
|
return [self.resources[jid]['status'] for jid in job_ids]
|
121
116
|
|
122
117
|
def _is_alive(self, job_dict):
|
123
|
-
retcode, stdout, stderr =
|
118
|
+
retcode, stdout, stderr = execute_wait(
|
124
119
|
'ps -p {} > /dev/null 2> /dev/null; echo "STATUS:$?" '.format(
|
125
120
|
job_dict['remote_pid']), self.cmd_timeout)
|
126
121
|
for line in stdout.split('\n'):
|
@@ -133,8 +128,6 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
|
|
133
128
|
|
134
129
|
def _job_file_path(self, script_path: str, suffix: str) -> str:
|
135
130
|
path = '{0}{1}'.format(script_path, suffix)
|
136
|
-
if self._should_move_files():
|
137
|
-
path = self.channel.pull_file(path, self.script_dir)
|
138
131
|
return path
|
139
132
|
|
140
133
|
def _read_job_file(self, script_path: str, suffix: str) -> str:
|
@@ -206,15 +199,12 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
|
|
206
199
|
script_path = "{0}/{1}.sh".format(self.script_dir, job_name)
|
207
200
|
script_path = os.path.abspath(script_path)
|
208
201
|
|
209
|
-
wrap_command = self.worker_init + f'\nexport JOBNAME
|
202
|
+
wrap_command = self.worker_init + f'\nexport JOBNAME={job_name}\n' + self.launcher(command, tasks_per_node, self.nodes_per_block)
|
210
203
|
|
211
204
|
self._write_submit_script(wrap_command, script_path)
|
212
205
|
|
213
206
|
job_id = None
|
214
207
|
remote_pid = None
|
215
|
-
if self._should_move_files():
|
216
|
-
logger.debug("Pushing start script")
|
217
|
-
script_path = self.channel.push_file(script_path, self.channel.script_dir)
|
218
208
|
|
219
209
|
logger.debug("Launching")
|
220
210
|
# We need to capture the exit code and the streams, so we put them in files. We also write
|
@@ -230,11 +220,11 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
|
|
230
220
|
# cancel the task later.
|
231
221
|
#
|
232
222
|
# We need to do the >/dev/null 2>&1 so that bash closes stdout, otherwise
|
233
|
-
#
|
223
|
+
# execute_wait hangs reading the process stdout until all the
|
234
224
|
# background commands complete.
|
235
225
|
cmd = '/bin/bash -c \'echo - >{0}.ec && {{ {{ bash {0} 1>{0}.out 2>{0}.err ; ' \
|
236
226
|
'echo $? > {0}.ec ; }} >/dev/null 2>&1 & echo "PID:$!" ; }}\''.format(script_path)
|
237
|
-
retcode, stdout, stderr =
|
227
|
+
retcode, stdout, stderr = execute_wait(cmd, self.cmd_timeout)
|
238
228
|
if retcode != 0:
|
239
229
|
raise SubmitException(job_name, "Launch command exited with code {0}".format(retcode),
|
240
230
|
stdout, stderr)
|
@@ -250,9 +240,6 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
|
|
250
240
|
|
251
241
|
return job_id
|
252
242
|
|
253
|
-
def _should_move_files(self):
|
254
|
-
return (self.move_files is None and not isinstance(self.channel, LocalChannel)) or (self.move_files)
|
255
|
-
|
256
243
|
def cancel(self, job_ids):
|
257
244
|
''' Cancels the jobs specified by a list of job ids
|
258
245
|
|
@@ -266,9 +253,9 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
|
|
266
253
|
for job in job_ids:
|
267
254
|
job_dict = self.resources[job]
|
268
255
|
job_dict['cancelled'] = True
|
269
|
-
logger.debug("Terminating job/
|
256
|
+
logger.debug("Terminating job/process ID: {0}".format(job))
|
270
257
|
cmd = "kill -- -$(ps -o pgid= {} | grep -o '[0-9]*')".format(job_dict['remote_pid'])
|
271
|
-
retcode, stdout, stderr =
|
258
|
+
retcode, stdout, stderr = execute_wait(cmd, self.cmd_timeout)
|
272
259
|
if retcode != 0:
|
273
260
|
logger.warning("Failed to kill PID: {} and child processes on {}".format(job_dict['remote_pid'],
|
274
261
|
self.label))
|