fractal-server 2.17.1a0__py3-none-any.whl → 2.17.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/__main__.py +19 -18
- fractal_server/app/db/__init__.py +3 -3
- fractal_server/app/models/__init__.py +1 -1
- fractal_server/app/models/linkuserproject.py +3 -1
- fractal_server/app/models/security.py +22 -17
- fractal_server/app/models/v2/__init__.py +3 -1
- fractal_server/app/models/v2/accounting.py +9 -1
- fractal_server/app/models/v2/dataset.py +5 -1
- fractal_server/app/models/v2/history.py +15 -1
- fractal_server/app/models/v2/job.py +4 -0
- fractal_server/app/models/v2/profile.py +29 -0
- fractal_server/app/models/v2/project.py +5 -14
- fractal_server/app/models/v2/resource.py +4 -0
- fractal_server/app/models/v2/task_group.py +5 -7
- fractal_server/app/models/v2/workflow.py +2 -1
- fractal_server/app/routes/admin/v2/__init__.py +1 -2
- fractal_server/app/routes/admin/v2/accounting.py +1 -1
- fractal_server/app/routes/admin/v2/job.py +9 -9
- fractal_server/app/routes/admin/v2/profile.py +3 -2
- fractal_server/app/routes/admin/v2/resource.py +5 -5
- fractal_server/app/routes/admin/v2/task.py +28 -18
- fractal_server/app/routes/admin/v2/task_group.py +0 -1
- fractal_server/app/routes/admin/v2/task_group_lifecycle.py +1 -2
- fractal_server/app/routes/api/__init__.py +1 -0
- fractal_server/app/routes/api/v2/__init__.py +5 -6
- fractal_server/app/routes/api/v2/_aux_functions.py +70 -63
- fractal_server/app/routes/api/v2/_aux_functions_history.py +43 -20
- fractal_server/app/routes/api/v2/_aux_functions_task_lifecycle.py +2 -4
- fractal_server/app/routes/api/v2/_aux_functions_tasks.py +5 -7
- fractal_server/app/routes/api/v2/_aux_task_group_disambiguation.py +1 -2
- fractal_server/app/routes/api/v2/dataset.py +13 -32
- fractal_server/app/routes/api/v2/history.py +35 -21
- fractal_server/app/routes/api/v2/images.py +3 -2
- fractal_server/app/routes/api/v2/job.py +17 -14
- fractal_server/app/routes/api/v2/pre_submission_checks.py +5 -4
- fractal_server/app/routes/api/v2/project.py +22 -17
- fractal_server/app/routes/api/v2/status_legacy.py +12 -11
- fractal_server/app/routes/api/v2/submit.py +11 -12
- fractal_server/app/routes/api/v2/task.py +4 -3
- fractal_server/app/routes/api/v2/task_collection.py +28 -30
- fractal_server/app/routes/api/v2/task_collection_custom.py +8 -7
- fractal_server/app/routes/api/v2/task_collection_pixi.py +1 -2
- fractal_server/app/routes/api/v2/task_group.py +7 -6
- fractal_server/app/routes/api/v2/task_group_lifecycle.py +6 -6
- fractal_server/app/routes/api/v2/task_version_update.py +13 -12
- fractal_server/app/routes/api/v2/workflow.py +14 -31
- fractal_server/app/routes/api/v2/workflow_import.py +17 -19
- fractal_server/app/routes/api/v2/workflowtask.py +10 -12
- fractal_server/app/routes/auth/__init__.py +1 -3
- fractal_server/app/routes/auth/_aux_auth.py +1 -2
- fractal_server/app/routes/auth/current_user.py +4 -5
- fractal_server/app/routes/auth/group.py +7 -5
- fractal_server/app/routes/auth/login.py +1 -0
- fractal_server/app/routes/auth/oauth.py +4 -3
- fractal_server/app/routes/auth/register.py +4 -2
- fractal_server/app/routes/auth/users.py +10 -10
- fractal_server/app/routes/aux/_job.py +1 -1
- fractal_server/app/routes/aux/_runner.py +2 -2
- fractal_server/app/routes/pagination.py +1 -1
- fractal_server/app/schemas/user.py +3 -3
- fractal_server/app/schemas/v2/accounting.py +11 -0
- fractal_server/app/schemas/v2/dataset.py +28 -4
- fractal_server/app/schemas/v2/dumps.py +1 -0
- fractal_server/app/schemas/v2/manifest.py +4 -3
- fractal_server/app/schemas/v2/profile.py +53 -2
- fractal_server/app/schemas/v2/resource.py +109 -13
- fractal_server/app/schemas/v2/task.py +0 -1
- fractal_server/app/schemas/v2/task_collection.py +1 -1
- fractal_server/app/schemas/v2/workflowtask.py +4 -3
- fractal_server/app/security/__init__.py +4 -7
- fractal_server/app/security/signup_email.py +4 -5
- fractal_server/app/shutdown.py +23 -19
- fractal_server/config/_data.py +36 -25
- fractal_server/config/_database.py +19 -20
- fractal_server/config/_email.py +30 -38
- fractal_server/config/_main.py +34 -53
- fractal_server/config/_oauth.py +17 -21
- fractal_server/exceptions.py +4 -0
- fractal_server/images/models.py +3 -3
- fractal_server/images/status_tools.py +4 -2
- fractal_server/logger.py +1 -1
- fractal_server/main.py +4 -3
- fractal_server/migrations/versions/034a469ec2eb_task_groups.py +4 -8
- fractal_server/migrations/versions/091b01f51f88_add_usergroup_and_linkusergroup_table.py +1 -1
- fractal_server/migrations/versions/0f5f85bb2ae7_add_pre_pinned_packages.py +1 -0
- fractal_server/migrations/versions/19eca0dd47a9_user_settings_project_dir.py +1 -1
- fractal_server/migrations/versions/1a83a5260664_rename.py +1 -1
- fractal_server/migrations/versions/1eac13a26c83_drop_v1_tables.py +1 -0
- fractal_server/migrations/versions/316140ff7ee1_remove_usersettings_cache_dir.py +1 -1
- fractal_server/migrations/versions/40d6d6511b20_add_index_to_history_models.py +47 -0
- fractal_server/migrations/versions/45fbb391d7af_make_resource_id_fk_non_nullable.py +46 -0
- fractal_server/migrations/versions/47351f8c7ebc_drop_dataset_filters.py +1 -0
- fractal_server/migrations/versions/49d0856e9569_drop_table.py +62 -0
- fractal_server/migrations/versions/4c308bcaea2b_add_task_args_schema_and_task_args_.py +1 -1
- fractal_server/migrations/versions/4cedeb448a53_workflowtask_foreign_keys_not_nullables.py +1 -1
- fractal_server/migrations/versions/501961cfcd85_remove_link_between_v1_and_v2_tasks_.py +2 -1
- fractal_server/migrations/versions/50a13d6138fd_initial_schema.py +7 -19
- fractal_server/migrations/versions/5bf02391cfef_v2.py +4 -10
- fractal_server/migrations/versions/70e77f1c38b0_add_applyworkflow_first_task_index_and_.py +1 -0
- fractal_server/migrations/versions/71eefd1dd202_add_slurm_accounts.py +1 -1
- fractal_server/migrations/versions/7673fe18c05d_remove_project_dir_server_default.py +29 -0
- fractal_server/migrations/versions/791ce783d3d8_add_indices.py +1 -1
- fractal_server/migrations/versions/83bc2ad3ffcc_2_17_0.py +1 -0
- fractal_server/migrations/versions/84bf0fffde30_add_dumps_to_applyworkflow.py +1 -0
- fractal_server/migrations/versions/8e8f227a3e36_update_taskv2_post_2_7_0.py +2 -4
- fractal_server/migrations/versions/8f79bd162e35_add_docs_info_and_docs_link_to_task_.py +1 -1
- fractal_server/migrations/versions/94a47ea2d3ff_remove_cache_dir_slurm_user_and_slurm_.py +1 -0
- fractal_server/migrations/versions/969d84257cac_add_historyrun_task_id.py +1 -1
- fractal_server/migrations/versions/97f444d47249_add_applyworkflow_project_dump.py +1 -1
- fractal_server/migrations/versions/981d588fe248_add_executor_error_log.py +1 -1
- fractal_server/migrations/versions/99ea79d9e5d2_add_dataset_history.py +2 -4
- fractal_server/migrations/versions/9c5ae74c9b98_add_user_settings_table.py +1 -1
- fractal_server/migrations/versions/9db60297b8b2_set_ondelete.py +1 -1
- fractal_server/migrations/versions/9fd26a2b0de4_add_workflow_timestamp_created.py +1 -1
- fractal_server/migrations/versions/a7f4d6137b53_add_workflow_dump_to_applyworkflow.py +1 -1
- fractal_server/migrations/versions/af1ef1c83c9b_add_accounting_tables.py +1 -0
- fractal_server/migrations/versions/af8673379a5c_drop_old_filter_columns.py +1 -0
- fractal_server/migrations/versions/b1e7f7a1ff71_task_group_for_pixi.py +1 -1
- fractal_server/migrations/versions/b3ffb095f973_json_to_jsonb.py +1 -0
- fractal_server/migrations/versions/c90a7c76e996_job_id_in_history_run.py +1 -1
- fractal_server/migrations/versions/caba9fb1ea5e_drop_useroauth_user_settings_id.py +49 -0
- fractal_server/migrations/versions/d256a7379ab8_taskgroup_activity_and_venv_info_to_.py +4 -9
- fractal_server/migrations/versions/d4fe3708d309_make_applyworkflow_workflow_dump_non_.py +1 -0
- fractal_server/migrations/versions/da2cb2ac4255_user_group_viewer_paths.py +1 -1
- fractal_server/migrations/versions/db09233ad13a_split_filters_and_keep_old_columns.py +1 -0
- fractal_server/migrations/versions/e0e717ae2f26_delete_linkuserproject_ondelete_project.py +50 -0
- fractal_server/migrations/versions/e75cac726012_make_applyworkflow_start_timestamp_not_.py +1 -0
- fractal_server/migrations/versions/e81103413827_add_job_type_filters.py +1 -1
- fractal_server/migrations/versions/efa89c30e0a4_add_project_timestamp_created.py +1 -0
- fractal_server/migrations/versions/f37aceb45062_make_historyunit_logfile_required.py +1 -1
- fractal_server/migrations/versions/f384e1c0cf5d_drop_task_default_args_columns.py +1 -0
- fractal_server/migrations/versions/fbce16ff4e47_new_history_items.py +4 -9
- fractal_server/runner/config/_local.py +8 -5
- fractal_server/runner/config/_slurm.py +37 -33
- fractal_server/runner/config/slurm_mem_to_MB.py +0 -1
- fractal_server/runner/executors/base_runner.py +29 -4
- fractal_server/runner/executors/local/get_local_config.py +1 -0
- fractal_server/runner/executors/local/runner.py +14 -13
- fractal_server/runner/executors/slurm_common/_batching.py +5 -10
- fractal_server/runner/executors/slurm_common/base_slurm_runner.py +53 -27
- fractal_server/runner/executors/slurm_common/get_slurm_config.py +14 -7
- fractal_server/runner/executors/slurm_common/remote.py +3 -1
- fractal_server/runner/executors/slurm_common/slurm_config.py +1 -0
- fractal_server/runner/executors/slurm_common/slurm_job_task_models.py +1 -3
- fractal_server/runner/executors/slurm_ssh/runner.py +16 -11
- fractal_server/runner/executors/slurm_ssh/tar_commands.py +1 -0
- fractal_server/runner/executors/slurm_sudo/_subprocess_run_as_user.py +1 -0
- fractal_server/runner/executors/slurm_sudo/runner.py +16 -11
- fractal_server/runner/task_files.py +9 -3
- fractal_server/runner/v2/_local.py +9 -4
- fractal_server/runner/v2/_slurm_ssh.py +11 -5
- fractal_server/runner/v2/_slurm_sudo.py +11 -5
- fractal_server/runner/v2/db_tools.py +0 -1
- fractal_server/runner/v2/deduplicate_list.py +2 -1
- fractal_server/runner/v2/runner.py +11 -14
- fractal_server/runner/v2/runner_functions.py +11 -14
- fractal_server/runner/v2/submit_workflow.py +7 -6
- fractal_server/ssh/_fabric.py +6 -13
- fractal_server/string_tools.py +0 -1
- fractal_server/syringe.py +1 -1
- fractal_server/tasks/config/_pixi.py +1 -1
- fractal_server/tasks/config/_python.py +16 -9
- fractal_server/tasks/utils.py +0 -1
- fractal_server/tasks/v2/local/_utils.py +1 -1
- fractal_server/tasks/v2/local/collect.py +10 -12
- fractal_server/tasks/v2/local/collect_pixi.py +9 -10
- fractal_server/tasks/v2/local/deactivate.py +7 -8
- fractal_server/tasks/v2/local/deactivate_pixi.py +4 -4
- fractal_server/tasks/v2/local/delete.py +1 -3
- fractal_server/tasks/v2/local/reactivate.py +7 -7
- fractal_server/tasks/v2/local/reactivate_pixi.py +7 -7
- fractal_server/tasks/v2/ssh/_utils.py +3 -3
- fractal_server/tasks/v2/ssh/collect.py +14 -19
- fractal_server/tasks/v2/ssh/collect_pixi.py +17 -19
- fractal_server/tasks/v2/ssh/deactivate.py +10 -8
- fractal_server/tasks/v2/ssh/deactivate_pixi.py +6 -5
- fractal_server/tasks/v2/ssh/delete.py +7 -5
- fractal_server/tasks/v2/ssh/reactivate.py +11 -11
- fractal_server/tasks/v2/ssh/reactivate_pixi.py +8 -9
- fractal_server/tasks/v2/templates/1_create_venv.sh +2 -0
- fractal_server/tasks/v2/templates/2_pip_install.sh +2 -0
- fractal_server/tasks/v2/templates/3_pip_freeze.sh +2 -0
- fractal_server/tasks/v2/templates/4_pip_show.sh +2 -0
- fractal_server/tasks/v2/templates/5_get_venv_size_and_file_number.sh +3 -1
- fractal_server/tasks/v2/templates/6_pip_install_from_freeze.sh +2 -0
- fractal_server/tasks/v2/templates/pixi_1_extract.sh +2 -0
- fractal_server/tasks/v2/templates/pixi_2_install.sh +2 -0
- fractal_server/tasks/v2/templates/pixi_3_post_install.sh +2 -0
- fractal_server/tasks/v2/utils_background.py +3 -3
- fractal_server/tasks/v2/utils_package_names.py +1 -2
- fractal_server/tasks/v2/utils_pixi.py +1 -3
- fractal_server/types/__init__.py +76 -1
- fractal_server/types/validators/_common_validators.py +1 -3
- fractal_server/types/validators/_workflow_task_arguments_validators.py +1 -2
- fractal_server/utils.py +1 -0
- fractal_server/zip_tools.py +34 -0
- {fractal_server-2.17.1a0.dist-info → fractal_server-2.17.2.dist-info}/METADATA +1 -1
- fractal_server-2.17.2.dist-info/RECORD +265 -0
- fractal_server/app/models/user_settings.py +0 -37
- fractal_server/app/routes/admin/v2/project.py +0 -41
- fractal_server/data_migrations/2_17_0.py +0 -339
- fractal_server-2.17.1a0.dist-info/RECORD +0 -262
- {fractal_server-2.17.1a0.dist-info → fractal_server-2.17.2.dist-info}/WHEEL +0 -0
- {fractal_server-2.17.1a0.dist-info → fractal_server-2.17.2.dist-info}/entry_points.txt +0 -0
- {fractal_server-2.17.1a0.dist-info → fractal_server-2.17.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -10,13 +10,15 @@ from fractal_server.runner.config.slurm_mem_to_MB import slurm_mem_to_MB
|
|
|
10
10
|
from fractal_server.types import DictStrStr
|
|
11
11
|
from fractal_server.types import NonEmptyStr
|
|
12
12
|
|
|
13
|
-
|
|
14
13
|
MemMBType = Annotated[
|
|
15
14
|
PositiveInt | NonEmptyStr, AfterValidator(slurm_mem_to_MB)
|
|
16
15
|
]
|
|
16
|
+
"""
|
|
17
|
+
Memory expressed in MB.
|
|
18
|
+
"""
|
|
17
19
|
|
|
18
20
|
|
|
19
|
-
class
|
|
21
|
+
class SlurmConfigSet(BaseModel):
|
|
20
22
|
"""
|
|
21
23
|
Options for the default or gpu SLURM config.
|
|
22
24
|
|
|
@@ -24,7 +26,6 @@ class _SlurmConfigSet(BaseModel):
|
|
|
24
26
|
partition:
|
|
25
27
|
cpus_per_task:
|
|
26
28
|
mem:
|
|
27
|
-
See `_parse_mem_value` for details on allowed values.
|
|
28
29
|
constraint:
|
|
29
30
|
gres:
|
|
30
31
|
time:
|
|
@@ -32,6 +33,7 @@ class _SlurmConfigSet(BaseModel):
|
|
|
32
33
|
nodelist:
|
|
33
34
|
account:
|
|
34
35
|
extra_lines:
|
|
36
|
+
gpus:
|
|
35
37
|
"""
|
|
36
38
|
|
|
37
39
|
model_config = ConfigDict(extra="forbid")
|
|
@@ -49,7 +51,7 @@ class _SlurmConfigSet(BaseModel):
|
|
|
49
51
|
gpus: NonEmptyStr | None = None
|
|
50
52
|
|
|
51
53
|
|
|
52
|
-
class
|
|
54
|
+
class BatchingConfigSet(BaseModel):
|
|
53
55
|
"""
|
|
54
56
|
Options to configure the batching strategy (that is, how to combine
|
|
55
57
|
several tasks in a single SLURM job).
|
|
@@ -58,9 +60,7 @@ class _BatchingConfigSet(BaseModel):
|
|
|
58
60
|
target_cpus_per_job:
|
|
59
61
|
max_cpus_per_job:
|
|
60
62
|
target_mem_per_job:
|
|
61
|
-
(see `_parse_mem_value` for details on allowed values)
|
|
62
63
|
max_mem_per_job:
|
|
63
|
-
(see `_parse_mem_value` for details on allowed values)
|
|
64
64
|
target_num_jobs:
|
|
65
65
|
max_num_jobs:
|
|
66
66
|
"""
|
|
@@ -77,34 +77,38 @@ class _BatchingConfigSet(BaseModel):
|
|
|
77
77
|
|
|
78
78
|
class JobRunnerConfigSLURM(BaseModel):
|
|
79
79
|
"""
|
|
80
|
-
|
|
80
|
+
Runner-configuration specifications, for a `slurm_sudo` or
|
|
81
|
+
`slurm_ssh` resource.
|
|
81
82
|
|
|
82
|
-
Note: this is a common
|
|
83
|
-
|
|
83
|
+
Note: this is a common class, which is processed and transformed into more
|
|
84
|
+
specific configuration objects during job execution.
|
|
84
85
|
|
|
85
86
|
Valid JSON example
|
|
86
|
-
```
|
|
87
|
+
```json
|
|
87
88
|
{
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
89
|
+
"default_slurm_config": {
|
|
90
|
+
"partition": "partition-name",
|
|
91
|
+
"cpus_per_task": 1,
|
|
92
|
+
"mem": "100M"
|
|
93
|
+
},
|
|
94
|
+
"gpu_slurm_config": {
|
|
95
|
+
"partition": "gpu",
|
|
96
|
+
"extra_lines": [
|
|
97
|
+
"#SBATCH --gres=gpu:v100:1"
|
|
98
|
+
]
|
|
99
|
+
},
|
|
100
|
+
"user_local_exports": {
|
|
101
|
+
"CELLPOSE_LOCAL_MODELS_PATH": "CELLPOSE_LOCAL_MODELS_PATH",
|
|
102
|
+
"NAPARI_CONFIG": "napari_config.json"
|
|
103
|
+
},
|
|
104
|
+
"batching_config": {
|
|
105
|
+
"target_cpus_per_job": 1,
|
|
106
|
+
"max_cpus_per_job": 1,
|
|
107
|
+
"target_mem_per_job": 200,
|
|
108
|
+
"max_mem_per_job": 500,
|
|
109
|
+
"target_num_jobs": 2,
|
|
110
|
+
"max_num_jobs": 4
|
|
111
|
+
}
|
|
108
112
|
}
|
|
109
113
|
```
|
|
110
114
|
|
|
@@ -123,7 +127,7 @@ class JobRunnerConfigSLURM(BaseModel):
|
|
|
123
127
|
|
|
124
128
|
model_config = ConfigDict(extra="forbid")
|
|
125
129
|
|
|
126
|
-
default_slurm_config:
|
|
127
|
-
gpu_slurm_config:
|
|
128
|
-
batching_config:
|
|
130
|
+
default_slurm_config: SlurmConfigSet
|
|
131
|
+
gpu_slurm_config: SlurmConfigSet | None = None
|
|
132
|
+
batching_config: BatchingConfigSet
|
|
129
133
|
user_local_exports: DictStrStr = Field(default_factory=dict)
|
|
@@ -9,13 +9,32 @@ from fractal_server.runner.task_files import TaskFiles
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class SubmitTaskType(StrEnum):
|
|
12
|
-
|
|
12
|
+
"""
|
|
13
|
+
Valid task types for `BaseRunner.submit`.
|
|
14
|
+
|
|
15
|
+
Attributes:
|
|
16
|
+
NON_PARALLEL: Non-parallel task.
|
|
17
|
+
COMPOUND: Compound task.
|
|
18
|
+
CONVERTER_NON_PARALLEL: Non-parallel converter task.
|
|
19
|
+
CONVERTER_COMPOUND: Compound converter task.
|
|
20
|
+
"""
|
|
21
|
+
|
|
13
22
|
NON_PARALLEL = TaskType.NON_PARALLEL
|
|
23
|
+
COMPOUND = TaskType.COMPOUND
|
|
14
24
|
CONVERTER_NON_PARALLEL = TaskType.CONVERTER_NON_PARALLEL
|
|
15
25
|
CONVERTER_COMPOUND = TaskType.CONVERTER_COMPOUND
|
|
16
26
|
|
|
17
27
|
|
|
18
28
|
class MultisubmitTaskType(StrEnum):
|
|
29
|
+
"""
|
|
30
|
+
Valid task types for `BaseRunner.multisubmit`.
|
|
31
|
+
|
|
32
|
+
Attributes:
|
|
33
|
+
PARALLEL: Parallel task.
|
|
34
|
+
COMPOUND: Compound task.
|
|
35
|
+
CONVERTER_COMPOUND: Compound converter task.
|
|
36
|
+
"""
|
|
37
|
+
|
|
19
38
|
PARALLEL = TaskType.PARALLEL
|
|
20
39
|
COMPOUND = TaskType.COMPOUND
|
|
21
40
|
CONVERTER_COMPOUND = TaskType.CONVERTER_COMPOUND
|
|
@@ -27,11 +46,18 @@ TASK_TYPES_SUBMIT: list[TaskType] = [
|
|
|
27
46
|
TaskType.NON_PARALLEL,
|
|
28
47
|
TaskType.CONVERTER_NON_PARALLEL,
|
|
29
48
|
]
|
|
49
|
+
"""
|
|
50
|
+
List of valid task types for `BaseRunner.submit`.
|
|
51
|
+
"""
|
|
52
|
+
|
|
30
53
|
TASK_TYPES_MULTISUBMIT: list[TaskType] = [
|
|
31
54
|
TaskType.COMPOUND,
|
|
32
55
|
TaskType.CONVERTER_COMPOUND,
|
|
33
56
|
TaskType.PARALLEL,
|
|
34
57
|
]
|
|
58
|
+
"""
|
|
59
|
+
List of valid task types for `BaseRunner.multisubmit`.
|
|
60
|
+
"""
|
|
35
61
|
|
|
36
62
|
logger = set_logger(__name__)
|
|
37
63
|
|
|
@@ -125,7 +151,7 @@ class BaseRunner:
|
|
|
125
151
|
|
|
126
152
|
Args:
|
|
127
153
|
parameters: Parameters dictionary.
|
|
128
|
-
task_type: Task type.
|
|
154
|
+
task_type: Task type.
|
|
129
155
|
"""
|
|
130
156
|
logger.info("[validate_submit_parameters] START")
|
|
131
157
|
if task_type not in TASK_TYPES_SUBMIT:
|
|
@@ -175,8 +201,7 @@ class BaseRunner:
|
|
|
175
201
|
|
|
176
202
|
if len(list_parameters) != len(list_task_files):
|
|
177
203
|
raise ValueError(
|
|
178
|
-
f"{len(list_task_files)=} differs from "
|
|
179
|
-
f"{len(list_parameters)=}."
|
|
204
|
+
f"{len(list_task_files)=} differs from {len(list_parameters)=}."
|
|
180
205
|
)
|
|
181
206
|
if len(history_unit_ids) != len(list_parameters):
|
|
182
207
|
raise ValueError(
|
|
@@ -4,7 +4,6 @@ from concurrent.futures import ThreadPoolExecutor
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Any
|
|
6
6
|
|
|
7
|
-
from ..call_command_wrapper import call_command_wrapper
|
|
8
7
|
from fractal_server.app.db import get_sync_db
|
|
9
8
|
from fractal_server.app.models import Profile
|
|
10
9
|
from fractal_server.app.models import Resource
|
|
@@ -16,10 +15,11 @@ from fractal_server.runner.exceptions import TaskExecutionError
|
|
|
16
15
|
from fractal_server.runner.executors.base_runner import BaseRunner
|
|
17
16
|
from fractal_server.runner.executors.base_runner import MultisubmitTaskType
|
|
18
17
|
from fractal_server.runner.executors.base_runner import SubmitTaskType
|
|
19
|
-
from fractal_server.runner.
|
|
20
|
-
|
|
21
|
-
bulk_update_status_of_history_unit,
|
|
18
|
+
from fractal_server.runner.executors.call_command_wrapper import (
|
|
19
|
+
call_command_wrapper,
|
|
22
20
|
)
|
|
21
|
+
from fractal_server.runner.task_files import TaskFiles
|
|
22
|
+
from fractal_server.runner.v2.db_tools import bulk_update_status_of_history_unit
|
|
23
23
|
from fractal_server.runner.v2.db_tools import update_status_of_history_unit
|
|
24
24
|
|
|
25
25
|
logger = set_logger(__name__)
|
|
@@ -56,6 +56,13 @@ def run_single_task(
|
|
|
56
56
|
|
|
57
57
|
|
|
58
58
|
class LocalRunner(BaseRunner):
|
|
59
|
+
"""
|
|
60
|
+
Runner implementation for a computational `local` resource.
|
|
61
|
+
|
|
62
|
+
Tasks are executed through a `concurrent.futures.ThreadPoolExecutor`
|
|
63
|
+
executor.
|
|
64
|
+
"""
|
|
65
|
+
|
|
59
66
|
executor: ThreadPoolExecutor
|
|
60
67
|
root_dir_local: Path
|
|
61
68
|
shared_config: JobRunnerConfigLocal
|
|
@@ -70,9 +77,7 @@ class LocalRunner(BaseRunner):
|
|
|
70
77
|
self.root_dir_local.mkdir(parents=True, exist_ok=True)
|
|
71
78
|
self.executor = ThreadPoolExecutor()
|
|
72
79
|
logger.debug("Create LocalRunner")
|
|
73
|
-
self.shared_config = JobRunnerConfigLocal(
|
|
74
|
-
**resource.jobs_runner_config
|
|
75
|
-
)
|
|
80
|
+
self.shared_config = JobRunnerConfigLocal(**resource.jobs_runner_config)
|
|
76
81
|
|
|
77
82
|
def __enter__(self):
|
|
78
83
|
logger.debug("Enter LocalRunner")
|
|
@@ -235,9 +240,7 @@ class LocalRunner(BaseRunner):
|
|
|
235
240
|
f"Original error {str(e)}"
|
|
236
241
|
)
|
|
237
242
|
exception = TaskExecutionError(str(e))
|
|
238
|
-
exceptions = {
|
|
239
|
-
ind: exception for ind in range(len(list_parameters))
|
|
240
|
-
}
|
|
243
|
+
exceptions = {ind: exception for ind in range(len(list_parameters))}
|
|
241
244
|
if task_type == TaskType.PARALLEL:
|
|
242
245
|
with next(get_sync_db()) as db:
|
|
243
246
|
bulk_update_status_of_history_unit(
|
|
@@ -269,9 +272,7 @@ class LocalRunner(BaseRunner):
|
|
|
269
272
|
"[multisubmit] Unexpected exception during submission."
|
|
270
273
|
f" Original error {str(e)}"
|
|
271
274
|
)
|
|
272
|
-
current_history_unit_id = history_unit_ids[
|
|
273
|
-
positional_index
|
|
274
|
-
]
|
|
275
|
+
current_history_unit_id = history_unit_ids[positional_index]
|
|
275
276
|
exceptions[positional_index] = TaskExecutionError(str(e))
|
|
276
277
|
if task_type == TaskType.PARALLEL:
|
|
277
278
|
with next(get_sync_db()) as db:
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
"""
|
|
12
12
|
Submodule to determine the number of total/parallel tasks per SLURM job.
|
|
13
13
|
"""
|
|
14
|
+
|
|
14
15
|
import math
|
|
15
16
|
|
|
16
17
|
from fractal_server.logger import set_logger
|
|
@@ -130,16 +131,12 @@ def heuristics(
|
|
|
130
131
|
raise SlurmHeuristicsError(msg)
|
|
131
132
|
if cpus_per_task > max_cpus_per_job:
|
|
132
133
|
msg = (
|
|
133
|
-
f"[heuristics] Requested {cpus_per_task=} "
|
|
134
|
-
f"but {max_cpus_per_job=}."
|
|
134
|
+
f"[heuristics] Requested {cpus_per_task=} but {max_cpus_per_job=}."
|
|
135
135
|
)
|
|
136
136
|
logger.error(msg)
|
|
137
137
|
raise SlurmHeuristicsError(msg)
|
|
138
138
|
if mem_per_task > max_mem_per_job:
|
|
139
|
-
msg =
|
|
140
|
-
f"[heuristics] Requested {mem_per_task=} "
|
|
141
|
-
f"but {max_mem_per_job=}."
|
|
142
|
-
)
|
|
139
|
+
msg = f"[heuristics] Requested {mem_per_task=} but {max_mem_per_job=}."
|
|
143
140
|
logger.error(msg)
|
|
144
141
|
raise SlurmHeuristicsError(msg)
|
|
145
142
|
|
|
@@ -177,8 +174,7 @@ def heuristics(
|
|
|
177
174
|
)
|
|
178
175
|
if mem_per_job > max_mem_per_job:
|
|
179
176
|
msg = (
|
|
180
|
-
f"[heuristics] Requested {mem_per_job=} "
|
|
181
|
-
f"but {max_mem_per_job=}."
|
|
177
|
+
f"[heuristics] Requested {mem_per_job=} but {max_mem_per_job=}."
|
|
182
178
|
)
|
|
183
179
|
logger.error(msg)
|
|
184
180
|
raise SlurmHeuristicsError(msg)
|
|
@@ -187,8 +183,7 @@ def heuristics(
|
|
|
187
183
|
num_jobs = math.ceil(tot_tasks / tasks_per_job)
|
|
188
184
|
if num_jobs > target_num_jobs:
|
|
189
185
|
logger.debug(
|
|
190
|
-
f"[heuristics] Requested {num_jobs=} "
|
|
191
|
-
f"but {target_num_jobs=}."
|
|
186
|
+
f"[heuristics] Requested {num_jobs=} but {target_num_jobs=}."
|
|
192
187
|
)
|
|
193
188
|
if num_jobs > max_num_jobs:
|
|
194
189
|
msg = f"[heuristics] Requested {num_jobs=} but {max_num_jobs=}."
|
|
@@ -9,10 +9,6 @@ from typing import Literal
|
|
|
9
9
|
from pydantic import BaseModel
|
|
10
10
|
from pydantic import ConfigDict
|
|
11
11
|
|
|
12
|
-
from ..slurm_common.slurm_job_task_models import SlurmJob
|
|
13
|
-
from ..slurm_common.slurm_job_task_models import SlurmTask
|
|
14
|
-
from ._job_states import STATES_FINISHED
|
|
15
|
-
from .slurm_config import SlurmConfig
|
|
16
12
|
from fractal_server import __VERSION__
|
|
17
13
|
from fractal_server.app.db import get_sync_db
|
|
18
14
|
from fractal_server.app.models.v2 import AccountingRecordSlurm
|
|
@@ -25,15 +21,47 @@ from fractal_server.runner.exceptions import TaskExecutionError
|
|
|
25
21
|
from fractal_server.runner.executors.base_runner import BaseRunner
|
|
26
22
|
from fractal_server.runner.executors.base_runner import MultisubmitTaskType
|
|
27
23
|
from fractal_server.runner.executors.base_runner import SubmitTaskType
|
|
24
|
+
from fractal_server.runner.executors.slurm_common.slurm_job_task_models import (
|
|
25
|
+
SlurmJob,
|
|
26
|
+
)
|
|
27
|
+
from fractal_server.runner.executors.slurm_common.slurm_job_task_models import (
|
|
28
|
+
SlurmTask,
|
|
29
|
+
)
|
|
28
30
|
from fractal_server.runner.filenames import SHUTDOWN_FILENAME
|
|
29
31
|
from fractal_server.runner.task_files import TaskFiles
|
|
30
|
-
from fractal_server.runner.v2.db_tools import
|
|
31
|
-
bulk_update_status_of_history_unit,
|
|
32
|
-
)
|
|
32
|
+
from fractal_server.runner.v2.db_tools import bulk_update_status_of_history_unit
|
|
33
33
|
from fractal_server.runner.v2.db_tools import update_status_of_history_unit
|
|
34
34
|
|
|
35
|
+
from ._job_states import STATES_FINISHED
|
|
36
|
+
from .slurm_config import SlurmConfig
|
|
37
|
+
|
|
35
38
|
SHUTDOWN_ERROR_MESSAGE = "Failed due to job-execution shutdown."
|
|
36
39
|
SHUTDOWN_EXCEPTION = JobExecutionError(SHUTDOWN_ERROR_MESSAGE)
|
|
40
|
+
STDERR_IGNORE_PATTERNS = [
|
|
41
|
+
"step creation temporarily disabled, retrying",
|
|
42
|
+
"step creation still disabled, retrying",
|
|
43
|
+
"srun: step created for stepid=",
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def ignore_stderr_line(line: str) -> bool:
|
|
48
|
+
"""
|
|
49
|
+
Whether to ignore a SLURM-job stderr line, based on
|
|
50
|
+
`STDERR_EXCLUDE_PATTERNS`.
|
|
51
|
+
|
|
52
|
+
The goal is not to flag some stderr files as relevant `executor_error_log`
|
|
53
|
+
if they only include lines matching a given set of patterns to ignore. See
|
|
54
|
+
https://github.com/fractal-analytics-platform/fractal-server/issues/2835
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
line: The line to be considered.
|
|
58
|
+
"""
|
|
59
|
+
line_lower = line.lower()
|
|
60
|
+
for pattern in STDERR_IGNORE_PATTERNS:
|
|
61
|
+
if pattern in line_lower:
|
|
62
|
+
return True
|
|
63
|
+
return False
|
|
64
|
+
|
|
37
65
|
|
|
38
66
|
logger = set_logger(__name__)
|
|
39
67
|
|
|
@@ -65,6 +93,10 @@ def create_accounting_record_slurm(
|
|
|
65
93
|
|
|
66
94
|
|
|
67
95
|
class BaseSlurmRunner(BaseRunner):
|
|
96
|
+
"""
|
|
97
|
+
Base class for SLURM runners.
|
|
98
|
+
"""
|
|
99
|
+
|
|
68
100
|
shutdown_file: Path
|
|
69
101
|
common_script_lines: list[str]
|
|
70
102
|
user_cache_dir: str
|
|
@@ -307,8 +339,7 @@ class BaseSlurmRunner(BaseRunner):
|
|
|
307
339
|
json.dump(task.parameters, f, indent=2)
|
|
308
340
|
|
|
309
341
|
logger.debug(
|
|
310
|
-
"[_prepare_single_slurm_job] Written "
|
|
311
|
-
f"{task.input_file_local=}"
|
|
342
|
+
f"[_prepare_single_slurm_job] Written {task.input_file_local=}"
|
|
312
343
|
)
|
|
313
344
|
|
|
314
345
|
# Prepare commands to be included in SLURM submission script
|
|
@@ -352,9 +383,7 @@ class BaseSlurmRunner(BaseRunner):
|
|
|
352
383
|
# Always print output of `uname -n` and `pwd`
|
|
353
384
|
script_lines.append('\necho "Hostname: $(uname -n)"')
|
|
354
385
|
script_lines.append('echo "Current directory: $(pwd)"')
|
|
355
|
-
script_lines.append(
|
|
356
|
-
'echo "Start time: $(date +"%Y-%m-%dT%H:%M:%S%z")"'
|
|
357
|
-
)
|
|
386
|
+
script_lines.append('echo "Start time: $(date +"%Y-%m-%dT%H:%M:%S%z")"')
|
|
358
387
|
|
|
359
388
|
# Complete script preamble
|
|
360
389
|
script_lines.append("\n")
|
|
@@ -368,9 +397,7 @@ class BaseSlurmRunner(BaseRunner):
|
|
|
368
397
|
f"{cmd} &"
|
|
369
398
|
)
|
|
370
399
|
script_lines.append("wait\n\n")
|
|
371
|
-
script_lines.append(
|
|
372
|
-
'echo "End time: $(date +"%Y-%m-%dT%H:%M:%S%z")"'
|
|
373
|
-
)
|
|
400
|
+
script_lines.append('echo "End time: $(date +"%Y-%m-%dT%H:%M:%S%z")"')
|
|
374
401
|
script = "\n".join(script_lines)
|
|
375
402
|
|
|
376
403
|
# Write submission script
|
|
@@ -528,7 +555,13 @@ class BaseSlurmRunner(BaseRunner):
|
|
|
528
555
|
|
|
529
556
|
try:
|
|
530
557
|
with open(stderr_path) as f:
|
|
531
|
-
|
|
558
|
+
stderr_lines = [
|
|
559
|
+
line
|
|
560
|
+
for line in f.readlines()
|
|
561
|
+
if not ignore_stderr_line(line)
|
|
562
|
+
]
|
|
563
|
+
stderr_content = "\n".join(stderr_lines)
|
|
564
|
+
stderr_content = stderr_content.strip()
|
|
532
565
|
if stderr_content:
|
|
533
566
|
return stderr_content
|
|
534
567
|
except Exception as e:
|
|
@@ -595,8 +628,7 @@ class BaseSlurmRunner(BaseRunner):
|
|
|
595
628
|
def _check_no_active_jobs(self):
|
|
596
629
|
if self.jobs != {}:
|
|
597
630
|
raise JobExecutionError(
|
|
598
|
-
"Unexpected branch: jobs must be empty before new "
|
|
599
|
-
"submissions."
|
|
631
|
+
"Unexpected branch: jobs must be empty before new submissions."
|
|
600
632
|
)
|
|
601
633
|
|
|
602
634
|
def submit(
|
|
@@ -711,9 +743,7 @@ class BaseSlurmRunner(BaseRunner):
|
|
|
711
743
|
scancelled_job_ids = []
|
|
712
744
|
while len(self.jobs) > 0:
|
|
713
745
|
# Look for finished jobs
|
|
714
|
-
finished_job_ids = self._get_finished_jobs(
|
|
715
|
-
job_ids=self.job_ids
|
|
716
|
-
)
|
|
746
|
+
finished_job_ids = self._get_finished_jobs(job_ids=self.job_ids)
|
|
717
747
|
logger.debug(f"[submit] {finished_job_ids=}")
|
|
718
748
|
finished_jobs = [
|
|
719
749
|
self.jobs[_slurm_job_id]
|
|
@@ -860,9 +890,7 @@ class BaseSlurmRunner(BaseRunner):
|
|
|
860
890
|
args_batches.append(
|
|
861
891
|
list_parameters[ind_chunk : ind_chunk + batch_size] # noqa
|
|
862
892
|
)
|
|
863
|
-
if len(args_batches) != math.ceil(
|
|
864
|
-
tot_tasks / config.tasks_per_job
|
|
865
|
-
):
|
|
893
|
+
if len(args_batches) != math.ceil(tot_tasks / config.tasks_per_job):
|
|
866
894
|
raise RuntimeError("Something wrong here while batching tasks")
|
|
867
895
|
|
|
868
896
|
# Part 1/3: Iterate over chunks, prepare SlurmJob objects
|
|
@@ -975,9 +1003,7 @@ class BaseSlurmRunner(BaseRunner):
|
|
|
975
1003
|
logger.debug(f"[multisubmit] Now process {slurm_job_id=}")
|
|
976
1004
|
slurm_job = self.jobs.pop(slurm_job_id)
|
|
977
1005
|
for task in slurm_job.tasks:
|
|
978
|
-
logger.debug(
|
|
979
|
-
f"[multisubmit] Now process {task.index=}"
|
|
980
|
-
)
|
|
1006
|
+
logger.debug(f"[multisubmit] Now process {task.index=}")
|
|
981
1007
|
was_job_scancelled = slurm_job_id in scancelled_job_ids
|
|
982
1008
|
if fetch_artifacts_exception is not None:
|
|
983
1009
|
result = None
|
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
from typing import Literal
|
|
2
2
|
|
|
3
|
-
from ._batching import heuristics
|
|
4
|
-
from .slurm_config import logger
|
|
5
|
-
from .slurm_config import SlurmConfig
|
|
6
3
|
from fractal_server.app.models.v2 import WorkflowTaskV2
|
|
7
4
|
from fractal_server.runner.config import JobRunnerConfigSLURM
|
|
8
5
|
from fractal_server.runner.config.slurm_mem_to_MB import slurm_mem_to_MB
|
|
9
6
|
from fractal_server.runner.exceptions import SlurmConfigError
|
|
10
7
|
from fractal_server.string_tools import interpret_as_bool
|
|
11
8
|
|
|
9
|
+
from ._batching import heuristics
|
|
10
|
+
from .slurm_config import SlurmConfig
|
|
11
|
+
from .slurm_config import logger
|
|
12
|
+
|
|
12
13
|
|
|
13
14
|
def _get_slurm_config_internal(
|
|
14
15
|
shared_config: JobRunnerConfigSLURM,
|
|
@@ -33,7 +34,6 @@ def _get_slurm_config_internal(
|
|
|
33
34
|
which_type:
|
|
34
35
|
Whether we should look at the non-parallel or parallel part
|
|
35
36
|
of `wftask`.
|
|
36
|
-
tot_tasks: Not used here, only present as a common interface.
|
|
37
37
|
|
|
38
38
|
Returns:
|
|
39
39
|
A ready-to-use `SlurmConfig` object.
|
|
@@ -138,9 +138,7 @@ def _get_slurm_config_internal(
|
|
|
138
138
|
# Job-batching parameters (if None, they will be determined heuristically)
|
|
139
139
|
if wftask_meta is not None:
|
|
140
140
|
tasks_per_job = wftask_meta.get("tasks_per_job", None)
|
|
141
|
-
parallel_tasks_per_job = wftask_meta.get(
|
|
142
|
-
"parallel_tasks_per_job", None
|
|
143
|
-
)
|
|
141
|
+
parallel_tasks_per_job = wftask_meta.get("parallel_tasks_per_job", None)
|
|
144
142
|
else:
|
|
145
143
|
tasks_per_job = None
|
|
146
144
|
parallel_tasks_per_job = None
|
|
@@ -162,6 +160,15 @@ def get_slurm_config(
|
|
|
162
160
|
which_type: Literal["non_parallel", "parallel"],
|
|
163
161
|
tot_tasks: int = 1,
|
|
164
162
|
) -> SlurmConfig:
|
|
163
|
+
"""
|
|
164
|
+
Get `SlurmConfig` object.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
shared_config:
|
|
168
|
+
wftask:
|
|
169
|
+
which_type:
|
|
170
|
+
tot_tasks:
|
|
171
|
+
"""
|
|
165
172
|
config = _get_slurm_config_internal(
|
|
166
173
|
shared_config=shared_config,
|
|
167
174
|
wftask=wftask,
|
|
@@ -3,8 +3,10 @@ import json
|
|
|
3
3
|
import os
|
|
4
4
|
import sys
|
|
5
5
|
|
|
6
|
-
from ..call_command_wrapper import call_command_wrapper
|
|
7
6
|
from fractal_server import __VERSION__
|
|
7
|
+
from fractal_server.runner.executors.call_command_wrapper import (
|
|
8
|
+
call_command_wrapper,
|
|
9
|
+
)
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
class FractalVersionMismatch(RuntimeError):
|
|
@@ -24,9 +24,7 @@ class SlurmTask(BaseModel):
|
|
|
24
24
|
|
|
25
25
|
@property
|
|
26
26
|
def input_file_local_path(self) -> Path:
|
|
27
|
-
return
|
|
28
|
-
self.workdir_local / f"{self.prefix}-{self.component}-input.json"
|
|
29
|
-
)
|
|
27
|
+
return self.workdir_local / f"{self.prefix}-{self.component}-input.json"
|
|
30
28
|
|
|
31
29
|
@property
|
|
32
30
|
def input_file_remote_path(self) -> Path:
|
|
@@ -1,23 +1,32 @@
|
|
|
1
1
|
import time
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
|
-
from ..slurm_common.base_slurm_runner import BaseSlurmRunner
|
|
5
|
-
from ..slurm_common.slurm_job_task_models import SlurmJob
|
|
6
|
-
from .run_subprocess import run_subprocess
|
|
7
|
-
from .tar_commands import get_tar_compression_cmd
|
|
8
|
-
from .tar_commands import get_tar_extraction_cmd
|
|
9
4
|
from fractal_server.app.models import Profile
|
|
10
5
|
from fractal_server.app.models import Resource
|
|
11
6
|
from fractal_server.logger import set_logger
|
|
12
7
|
from fractal_server.runner.config import JobRunnerConfigSLURM
|
|
8
|
+
from fractal_server.runner.executors.slurm_common.base_slurm_runner import (
|
|
9
|
+
BaseSlurmRunner,
|
|
10
|
+
)
|
|
11
|
+
from fractal_server.runner.executors.slurm_common.slurm_job_task_models import (
|
|
12
|
+
SlurmJob,
|
|
13
|
+
)
|
|
13
14
|
from fractal_server.ssh._fabric import FractalSSH
|
|
14
15
|
from fractal_server.ssh._fabric import FractalSSHCommandError
|
|
15
16
|
from fractal_server.ssh._fabric import FractalSSHTimeoutError
|
|
16
17
|
|
|
18
|
+
from .run_subprocess import run_subprocess
|
|
19
|
+
from .tar_commands import get_tar_compression_cmd
|
|
20
|
+
from .tar_commands import get_tar_extraction_cmd
|
|
21
|
+
|
|
17
22
|
logger = set_logger(__name__)
|
|
18
23
|
|
|
19
24
|
|
|
20
25
|
class SlurmSSHRunner(BaseSlurmRunner):
|
|
26
|
+
"""
|
|
27
|
+
Runner implementation for a computational `sudo_slurm` resource.
|
|
28
|
+
"""
|
|
29
|
+
|
|
21
30
|
fractal_ssh: FractalSSH
|
|
22
31
|
|
|
23
32
|
def __init__(
|
|
@@ -39,9 +48,7 @@ class SlurmSSHRunner(BaseSlurmRunner):
|
|
|
39
48
|
different SLURM jobs/tasks.
|
|
40
49
|
"""
|
|
41
50
|
self.fractal_ssh = fractal_ssh
|
|
42
|
-
self.shared_config = JobRunnerConfigSLURM(
|
|
43
|
-
**resource.jobs_runner_config
|
|
44
|
-
)
|
|
51
|
+
self.shared_config = JobRunnerConfigSLURM(**resource.jobs_runner_config)
|
|
45
52
|
logger.warning(self.fractal_ssh)
|
|
46
53
|
|
|
47
54
|
# Check SSH connection and try to recover from a closed-socket error
|
|
@@ -80,9 +87,7 @@ class SlurmSSHRunner(BaseSlurmRunner):
|
|
|
80
87
|
return None
|
|
81
88
|
|
|
82
89
|
t_0 = time.perf_counter()
|
|
83
|
-
logger.debug(
|
|
84
|
-
f"[_fetch_artifacts] START ({len(finished_slurm_jobs)=})."
|
|
85
|
-
)
|
|
90
|
+
logger.debug(f"[_fetch_artifacts] START ({len(finished_slurm_jobs)=}).")
|
|
86
91
|
|
|
87
92
|
# Extract `workdir_remote` and `workdir_local`
|
|
88
93
|
self.validate_slurm_jobs_workdirs(finished_slurm_jobs)
|
|
@@ -15,6 +15,7 @@ This module provides a set of tools similar to `subprocess.run`, `glob.glob` or
|
|
|
15
15
|
`os.path.exists`, but extended so that they can be executed on behalf of
|
|
16
16
|
another user. Note that this requires appropriate sudo permissions.
|
|
17
17
|
"""
|
|
18
|
+
|
|
18
19
|
import shlex
|
|
19
20
|
import subprocess # nosec
|
|
20
21
|
|