fractal-server 2.2.0a0__py3-none-any.whl → 2.3.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/db/__init__.py +1 -1
- fractal_server/app/models/v1/state.py +1 -2
- fractal_server/app/routes/admin/v1.py +2 -2
- fractal_server/app/routes/admin/v2.py +2 -2
- fractal_server/app/routes/api/v1/job.py +2 -2
- fractal_server/app/routes/api/v1/task_collection.py +4 -4
- fractal_server/app/routes/api/v2/__init__.py +23 -3
- fractal_server/app/routes/api/v2/job.py +2 -2
- fractal_server/app/routes/api/v2/submit.py +6 -0
- fractal_server/app/routes/api/v2/task_collection.py +74 -34
- fractal_server/app/routes/api/v2/task_collection_custom.py +144 -0
- fractal_server/app/routes/api/v2/task_collection_ssh.py +125 -0
- fractal_server/app/routes/aux/_runner.py +10 -2
- fractal_server/app/runner/compress_folder.py +120 -0
- fractal_server/app/runner/executors/slurm/__init__.py +0 -3
- fractal_server/app/runner/executors/slurm/_batching.py +0 -1
- fractal_server/app/runner/executors/slurm/_slurm_config.py +9 -9
- fractal_server/app/runner/executors/slurm/ssh/__init__.py +3 -0
- fractal_server/app/runner/executors/slurm/ssh/_executor_wait_thread.py +112 -0
- fractal_server/app/runner/executors/slurm/ssh/_slurm_job.py +120 -0
- fractal_server/app/runner/executors/slurm/ssh/executor.py +1490 -0
- fractal_server/app/runner/executors/slurm/sudo/__init__.py +3 -0
- fractal_server/app/runner/executors/slurm/{_check_jobs_status.py → sudo/_check_jobs_status.py} +1 -1
- fractal_server/app/runner/executors/slurm/{_executor_wait_thread.py → sudo/_executor_wait_thread.py} +1 -1
- fractal_server/app/runner/executors/slurm/{_subprocess_run_as_user.py → sudo/_subprocess_run_as_user.py} +1 -1
- fractal_server/app/runner/executors/slurm/{executor.py → sudo/executor.py} +12 -12
- fractal_server/app/runner/extract_archive.py +38 -0
- fractal_server/app/runner/v1/__init__.py +78 -40
- fractal_server/app/runner/v1/_slurm/__init__.py +1 -1
- fractal_server/app/runner/v2/__init__.py +183 -82
- fractal_server/app/runner/v2/_local_experimental/__init__.py +22 -12
- fractal_server/app/runner/v2/_local_experimental/executor.py +12 -8
- fractal_server/app/runner/v2/_slurm/__init__.py +1 -6
- fractal_server/app/runner/v2/_slurm_ssh/__init__.py +126 -0
- fractal_server/app/runner/v2/_slurm_ssh/_submit_setup.py +83 -0
- fractal_server/app/runner/v2/_slurm_ssh/get_slurm_config.py +182 -0
- fractal_server/app/runner/v2/runner_functions_low_level.py +9 -11
- fractal_server/app/runner/versions.py +30 -0
- fractal_server/app/schemas/v1/__init__.py +1 -0
- fractal_server/app/schemas/{state.py → v1/state.py} +4 -21
- fractal_server/app/schemas/v2/__init__.py +4 -1
- fractal_server/app/schemas/v2/task_collection.py +97 -27
- fractal_server/config.py +222 -21
- fractal_server/main.py +25 -1
- fractal_server/migrations/env.py +1 -1
- fractal_server/ssh/__init__.py +4 -0
- fractal_server/ssh/_fabric.py +190 -0
- fractal_server/tasks/utils.py +12 -64
- fractal_server/tasks/v1/background_operations.py +2 -2
- fractal_server/tasks/{endpoint_operations.py → v1/endpoint_operations.py} +7 -12
- fractal_server/tasks/v1/utils.py +67 -0
- fractal_server/tasks/v2/_TaskCollectPip.py +61 -32
- fractal_server/tasks/v2/_venv_pip.py +195 -0
- fractal_server/tasks/v2/background_operations.py +257 -295
- fractal_server/tasks/v2/background_operations_ssh.py +304 -0
- fractal_server/tasks/v2/endpoint_operations.py +136 -0
- fractal_server/tasks/v2/templates/_1_create_venv.sh +46 -0
- fractal_server/tasks/v2/templates/_2_upgrade_pip.sh +30 -0
- fractal_server/tasks/v2/templates/_3_pip_install.sh +32 -0
- fractal_server/tasks/v2/templates/_4_pip_freeze.sh +21 -0
- fractal_server/tasks/v2/templates/_5_pip_show.sh +59 -0
- fractal_server/tasks/v2/utils.py +54 -0
- {fractal_server-2.2.0a0.dist-info → fractal_server-2.3.0a0.dist-info}/METADATA +6 -2
- {fractal_server-2.2.0a0.dist-info → fractal_server-2.3.0a0.dist-info}/RECORD +68 -44
- fractal_server/tasks/v2/get_collection_data.py +0 -14
- {fractal_server-2.2.0a0.dist-info → fractal_server-2.3.0a0.dist-info}/LICENSE +0 -0
- {fractal_server-2.2.0a0.dist-info → fractal_server-2.3.0a0.dist-info}/WHEEL +0 -0
- {fractal_server-2.2.0a0.dist-info → fractal_server-2.3.0a0.dist-info}/entry_points.txt +0 -0
@@ -29,18 +29,18 @@ import cloudpickle
|
|
29
29
|
from cfut import SlurmExecutor
|
30
30
|
from cfut.util import random_string
|
31
31
|
|
32
|
-
from
|
33
|
-
from
|
34
|
-
from
|
35
|
-
from
|
36
|
-
from
|
37
|
-
from
|
38
|
-
from
|
39
|
-
from
|
40
|
-
from .
|
32
|
+
from ......config import get_settings
|
33
|
+
from ......logger import set_logger
|
34
|
+
from ......syringe import Inject
|
35
|
+
from ....exceptions import JobExecutionError
|
36
|
+
from ....exceptions import TaskExecutionError
|
37
|
+
from ....filenames import SHUTDOWN_FILENAME
|
38
|
+
from ....task_files import get_task_file_paths
|
39
|
+
from ....task_files import TaskFiles
|
40
|
+
from ...slurm._slurm_config import get_default_slurm_config
|
41
|
+
from ...slurm._slurm_config import SlurmConfig
|
42
|
+
from .._batching import heuristics
|
41
43
|
from ._executor_wait_thread import FractalSlurmWaitThread
|
42
|
-
from ._slurm_config import get_default_slurm_config
|
43
|
-
from ._slurm_config import SlurmConfig
|
44
44
|
from ._subprocess_run_as_user import _glob_as_user
|
45
45
|
from ._subprocess_run_as_user import _glob_as_user_strict
|
46
46
|
from ._subprocess_run_as_user import _path_exists_as_user
|
@@ -1180,7 +1180,7 @@ class FractalSlurmExecutor(SlurmExecutor):
|
|
1180
1180
|
|
1181
1181
|
# Prepare SLURM preamble based on SlurmConfig object
|
1182
1182
|
script_lines = slurm_config.to_sbatch_preamble(
|
1183
|
-
|
1183
|
+
remote_export_dir=self.user_cache_dir
|
1184
1184
|
)
|
1185
1185
|
|
1186
1186
|
# Extend SLURM preamble with variable which are not in SlurmConfig, and
|
@@ -0,0 +1,38 @@
|
|
1
|
+
import sys
|
2
|
+
import tarfile
|
3
|
+
from pathlib import Path
|
4
|
+
|
5
|
+
|
6
|
+
def _remove_suffix(*, string: str, suffix: str) -> str:
|
7
|
+
if string.endswith(suffix):
|
8
|
+
return string[: -len(suffix)]
|
9
|
+
else:
|
10
|
+
raise ValueError(f"Cannot remove {suffix=} from {string=}.")
|
11
|
+
|
12
|
+
|
13
|
+
if __name__ == "__main__":
|
14
|
+
help_msg = (
|
15
|
+
"Expected use:\n"
|
16
|
+
"python -m fractal_server.app.runner.extract_archive "
|
17
|
+
"path/to/archive.tar.gz"
|
18
|
+
)
|
19
|
+
|
20
|
+
if len(sys.argv[1:]) != 1:
|
21
|
+
raise ValueError(
|
22
|
+
f"Invalid argument.\n{help_msg}\nProvided: {sys.argv=}"
|
23
|
+
)
|
24
|
+
elif not sys.argv[1].endswith(".tar.gz"):
|
25
|
+
raise ValueError(
|
26
|
+
f"Invalid argument.\n{help_msg}\nProvided: {sys.argv=}"
|
27
|
+
)
|
28
|
+
|
29
|
+
tarfile_path = Path(sys.argv[1])
|
30
|
+
|
31
|
+
print(f"[extract_archive.py] {tarfile_path=}")
|
32
|
+
|
33
|
+
job_folder = tarfile_path.parent
|
34
|
+
subfolder_name = _remove_suffix(string=tarfile_path.name, suffix=".tar.gz")
|
35
|
+
with tarfile.open(tarfile_path) as tar:
|
36
|
+
tar.extractall(path=Path(job_folder, subfolder_name).as_posix())
|
37
|
+
|
38
|
+
print(f"[extract_archive.py] {tarfile_path=}")
|
@@ -22,6 +22,10 @@ import traceback
|
|
22
22
|
from pathlib import Path
|
23
23
|
from typing import Optional
|
24
24
|
|
25
|
+
from sqlalchemy.orm import Session as DBSyncSession
|
26
|
+
|
27
|
+
from ....logger import get_logger
|
28
|
+
from ....logger import reset_logger_handlers
|
25
29
|
from ....logger import set_logger
|
26
30
|
from ....syringe import Inject
|
27
31
|
from ....utils import get_timestamp
|
@@ -33,7 +37,7 @@ from ...models.v1 import WorkflowTask
|
|
33
37
|
from ...schemas.v1 import JobStatusTypeV1
|
34
38
|
from ..exceptions import JobExecutionError
|
35
39
|
from ..exceptions import TaskExecutionError
|
36
|
-
from ..executors.slurm._subprocess_run_as_user import (
|
40
|
+
from ..executors.slurm.sudo._subprocess_run_as_user import (
|
37
41
|
_mkdir_as_user,
|
38
42
|
)
|
39
43
|
from ..filenames import WORKFLOW_LOG_FILENAME
|
@@ -53,6 +57,27 @@ _backends["local"] = local_process_workflow
|
|
53
57
|
_backends["slurm"] = slurm_process_workflow
|
54
58
|
|
55
59
|
|
60
|
+
def fail_job(
|
61
|
+
*,
|
62
|
+
db: DBSyncSession,
|
63
|
+
job: ApplyWorkflow,
|
64
|
+
log_msg: str,
|
65
|
+
logger_name: str,
|
66
|
+
emit_log: bool = False,
|
67
|
+
) -> None:
|
68
|
+
logger = get_logger(logger_name=logger_name)
|
69
|
+
if emit_log:
|
70
|
+
logger.error(log_msg)
|
71
|
+
reset_logger_handlers(logger)
|
72
|
+
job.status = JobStatusTypeV1.FAILED
|
73
|
+
job.end_timestamp = get_timestamp()
|
74
|
+
job.log = log_msg
|
75
|
+
db.merge(job)
|
76
|
+
db.commit()
|
77
|
+
db.close()
|
78
|
+
return
|
79
|
+
|
80
|
+
|
56
81
|
async def submit_workflow(
|
57
82
|
*,
|
58
83
|
workflow_id: int,
|
@@ -91,21 +116,41 @@ async def submit_workflow(
|
|
91
116
|
slurm backend.
|
92
117
|
"""
|
93
118
|
|
94
|
-
|
95
|
-
|
96
|
-
FRACTAL_RUNNER_BACKEND = settings.FRACTAL_RUNNER_BACKEND
|
97
|
-
if FRACTAL_RUNNER_BACKEND == "local":
|
98
|
-
process_workflow = local_process_workflow
|
99
|
-
elif FRACTAL_RUNNER_BACKEND == "slurm":
|
100
|
-
process_workflow = slurm_process_workflow
|
101
|
-
else:
|
102
|
-
raise RuntimeError(f"Invalid runner backend {FRACTAL_RUNNER_BACKEND=}")
|
119
|
+
logger_name = f"WF{workflow_id}_job{job_id}"
|
120
|
+
logger = set_logger(logger_name=logger_name)
|
103
121
|
|
104
122
|
with next(DB.get_sync_db()) as db_sync:
|
105
123
|
|
106
124
|
job: ApplyWorkflow = db_sync.get(ApplyWorkflow, job_id)
|
107
125
|
if not job:
|
108
|
-
|
126
|
+
logger.error(f"ApplyWorkflow {job_id} does not exist")
|
127
|
+
return
|
128
|
+
|
129
|
+
settings = Inject(get_settings)
|
130
|
+
FRACTAL_RUNNER_BACKEND = settings.FRACTAL_RUNNER_BACKEND
|
131
|
+
if FRACTAL_RUNNER_BACKEND == "local":
|
132
|
+
process_workflow = local_process_workflow
|
133
|
+
elif FRACTAL_RUNNER_BACKEND == "slurm":
|
134
|
+
process_workflow = slurm_process_workflow
|
135
|
+
else:
|
136
|
+
|
137
|
+
if FRACTAL_RUNNER_BACKEND == "local_experimental":
|
138
|
+
log_msg = (
|
139
|
+
f"{FRACTAL_RUNNER_BACKEND=} is not available for v1 jobs."
|
140
|
+
)
|
141
|
+
else:
|
142
|
+
log_msg = f"Invalid {FRACTAL_RUNNER_BACKEND=}"
|
143
|
+
|
144
|
+
fail_job(
|
145
|
+
job=job,
|
146
|
+
db=db_sync,
|
147
|
+
log_msg=log_msg,
|
148
|
+
logger_name=logger_name,
|
149
|
+
emit_log=True,
|
150
|
+
)
|
151
|
+
return
|
152
|
+
|
153
|
+
# Declare runner backend and set `process_workflow` function
|
109
154
|
|
110
155
|
input_dataset: Dataset = db_sync.get(Dataset, input_dataset_id)
|
111
156
|
output_dataset: Dataset = db_sync.get(Dataset, output_dataset_id)
|
@@ -126,12 +171,9 @@ async def submit_workflow(
|
|
126
171
|
log_msg += (
|
127
172
|
f"Cannot fetch workflow {workflow_id} from database\n"
|
128
173
|
)
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
db_sync.merge(job)
|
133
|
-
db_sync.commit()
|
134
|
-
db_sync.close()
|
174
|
+
fail_job(
|
175
|
+
db=db_sync, job=job, log_msg=log_msg, logger_name=logger_name
|
176
|
+
)
|
135
177
|
return
|
136
178
|
|
137
179
|
# Prepare some of process_workflow arguments
|
@@ -147,9 +189,14 @@ async def submit_workflow(
|
|
147
189
|
)
|
148
190
|
|
149
191
|
if WORKFLOW_DIR_LOCAL.exists():
|
150
|
-
|
151
|
-
|
192
|
+
fail_job(
|
193
|
+
db=db_sync,
|
194
|
+
job=job,
|
195
|
+
log_msg=f"Workflow dir {WORKFLOW_DIR_LOCAL} already exists.",
|
196
|
+
logger_name=logger_name,
|
197
|
+
emit_log=True,
|
152
198
|
)
|
199
|
+
return
|
153
200
|
|
154
201
|
# Create WORKFLOW_DIR
|
155
202
|
original_umask = os.umask(0)
|
@@ -202,7 +249,6 @@ async def submit_workflow(
|
|
202
249
|
db_sync.refresh(workflow)
|
203
250
|
|
204
251
|
# Write logs
|
205
|
-
logger_name = f"WF{workflow_id}_job{job_id}"
|
206
252
|
log_file_path = WORKFLOW_DIR_LOCAL / WORKFLOW_LOG_FILENAME
|
207
253
|
logger = set_logger(
|
208
254
|
logger_name=logger_name,
|
@@ -302,19 +348,14 @@ async def submit_workflow(
|
|
302
348
|
|
303
349
|
db_sync.merge(output_dataset)
|
304
350
|
|
305
|
-
job.status = JobStatusTypeV1.FAILED
|
306
|
-
job.end_timestamp = get_timestamp()
|
307
|
-
|
308
351
|
exception_args_string = "\n".join(e.args)
|
309
|
-
|
352
|
+
log_msg = (
|
310
353
|
f"TASK ERROR: "
|
311
354
|
f"Task name: {e.task_name}, "
|
312
355
|
f"position in Workflow: {e.workflow_task_order}\n"
|
313
356
|
f"TRACEBACK:\n{exception_args_string}"
|
314
357
|
)
|
315
|
-
db_sync
|
316
|
-
close_job_logger(logger)
|
317
|
-
db_sync.commit()
|
358
|
+
fail_job(db=db_sync, job=job, log_msg=log_msg, logger_name=logger_name)
|
318
359
|
|
319
360
|
except JobExecutionError as e:
|
320
361
|
|
@@ -334,14 +375,13 @@ async def submit_workflow(
|
|
334
375
|
)
|
335
376
|
|
336
377
|
db_sync.merge(output_dataset)
|
337
|
-
|
338
|
-
job.status = JobStatusTypeV1.FAILED
|
339
|
-
job.end_timestamp = get_timestamp()
|
340
378
|
error = e.assemble_error()
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
379
|
+
fail_job(
|
380
|
+
db=db_sync,
|
381
|
+
job=job,
|
382
|
+
log_msg=f"JOB ERROR in Fractal job {job.id}:\nTRACEBACK:\n{error}",
|
383
|
+
logger_name=logger_name,
|
384
|
+
)
|
345
385
|
|
346
386
|
except Exception:
|
347
387
|
|
@@ -364,14 +404,12 @@ async def submit_workflow(
|
|
364
404
|
|
365
405
|
db_sync.merge(output_dataset)
|
366
406
|
|
367
|
-
|
368
|
-
job.end_timestamp = get_timestamp()
|
369
|
-
job.log = (
|
407
|
+
log_msg = (
|
370
408
|
f"UNKNOWN ERROR in Fractal job {job.id}\n"
|
371
409
|
f"TRACEBACK:\n{current_traceback}"
|
372
410
|
)
|
373
|
-
db_sync
|
374
|
-
|
375
|
-
db_sync.commit()
|
411
|
+
fail_job(db=db_sync, job=job, log_msg=log_msg, logger_name=logger_name)
|
412
|
+
|
376
413
|
finally:
|
377
414
|
db_sync.close()
|
415
|
+
reset_logger_handlers(logger)
|
@@ -22,7 +22,7 @@ from typing import Optional
|
|
22
22
|
from typing import Union
|
23
23
|
|
24
24
|
from ...async_wrap import async_wrap
|
25
|
-
from ...executors.slurm.executor import FractalSlurmExecutor
|
25
|
+
from ...executors.slurm.sudo.executor import FractalSlurmExecutor
|
26
26
|
from ...set_start_and_last_task_index import set_start_and_last_task_index
|
27
27
|
from .._common import execute_tasks
|
28
28
|
from ..common import TaskParameters
|