fractal-server 2.2.0a0__py3-none-any.whl → 2.3.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. fractal_server/__init__.py +1 -1
  2. fractal_server/app/db/__init__.py +1 -1
  3. fractal_server/app/models/v1/state.py +1 -2
  4. fractal_server/app/routes/admin/v1.py +2 -2
  5. fractal_server/app/routes/admin/v2.py +2 -2
  6. fractal_server/app/routes/api/v1/job.py +2 -2
  7. fractal_server/app/routes/api/v1/task_collection.py +4 -4
  8. fractal_server/app/routes/api/v2/__init__.py +23 -3
  9. fractal_server/app/routes/api/v2/job.py +2 -2
  10. fractal_server/app/routes/api/v2/submit.py +6 -0
  11. fractal_server/app/routes/api/v2/task_collection.py +74 -34
  12. fractal_server/app/routes/api/v2/task_collection_custom.py +144 -0
  13. fractal_server/app/routes/api/v2/task_collection_ssh.py +125 -0
  14. fractal_server/app/routes/aux/_runner.py +10 -2
  15. fractal_server/app/runner/compress_folder.py +120 -0
  16. fractal_server/app/runner/executors/slurm/__init__.py +0 -3
  17. fractal_server/app/runner/executors/slurm/_batching.py +0 -1
  18. fractal_server/app/runner/executors/slurm/_slurm_config.py +9 -9
  19. fractal_server/app/runner/executors/slurm/ssh/__init__.py +3 -0
  20. fractal_server/app/runner/executors/slurm/ssh/_executor_wait_thread.py +112 -0
  21. fractal_server/app/runner/executors/slurm/ssh/_slurm_job.py +120 -0
  22. fractal_server/app/runner/executors/slurm/ssh/executor.py +1490 -0
  23. fractal_server/app/runner/executors/slurm/sudo/__init__.py +3 -0
  24. fractal_server/app/runner/executors/slurm/{_check_jobs_status.py → sudo/_check_jobs_status.py} +1 -1
  25. fractal_server/app/runner/executors/slurm/{_executor_wait_thread.py → sudo/_executor_wait_thread.py} +1 -1
  26. fractal_server/app/runner/executors/slurm/{_subprocess_run_as_user.py → sudo/_subprocess_run_as_user.py} +1 -1
  27. fractal_server/app/runner/executors/slurm/{executor.py → sudo/executor.py} +12 -12
  28. fractal_server/app/runner/extract_archive.py +38 -0
  29. fractal_server/app/runner/v1/__init__.py +78 -40
  30. fractal_server/app/runner/v1/_slurm/__init__.py +1 -1
  31. fractal_server/app/runner/v2/__init__.py +183 -82
  32. fractal_server/app/runner/v2/_local_experimental/__init__.py +22 -12
  33. fractal_server/app/runner/v2/_local_experimental/executor.py +12 -8
  34. fractal_server/app/runner/v2/_slurm/__init__.py +1 -6
  35. fractal_server/app/runner/v2/_slurm_ssh/__init__.py +126 -0
  36. fractal_server/app/runner/v2/_slurm_ssh/_submit_setup.py +83 -0
  37. fractal_server/app/runner/v2/_slurm_ssh/get_slurm_config.py +182 -0
  38. fractal_server/app/runner/v2/runner_functions_low_level.py +9 -11
  39. fractal_server/app/runner/versions.py +30 -0
  40. fractal_server/app/schemas/v1/__init__.py +1 -0
  41. fractal_server/app/schemas/{state.py → v1/state.py} +4 -21
  42. fractal_server/app/schemas/v2/__init__.py +4 -1
  43. fractal_server/app/schemas/v2/task_collection.py +97 -27
  44. fractal_server/config.py +222 -21
  45. fractal_server/main.py +25 -1
  46. fractal_server/migrations/env.py +1 -1
  47. fractal_server/ssh/__init__.py +4 -0
  48. fractal_server/ssh/_fabric.py +190 -0
  49. fractal_server/tasks/utils.py +12 -64
  50. fractal_server/tasks/v1/background_operations.py +2 -2
  51. fractal_server/tasks/{endpoint_operations.py → v1/endpoint_operations.py} +7 -12
  52. fractal_server/tasks/v1/utils.py +67 -0
  53. fractal_server/tasks/v2/_TaskCollectPip.py +61 -32
  54. fractal_server/tasks/v2/_venv_pip.py +195 -0
  55. fractal_server/tasks/v2/background_operations.py +257 -295
  56. fractal_server/tasks/v2/background_operations_ssh.py +304 -0
  57. fractal_server/tasks/v2/endpoint_operations.py +136 -0
  58. fractal_server/tasks/v2/templates/_1_create_venv.sh +46 -0
  59. fractal_server/tasks/v2/templates/_2_upgrade_pip.sh +30 -0
  60. fractal_server/tasks/v2/templates/_3_pip_install.sh +32 -0
  61. fractal_server/tasks/v2/templates/_4_pip_freeze.sh +21 -0
  62. fractal_server/tasks/v2/templates/_5_pip_show.sh +59 -0
  63. fractal_server/tasks/v2/utils.py +54 -0
  64. {fractal_server-2.2.0a0.dist-info → fractal_server-2.3.0a0.dist-info}/METADATA +6 -2
  65. {fractal_server-2.2.0a0.dist-info → fractal_server-2.3.0a0.dist-info}/RECORD +68 -44
  66. fractal_server/tasks/v2/get_collection_data.py +0 -14
  67. {fractal_server-2.2.0a0.dist-info → fractal_server-2.3.0a0.dist-info}/LICENSE +0 -0
  68. {fractal_server-2.2.0a0.dist-info → fractal_server-2.3.0a0.dist-info}/WHEEL +0 -0
  69. {fractal_server-2.2.0a0.dist-info → fractal_server-2.3.0a0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,3 @@
1
+ from .executor import SlurmExecutor
2
+
3
+ __all__ = ["SlurmExecutor"]
@@ -2,7 +2,7 @@ from subprocess import run # nosec
2
2
 
3
3
  from cfut.slurm import STATES_FINISHED
4
4
 
5
- from .....logger import set_logger
5
+ from ......logger import set_logger
6
6
 
7
7
 
8
8
  logger = set_logger(__name__)
@@ -7,7 +7,7 @@ from typing import Optional
7
7
 
8
8
  from cfut import FileWaitThread
9
9
 
10
- from .....logger import set_logger
10
+ from ......logger import set_logger
11
11
  from ._check_jobs_status import _jobs_finished
12
12
 
13
13
  logger = set_logger(__name__)
@@ -19,7 +19,7 @@ import shlex
19
19
  import subprocess # nosec
20
20
  from typing import Optional
21
21
 
22
- from .....logger import set_logger
22
+ from ......logger import set_logger
23
23
 
24
24
  logger = set_logger(__name__)
25
25
 
@@ -29,18 +29,18 @@ import cloudpickle
29
29
  from cfut import SlurmExecutor
30
30
  from cfut.util import random_string
31
31
 
32
- from .....config import get_settings
33
- from .....logger import set_logger
34
- from .....syringe import Inject
35
- from ...exceptions import JobExecutionError
36
- from ...exceptions import TaskExecutionError
37
- from ...filenames import SHUTDOWN_FILENAME
38
- from ...task_files import get_task_file_paths
39
- from ...task_files import TaskFiles
40
- from ._batching import heuristics
32
+ from ......config import get_settings
33
+ from ......logger import set_logger
34
+ from ......syringe import Inject
35
+ from ....exceptions import JobExecutionError
36
+ from ....exceptions import TaskExecutionError
37
+ from ....filenames import SHUTDOWN_FILENAME
38
+ from ....task_files import get_task_file_paths
39
+ from ....task_files import TaskFiles
40
+ from ...slurm._slurm_config import get_default_slurm_config
41
+ from ...slurm._slurm_config import SlurmConfig
42
+ from .._batching import heuristics
41
43
  from ._executor_wait_thread import FractalSlurmWaitThread
42
- from ._slurm_config import get_default_slurm_config
43
- from ._slurm_config import SlurmConfig
44
44
  from ._subprocess_run_as_user import _glob_as_user
45
45
  from ._subprocess_run_as_user import _glob_as_user_strict
46
46
  from ._subprocess_run_as_user import _path_exists_as_user
@@ -1180,7 +1180,7 @@ class FractalSlurmExecutor(SlurmExecutor):
1180
1180
 
1181
1181
  # Prepare SLURM preamble based on SlurmConfig object
1182
1182
  script_lines = slurm_config.to_sbatch_preamble(
1183
- user_cache_dir=self.user_cache_dir
1183
+ remote_export_dir=self.user_cache_dir
1184
1184
  )
1185
1185
 
1186
1186
  # Extend SLURM preamble with variable which are not in SlurmConfig, and
@@ -0,0 +1,38 @@
1
+ import sys
2
+ import tarfile
3
+ from pathlib import Path
4
+
5
+
6
+ def _remove_suffix(*, string: str, suffix: str) -> str:
7
+ if string.endswith(suffix):
8
+ return string[: -len(suffix)]
9
+ else:
10
+ raise ValueError(f"Cannot remove {suffix=} from {string=}.")
11
+
12
+
13
+ if __name__ == "__main__":
14
+ help_msg = (
15
+ "Expected use:\n"
16
+ "python -m fractal_server.app.runner.extract_archive "
17
+ "path/to/archive.tar.gz"
18
+ )
19
+
20
+ if len(sys.argv[1:]) != 1:
21
+ raise ValueError(
22
+ f"Invalid argument.\n{help_msg}\nProvided: {sys.argv=}"
23
+ )
24
+ elif not sys.argv[1].endswith(".tar.gz"):
25
+ raise ValueError(
26
+ f"Invalid argument.\n{help_msg}\nProvided: {sys.argv=}"
27
+ )
28
+
29
+ tarfile_path = Path(sys.argv[1])
30
+
31
+ print(f"[extract_archive.py] {tarfile_path=}")
32
+
33
+ job_folder = tarfile_path.parent
34
+ subfolder_name = _remove_suffix(string=tarfile_path.name, suffix=".tar.gz")
35
+ with tarfile.open(tarfile_path) as tar:
36
+ tar.extractall(path=Path(job_folder, subfolder_name).as_posix())
37
+
38
+ print(f"[extract_archive.py] {tarfile_path=}")
@@ -22,6 +22,10 @@ import traceback
22
22
  from pathlib import Path
23
23
  from typing import Optional
24
24
 
25
+ from sqlalchemy.orm import Session as DBSyncSession
26
+
27
+ from ....logger import get_logger
28
+ from ....logger import reset_logger_handlers
25
29
  from ....logger import set_logger
26
30
  from ....syringe import Inject
27
31
  from ....utils import get_timestamp
@@ -33,7 +37,7 @@ from ...models.v1 import WorkflowTask
33
37
  from ...schemas.v1 import JobStatusTypeV1
34
38
  from ..exceptions import JobExecutionError
35
39
  from ..exceptions import TaskExecutionError
36
- from ..executors.slurm._subprocess_run_as_user import (
40
+ from ..executors.slurm.sudo._subprocess_run_as_user import (
37
41
  _mkdir_as_user,
38
42
  )
39
43
  from ..filenames import WORKFLOW_LOG_FILENAME
@@ -53,6 +57,27 @@ _backends["local"] = local_process_workflow
53
57
  _backends["slurm"] = slurm_process_workflow
54
58
 
55
59
 
60
+ def fail_job(
61
+ *,
62
+ db: DBSyncSession,
63
+ job: ApplyWorkflow,
64
+ log_msg: str,
65
+ logger_name: str,
66
+ emit_log: bool = False,
67
+ ) -> None:
68
+ logger = get_logger(logger_name=logger_name)
69
+ if emit_log:
70
+ logger.error(log_msg)
71
+ reset_logger_handlers(logger)
72
+ job.status = JobStatusTypeV1.FAILED
73
+ job.end_timestamp = get_timestamp()
74
+ job.log = log_msg
75
+ db.merge(job)
76
+ db.commit()
77
+ db.close()
78
+ return
79
+
80
+
56
81
  async def submit_workflow(
57
82
  *,
58
83
  workflow_id: int,
@@ -91,21 +116,41 @@ async def submit_workflow(
91
116
  slurm backend.
92
117
  """
93
118
 
94
- # Declare runner backend and set `process_workflow` function
95
- settings = Inject(get_settings)
96
- FRACTAL_RUNNER_BACKEND = settings.FRACTAL_RUNNER_BACKEND
97
- if FRACTAL_RUNNER_BACKEND == "local":
98
- process_workflow = local_process_workflow
99
- elif FRACTAL_RUNNER_BACKEND == "slurm":
100
- process_workflow = slurm_process_workflow
101
- else:
102
- raise RuntimeError(f"Invalid runner backend {FRACTAL_RUNNER_BACKEND=}")
119
+ logger_name = f"WF{workflow_id}_job{job_id}"
120
+ logger = set_logger(logger_name=logger_name)
103
121
 
104
122
  with next(DB.get_sync_db()) as db_sync:
105
123
 
106
124
  job: ApplyWorkflow = db_sync.get(ApplyWorkflow, job_id)
107
125
  if not job:
108
- raise ValueError(f"Cannot fetch job {job_id} from database")
126
+ logger.error(f"ApplyWorkflow {job_id} does not exist")
127
+ return
128
+
129
+ settings = Inject(get_settings)
130
+ FRACTAL_RUNNER_BACKEND = settings.FRACTAL_RUNNER_BACKEND
131
+ if FRACTAL_RUNNER_BACKEND == "local":
132
+ process_workflow = local_process_workflow
133
+ elif FRACTAL_RUNNER_BACKEND == "slurm":
134
+ process_workflow = slurm_process_workflow
135
+ else:
136
+
137
+ if FRACTAL_RUNNER_BACKEND == "local_experimental":
138
+ log_msg = (
139
+ f"{FRACTAL_RUNNER_BACKEND=} is not available for v1 jobs."
140
+ )
141
+ else:
142
+ log_msg = f"Invalid {FRACTAL_RUNNER_BACKEND=}"
143
+
144
+ fail_job(
145
+ job=job,
146
+ db=db_sync,
147
+ log_msg=log_msg,
148
+ logger_name=logger_name,
149
+ emit_log=True,
150
+ )
151
+ return
152
+
153
+ # Declare runner backend and set `process_workflow` function
109
154
 
110
155
  input_dataset: Dataset = db_sync.get(Dataset, input_dataset_id)
111
156
  output_dataset: Dataset = db_sync.get(Dataset, output_dataset_id)
@@ -126,12 +171,9 @@ async def submit_workflow(
126
171
  log_msg += (
127
172
  f"Cannot fetch workflow {workflow_id} from database\n"
128
173
  )
129
- job.status = JobStatusTypeV1.FAILED
130
- job.end_timestamp = get_timestamp()
131
- job.log = log_msg
132
- db_sync.merge(job)
133
- db_sync.commit()
134
- db_sync.close()
174
+ fail_job(
175
+ db=db_sync, job=job, log_msg=log_msg, logger_name=logger_name
176
+ )
135
177
  return
136
178
 
137
179
  # Prepare some of process_workflow arguments
@@ -147,9 +189,14 @@ async def submit_workflow(
147
189
  )
148
190
 
149
191
  if WORKFLOW_DIR_LOCAL.exists():
150
- raise RuntimeError(
151
- f"Workflow dir {WORKFLOW_DIR_LOCAL} already exists."
192
+ fail_job(
193
+ db=db_sync,
194
+ job=job,
195
+ log_msg=f"Workflow dir {WORKFLOW_DIR_LOCAL} already exists.",
196
+ logger_name=logger_name,
197
+ emit_log=True,
152
198
  )
199
+ return
153
200
 
154
201
  # Create WORKFLOW_DIR
155
202
  original_umask = os.umask(0)
@@ -202,7 +249,6 @@ async def submit_workflow(
202
249
  db_sync.refresh(workflow)
203
250
 
204
251
  # Write logs
205
- logger_name = f"WF{workflow_id}_job{job_id}"
206
252
  log_file_path = WORKFLOW_DIR_LOCAL / WORKFLOW_LOG_FILENAME
207
253
  logger = set_logger(
208
254
  logger_name=logger_name,
@@ -302,19 +348,14 @@ async def submit_workflow(
302
348
 
303
349
  db_sync.merge(output_dataset)
304
350
 
305
- job.status = JobStatusTypeV1.FAILED
306
- job.end_timestamp = get_timestamp()
307
-
308
351
  exception_args_string = "\n".join(e.args)
309
- job.log = (
352
+ log_msg = (
310
353
  f"TASK ERROR: "
311
354
  f"Task name: {e.task_name}, "
312
355
  f"position in Workflow: {e.workflow_task_order}\n"
313
356
  f"TRACEBACK:\n{exception_args_string}"
314
357
  )
315
- db_sync.merge(job)
316
- close_job_logger(logger)
317
- db_sync.commit()
358
+ fail_job(db=db_sync, job=job, log_msg=log_msg, logger_name=logger_name)
318
359
 
319
360
  except JobExecutionError as e:
320
361
 
@@ -334,14 +375,13 @@ async def submit_workflow(
334
375
  )
335
376
 
336
377
  db_sync.merge(output_dataset)
337
-
338
- job.status = JobStatusTypeV1.FAILED
339
- job.end_timestamp = get_timestamp()
340
378
  error = e.assemble_error()
341
- job.log = f"JOB ERROR in Fractal job {job.id}:\nTRACEBACK:\n{error}"
342
- db_sync.merge(job)
343
- close_job_logger(logger)
344
- db_sync.commit()
379
+ fail_job(
380
+ db=db_sync,
381
+ job=job,
382
+ log_msg=f"JOB ERROR in Fractal job {job.id}:\nTRACEBACK:\n{error}",
383
+ logger_name=logger_name,
384
+ )
345
385
 
346
386
  except Exception:
347
387
 
@@ -364,14 +404,12 @@ async def submit_workflow(
364
404
 
365
405
  db_sync.merge(output_dataset)
366
406
 
367
- job.status = JobStatusTypeV1.FAILED
368
- job.end_timestamp = get_timestamp()
369
- job.log = (
407
+ log_msg = (
370
408
  f"UNKNOWN ERROR in Fractal job {job.id}\n"
371
409
  f"TRACEBACK:\n{current_traceback}"
372
410
  )
373
- db_sync.merge(job)
374
- close_job_logger(logger)
375
- db_sync.commit()
411
+ fail_job(db=db_sync, job=job, log_msg=log_msg, logger_name=logger_name)
412
+
376
413
  finally:
377
414
  db_sync.close()
415
+ reset_logger_handlers(logger)
@@ -22,7 +22,7 @@ from typing import Optional
22
22
  from typing import Union
23
23
 
24
24
  from ...async_wrap import async_wrap
25
- from ...executors.slurm.executor import FractalSlurmExecutor
25
+ from ...executors.slurm.sudo.executor import FractalSlurmExecutor
26
26
  from ...set_start_and_last_task_index import set_start_and_last_task_index
27
27
  from .._common import execute_tasks
28
28
  from ..common import TaskParameters