fractal-server 2.2.0a0__py3-none-any.whl → 2.3.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/db/__init__.py +1 -1
- fractal_server/app/models/v1/state.py +1 -2
- fractal_server/app/routes/admin/v1.py +2 -2
- fractal_server/app/routes/admin/v2.py +2 -2
- fractal_server/app/routes/api/v1/job.py +2 -2
- fractal_server/app/routes/api/v1/task_collection.py +4 -4
- fractal_server/app/routes/api/v2/__init__.py +23 -3
- fractal_server/app/routes/api/v2/job.py +2 -2
- fractal_server/app/routes/api/v2/submit.py +6 -0
- fractal_server/app/routes/api/v2/task_collection.py +74 -34
- fractal_server/app/routes/api/v2/task_collection_custom.py +144 -0
- fractal_server/app/routes/api/v2/task_collection_ssh.py +125 -0
- fractal_server/app/routes/aux/_runner.py +10 -2
- fractal_server/app/runner/compress_folder.py +120 -0
- fractal_server/app/runner/executors/slurm/__init__.py +0 -3
- fractal_server/app/runner/executors/slurm/_batching.py +0 -1
- fractal_server/app/runner/executors/slurm/_slurm_config.py +9 -9
- fractal_server/app/runner/executors/slurm/ssh/__init__.py +3 -0
- fractal_server/app/runner/executors/slurm/ssh/_executor_wait_thread.py +112 -0
- fractal_server/app/runner/executors/slurm/ssh/_slurm_job.py +120 -0
- fractal_server/app/runner/executors/slurm/ssh/executor.py +1490 -0
- fractal_server/app/runner/executors/slurm/sudo/__init__.py +3 -0
- fractal_server/app/runner/executors/slurm/{_check_jobs_status.py → sudo/_check_jobs_status.py} +1 -1
- fractal_server/app/runner/executors/slurm/{_executor_wait_thread.py → sudo/_executor_wait_thread.py} +1 -1
- fractal_server/app/runner/executors/slurm/{_subprocess_run_as_user.py → sudo/_subprocess_run_as_user.py} +1 -1
- fractal_server/app/runner/executors/slurm/{executor.py → sudo/executor.py} +12 -12
- fractal_server/app/runner/extract_archive.py +38 -0
- fractal_server/app/runner/v1/__init__.py +78 -40
- fractal_server/app/runner/v1/_slurm/__init__.py +1 -1
- fractal_server/app/runner/v2/__init__.py +183 -82
- fractal_server/app/runner/v2/_local_experimental/__init__.py +22 -12
- fractal_server/app/runner/v2/_local_experimental/executor.py +12 -8
- fractal_server/app/runner/v2/_slurm/__init__.py +1 -6
- fractal_server/app/runner/v2/_slurm_ssh/__init__.py +126 -0
- fractal_server/app/runner/v2/_slurm_ssh/_submit_setup.py +83 -0
- fractal_server/app/runner/v2/_slurm_ssh/get_slurm_config.py +182 -0
- fractal_server/app/runner/v2/runner_functions_low_level.py +9 -11
- fractal_server/app/runner/versions.py +30 -0
- fractal_server/app/schemas/v1/__init__.py +1 -0
- fractal_server/app/schemas/{state.py → v1/state.py} +4 -21
- fractal_server/app/schemas/v2/__init__.py +4 -1
- fractal_server/app/schemas/v2/task_collection.py +97 -27
- fractal_server/config.py +222 -21
- fractal_server/main.py +25 -1
- fractal_server/migrations/env.py +1 -1
- fractal_server/ssh/__init__.py +4 -0
- fractal_server/ssh/_fabric.py +190 -0
- fractal_server/tasks/utils.py +12 -64
- fractal_server/tasks/v1/background_operations.py +2 -2
- fractal_server/tasks/{endpoint_operations.py → v1/endpoint_operations.py} +7 -12
- fractal_server/tasks/v1/utils.py +67 -0
- fractal_server/tasks/v2/_TaskCollectPip.py +61 -32
- fractal_server/tasks/v2/_venv_pip.py +195 -0
- fractal_server/tasks/v2/background_operations.py +257 -295
- fractal_server/tasks/v2/background_operations_ssh.py +304 -0
- fractal_server/tasks/v2/endpoint_operations.py +136 -0
- fractal_server/tasks/v2/templates/_1_create_venv.sh +46 -0
- fractal_server/tasks/v2/templates/_2_upgrade_pip.sh +30 -0
- fractal_server/tasks/v2/templates/_3_pip_install.sh +32 -0
- fractal_server/tasks/v2/templates/_4_pip_freeze.sh +21 -0
- fractal_server/tasks/v2/templates/_5_pip_show.sh +59 -0
- fractal_server/tasks/v2/utils.py +54 -0
- {fractal_server-2.2.0a0.dist-info → fractal_server-2.3.0a0.dist-info}/METADATA +6 -2
- {fractal_server-2.2.0a0.dist-info → fractal_server-2.3.0a0.dist-info}/RECORD +68 -44
- fractal_server/tasks/v2/get_collection_data.py +0 -14
- {fractal_server-2.2.0a0.dist-info → fractal_server-2.3.0a0.dist-info}/LICENSE +0 -0
- {fractal_server-2.2.0a0.dist-info → fractal_server-2.3.0a0.dist-info}/WHEEL +0 -0
- {fractal_server-2.2.0a0.dist-info → fractal_server-2.3.0a0.dist-info}/entry_points.txt +0 -0
fractal_server/config.py
CHANGED
@@ -13,6 +13,7 @@
|
|
13
13
|
# Zurich.
|
14
14
|
import logging
|
15
15
|
import shutil
|
16
|
+
import sys
|
16
17
|
from os import environ
|
17
18
|
from os import getenv
|
18
19
|
from os.path import abspath
|
@@ -166,7 +167,7 @@ class Settings(BaseSettings):
|
|
166
167
|
###########################################################################
|
167
168
|
# DATABASE
|
168
169
|
###########################################################################
|
169
|
-
DB_ENGINE: Literal["sqlite", "postgres"] = "sqlite"
|
170
|
+
DB_ENGINE: Literal["sqlite", "postgres", "postgres-psycopg"] = "sqlite"
|
170
171
|
"""
|
171
172
|
Select which database engine to use (supported: `sqlite` and `postgres`).
|
172
173
|
"""
|
@@ -201,39 +202,51 @@ class Settings(BaseSettings):
|
|
201
202
|
"""
|
202
203
|
|
203
204
|
@property
|
204
|
-
def
|
205
|
-
if self.DB_ENGINE == "
|
206
|
-
if not self.SQLITE_PATH:
|
207
|
-
raise FractalConfigurationError(
|
208
|
-
"SQLITE_PATH path cannot be None"
|
209
|
-
)
|
210
|
-
sqlite_path = abspath(self.SQLITE_PATH)
|
205
|
+
def DATABASE_ASYNC_URL(self) -> URL:
|
206
|
+
if self.DB_ENGINE == "postgres":
|
211
207
|
url = URL.create(
|
212
|
-
drivername="
|
213
|
-
|
208
|
+
drivername="postgresql+asyncpg",
|
209
|
+
username=self.POSTGRES_USER,
|
210
|
+
password=self.POSTGRES_PASSWORD,
|
211
|
+
host=self.POSTGRES_HOST,
|
212
|
+
port=self.POSTGRES_PORT,
|
213
|
+
database=self.POSTGRES_DB,
|
214
214
|
)
|
215
|
-
|
216
|
-
elif "postgres":
|
215
|
+
elif self.DB_ENGINE == "postgres-psycopg":
|
217
216
|
url = URL.create(
|
218
|
-
drivername="postgresql+
|
217
|
+
drivername="postgresql+psycopg",
|
219
218
|
username=self.POSTGRES_USER,
|
220
219
|
password=self.POSTGRES_PASSWORD,
|
221
220
|
host=self.POSTGRES_HOST,
|
222
221
|
port=self.POSTGRES_PORT,
|
223
222
|
database=self.POSTGRES_DB,
|
224
223
|
)
|
225
|
-
|
224
|
+
else:
|
225
|
+
if not self.SQLITE_PATH:
|
226
|
+
raise FractalConfigurationError(
|
227
|
+
"SQLITE_PATH path cannot be None"
|
228
|
+
)
|
229
|
+
sqlite_path = abspath(self.SQLITE_PATH)
|
230
|
+
url = URL.create(
|
231
|
+
drivername="sqlite+aiosqlite",
|
232
|
+
database=sqlite_path,
|
233
|
+
)
|
234
|
+
return url
|
226
235
|
|
227
236
|
@property
|
228
237
|
def DATABASE_SYNC_URL(self):
|
229
|
-
if self.DB_ENGINE == "
|
238
|
+
if self.DB_ENGINE == "postgres":
|
239
|
+
return self.DATABASE_ASYNC_URL.set(
|
240
|
+
drivername="postgresql+psycopg2"
|
241
|
+
)
|
242
|
+
elif self.DB_ENGINE == "postgres-psycopg":
|
243
|
+
return self.DATABASE_ASYNC_URL.set(drivername="postgresql+psycopg")
|
244
|
+
else:
|
230
245
|
if not self.SQLITE_PATH:
|
231
246
|
raise FractalConfigurationError(
|
232
247
|
"SQLITE_PATH path cannot be None"
|
233
248
|
)
|
234
|
-
return self.
|
235
|
-
elif self.DB_ENGINE == "postgres":
|
236
|
-
return self.DATABASE_URL.set(drivername="postgresql+psycopg2")
|
249
|
+
return self.DATABASE_ASYNC_URL.set(drivername="sqlite")
|
237
250
|
|
238
251
|
###########################################################################
|
239
252
|
# FRACTAL SPECIFIC
|
@@ -311,7 +324,10 @@ class Settings(BaseSettings):
|
|
311
324
|
return FRACTAL_RUNNER_WORKING_BASE_DIR_path
|
312
325
|
|
313
326
|
FRACTAL_RUNNER_BACKEND: Literal[
|
314
|
-
"local",
|
327
|
+
"local",
|
328
|
+
"local_experimental",
|
329
|
+
"slurm",
|
330
|
+
"slurm_ssh",
|
315
331
|
] = "local"
|
316
332
|
"""
|
317
333
|
Select which runner backend to use.
|
@@ -354,9 +370,125 @@ class Settings(BaseSettings):
|
|
354
370
|
|
355
371
|
FRACTAL_SLURM_WORKER_PYTHON: Optional[str] = None
|
356
372
|
"""
|
357
|
-
|
358
|
-
not specified, the same interpreter that runs the server is used.
|
373
|
+
Absolute path to Python interpreter that will run the jobs on the SLURM
|
374
|
+
nodes. If not specified, the same interpreter that runs the server is used.
|
375
|
+
"""
|
376
|
+
|
377
|
+
@validator("FRACTAL_SLURM_WORKER_PYTHON", always=True)
|
378
|
+
def absolute_FRACTAL_SLURM_WORKER_PYTHON(cls, v):
|
379
|
+
"""
|
380
|
+
If `FRACTAL_SLURM_WORKER_PYTHON` is a relative path, fail.
|
381
|
+
"""
|
382
|
+
if v is None:
|
383
|
+
return None
|
384
|
+
elif not Path(v).is_absolute():
|
385
|
+
raise FractalConfigurationError(
|
386
|
+
f"Non-absolute value for FRACTAL_SLURM_WORKER_PYTHON={v}"
|
387
|
+
)
|
388
|
+
else:
|
389
|
+
return v
|
390
|
+
|
391
|
+
FRACTAL_TASKS_PYTHON_DEFAULT_VERSION: Optional[
|
392
|
+
Literal["3.9", "3.10", "3.11", "3.12"]
|
393
|
+
] = None
|
394
|
+
"""
|
395
|
+
Default Python version to be used for task collection. Defaults to the
|
396
|
+
current version. Requires the corresponding variable (e.g
|
397
|
+
`FRACTAL_TASKS_PYTHON_3_10`) to be set.
|
398
|
+
"""
|
399
|
+
|
400
|
+
FRACTAL_TASKS_PYTHON_3_9: Optional[str] = None
|
359
401
|
"""
|
402
|
+
Absolute path to the Python 3.9 interpreter that serves as base for virtual
|
403
|
+
environments tasks. Note that this interpreter must have the `venv` module
|
404
|
+
installed. If set, this must be an absolute path. If the version specified
|
405
|
+
in `FRACTAL_TASKS_PYTHON_DEFAULT_VERSION` is `"3.9"` and this attribute is
|
406
|
+
unset, `sys.executable` is used as a default.
|
407
|
+
"""
|
408
|
+
|
409
|
+
FRACTAL_TASKS_PYTHON_3_10: Optional[str] = None
|
410
|
+
"""
|
411
|
+
Same as `FRACTAL_TASKS_PYTHON_3_9`, for Python 3.10.
|
412
|
+
"""
|
413
|
+
|
414
|
+
FRACTAL_TASKS_PYTHON_3_11: Optional[str] = None
|
415
|
+
"""
|
416
|
+
Same as `FRACTAL_TASKS_PYTHON_3_9`, for Python 3.11.
|
417
|
+
"""
|
418
|
+
|
419
|
+
FRACTAL_TASKS_PYTHON_3_12: Optional[str] = None
|
420
|
+
"""
|
421
|
+
Same as `FRACTAL_TASKS_PYTHON_3_9`, for Python 3.12.
|
422
|
+
"""
|
423
|
+
|
424
|
+
@root_validator(pre=True)
|
425
|
+
def check_tasks_python(cls, values) -> None:
|
426
|
+
"""
|
427
|
+
Perform multiple checks of the Python-intepreter variables.
|
428
|
+
|
429
|
+
1. Each `FRACTAL_TASKS_PYTHON_X_Y` variable must be an absolute path,
|
430
|
+
if set.
|
431
|
+
2. If `FRACTAL_TASKS_PYTHON_DEFAULT_VERSION` is unset, use
|
432
|
+
`sys.executable` and set the corresponding
|
433
|
+
`FRACTAL_TASKS_PYTHON_X_Y` (and unset all others).
|
434
|
+
"""
|
435
|
+
|
436
|
+
# `FRACTAL_TASKS_PYTHON_X_Y` variables can only be absolute paths
|
437
|
+
for version in ["3_9", "3_10", "3_11", "3_12"]:
|
438
|
+
key = f"FRACTAL_TASKS_PYTHON_{version}"
|
439
|
+
value = values.get(key)
|
440
|
+
if value is not None and not Path(value).is_absolute():
|
441
|
+
raise FractalConfigurationError(
|
442
|
+
f"Non-absolute value {key}={value}"
|
443
|
+
)
|
444
|
+
|
445
|
+
default_version = values.get("FRACTAL_TASKS_PYTHON_DEFAULT_VERSION")
|
446
|
+
|
447
|
+
if default_version is not None:
|
448
|
+
# "production/slurm" branch
|
449
|
+
# If a default version is set, then the corresponding interpreter
|
450
|
+
# must also be set
|
451
|
+
default_version_undescore = default_version.replace(".", "_")
|
452
|
+
key = f"FRACTAL_TASKS_PYTHON_{default_version_undescore}"
|
453
|
+
value = values.get(key)
|
454
|
+
if value is None:
|
455
|
+
msg = (
|
456
|
+
f"FRACTAL_TASKS_PYTHON_DEFAULT_VERSION={default_version} "
|
457
|
+
f"but {key}={value}."
|
458
|
+
)
|
459
|
+
logging.error(msg)
|
460
|
+
raise FractalConfigurationError(msg)
|
461
|
+
|
462
|
+
else:
|
463
|
+
# If no default version is set, then only `sys.executable` is made
|
464
|
+
# available
|
465
|
+
_info = sys.version_info
|
466
|
+
current_version = f"{_info.major}_{_info.minor}"
|
467
|
+
current_version_dot = f"{_info.major}.{_info.minor}"
|
468
|
+
values[
|
469
|
+
"FRACTAL_TASKS_PYTHON_DEFAULT_VERSION"
|
470
|
+
] = current_version_dot
|
471
|
+
logging.info(
|
472
|
+
"Setting FRACTAL_TASKS_PYTHON_DEFAULT_VERSION to "
|
473
|
+
f"{current_version_dot}"
|
474
|
+
)
|
475
|
+
|
476
|
+
# Unset all existing intepreters variable
|
477
|
+
for _version in ["3_9", "3_10", "3_11", "3_12"]:
|
478
|
+
key = f"FRACTAL_TASKS_PYTHON_{_version}"
|
479
|
+
if _version == current_version:
|
480
|
+
values[key] = sys.executable
|
481
|
+
logging.info(f"Setting {key} to {sys.executable}.")
|
482
|
+
else:
|
483
|
+
value = values.get(key)
|
484
|
+
if value is not None:
|
485
|
+
logging.info(
|
486
|
+
f"Setting {key} to None (given: {value}), "
|
487
|
+
"because FRACTAL_TASKS_PYTHON_DEFAULT_VERSION was "
|
488
|
+
"not set."
|
489
|
+
)
|
490
|
+
values[key] = None
|
491
|
+
return values
|
360
492
|
|
361
493
|
FRACTAL_SLURM_POLL_INTERVAL: int = 5
|
362
494
|
"""
|
@@ -380,6 +512,25 @@ class Settings(BaseSettings):
|
|
380
512
|
`JobExecutionError`.
|
381
513
|
"""
|
382
514
|
|
515
|
+
FRACTAL_SLURM_SSH_HOST: Optional[str] = None
|
516
|
+
"""
|
517
|
+
SSH-reachable host where a SLURM client is available.
|
518
|
+
"""
|
519
|
+
FRACTAL_SLURM_SSH_USER: Optional[str] = None
|
520
|
+
"""
|
521
|
+
User on `FRACTAL_SLURM_SSH_HOST`.
|
522
|
+
"""
|
523
|
+
FRACTAL_SLURM_SSH_PRIVATE_KEY_PATH: Optional[str] = None
|
524
|
+
"""
|
525
|
+
Private key for connecting to `FRACTAL_SLURM_SSH_HOST` as
|
526
|
+
`FRACTAL_SLURM_SSH_USER`.
|
527
|
+
"""
|
528
|
+
# FIXME SSH: Split this into two folders (for tasks and for jobs)
|
529
|
+
FRACTAL_SLURM_SSH_WORKING_BASE_DIR: Optional[str] = None
|
530
|
+
"""
|
531
|
+
Remote folder on `FRACTAL_SLURM_SSH_HOST`.
|
532
|
+
"""
|
533
|
+
|
383
534
|
FRACTAL_API_SUBMIT_RATE_LIMIT: int = 2
|
384
535
|
"""
|
385
536
|
Interval to wait (in seconds) to be allowed to call again
|
@@ -420,6 +571,14 @@ class Settings(BaseSettings):
|
|
420
571
|
"DB engine is `postgres` but `psycopg2` or `asyncpg` "
|
421
572
|
"are not available"
|
422
573
|
)
|
574
|
+
elif self.DB_ENGINE == "postgres-psycopg":
|
575
|
+
try:
|
576
|
+
import psycopg # noqa: F401
|
577
|
+
except ModuleNotFoundError:
|
578
|
+
raise FractalConfigurationError(
|
579
|
+
"DB engine is `postgres-psycopg` but `psycopg` is not "
|
580
|
+
"available"
|
581
|
+
)
|
423
582
|
else:
|
424
583
|
if not self.SQLITE_PATH:
|
425
584
|
raise FractalConfigurationError(
|
@@ -460,6 +619,48 @@ class Settings(BaseSettings):
|
|
460
619
|
raise FractalConfigurationError(
|
461
620
|
f"{info} but `squeue` command not found."
|
462
621
|
)
|
622
|
+
elif self.FRACTAL_RUNNER_BACKEND == "slurm_ssh":
|
623
|
+
if self.FRACTAL_SLURM_WORKER_PYTHON is None:
|
624
|
+
raise FractalConfigurationError(
|
625
|
+
f"Must set FRACTAL_SLURM_WORKER_PYTHON when {info}"
|
626
|
+
)
|
627
|
+
if self.FRACTAL_SLURM_SSH_USER is None:
|
628
|
+
raise FractalConfigurationError(
|
629
|
+
f"Must set FRACTAL_SLURM_SSH_USER when {info}"
|
630
|
+
)
|
631
|
+
if self.FRACTAL_SLURM_SSH_HOST is None:
|
632
|
+
raise FractalConfigurationError(
|
633
|
+
f"Must set FRACTAL_SLURM_SSH_HOST when {info}"
|
634
|
+
)
|
635
|
+
if self.FRACTAL_SLURM_SSH_PRIVATE_KEY_PATH is None:
|
636
|
+
raise FractalConfigurationError(
|
637
|
+
f"Must set FRACTAL_SLURM_SSH_PRIVATE_KEY_PATH when {info}"
|
638
|
+
)
|
639
|
+
if self.FRACTAL_SLURM_SSH_WORKING_BASE_DIR is None:
|
640
|
+
raise FractalConfigurationError(
|
641
|
+
f"Must set FRACTAL_SLURM_SSH_WORKING_BASE_DIR when {info}"
|
642
|
+
)
|
643
|
+
|
644
|
+
from fractal_server.app.runner.executors.slurm._slurm_config import ( # noqa: E501
|
645
|
+
load_slurm_config_file,
|
646
|
+
)
|
647
|
+
|
648
|
+
if not self.FRACTAL_SLURM_CONFIG_FILE:
|
649
|
+
raise FractalConfigurationError(
|
650
|
+
f"Must set FRACTAL_SLURM_CONFIG_FILE when {info}"
|
651
|
+
)
|
652
|
+
else:
|
653
|
+
if not self.FRACTAL_SLURM_CONFIG_FILE.exists():
|
654
|
+
raise FractalConfigurationError(
|
655
|
+
f"{info} but FRACTAL_SLURM_CONFIG_FILE="
|
656
|
+
f"{self.FRACTAL_SLURM_CONFIG_FILE} not found."
|
657
|
+
)
|
658
|
+
|
659
|
+
load_slurm_config_file(self.FRACTAL_SLURM_CONFIG_FILE)
|
660
|
+
if not shutil.which("ssh"):
|
661
|
+
raise FractalConfigurationError(
|
662
|
+
f"{info} but `ssh` command not found."
|
663
|
+
)
|
463
664
|
else: # i.e. self.FRACTAL_RUNNER_BACKEND == "local"
|
464
665
|
if self.FRACTAL_LOCAL_CONFIG_FILE:
|
465
666
|
if not self.FRACTAL_LOCAL_CONFIG_FILE.exists():
|
fractal_server/main.py
CHANGED
@@ -20,6 +20,7 @@ from contextlib import asynccontextmanager
|
|
20
20
|
|
21
21
|
from fastapi import FastAPI
|
22
22
|
|
23
|
+
from .app.routes.aux._runner import _backend_supports_shutdown # FIXME: change
|
23
24
|
from .app.runner.shutdown import cleanup_after_shutdown
|
24
25
|
from .app.security import _create_first_user
|
25
26
|
from .config import get_settings
|
@@ -97,17 +98,38 @@ async def lifespan(app: FastAPI):
|
|
97
98
|
is_superuser=True,
|
98
99
|
is_verified=True,
|
99
100
|
)
|
101
|
+
|
102
|
+
if settings.FRACTAL_RUNNER_BACKEND == "slurm_ssh":
|
103
|
+
from fractal_server.ssh._fabric import get_ssh_connection
|
104
|
+
|
105
|
+
app.state.connection = get_ssh_connection()
|
106
|
+
logger.info(
|
107
|
+
f"Created SSH connection "
|
108
|
+
f"({app.state.connection.is_connected=})."
|
109
|
+
)
|
110
|
+
else:
|
111
|
+
app.state.connection = None
|
112
|
+
|
100
113
|
config_uvicorn_loggers()
|
101
114
|
logger.info("End application startup")
|
102
115
|
reset_logger_handlers(logger)
|
103
116
|
yield
|
104
117
|
logger = get_logger("fractal_server.lifespan")
|
105
118
|
logger.info("Start application shutdown")
|
119
|
+
|
120
|
+
if settings.FRACTAL_RUNNER_BACKEND == "slurm_ssh":
|
121
|
+
logger.info(
|
122
|
+
f"Closing SSH connection "
|
123
|
+
f"(current: {app.state.connection.is_connected=})."
|
124
|
+
)
|
125
|
+
|
126
|
+
app.state.connection.close()
|
127
|
+
|
106
128
|
logger.info(
|
107
129
|
f"Current worker with pid {os.getpid()} is shutting down. "
|
108
130
|
f"Current jobs: {app.state.jobsV1=}, {app.state.jobsV2=}"
|
109
131
|
)
|
110
|
-
if settings.FRACTAL_RUNNER_BACKEND
|
132
|
+
if _backend_supports_shutdown(settings.FRACTAL_RUNNER_BACKEND):
|
111
133
|
try:
|
112
134
|
await cleanup_after_shutdown(
|
113
135
|
jobsV1=app.state.jobsV1,
|
@@ -120,6 +142,8 @@ async def lifespan(app: FastAPI):
|
|
120
142
|
"some of running jobs are not shutdown properly. "
|
121
143
|
f"Original error: {e}"
|
122
144
|
)
|
145
|
+
else:
|
146
|
+
logger.info("Shutdown not available for this backend runner.")
|
123
147
|
|
124
148
|
logger.info("End application shutdown")
|
125
149
|
reset_logger_handlers(logger)
|
fractal_server/migrations/env.py
CHANGED
@@ -54,7 +54,7 @@ def run_migrations_offline() -> None:
|
|
54
54
|
settings = Inject(get_settings)
|
55
55
|
settings.check_db()
|
56
56
|
context.configure(
|
57
|
-
url=settings.
|
57
|
+
url=settings.DATABASE_ASYNC_URL,
|
58
58
|
target_metadata=target_metadata,
|
59
59
|
literal_binds=True,
|
60
60
|
dialect_opts={"paramstyle": "named"},
|
@@ -0,0 +1,190 @@
|
|
1
|
+
import time
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
from fabric import Connection
|
5
|
+
from invoke import UnexpectedExit
|
6
|
+
from paramiko.ssh_exception import NoValidConnectionsError
|
7
|
+
|
8
|
+
from ..logger import get_logger
|
9
|
+
from ..logger import set_logger
|
10
|
+
from fractal_server.config import get_settings
|
11
|
+
from fractal_server.syringe import Inject
|
12
|
+
|
13
|
+
logger = set_logger(__name__)
|
14
|
+
|
15
|
+
MAX_ATTEMPTS = 5
|
16
|
+
|
17
|
+
|
18
|
+
def get_ssh_connection(
|
19
|
+
*,
|
20
|
+
host: Optional[str] = None,
|
21
|
+
user: Optional[str] = None,
|
22
|
+
key_filename: Optional[str] = None,
|
23
|
+
) -> Connection:
|
24
|
+
"""
|
25
|
+
Create a `fabric.Connection` object based on fractal-server settings
|
26
|
+
or explicit arguments.
|
27
|
+
|
28
|
+
Args:
|
29
|
+
host:
|
30
|
+
user:
|
31
|
+
key_filename:
|
32
|
+
|
33
|
+
Returns:
|
34
|
+
Fabric connection object
|
35
|
+
"""
|
36
|
+
settings = Inject(get_settings)
|
37
|
+
if host is None:
|
38
|
+
host = settings.FRACTAL_SLURM_SSH_HOST
|
39
|
+
if user is None:
|
40
|
+
user = settings.FRACTAL_SLURM_SSH_USER
|
41
|
+
if key_filename is None:
|
42
|
+
key_filename = settings.FRACTAL_SLURM_SSH_PRIVATE_KEY_PATH
|
43
|
+
|
44
|
+
connection = Connection(
|
45
|
+
host=host,
|
46
|
+
user=user,
|
47
|
+
connect_kwargs={"key_filename": key_filename},
|
48
|
+
)
|
49
|
+
logger.debug(f"Now created {connection=}.")
|
50
|
+
return connection
|
51
|
+
|
52
|
+
|
53
|
+
def check_connection(connection: Connection) -> None:
|
54
|
+
"""
|
55
|
+
Open the SSH connection and handle exceptions.
|
56
|
+
|
57
|
+
This function can be called from within other functions that use
|
58
|
+
`connection`, so that we can provide a meaningful error in case the
|
59
|
+
SSH connection cannot be opened.
|
60
|
+
|
61
|
+
Args:
|
62
|
+
connection: Fabric connection object
|
63
|
+
"""
|
64
|
+
if not connection.is_connected:
|
65
|
+
try:
|
66
|
+
connection.open()
|
67
|
+
except Exception as e:
|
68
|
+
raise RuntimeError(
|
69
|
+
f"Cannot open SSH connection (original error: '{str(e)}')."
|
70
|
+
)
|
71
|
+
|
72
|
+
|
73
|
+
def run_command_over_ssh(
|
74
|
+
*,
|
75
|
+
cmd: str,
|
76
|
+
connection: Connection,
|
77
|
+
max_attempts: int = MAX_ATTEMPTS,
|
78
|
+
base_interval: float = 3.0,
|
79
|
+
) -> str:
|
80
|
+
"""
|
81
|
+
Run a command within an open SSH connection.
|
82
|
+
|
83
|
+
Args:
|
84
|
+
cmd: Command to be run
|
85
|
+
connection: Fabric connection object
|
86
|
+
|
87
|
+
Returns:
|
88
|
+
Standard output of the command, if successful.
|
89
|
+
"""
|
90
|
+
t_0 = time.perf_counter()
|
91
|
+
ind_attempt = 0
|
92
|
+
while ind_attempt <= max_attempts:
|
93
|
+
ind_attempt += 1
|
94
|
+
prefix = f"[attempt {ind_attempt}/{max_attempts}]"
|
95
|
+
logger.info(f"{prefix} START running '{cmd}' over SSH.")
|
96
|
+
try:
|
97
|
+
# Case 1: Command runs successfully
|
98
|
+
res = connection.run(cmd, hide=True)
|
99
|
+
t_1 = time.perf_counter()
|
100
|
+
logger.info(
|
101
|
+
f"{prefix} END running '{cmd}' over SSH, "
|
102
|
+
f"elapsed {t_1-t_0:.3f}"
|
103
|
+
)
|
104
|
+
logger.debug(f"STDOUT: {res.stdout}")
|
105
|
+
logger.debug(f"STDERR: {res.stderr}")
|
106
|
+
return res.stdout
|
107
|
+
except NoValidConnectionsError as e:
|
108
|
+
# Case 2: Command fails with a connection error
|
109
|
+
logger.warning(
|
110
|
+
f"{prefix} Running command `{cmd}` over SSH failed.\n"
|
111
|
+
f"Original NoValidConnectionError:\n{str(e)}.\n"
|
112
|
+
f"{e.errors=}\n"
|
113
|
+
)
|
114
|
+
if ind_attempt < max_attempts:
|
115
|
+
sleeptime = (
|
116
|
+
base_interval**ind_attempt
|
117
|
+
) # FIXME SSH: add jitter?
|
118
|
+
logger.warning(
|
119
|
+
f"{prefix} Now sleep {sleeptime:.3f} seconds and continue."
|
120
|
+
)
|
121
|
+
time.sleep(sleeptime)
|
122
|
+
continue
|
123
|
+
else:
|
124
|
+
logger.error(f"{prefix} Reached last attempt")
|
125
|
+
break
|
126
|
+
except UnexpectedExit as e:
|
127
|
+
# Case 3: Command fails with an actual error
|
128
|
+
error_msg = (
|
129
|
+
f"{prefix} Running command `{cmd}` over SSH failed.\n"
|
130
|
+
f"Original error:\n{str(e)}."
|
131
|
+
)
|
132
|
+
logger.error(error_msg)
|
133
|
+
raise ValueError(error_msg)
|
134
|
+
except Exception as e:
|
135
|
+
logger.error(
|
136
|
+
f"Running command `{cmd}` over SSH failed.\n"
|
137
|
+
f"Original Error:\n{str(e)}."
|
138
|
+
)
|
139
|
+
raise e
|
140
|
+
|
141
|
+
raise ValueError(
|
142
|
+
f"Reached last attempt ({max_attempts=}) for running '{cmd}' over SSH"
|
143
|
+
)
|
144
|
+
|
145
|
+
|
146
|
+
def put_over_ssh(
|
147
|
+
*,
|
148
|
+
local: str,
|
149
|
+
remote: str,
|
150
|
+
connection: Connection,
|
151
|
+
logger_name: Optional[str] = None,
|
152
|
+
) -> None:
|
153
|
+
"""
|
154
|
+
Transfer a file via SSH
|
155
|
+
|
156
|
+
Args:
|
157
|
+
local: Local path to file
|
158
|
+
remote: Target path on remote host
|
159
|
+
connection: Fabric connection object
|
160
|
+
logger_name: Name of the logger
|
161
|
+
|
162
|
+
"""
|
163
|
+
try:
|
164
|
+
connection.put(local=local, remote=remote)
|
165
|
+
except Exception as e:
|
166
|
+
logger = get_logger(logger_name=logger_name)
|
167
|
+
logger.error(
|
168
|
+
f"Transferring {local=} to {remote=} over SSH failed.\n"
|
169
|
+
f"Original Error:\n{str(e)}."
|
170
|
+
)
|
171
|
+
raise e
|
172
|
+
|
173
|
+
|
174
|
+
def _mkdir_over_ssh(
|
175
|
+
*, folder: str, connection: Connection, parents: bool = True
|
176
|
+
) -> None:
|
177
|
+
"""
|
178
|
+
Create a folder remotely via SSH.
|
179
|
+
|
180
|
+
Args:
|
181
|
+
folder:
|
182
|
+
connection:
|
183
|
+
parents:
|
184
|
+
"""
|
185
|
+
# FIXME SSH: try using `mkdir` method of `paramiko.SFTPClient`
|
186
|
+
if parents:
|
187
|
+
cmd = f"mkdir -p {folder}"
|
188
|
+
else:
|
189
|
+
cmd = f"mkdir {folder}"
|
190
|
+
run_command_over_ssh(cmd=cmd, connection=connection)
|
fractal_server/tasks/utils.py
CHANGED
@@ -1,42 +1,12 @@
|
|
1
1
|
import re
|
2
|
-
import shutil
|
3
|
-
import sys
|
4
2
|
from pathlib import Path
|
5
|
-
from typing import Optional
|
6
3
|
|
7
4
|
from fractal_server.config import get_settings
|
8
|
-
from fractal_server.logger import get_logger
|
9
5
|
from fractal_server.syringe import Inject
|
10
|
-
from fractal_server.utils import execute_command
|
11
6
|
|
12
7
|
COLLECTION_FILENAME = "collection.json"
|
13
8
|
COLLECTION_LOG_FILENAME = "collection.log"
|
14
|
-
|
15
|
-
|
16
|
-
def get_python_interpreter(version: Optional[str] = None) -> str:
|
17
|
-
"""
|
18
|
-
Return the path to the python interpreter
|
19
|
-
|
20
|
-
Args:
|
21
|
-
version: Python version
|
22
|
-
|
23
|
-
Raises:
|
24
|
-
ValueError: If the python version requested is not available on the
|
25
|
-
host.
|
26
|
-
|
27
|
-
Returns:
|
28
|
-
interpreter: string representing the python executable or its path
|
29
|
-
"""
|
30
|
-
if version:
|
31
|
-
interpreter = shutil.which(f"python{version}")
|
32
|
-
if not interpreter:
|
33
|
-
raise ValueError(
|
34
|
-
f"Python version {version} not available on host."
|
35
|
-
)
|
36
|
-
else:
|
37
|
-
interpreter = sys.executable
|
38
|
-
|
39
|
-
return interpreter
|
9
|
+
COLLECTION_FREEZE_FILENAME = "collection_freeze.txt"
|
40
10
|
|
41
11
|
|
42
12
|
def slugify_task_name(task_name: str) -> str:
|
@@ -63,6 +33,10 @@ def get_log_path(base: Path) -> Path:
|
|
63
33
|
return base / COLLECTION_LOG_FILENAME
|
64
34
|
|
65
35
|
|
36
|
+
def get_freeze_path(base: Path) -> Path:
|
37
|
+
return base / COLLECTION_FREEZE_FILENAME
|
38
|
+
|
39
|
+
|
66
40
|
def get_collection_log(venv_path: Path) -> str:
|
67
41
|
package_path = get_absolute_venv_path(venv_path)
|
68
42
|
log_path = get_log_path(package_path)
|
@@ -70,6 +44,13 @@ def get_collection_log(venv_path: Path) -> str:
|
|
70
44
|
return log
|
71
45
|
|
72
46
|
|
47
|
+
def get_collection_freeze(venv_path: Path) -> str:
|
48
|
+
package_path = get_absolute_venv_path(venv_path)
|
49
|
+
freeze_path = get_freeze_path(package_path)
|
50
|
+
freeze = freeze_path.open().read()
|
51
|
+
return freeze
|
52
|
+
|
53
|
+
|
73
54
|
def _normalize_package_name(name: str) -> str:
|
74
55
|
"""
|
75
56
|
Implement PyPa specifications for package-name normalization
|
@@ -86,36 +67,3 @@ def _normalize_package_name(name: str) -> str:
|
|
86
67
|
The normalized package name.
|
87
68
|
"""
|
88
69
|
return re.sub(r"[-_.]+", "-", name).lower()
|
89
|
-
|
90
|
-
|
91
|
-
async def _init_venv(
|
92
|
-
*,
|
93
|
-
path: Path,
|
94
|
-
python_version: Optional[str] = None,
|
95
|
-
logger_name: str,
|
96
|
-
) -> Path:
|
97
|
-
"""
|
98
|
-
Set a virtual environment at `path/venv`
|
99
|
-
|
100
|
-
Args:
|
101
|
-
path : Path
|
102
|
-
path to directory in which to set up the virtual environment
|
103
|
-
python_version : default=None
|
104
|
-
Python version the virtual environment will be based upon
|
105
|
-
|
106
|
-
Returns:
|
107
|
-
python_bin : Path
|
108
|
-
path to python interpreter
|
109
|
-
"""
|
110
|
-
logger = get_logger(logger_name)
|
111
|
-
logger.debug(f"[_init_venv] {path=}")
|
112
|
-
interpreter = get_python_interpreter(version=python_version)
|
113
|
-
logger.debug(f"[_init_venv] {interpreter=}")
|
114
|
-
await execute_command(
|
115
|
-
cwd=path,
|
116
|
-
command=f"{interpreter} -m venv venv",
|
117
|
-
logger_name=logger_name,
|
118
|
-
)
|
119
|
-
python_bin = path / "venv/bin/python"
|
120
|
-
logger.debug(f"[_init_venv] {python_bin=}")
|
121
|
-
return python_bin
|