fractal-server 2.2.0a0__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. fractal_server/__init__.py +1 -1
  2. fractal_server/app/db/__init__.py +1 -1
  3. fractal_server/app/models/v1/state.py +1 -2
  4. fractal_server/app/routes/admin/v1.py +2 -2
  5. fractal_server/app/routes/admin/v2.py +2 -2
  6. fractal_server/app/routes/api/v1/job.py +2 -2
  7. fractal_server/app/routes/api/v1/task_collection.py +4 -4
  8. fractal_server/app/routes/api/v2/__init__.py +23 -3
  9. fractal_server/app/routes/api/v2/job.py +2 -2
  10. fractal_server/app/routes/api/v2/submit.py +6 -0
  11. fractal_server/app/routes/api/v2/task_collection.py +74 -34
  12. fractal_server/app/routes/api/v2/task_collection_custom.py +170 -0
  13. fractal_server/app/routes/api/v2/task_collection_ssh.py +125 -0
  14. fractal_server/app/routes/aux/_runner.py +10 -2
  15. fractal_server/app/runner/compress_folder.py +120 -0
  16. fractal_server/app/runner/executors/slurm/__init__.py +0 -3
  17. fractal_server/app/runner/executors/slurm/_batching.py +0 -1
  18. fractal_server/app/runner/executors/slurm/_slurm_config.py +9 -9
  19. fractal_server/app/runner/executors/slurm/ssh/__init__.py +3 -0
  20. fractal_server/app/runner/executors/slurm/ssh/_executor_wait_thread.py +112 -0
  21. fractal_server/app/runner/executors/slurm/ssh/_slurm_job.py +120 -0
  22. fractal_server/app/runner/executors/slurm/ssh/executor.py +1488 -0
  23. fractal_server/app/runner/executors/slurm/sudo/__init__.py +3 -0
  24. fractal_server/app/runner/executors/slurm/{_check_jobs_status.py → sudo/_check_jobs_status.py} +1 -1
  25. fractal_server/app/runner/executors/slurm/{_executor_wait_thread.py → sudo/_executor_wait_thread.py} +1 -1
  26. fractal_server/app/runner/executors/slurm/{_subprocess_run_as_user.py → sudo/_subprocess_run_as_user.py} +1 -1
  27. fractal_server/app/runner/executors/slurm/{executor.py → sudo/executor.py} +12 -12
  28. fractal_server/app/runner/extract_archive.py +38 -0
  29. fractal_server/app/runner/v1/__init__.py +78 -40
  30. fractal_server/app/runner/v1/_slurm/__init__.py +1 -1
  31. fractal_server/app/runner/v2/__init__.py +183 -82
  32. fractal_server/app/runner/v2/_local_experimental/__init__.py +22 -12
  33. fractal_server/app/runner/v2/_local_experimental/executor.py +12 -8
  34. fractal_server/app/runner/v2/_slurm/__init__.py +1 -6
  35. fractal_server/app/runner/v2/_slurm_ssh/__init__.py +125 -0
  36. fractal_server/app/runner/v2/_slurm_ssh/_submit_setup.py +83 -0
  37. fractal_server/app/runner/v2/_slurm_ssh/get_slurm_config.py +182 -0
  38. fractal_server/app/runner/v2/runner_functions_low_level.py +9 -11
  39. fractal_server/app/runner/versions.py +30 -0
  40. fractal_server/app/schemas/v1/__init__.py +1 -0
  41. fractal_server/app/schemas/{state.py → v1/state.py} +4 -21
  42. fractal_server/app/schemas/v2/__init__.py +4 -1
  43. fractal_server/app/schemas/v2/task_collection.py +101 -30
  44. fractal_server/config.py +222 -21
  45. fractal_server/main.py +27 -1
  46. fractal_server/migrations/env.py +1 -1
  47. fractal_server/ssh/__init__.py +4 -0
  48. fractal_server/ssh/_fabric.py +245 -0
  49. fractal_server/tasks/utils.py +12 -64
  50. fractal_server/tasks/v1/background_operations.py +2 -2
  51. fractal_server/tasks/{endpoint_operations.py → v1/endpoint_operations.py} +7 -12
  52. fractal_server/tasks/v1/utils.py +67 -0
  53. fractal_server/tasks/v2/_TaskCollectPip.py +61 -32
  54. fractal_server/tasks/v2/_venv_pip.py +195 -0
  55. fractal_server/tasks/v2/background_operations.py +257 -295
  56. fractal_server/tasks/v2/background_operations_ssh.py +317 -0
  57. fractal_server/tasks/v2/endpoint_operations.py +136 -0
  58. fractal_server/tasks/v2/templates/_1_create_venv.sh +46 -0
  59. fractal_server/tasks/v2/templates/_2_upgrade_pip.sh +30 -0
  60. fractal_server/tasks/v2/templates/_3_pip_install.sh +32 -0
  61. fractal_server/tasks/v2/templates/_4_pip_freeze.sh +21 -0
  62. fractal_server/tasks/v2/templates/_5_pip_show.sh +59 -0
  63. fractal_server/tasks/v2/utils.py +54 -0
  64. {fractal_server-2.2.0a0.dist-info → fractal_server-2.3.0.dist-info}/METADATA +6 -2
  65. {fractal_server-2.2.0a0.dist-info → fractal_server-2.3.0.dist-info}/RECORD +68 -44
  66. fractal_server/tasks/v2/get_collection_data.py +0 -14
  67. {fractal_server-2.2.0a0.dist-info → fractal_server-2.3.0.dist-info}/LICENSE +0 -0
  68. {fractal_server-2.2.0a0.dist-info → fractal_server-2.3.0.dist-info}/WHEEL +0 -0
  69. {fractal_server-2.2.0a0.dist-info → fractal_server-2.3.0.dist-info}/entry_points.txt +0 -0
fractal_server/config.py CHANGED
@@ -13,6 +13,7 @@
13
13
  # Zurich.
14
14
  import logging
15
15
  import shutil
16
+ import sys
16
17
  from os import environ
17
18
  from os import getenv
18
19
  from os.path import abspath
@@ -166,7 +167,7 @@ class Settings(BaseSettings):
166
167
  ###########################################################################
167
168
  # DATABASE
168
169
  ###########################################################################
169
- DB_ENGINE: Literal["sqlite", "postgres"] = "sqlite"
170
+ DB_ENGINE: Literal["sqlite", "postgres", "postgres-psycopg"] = "sqlite"
170
171
  """
171
172
  Select which database engine to use (supported: `sqlite` and `postgres`).
172
173
  """
@@ -201,39 +202,51 @@ class Settings(BaseSettings):
201
202
  """
202
203
 
203
204
  @property
204
- def DATABASE_URL(self) -> URL:
205
- if self.DB_ENGINE == "sqlite":
206
- if not self.SQLITE_PATH:
207
- raise FractalConfigurationError(
208
- "SQLITE_PATH path cannot be None"
209
- )
210
- sqlite_path = abspath(self.SQLITE_PATH)
205
+ def DATABASE_ASYNC_URL(self) -> URL:
206
+ if self.DB_ENGINE == "postgres":
211
207
  url = URL.create(
212
- drivername="sqlite+aiosqlite",
213
- database=sqlite_path,
208
+ drivername="postgresql+asyncpg",
209
+ username=self.POSTGRES_USER,
210
+ password=self.POSTGRES_PASSWORD,
211
+ host=self.POSTGRES_HOST,
212
+ port=self.POSTGRES_PORT,
213
+ database=self.POSTGRES_DB,
214
214
  )
215
- return url
216
- elif "postgres":
215
+ elif self.DB_ENGINE == "postgres-psycopg":
217
216
  url = URL.create(
218
- drivername="postgresql+asyncpg",
217
+ drivername="postgresql+psycopg",
219
218
  username=self.POSTGRES_USER,
220
219
  password=self.POSTGRES_PASSWORD,
221
220
  host=self.POSTGRES_HOST,
222
221
  port=self.POSTGRES_PORT,
223
222
  database=self.POSTGRES_DB,
224
223
  )
225
- return url
224
+ else:
225
+ if not self.SQLITE_PATH:
226
+ raise FractalConfigurationError(
227
+ "SQLITE_PATH path cannot be None"
228
+ )
229
+ sqlite_path = abspath(self.SQLITE_PATH)
230
+ url = URL.create(
231
+ drivername="sqlite+aiosqlite",
232
+ database=sqlite_path,
233
+ )
234
+ return url
226
235
 
227
236
  @property
228
237
  def DATABASE_SYNC_URL(self):
229
- if self.DB_ENGINE == "sqlite":
238
+ if self.DB_ENGINE == "postgres":
239
+ return self.DATABASE_ASYNC_URL.set(
240
+ drivername="postgresql+psycopg2"
241
+ )
242
+ elif self.DB_ENGINE == "postgres-psycopg":
243
+ return self.DATABASE_ASYNC_URL.set(drivername="postgresql+psycopg")
244
+ else:
230
245
  if not self.SQLITE_PATH:
231
246
  raise FractalConfigurationError(
232
247
  "SQLITE_PATH path cannot be None"
233
248
  )
234
- return self.DATABASE_URL.set(drivername="sqlite")
235
- elif self.DB_ENGINE == "postgres":
236
- return self.DATABASE_URL.set(drivername="postgresql+psycopg2")
249
+ return self.DATABASE_ASYNC_URL.set(drivername="sqlite")
237
250
 
238
251
  ###########################################################################
239
252
  # FRACTAL SPECIFIC
@@ -311,7 +324,10 @@ class Settings(BaseSettings):
311
324
  return FRACTAL_RUNNER_WORKING_BASE_DIR_path
312
325
 
313
326
  FRACTAL_RUNNER_BACKEND: Literal[
314
- "local", "local_experimental", "slurm"
327
+ "local",
328
+ "local_experimental",
329
+ "slurm",
330
+ "slurm_ssh",
315
331
  ] = "local"
316
332
  """
317
333
  Select which runner backend to use.
@@ -354,9 +370,125 @@ class Settings(BaseSettings):
354
370
 
355
371
  FRACTAL_SLURM_WORKER_PYTHON: Optional[str] = None
356
372
  """
357
- Path to Python interpreter that will run the jobs on the SLURM nodes. If
358
- not specified, the same interpreter that runs the server is used.
373
+ Absolute path to Python interpreter that will run the jobs on the SLURM
374
+ nodes. If not specified, the same interpreter that runs the server is used.
375
+ """
376
+
377
+ @validator("FRACTAL_SLURM_WORKER_PYTHON", always=True)
378
+ def absolute_FRACTAL_SLURM_WORKER_PYTHON(cls, v):
379
+ """
380
+ If `FRACTAL_SLURM_WORKER_PYTHON` is a relative path, fail.
381
+ """
382
+ if v is None:
383
+ return None
384
+ elif not Path(v).is_absolute():
385
+ raise FractalConfigurationError(
386
+ f"Non-absolute value for FRACTAL_SLURM_WORKER_PYTHON={v}"
387
+ )
388
+ else:
389
+ return v
390
+
391
+ FRACTAL_TASKS_PYTHON_DEFAULT_VERSION: Optional[
392
+ Literal["3.9", "3.10", "3.11", "3.12"]
393
+ ] = None
394
+ """
395
+ Default Python version to be used for task collection. Defaults to the
396
+ current version. Requires the corresponding variable (e.g
397
+ `FRACTAL_TASKS_PYTHON_3_10`) to be set.
398
+ """
399
+
400
+ FRACTAL_TASKS_PYTHON_3_9: Optional[str] = None
359
401
  """
402
+ Absolute path to the Python 3.9 interpreter that serves as base for virtual
403
+ environments tasks. Note that this interpreter must have the `venv` module
404
+ installed. If set, this must be an absolute path. If the version specified
405
+ in `FRACTAL_TASKS_PYTHON_DEFAULT_VERSION` is `"3.9"` and this attribute is
406
+ unset, `sys.executable` is used as a default.
407
+ """
408
+
409
+ FRACTAL_TASKS_PYTHON_3_10: Optional[str] = None
410
+ """
411
+ Same as `FRACTAL_TASKS_PYTHON_3_9`, for Python 3.10.
412
+ """
413
+
414
+ FRACTAL_TASKS_PYTHON_3_11: Optional[str] = None
415
+ """
416
+ Same as `FRACTAL_TASKS_PYTHON_3_9`, for Python 3.11.
417
+ """
418
+
419
+ FRACTAL_TASKS_PYTHON_3_12: Optional[str] = None
420
+ """
421
+ Same as `FRACTAL_TASKS_PYTHON_3_9`, for Python 3.12.
422
+ """
423
+
424
+ @root_validator(pre=True)
425
+ def check_tasks_python(cls, values) -> None:
426
+ """
427
+ Perform multiple checks of the Python-intepreter variables.
428
+
429
+ 1. Each `FRACTAL_TASKS_PYTHON_X_Y` variable must be an absolute path,
430
+ if set.
431
+ 2. If `FRACTAL_TASKS_PYTHON_DEFAULT_VERSION` is unset, use
432
+ `sys.executable` and set the corresponding
433
+ `FRACTAL_TASKS_PYTHON_X_Y` (and unset all others).
434
+ """
435
+
436
+ # `FRACTAL_TASKS_PYTHON_X_Y` variables can only be absolute paths
437
+ for version in ["3_9", "3_10", "3_11", "3_12"]:
438
+ key = f"FRACTAL_TASKS_PYTHON_{version}"
439
+ value = values.get(key)
440
+ if value is not None and not Path(value).is_absolute():
441
+ raise FractalConfigurationError(
442
+ f"Non-absolute value {key}={value}"
443
+ )
444
+
445
+ default_version = values.get("FRACTAL_TASKS_PYTHON_DEFAULT_VERSION")
446
+
447
+ if default_version is not None:
448
+ # "production/slurm" branch
449
+ # If a default version is set, then the corresponding interpreter
450
+ # must also be set
451
+ default_version_undescore = default_version.replace(".", "_")
452
+ key = f"FRACTAL_TASKS_PYTHON_{default_version_undescore}"
453
+ value = values.get(key)
454
+ if value is None:
455
+ msg = (
456
+ f"FRACTAL_TASKS_PYTHON_DEFAULT_VERSION={default_version} "
457
+ f"but {key}={value}."
458
+ )
459
+ logging.error(msg)
460
+ raise FractalConfigurationError(msg)
461
+
462
+ else:
463
+ # If no default version is set, then only `sys.executable` is made
464
+ # available
465
+ _info = sys.version_info
466
+ current_version = f"{_info.major}_{_info.minor}"
467
+ current_version_dot = f"{_info.major}.{_info.minor}"
468
+ values[
469
+ "FRACTAL_TASKS_PYTHON_DEFAULT_VERSION"
470
+ ] = current_version_dot
471
+ logging.info(
472
+ "Setting FRACTAL_TASKS_PYTHON_DEFAULT_VERSION to "
473
+ f"{current_version_dot}"
474
+ )
475
+
476
+ # Unset all existing intepreters variable
477
+ for _version in ["3_9", "3_10", "3_11", "3_12"]:
478
+ key = f"FRACTAL_TASKS_PYTHON_{_version}"
479
+ if _version == current_version:
480
+ values[key] = sys.executable
481
+ logging.info(f"Setting {key} to {sys.executable}.")
482
+ else:
483
+ value = values.get(key)
484
+ if value is not None:
485
+ logging.info(
486
+ f"Setting {key} to None (given: {value}), "
487
+ "because FRACTAL_TASKS_PYTHON_DEFAULT_VERSION was "
488
+ "not set."
489
+ )
490
+ values[key] = None
491
+ return values
360
492
 
361
493
  FRACTAL_SLURM_POLL_INTERVAL: int = 5
362
494
  """
@@ -380,6 +512,25 @@ class Settings(BaseSettings):
380
512
  `JobExecutionError`.
381
513
  """
382
514
 
515
+ FRACTAL_SLURM_SSH_HOST: Optional[str] = None
516
+ """
517
+ SSH-reachable host where a SLURM client is available.
518
+ """
519
+ FRACTAL_SLURM_SSH_USER: Optional[str] = None
520
+ """
521
+ User on `FRACTAL_SLURM_SSH_HOST`.
522
+ """
523
+ FRACTAL_SLURM_SSH_PRIVATE_KEY_PATH: Optional[str] = None
524
+ """
525
+ Private key for connecting to `FRACTAL_SLURM_SSH_HOST` as
526
+ `FRACTAL_SLURM_SSH_USER`.
527
+ """
528
+ # FIXME SSH: Split this into two folders (for tasks and for jobs)
529
+ FRACTAL_SLURM_SSH_WORKING_BASE_DIR: Optional[str] = None
530
+ """
531
+ Remote folder on `FRACTAL_SLURM_SSH_HOST`.
532
+ """
533
+
383
534
  FRACTAL_API_SUBMIT_RATE_LIMIT: int = 2
384
535
  """
385
536
  Interval to wait (in seconds) to be allowed to call again
@@ -420,6 +571,14 @@ class Settings(BaseSettings):
420
571
  "DB engine is `postgres` but `psycopg2` or `asyncpg` "
421
572
  "are not available"
422
573
  )
574
+ elif self.DB_ENGINE == "postgres-psycopg":
575
+ try:
576
+ import psycopg # noqa: F401
577
+ except ModuleNotFoundError:
578
+ raise FractalConfigurationError(
579
+ "DB engine is `postgres-psycopg` but `psycopg` is not "
580
+ "available"
581
+ )
423
582
  else:
424
583
  if not self.SQLITE_PATH:
425
584
  raise FractalConfigurationError(
@@ -460,6 +619,48 @@ class Settings(BaseSettings):
460
619
  raise FractalConfigurationError(
461
620
  f"{info} but `squeue` command not found."
462
621
  )
622
+ elif self.FRACTAL_RUNNER_BACKEND == "slurm_ssh":
623
+ if self.FRACTAL_SLURM_WORKER_PYTHON is None:
624
+ raise FractalConfigurationError(
625
+ f"Must set FRACTAL_SLURM_WORKER_PYTHON when {info}"
626
+ )
627
+ if self.FRACTAL_SLURM_SSH_USER is None:
628
+ raise FractalConfigurationError(
629
+ f"Must set FRACTAL_SLURM_SSH_USER when {info}"
630
+ )
631
+ if self.FRACTAL_SLURM_SSH_HOST is None:
632
+ raise FractalConfigurationError(
633
+ f"Must set FRACTAL_SLURM_SSH_HOST when {info}"
634
+ )
635
+ if self.FRACTAL_SLURM_SSH_PRIVATE_KEY_PATH is None:
636
+ raise FractalConfigurationError(
637
+ f"Must set FRACTAL_SLURM_SSH_PRIVATE_KEY_PATH when {info}"
638
+ )
639
+ if self.FRACTAL_SLURM_SSH_WORKING_BASE_DIR is None:
640
+ raise FractalConfigurationError(
641
+ f"Must set FRACTAL_SLURM_SSH_WORKING_BASE_DIR when {info}"
642
+ )
643
+
644
+ from fractal_server.app.runner.executors.slurm._slurm_config import ( # noqa: E501
645
+ load_slurm_config_file,
646
+ )
647
+
648
+ if not self.FRACTAL_SLURM_CONFIG_FILE:
649
+ raise FractalConfigurationError(
650
+ f"Must set FRACTAL_SLURM_CONFIG_FILE when {info}"
651
+ )
652
+ else:
653
+ if not self.FRACTAL_SLURM_CONFIG_FILE.exists():
654
+ raise FractalConfigurationError(
655
+ f"{info} but FRACTAL_SLURM_CONFIG_FILE="
656
+ f"{self.FRACTAL_SLURM_CONFIG_FILE} not found."
657
+ )
658
+
659
+ load_slurm_config_file(self.FRACTAL_SLURM_CONFIG_FILE)
660
+ if not shutil.which("ssh"):
661
+ raise FractalConfigurationError(
662
+ f"{info} but `ssh` command not found."
663
+ )
463
664
  else: # i.e. self.FRACTAL_RUNNER_BACKEND == "local"
464
665
  if self.FRACTAL_LOCAL_CONFIG_FILE:
465
666
  if not self.FRACTAL_LOCAL_CONFIG_FILE.exists():
fractal_server/main.py CHANGED
@@ -20,6 +20,7 @@ from contextlib import asynccontextmanager
20
20
 
21
21
  from fastapi import FastAPI
22
22
 
23
+ from .app.routes.aux._runner import _backend_supports_shutdown # FIXME: change
23
24
  from .app.runner.shutdown import cleanup_after_shutdown
24
25
  from .app.security import _create_first_user
25
26
  from .config import get_settings
@@ -97,17 +98,40 @@ async def lifespan(app: FastAPI):
97
98
  is_superuser=True,
98
99
  is_verified=True,
99
100
  )
101
+
102
+ if settings.FRACTAL_RUNNER_BACKEND == "slurm_ssh":
103
+ from fractal_server.ssh._fabric import get_ssh_connection
104
+ from fractal_server.ssh._fabric import FractalSSH
105
+
106
+ connection = get_ssh_connection()
107
+ app.state.fractal_ssh = FractalSSH(connection=connection)
108
+ logger.info(
109
+ f"Created SSH connection "
110
+ f"({app.state.fractal_ssh.is_connected=})."
111
+ )
112
+ else:
113
+ app.state.fractal_ssh = None
114
+
100
115
  config_uvicorn_loggers()
101
116
  logger.info("End application startup")
102
117
  reset_logger_handlers(logger)
103
118
  yield
104
119
  logger = get_logger("fractal_server.lifespan")
105
120
  logger.info("Start application shutdown")
121
+
122
+ if settings.FRACTAL_RUNNER_BACKEND == "slurm_ssh":
123
+ logger.info(
124
+ f"Closing SSH connection "
125
+ f"(current: {app.state.fractal_ssh.is_connected=})."
126
+ )
127
+
128
+ app.state.fractal_ssh.close()
129
+
106
130
  logger.info(
107
131
  f"Current worker with pid {os.getpid()} is shutting down. "
108
132
  f"Current jobs: {app.state.jobsV1=}, {app.state.jobsV2=}"
109
133
  )
110
- if settings.FRACTAL_RUNNER_BACKEND == "slurm":
134
+ if _backend_supports_shutdown(settings.FRACTAL_RUNNER_BACKEND):
111
135
  try:
112
136
  await cleanup_after_shutdown(
113
137
  jobsV1=app.state.jobsV1,
@@ -120,6 +144,8 @@ async def lifespan(app: FastAPI):
120
144
  "some of running jobs are not shutdown properly. "
121
145
  f"Original error: {e}"
122
146
  )
147
+ else:
148
+ logger.info("Shutdown not available for this backend runner.")
123
149
 
124
150
  logger.info("End application shutdown")
125
151
  reset_logger_handlers(logger)
@@ -54,7 +54,7 @@ def run_migrations_offline() -> None:
54
54
  settings = Inject(get_settings)
55
55
  settings.check_db()
56
56
  context.configure(
57
- url=settings.DATABASE_URL,
57
+ url=settings.DATABASE_ASYNC_URL,
58
58
  target_metadata=target_metadata,
59
59
  literal_binds=True,
60
60
  dialect_opts={"paramstyle": "named"},
@@ -0,0 +1,4 @@
1
+ """
2
+ The `fractal_server.ssh` subpackage is meant as a layer in front of some SSH
3
+ library (e.g. `fabric` or `asyncssh`).
4
+ """
@@ -0,0 +1,245 @@
1
+ import time
2
+ from contextlib import contextmanager
3
+ from threading import Lock
4
+ from typing import Any
5
+ from typing import Optional
6
+
7
+ from fabric import Connection
8
+ from fabric import Result
9
+ from invoke import UnexpectedExit
10
+ from paramiko.ssh_exception import NoValidConnectionsError
11
+
12
+ from ..logger import get_logger
13
+ from ..logger import set_logger
14
+ from fractal_server.config import get_settings
15
+ from fractal_server.syringe import Inject
16
+
17
+ logger = set_logger(__name__)
18
+
19
+ MAX_ATTEMPTS = 5
20
+
21
+
22
+ class TimeoutException(Exception):
23
+ pass
24
+
25
+
26
+ @contextmanager
27
+ def acquire_timeout(lock: Lock, timeout: int) -> Any:
28
+ logger.debug(f"Trying to acquire lock, with {timeout=}")
29
+ result = lock.acquire(timeout=timeout)
30
+ try:
31
+ if not result:
32
+ raise TimeoutException(
33
+ f"Failed to acquire lock within {timeout} seconds"
34
+ )
35
+ logger.debug("Lock was acquired.")
36
+ yield result
37
+ finally:
38
+ if result:
39
+ lock.release()
40
+ logger.debug("Lock was released")
41
+
42
+
43
+ class FractalSSH(object):
44
+ lock: Lock
45
+ connection: Connection
46
+ default_timeout: int
47
+
48
+ # FIXME SSH: maybe extend the actual_timeout logic to other methods
49
+
50
+ def __init__(self, connection: Connection, default_timeout: int = 250):
51
+ self.lock = Lock()
52
+ self.conn = connection
53
+ self.default_timeout = default_timeout
54
+
55
+ @property
56
+ def is_connected(self) -> bool:
57
+ return self.conn.is_connected
58
+
59
+ def put(self, *args, timeout: Optional[int] = None, **kwargs) -> Result:
60
+ actual_timeout = timeout or self.default_timeout
61
+ with acquire_timeout(self.lock, timeout=actual_timeout):
62
+ return self.conn.put(*args, **kwargs)
63
+
64
+ def get(self, *args, **kwargs) -> Result:
65
+ with acquire_timeout(self.lock, timeout=self.default_timeout):
66
+ return self.conn.get(*args, **kwargs)
67
+
68
+ def run(self, *args, **kwargs) -> Any:
69
+ with acquire_timeout(self.lock, timeout=self.default_timeout):
70
+ return self.conn.run(*args, **kwargs)
71
+
72
+ def close(self):
73
+ return self.conn.close()
74
+
75
+ def sftp(self):
76
+ return self.conn.sftp()
77
+
78
+ def check_connection(self) -> None:
79
+ """
80
+ Open the SSH connection and handle exceptions.
81
+
82
+ This function can be called from within other functions that use
83
+ `connection`, so that we can provide a meaningful error in case the
84
+ SSH connection cannot be opened.
85
+ """
86
+ if not self.conn.is_connected:
87
+ try:
88
+ self.conn.open()
89
+ except Exception as e:
90
+ raise RuntimeError(
91
+ f"Cannot open SSH connection (original error: '{str(e)}')."
92
+ )
93
+
94
+
95
+ def get_ssh_connection(
96
+ *,
97
+ host: Optional[str] = None,
98
+ user: Optional[str] = None,
99
+ key_filename: Optional[str] = None,
100
+ ) -> Connection:
101
+ """
102
+ Create a `fabric.Connection` object based on fractal-server settings
103
+ or explicit arguments.
104
+
105
+ Args:
106
+ host:
107
+ user:
108
+ key_filename:
109
+
110
+ Returns:
111
+ Fabric connection object
112
+ """
113
+ settings = Inject(get_settings)
114
+ if host is None:
115
+ host = settings.FRACTAL_SLURM_SSH_HOST
116
+ if user is None:
117
+ user = settings.FRACTAL_SLURM_SSH_USER
118
+ if key_filename is None:
119
+ key_filename = settings.FRACTAL_SLURM_SSH_PRIVATE_KEY_PATH
120
+
121
+ connection = Connection(
122
+ host=host,
123
+ user=user,
124
+ connect_kwargs={"key_filename": key_filename},
125
+ )
126
+ logger.debug(f"Now created {connection=}.")
127
+ return connection
128
+
129
+
130
+ def run_command_over_ssh(
131
+ *,
132
+ cmd: str,
133
+ fractal_ssh: FractalSSH,
134
+ max_attempts: int = MAX_ATTEMPTS,
135
+ base_interval: float = 3.0,
136
+ ) -> str:
137
+ """
138
+ Run a command within an open SSH connection.
139
+
140
+ Args:
141
+ cmd: Command to be run
142
+ fractal_ssh: FractalSSH connection object with custom lock
143
+
144
+ Returns:
145
+ Standard output of the command, if successful.
146
+ """
147
+ t_0 = time.perf_counter()
148
+ ind_attempt = 0
149
+ while ind_attempt <= max_attempts:
150
+ ind_attempt += 1
151
+ prefix = f"[attempt {ind_attempt}/{max_attempts}]"
152
+ logger.info(f"{prefix} START running '{cmd}' over SSH.")
153
+ try:
154
+ # Case 1: Command runs successfully
155
+ res = fractal_ssh.run(cmd, hide=True)
156
+ t_1 = time.perf_counter()
157
+ logger.info(
158
+ f"{prefix} END running '{cmd}' over SSH, "
159
+ f"elapsed {t_1-t_0:.3f}"
160
+ )
161
+ logger.debug(f"STDOUT: {res.stdout}")
162
+ logger.debug(f"STDERR: {res.stderr}")
163
+ return res.stdout
164
+ except NoValidConnectionsError as e:
165
+ # Case 2: Command fails with a connection error
166
+ logger.warning(
167
+ f"{prefix} Running command `{cmd}` over SSH failed.\n"
168
+ f"Original NoValidConnectionError:\n{str(e)}.\n"
169
+ f"{e.errors=}\n"
170
+ )
171
+ if ind_attempt < max_attempts:
172
+ sleeptime = base_interval**ind_attempt
173
+ logger.warning(
174
+ f"{prefix} Now sleep {sleeptime:.3f} seconds and continue."
175
+ )
176
+ time.sleep(sleeptime)
177
+ continue
178
+ else:
179
+ logger.error(f"{prefix} Reached last attempt")
180
+ break
181
+ except UnexpectedExit as e:
182
+ # Case 3: Command fails with an actual error
183
+ error_msg = (
184
+ f"{prefix} Running command `{cmd}` over SSH failed.\n"
185
+ f"Original error:\n{str(e)}."
186
+ )
187
+ logger.error(error_msg)
188
+ raise ValueError(error_msg)
189
+ except Exception as e:
190
+ logger.error(
191
+ f"Running command `{cmd}` over SSH failed.\n"
192
+ f"Original Error:\n{str(e)}."
193
+ )
194
+ raise e
195
+
196
+ raise ValueError(
197
+ f"Reached last attempt ({max_attempts=}) for running '{cmd}' over SSH"
198
+ )
199
+
200
+
201
+ def put_over_ssh(
202
+ *,
203
+ local: str,
204
+ remote: str,
205
+ fractal_ssh: FractalSSH,
206
+ logger_name: Optional[str] = None,
207
+ ) -> None:
208
+ """
209
+ Transfer a file via SSH
210
+
211
+ Args:
212
+ local: Local path to file
213
+ remote: Target path on remote host
214
+ fractal_ssh: FractalSSH connection object with custom lock
215
+ logger_name: Name of the logger
216
+
217
+ """
218
+ try:
219
+ fractal_ssh.put(local=local, remote=remote)
220
+ except Exception as e:
221
+ logger = get_logger(logger_name=logger_name)
222
+ logger.error(
223
+ f"Transferring {local=} to {remote=} over SSH failed.\n"
224
+ f"Original Error:\n{str(e)}."
225
+ )
226
+ raise e
227
+
228
+
229
+ def _mkdir_over_ssh(
230
+ *, folder: str, fractal_ssh: FractalSSH, parents: bool = True
231
+ ) -> None:
232
+ """
233
+ Create a folder remotely via SSH.
234
+
235
+ Args:
236
+ folder:
237
+ fractal_ssh:
238
+ parents:
239
+ """
240
+ # FIXME SSH: try using `mkdir` method of `paramiko.SFTPClient`
241
+ if parents:
242
+ cmd = f"mkdir -p {folder}"
243
+ else:
244
+ cmd = f"mkdir {folder}"
245
+ run_command_over_ssh(cmd=cmd, fractal_ssh=fractal_ssh)