fractal-server 2.12.0a1__py3-none-any.whl → 2.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. fractal_server/__init__.py +1 -1
  2. fractal_server/__main__.py +17 -63
  3. fractal_server/app/models/security.py +9 -12
  4. fractal_server/app/models/v2/dataset.py +2 -2
  5. fractal_server/app/models/v2/job.py +11 -9
  6. fractal_server/app/models/v2/task.py +2 -3
  7. fractal_server/app/models/v2/task_group.py +6 -2
  8. fractal_server/app/models/v2/workflowtask.py +15 -8
  9. fractal_server/app/routes/admin/v2/task.py +1 -1
  10. fractal_server/app/routes/admin/v2/task_group.py +1 -1
  11. fractal_server/app/routes/api/v2/dataset.py +4 -4
  12. fractal_server/app/routes/api/v2/images.py +11 -23
  13. fractal_server/app/routes/api/v2/project.py +2 -2
  14. fractal_server/app/routes/api/v2/status.py +1 -1
  15. fractal_server/app/routes/api/v2/submit.py +8 -6
  16. fractal_server/app/routes/api/v2/task.py +4 -2
  17. fractal_server/app/routes/api/v2/task_collection.py +3 -2
  18. fractal_server/app/routes/api/v2/task_group.py +2 -2
  19. fractal_server/app/routes/api/v2/workflow.py +3 -3
  20. fractal_server/app/routes/api/v2/workflow_import.py +3 -3
  21. fractal_server/app/routes/api/v2/workflowtask.py +3 -1
  22. fractal_server/app/routes/auth/_aux_auth.py +4 -1
  23. fractal_server/app/routes/auth/current_user.py +3 -5
  24. fractal_server/app/routes/auth/group.py +1 -1
  25. fractal_server/app/routes/auth/users.py +2 -4
  26. fractal_server/app/routes/aux/_runner.py +1 -1
  27. fractal_server/app/routes/aux/validate_user_settings.py +1 -2
  28. fractal_server/app/runner/executors/_job_states.py +13 -0
  29. fractal_server/app/runner/executors/slurm/_slurm_config.py +26 -18
  30. fractal_server/app/runner/executors/slurm/ssh/__init__.py +0 -3
  31. fractal_server/app/runner/executors/slurm/ssh/_executor_wait_thread.py +31 -22
  32. fractal_server/app/runner/executors/slurm/ssh/_slurm_job.py +2 -6
  33. fractal_server/app/runner/executors/slurm/ssh/executor.py +35 -50
  34. fractal_server/app/runner/executors/slurm/sudo/__init__.py +0 -3
  35. fractal_server/app/runner/executors/slurm/sudo/_check_jobs_status.py +1 -2
  36. fractal_server/app/runner/executors/slurm/sudo/_executor_wait_thread.py +37 -47
  37. fractal_server/app/runner/executors/slurm/sudo/executor.py +77 -41
  38. fractal_server/app/runner/v2/__init__.py +0 -9
  39. fractal_server/app/runner/v2/_local/_local_config.py +5 -4
  40. fractal_server/app/runner/v2/_slurm_common/get_slurm_config.py +4 -4
  41. fractal_server/app/runner/v2/_slurm_sudo/__init__.py +2 -2
  42. fractal_server/app/runner/v2/deduplicate_list.py +1 -1
  43. fractal_server/app/runner/v2/runner.py +9 -4
  44. fractal_server/app/runner/v2/task_interface.py +15 -7
  45. fractal_server/app/schemas/_filter_validators.py +6 -3
  46. fractal_server/app/schemas/_validators.py +7 -5
  47. fractal_server/app/schemas/user.py +23 -18
  48. fractal_server/app/schemas/user_group.py +25 -11
  49. fractal_server/app/schemas/user_settings.py +31 -24
  50. fractal_server/app/schemas/v2/dataset.py +48 -35
  51. fractal_server/app/schemas/v2/dumps.py +16 -14
  52. fractal_server/app/schemas/v2/job.py +49 -29
  53. fractal_server/app/schemas/v2/manifest.py +32 -28
  54. fractal_server/app/schemas/v2/project.py +18 -8
  55. fractal_server/app/schemas/v2/task.py +86 -75
  56. fractal_server/app/schemas/v2/task_collection.py +41 -30
  57. fractal_server/app/schemas/v2/task_group.py +39 -20
  58. fractal_server/app/schemas/v2/workflow.py +24 -12
  59. fractal_server/app/schemas/v2/workflowtask.py +63 -61
  60. fractal_server/app/security/__init__.py +7 -4
  61. fractal_server/app/security/signup_email.py +21 -12
  62. fractal_server/config.py +123 -75
  63. fractal_server/images/models.py +18 -12
  64. fractal_server/main.py +13 -10
  65. fractal_server/migrations/env.py +16 -63
  66. fractal_server/tasks/v2/local/collect.py +9 -8
  67. fractal_server/tasks/v2/local/deactivate.py +3 -0
  68. fractal_server/tasks/v2/local/reactivate.py +3 -0
  69. fractal_server/tasks/v2/ssh/collect.py +8 -8
  70. fractal_server/tasks/v2/ssh/deactivate.py +3 -0
  71. fractal_server/tasks/v2/ssh/reactivate.py +9 -6
  72. fractal_server/tasks/v2/utils_background.py +1 -1
  73. fractal_server/tasks/v2/utils_database.py +1 -1
  74. {fractal_server-2.12.0a1.dist-info → fractal_server-2.13.0.dist-info}/METADATA +10 -11
  75. {fractal_server-2.12.0a1.dist-info → fractal_server-2.13.0.dist-info}/RECORD +78 -81
  76. fractal_server/app/runner/v2/_local_experimental/__init__.py +0 -121
  77. fractal_server/app/runner/v2/_local_experimental/_local_config.py +0 -108
  78. fractal_server/app/runner/v2/_local_experimental/_submit_setup.py +0 -42
  79. fractal_server/app/runner/v2/_local_experimental/executor.py +0 -157
  80. {fractal_server-2.12.0a1.dist-info → fractal_server-2.13.0.dist-info}/LICENSE +0 -0
  81. {fractal_server-2.12.0a1.dist-info → fractal_server-2.13.0.dist-info}/WHEEL +0 -0
  82. {fractal_server-2.12.0a1.dist-info → fractal_server-2.13.0.dist-info}/entry_points.txt +0 -0
@@ -58,11 +58,14 @@ async def _get_single_user_with_groups(
58
58
  group_ids_names.insert(0, default_group)
59
59
  else:
60
60
  pass
61
+ oauth_accounts = [
62
+ oauth_account.model_dump() for oauth_account in user.oauth_accounts
63
+ ]
61
64
 
62
65
  return UserRead(
63
66
  **user.model_dump(),
64
67
  group_ids_names=group_ids_names,
65
- oauth_accounts=user.oauth_accounts,
68
+ oauth_accounts=oauth_accounts,
66
69
  )
67
70
 
68
71
 
@@ -57,14 +57,14 @@ async def patch_current_user(
57
57
  Note: a user cannot patch their own password (as enforced within the
58
58
  `UserUpdateStrict` schema).
59
59
  """
60
- update = UserUpdate(**user_update.dict(exclude_unset=True))
60
+ update = UserUpdate(**user_update.model_dump(exclude_unset=True))
61
61
 
62
62
  # NOTE: here it would be relevant to catch an `InvalidPasswordException`
63
63
  # (from `fastapi_users.exceptions`), if we were to allow users change
64
64
  # their own password
65
65
 
66
66
  user = await user_manager.update(update, current_user, safe=True)
67
- validated_user = schemas.model_validate(UserOAuth, user)
67
+ validated_user = schemas.model_validate(UserOAuth, user.model_dump())
68
68
 
69
69
  patched_user = await db.get(
70
70
  UserOAuth, validated_user.id, populate_existing=True
@@ -82,7 +82,6 @@ async def get_current_user_settings(
82
82
  current_user: UserOAuth = Depends(current_active_user),
83
83
  db: AsyncSession = Depends(get_async_db),
84
84
  ) -> UserSettingsReadStrict:
85
-
86
85
  verify_user_has_settings(current_user)
87
86
  user_settings = await db.get(UserSettings, current_user.user_settings_id)
88
87
  return user_settings
@@ -96,13 +95,12 @@ async def patch_current_user_settings(
96
95
  current_user: UserOAuth = Depends(current_active_user),
97
96
  db: AsyncSession = Depends(get_async_db),
98
97
  ) -> UserSettingsReadStrict:
99
-
100
98
  verify_user_has_settings(current_user)
101
99
  current_user_settings = await db.get(
102
100
  UserSettings, current_user.user_settings_id
103
101
  )
104
102
 
105
- for k, v in settings_update.dict(exclude_unset=True).items():
103
+ for k, v in settings_update.model_dump(exclude_unset=True).items():
106
104
  setattr(current_user_settings, k, v)
107
105
 
108
106
  db.add(current_user_settings)
@@ -194,7 +194,7 @@ async def patch_user_settings_bulk(
194
194
  .where(LinkUserGroup.group_id == group_id)
195
195
  )
196
196
  settings_list = res.scalars().all()
197
- update = settings_update.dict(exclude_unset=True)
197
+ update = settings_update.model_dump(exclude_unset=True)
198
198
  for settings in settings_list:
199
199
  for k, v in update.items():
200
200
  setattr(settings, k, v)
@@ -75,7 +75,7 @@ async def patch_user(
75
75
  safe=False,
76
76
  request=None,
77
77
  )
78
- validated_user = schemas.model_validate(UserOAuth, user)
78
+ validated_user = schemas.model_validate(UserOAuth, user.model_dump())
79
79
  patched_user = await db.get(
80
80
  UserOAuth, validated_user.id, populate_existing=True
81
81
  )
@@ -139,7 +139,6 @@ async def set_user_groups(
139
139
  superuser: UserOAuth = Depends(current_active_superuser),
140
140
  db: AsyncSession = Depends(get_async_db),
141
141
  ) -> UserRead:
142
-
143
142
  # Preliminary check that all objects exist in the db
144
143
  user = await _user_or_404(user_id=user_id, db=db)
145
144
  target_group_ids = user_update.group_ids
@@ -209,7 +208,6 @@ async def get_user_settings(
209
208
  superuser: UserOAuth = Depends(current_active_superuser),
210
209
  db: AsyncSession = Depends(get_async_db),
211
210
  ) -> UserSettingsRead:
212
-
213
211
  user = await _user_or_404(user_id=user_id, db=db)
214
212
  verify_user_has_settings(user)
215
213
  user_settings = await db.get(UserSettings, user.user_settings_id)
@@ -229,7 +227,7 @@ async def patch_user_settings(
229
227
  verify_user_has_settings(user)
230
228
  user_settings = await db.get(UserSettings, user.user_settings_id)
231
229
 
232
- for k, v in settings_update.dict(exclude_unset=True).items():
230
+ for k, v in settings_update.model_dump(exclude_unset=True).items():
233
231
  setattr(user_settings, k, v)
234
232
 
235
233
  db.add(user_settings)
@@ -6,7 +6,7 @@ from ....syringe import Inject
6
6
 
7
7
 
8
8
  def _backend_supports_shutdown(backend: str) -> bool:
9
- if backend in ["slurm", "slurm_ssh", "local_experimental"]:
9
+ if backend in ["slurm", "slurm_ssh"]:
10
10
  return True
11
11
  else:
12
12
  return False
@@ -1,6 +1,5 @@
1
1
  from fastapi import HTTPException
2
2
  from fastapi import status
3
- from pydantic import BaseModel
4
3
  from pydantic import ValidationError
5
4
 
6
5
  from fractal_server.app.db import AsyncSession
@@ -55,7 +54,7 @@ async def validate_user_settings(
55
54
  UserSettingsValidationModel = SlurmSudoUserSettings
56
55
  else:
57
56
  # For other backends, we don't validate anything
58
- UserSettingsValidationModel = BaseModel
57
+ return user_settings
59
58
 
60
59
  try:
61
60
  UserSettingsValidationModel(**user_settings.model_dump())
@@ -0,0 +1,13 @@
1
+ # https://slurm.schedmd.com/squeue.html#lbAG
2
+ STATES_FINISHED = {
3
+ "BOOT_FAIL",
4
+ "CANCELLED",
5
+ "COMPLETED",
6
+ "DEADLINE",
7
+ "FAILED",
8
+ "NODE_FAIL",
9
+ "OUT_OF_MEMORY",
10
+ "PREEMPTED",
11
+ "SPECIAL_EXIT",
12
+ "TIMEOUT",
13
+ }
@@ -18,9 +18,9 @@ from typing import Optional
18
18
  from typing import Union
19
19
 
20
20
  from pydantic import BaseModel
21
- from pydantic import Extra
21
+ from pydantic import ConfigDict
22
22
  from pydantic import Field
23
- from pydantic.error_wrappers import ValidationError
23
+ from pydantic import ValidationError
24
24
 
25
25
  from .....config import get_settings
26
26
  from .....logger import set_logger
@@ -37,7 +37,7 @@ class SlurmConfigError(ValueError):
37
37
  pass
38
38
 
39
39
 
40
- class _SlurmConfigSet(BaseModel, extra=Extra.forbid):
40
+ class _SlurmConfigSet(BaseModel):
41
41
  """
42
42
  Options that can be set in `FRACTAL_SLURM_CONFIG_FILE` for the default/gpu
43
43
  SLURM config. Only used as part of `SlurmConfigFile`.
@@ -54,19 +54,21 @@ class _SlurmConfigSet(BaseModel, extra=Extra.forbid):
54
54
  extra_lines:
55
55
  """
56
56
 
57
- partition: Optional[str]
58
- cpus_per_task: Optional[int]
59
- mem: Optional[Union[int, str]]
60
- constraint: Optional[str]
61
- gres: Optional[str]
62
- time: Optional[str]
63
- account: Optional[str]
64
- extra_lines: Optional[list[str]]
65
- pre_submission_commands: Optional[list[str]]
66
- gpus: Optional[str]
57
+ model_config = ConfigDict(extra="forbid")
67
58
 
59
+ partition: Optional[str] = None
60
+ cpus_per_task: Optional[int] = None
61
+ mem: Optional[Union[int, str]] = None
62
+ constraint: Optional[str] = None
63
+ gres: Optional[str] = None
64
+ time: Optional[str] = None
65
+ account: Optional[str] = None
66
+ extra_lines: Optional[list[str]] = None
67
+ pre_submission_commands: Optional[list[str]] = None
68
+ gpus: Optional[str] = None
68
69
 
69
- class _BatchingConfigSet(BaseModel, extra=Extra.forbid):
70
+
71
+ class _BatchingConfigSet(BaseModel):
70
72
  """
71
73
  Options that can be set in `FRACTAL_SLURM_CONFIG_FILE` to configure the
72
74
  batching strategy (that is, how to combine several tasks in a single SLURM
@@ -83,6 +85,8 @@ class _BatchingConfigSet(BaseModel, extra=Extra.forbid):
83
85
  max_num_jobs:
84
86
  """
85
87
 
88
+ model_config = ConfigDict(extra="forbid")
89
+
86
90
  target_cpus_per_job: int
87
91
  max_cpus_per_job: int
88
92
  target_mem_per_job: Union[int, str]
@@ -91,7 +95,7 @@ class _BatchingConfigSet(BaseModel, extra=Extra.forbid):
91
95
  max_num_jobs: int
92
96
 
93
97
 
94
- class SlurmConfigFile(BaseModel, extra=Extra.forbid):
98
+ class SlurmConfigFile(BaseModel):
95
99
  """
96
100
  Specifications for the content of `FRACTAL_SLURM_CONFIG_FILE`
97
101
 
@@ -136,10 +140,12 @@ class SlurmConfigFile(BaseModel, extra=Extra.forbid):
136
140
  directory.
137
141
  """
138
142
 
143
+ model_config = ConfigDict(extra="forbid")
144
+
139
145
  default_slurm_config: _SlurmConfigSet
140
- gpu_slurm_config: Optional[_SlurmConfigSet]
146
+ gpu_slurm_config: Optional[_SlurmConfigSet] = None
141
147
  batching_config: _BatchingConfigSet
142
- user_local_exports: Optional[dict[str, str]]
148
+ user_local_exports: Optional[dict[str, str]] = None
143
149
 
144
150
 
145
151
  def load_slurm_config_file(
@@ -196,7 +202,7 @@ def load_slurm_config_file(
196
202
  return obj
197
203
 
198
204
 
199
- class SlurmConfig(BaseModel, extra=Extra.forbid):
205
+ class SlurmConfig(BaseModel):
200
206
  """
201
207
  Abstraction for SLURM parameters
202
208
 
@@ -247,6 +253,8 @@ class SlurmConfig(BaseModel, extra=Extra.forbid):
247
253
  command.
248
254
  """
249
255
 
256
+ model_config = ConfigDict(extra="forbid")
257
+
250
258
  # Required SLURM parameters (note that the integer attributes are those
251
259
  # that will need to scale up with the number of parallel tasks per job)
252
260
  partition: str
@@ -1,3 +0,0 @@
1
- from .executor import SlurmExecutor
2
-
3
- __all__ = ["SlurmExecutor"]
@@ -1,10 +1,8 @@
1
1
  import os
2
+ import threading
2
3
  import time
3
4
  import traceback
4
5
  from itertools import count
5
- from typing import Callable
6
-
7
- from cfut import FileWaitThread
8
6
 
9
7
  from ......logger import set_logger
10
8
  from fractal_server.app.runner.exceptions import JobExecutionError
@@ -12,35 +10,46 @@ from fractal_server.app.runner.exceptions import JobExecutionError
12
10
  logger = set_logger(__name__)
13
11
 
14
12
 
15
- class FractalSlurmWaitThread(FileWaitThread):
13
+ class FractalSlurmSSHWaitThread(threading.Thread):
16
14
  """
17
- Overrides the original clusterfutures.FileWaitThread, so that:
18
-
19
- 1. Each jobid in the waiting list is associated to a tuple of filenames,
20
- rather than a single one.
21
- 2. In the `check` method, we avoid output-file existence checks (which
22
- would require `sudo -u user ls` calls), and we rather check for the
23
- existence of the shutdown file. All the logic to check whether a job is
24
- complete is deferred to the `cfut.slurm.jobs_finished` function.
25
- 3. There are additional attributes (...).
26
-
27
- This class is based on clusterfutures 0.5. Original Copyright: 2022
28
- Adrian Sampson, released under the MIT licence
15
+ Thread that monitors a pool of SLURM jobs
16
+
17
+ This class is a custom re-implementation of the waiting thread class from:
18
+
19
+ > clusterfutures <https://github.com/sampsyo/clusterfutures>
20
+ > Original Copyright
21
+ > Copyright 2021 Adrian Sampson <asampson@cs.washington.edu>
22
+ > License: MIT
23
+
24
+ Attributes:
25
+ shutdown_file:
26
+ shutdown_callback:
27
+ slurm_poll_interval:
28
+ jobs_finished_callback:
29
+ active_job_ids:
30
+ shutdown:
31
+ lock:
29
32
  """
30
33
 
31
34
  shutdown_file: str
32
- shutdown_callback: Callable
33
- jobs_finished_callback: Callable
35
+ shutdown_callback: callable
34
36
  slurm_poll_interval = 30
37
+ jobs_finished_callback: callable
35
38
  active_job_ids: list[str]
39
+ shutdown: bool
40
+ _lock: threading.Lock
36
41
 
37
- def __init__(self, *args, **kwargs):
42
+ def __init__(self, callback: callable, interval=1):
38
43
  """
39
44
  Init method
40
45
 
41
46
  This method is executed on the main thread.
42
47
  """
43
- super().__init__(*args, **kwargs)
48
+ threading.Thread.__init__(self, daemon=True)
49
+ self.callback = callback
50
+ self.interval = interval
51
+ self._lock = threading.Lock()
52
+ self.shutdown = False
44
53
  self.active_job_ids = []
45
54
 
46
55
  def wait(self, *, job_id: str):
@@ -53,7 +62,7 @@ class FractalSlurmWaitThread(FileWaitThread):
53
62
  error_msg = "Cannot call `wait` method after executor shutdown."
54
63
  logger.warning(error_msg)
55
64
  raise JobExecutionError(info=error_msg)
56
- with self.lock:
65
+ with self._lock:
57
66
  self.active_job_ids.append(job_id)
58
67
 
59
68
  def check_shutdown(self):
@@ -109,7 +118,7 @@ class FractalSlurmWaitThread(FileWaitThread):
109
118
  pass
110
119
  return
111
120
  if ind % skip == 0:
112
- with self.lock:
121
+ with self._lock:
113
122
  try:
114
123
  self.check_jobs()
115
124
  except Exception: # nosec
@@ -1,8 +1,7 @@
1
+ import uuid
1
2
  from pathlib import Path
2
3
  from typing import Optional
3
4
 
4
- from cfut.util import random_string
5
-
6
5
  from fractal_server.app.runner.executors.slurm._slurm_config import (
7
6
  SlurmConfig,
8
7
  )
@@ -88,7 +87,6 @@ class SlurmJob:
88
87
  self,
89
88
  num_tasks_tot: int,
90
89
  slurm_config: SlurmConfig,
91
- workflow_task_file_prefix: Optional[str] = None,
92
90
  slurm_file_prefix: Optional[str] = None,
93
91
  wftask_file_prefixes: Optional[tuple[str, ...]] = None,
94
92
  single_task_submission: bool = False,
@@ -107,9 +105,7 @@ class SlurmJob:
107
105
  )
108
106
  else:
109
107
  self.wftask_file_prefixes = wftask_file_prefixes
110
- self.workerids = tuple(
111
- random_string() for i in range(self.num_tasks_tot)
112
- )
108
+ self.workerids = tuple(uuid.uuid4() for i in range(self.num_tasks_tot))
113
109
  self.slurm_config = slurm_config
114
110
 
115
111
  def get_clean_output_pickle_files(self) -> tuple[str, ...]:
@@ -1,20 +1,9 @@
1
- # This adapts clusterfutures <https://github.com/sampsyo/clusterfutures>
2
- # Original Copyright
3
- # Copyright 2021 Adrian Sampson <asampson@cs.washington.edu>
4
- # License: MIT
5
- #
6
- # Modified by:
7
- # Jacopo Nespolo <jacopo.nespolo@exact-lab.it>
8
- # Tommaso Comparin <tommaso.comparin@exact-lab.it>
9
- # Marco Franzon <marco.franzon@exact-lab.it>
10
- #
11
- # Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
12
- # University of Zurich
13
1
  import json
14
2
  import math
15
3
  import sys
16
4
  import threading
17
5
  import time
6
+ from concurrent.futures import Executor
18
7
  from concurrent.futures import Future
19
8
  from concurrent.futures import InvalidStateError
20
9
  from copy import copy
@@ -25,18 +14,18 @@ from typing import Optional
25
14
  from typing import Sequence
26
15
 
27
16
  import cloudpickle
28
- from cfut import SlurmExecutor
29
17
 
30
18
  from ....filenames import SHUTDOWN_FILENAME
31
19
  from ....task_files import get_task_file_paths
32
20
  from ....task_files import TaskFiles
33
21
  from ....versions import get_versions
22
+ from ..._job_states import STATES_FINISHED
34
23
  from ...slurm._slurm_config import SlurmConfig
35
24
  from .._batching import heuristics
36
25
  from ..utils_executors import get_pickle_file_path
37
26
  from ..utils_executors import get_slurm_file_path
38
27
  from ..utils_executors import get_slurm_script_file_path
39
- from ._executor_wait_thread import FractalSlurmWaitThread
28
+ from ._executor_wait_thread import FractalSlurmSSHWaitThread
40
29
  from fractal_server.app.runner.components import _COMPONENT_KEY_
41
30
  from fractal_server.app.runner.compress_folder import compress_folder
42
31
  from fractal_server.app.runner.exceptions import JobExecutionError
@@ -48,25 +37,31 @@ from fractal_server.logger import set_logger
48
37
  from fractal_server.ssh._fabric import FractalSSH
49
38
  from fractal_server.syringe import Inject
50
39
 
40
+
51
41
  logger = set_logger(__name__)
52
42
 
53
43
 
54
- class FractalSlurmSSHExecutor(SlurmExecutor):
44
+ class FractalSlurmSSHExecutor(Executor):
55
45
  """
56
- FractalSlurmSSHExecutor (inherits from cfut.SlurmExecutor)
46
+ Executor to submit SLURM jobs via SSH
47
+
48
+ This class is a custom re-implementation of the SLURM executor from
49
+
50
+ > clusterfutures <https://github.com/sampsyo/clusterfutures>
51
+ > Original Copyright
52
+ > Copyright 2021 Adrian Sampson <asampson@cs.washington.edu>
53
+ > License: MIT
57
54
 
58
- FIXME: docstring
59
55
 
60
56
  Attributes:
61
57
  fractal_ssh: FractalSSH connection with custom lock
62
- shutdown_file:
63
- python_remote: Equal to `settings.FRACTAL_SLURM_WORKER_PYTHON`
64
- wait_thread_cls: Class for waiting thread
65
- keep_pickle_files:
66
58
  workflow_dir_local:
67
59
  Directory for both the cfut/SLURM and fractal-server files and logs
68
60
  workflow_dir_remote:
69
61
  Directory for both the cfut/SLURM and fractal-server files and logs
62
+ shutdown_file:
63
+ python_remote: Equal to `settings.FRACTAL_SLURM_WORKER_PYTHON`
64
+ wait_thread_cls: Class for waiting thread
70
65
  common_script_lines:
71
66
  Arbitrary script lines that will always be included in the
72
67
  sbatch script
@@ -83,11 +78,10 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
83
78
  shutdown_file: str
84
79
  python_remote: str
85
80
 
86
- wait_thread_cls = FractalSlurmWaitThread
87
- keep_pickle_files: bool
81
+ wait_thread_cls = FractalSlurmSSHWaitThread
88
82
 
89
83
  common_script_lines: list[str]
90
- slurm_account: Optional[str]
84
+ slurm_account: Optional[str] = None
91
85
 
92
86
  jobs: dict[str, tuple[Future, SlurmJob]]
93
87
  map_jobid_to_slurm_files_local: dict[str, tuple[str, str, str]]
@@ -100,8 +94,6 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
100
94
  # Folders and files
101
95
  workflow_dir_local: Path,
102
96
  workflow_dir_remote: Path,
103
- # Runner options
104
- keep_pickle_files: bool = False,
105
97
  # Monitoring options
106
98
  slurm_poll_interval: Optional[int] = None,
107
99
  # SLURM submission script options
@@ -120,7 +112,6 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
120
112
  fractal_ssh:
121
113
  workflow_dir_local:
122
114
  workflow_dir_remote:
123
- keep_pickle_files:
124
115
  slurm_poll_interval:
125
116
  common_script_lines:
126
117
  slurm_account:
@@ -194,7 +185,6 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
194
185
  raise e
195
186
 
196
187
  # Set/initialize some more options
197
- self.keep_pickle_files = keep_pickle_files
198
188
  self.map_jobid_to_slurm_files_local = {}
199
189
 
200
190
  def _validate_common_script_lines(self):
@@ -901,12 +891,11 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
901
891
  pass
902
892
  for job_id in remaining_job_ids:
903
893
  self._cleanup(job_id)
904
- if not self.keep_pickle_files:
905
- for job in remaining_jobs:
906
- for path in job.output_pickle_files_local:
907
- path.unlink()
908
- for path in job.input_pickle_files_local:
909
- path.unlink()
894
+ for job in remaining_jobs:
895
+ for path in job.output_pickle_files_local:
896
+ path.unlink()
897
+ for path in job.input_pickle_files_local:
898
+ path.unlink()
910
899
 
911
900
  def _completion(self, job_ids: list[str]) -> None:
912
901
  """
@@ -1001,8 +990,7 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
1001
990
  f"Future {future} (SLURM job ID: {job_id}) "
1002
991
  "was already cancelled."
1003
992
  )
1004
- if not self.keep_pickle_files:
1005
- in_path.unlink()
993
+ in_path.unlink()
1006
994
  self._cleanup(job_id)
1007
995
  self._handle_remaining_jobs(
1008
996
  remaining_futures=remaining_futures,
@@ -1062,17 +1050,15 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
1062
1050
  remaining_job_ids=remaining_job_ids,
1063
1051
  )
1064
1052
  return
1065
- if not self.keep_pickle_files:
1066
- out_path.unlink()
1053
+ out_path.unlink()
1067
1054
  except InvalidStateError:
1068
1055
  logger.warning(
1069
1056
  f"Future {future} (SLURM job ID: {job_id}) was "
1070
1057
  "already cancelled, exit from "
1071
1058
  "FractalSlurmSSHExecutor._completion."
1072
1059
  )
1073
- if not self.keep_pickle_files:
1074
- out_path.unlink()
1075
- in_path.unlink()
1060
+ out_path.unlink()
1061
+ in_path.unlink()
1076
1062
 
1077
1063
  self._cleanup(job_id)
1078
1064
  self._handle_remaining_jobs(
@@ -1082,8 +1068,7 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
1082
1068
  return
1083
1069
 
1084
1070
  # Clean up input pickle file
1085
- if not self.keep_pickle_files:
1086
- in_path.unlink()
1071
+ in_path.unlink()
1087
1072
  self._cleanup(job_id)
1088
1073
  if job.single_task_submission:
1089
1074
  future.set_result(outputs[0])
@@ -1170,7 +1155,7 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
1170
1155
  Path(tarfile_path_local).unlink()
1171
1156
 
1172
1157
  t_1 = time.perf_counter()
1173
- logger.info("[_get_subfolder_sftp] End - " f"elapsed: {t_1-t_0:.3f} s")
1158
+ logger.info(f"[_get_subfolder_sftp] End - elapsed: {t_1 - t_0:.3f} s")
1174
1159
 
1175
1160
  def _prepare_sbatch_script(
1176
1161
  self,
@@ -1210,8 +1195,10 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
1210
1195
  script_lines = slurm_config.sort_script_lines(script_lines)
1211
1196
  logger.debug(script_lines)
1212
1197
 
1213
- # Always print output of `pwd`
1214
- script_lines.append('echo "Working directory (pwd): `pwd`"\n')
1198
+ # Always print output of `uname -n` and `pwd`
1199
+ script_lines.append(
1200
+ '"Hostname: `uname -n`; current directory: `pwd`"\n'
1201
+ )
1215
1202
 
1216
1203
  # Complete script preamble
1217
1204
  script_lines.append("\n")
@@ -1267,7 +1254,7 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
1267
1254
  logger.debug("Executor shutdown: end")
1268
1255
 
1269
1256
  def _stop_and_join_wait_thread(self):
1270
- self.wait_thread.stop()
1257
+ self.wait_thread.shutdown = True
1271
1258
  self.wait_thread.join()
1272
1259
 
1273
1260
  def __exit__(self, *args, **kwargs):
@@ -1304,8 +1291,6 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
1304
1291
  (released under the MIT licence)
1305
1292
  """
1306
1293
 
1307
- from cfut.slurm import STATES_FINISHED
1308
-
1309
1294
  logger.debug(
1310
1295
  f"[FractalSlurmSSHExecutor._jobs_finished] START ({job_ids=})"
1311
1296
  )
@@ -1396,6 +1381,6 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
1396
1381
  t_end_handshake = time.perf_counter()
1397
1382
  logger.info(
1398
1383
  "[FractalSlurmSSHExecutor.ssh_handshake] END"
1399
- f" - elapsed: {t_end_handshake-t_start_handshake:.3f} s"
1384
+ f" - elapsed: {t_end_handshake - t_start_handshake:.3f} s"
1400
1385
  )
1401
1386
  return remote_versions
@@ -1,3 +0,0 @@
1
- from .executor import SlurmExecutor
2
-
3
- __all__ = ["SlurmExecutor"]
@@ -1,8 +1,7 @@
1
1
  from subprocess import run # nosec
2
2
 
3
- from cfut.slurm import STATES_FINISHED
4
-
5
3
  from ......logger import set_logger
4
+ from ..._job_states import STATES_FINISHED
6
5
 
7
6
 
8
7
  logger = set_logger(__name__)