fractal-server 2.12.1__py3-none-any.whl → 2.13.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. fractal_server/__init__.py +1 -1
  2. fractal_server/app/models/security.py +9 -12
  3. fractal_server/app/models/v2/__init__.py +4 -0
  4. fractal_server/app/models/v2/accounting.py +35 -0
  5. fractal_server/app/models/v2/dataset.py +2 -2
  6. fractal_server/app/models/v2/job.py +11 -9
  7. fractal_server/app/models/v2/task.py +2 -3
  8. fractal_server/app/models/v2/task_group.py +6 -2
  9. fractal_server/app/models/v2/workflowtask.py +15 -8
  10. fractal_server/app/routes/admin/v2/__init__.py +4 -0
  11. fractal_server/app/routes/admin/v2/accounting.py +108 -0
  12. fractal_server/app/routes/admin/v2/impersonate.py +35 -0
  13. fractal_server/app/routes/admin/v2/job.py +5 -13
  14. fractal_server/app/routes/admin/v2/task.py +1 -1
  15. fractal_server/app/routes/admin/v2/task_group.py +5 -13
  16. fractal_server/app/routes/api/v2/_aux_functions_task_lifecycle.py +3 -3
  17. fractal_server/app/routes/api/v2/dataset.py +4 -4
  18. fractal_server/app/routes/api/v2/images.py +11 -11
  19. fractal_server/app/routes/api/v2/project.py +2 -2
  20. fractal_server/app/routes/api/v2/status.py +1 -1
  21. fractal_server/app/routes/api/v2/submit.py +9 -6
  22. fractal_server/app/routes/api/v2/task.py +4 -2
  23. fractal_server/app/routes/api/v2/task_collection.py +3 -2
  24. fractal_server/app/routes/api/v2/task_group.py +4 -7
  25. fractal_server/app/routes/api/v2/workflow.py +3 -3
  26. fractal_server/app/routes/api/v2/workflow_import.py +3 -3
  27. fractal_server/app/routes/api/v2/workflowtask.py +3 -1
  28. fractal_server/app/routes/auth/_aux_auth.py +4 -1
  29. fractal_server/app/routes/auth/current_user.py +3 -5
  30. fractal_server/app/routes/auth/group.py +1 -1
  31. fractal_server/app/routes/auth/users.py +2 -4
  32. fractal_server/app/routes/aux/__init__.py +0 -20
  33. fractal_server/app/routes/aux/_runner.py +1 -1
  34. fractal_server/app/routes/aux/validate_user_settings.py +1 -2
  35. fractal_server/app/runner/executors/_job_states.py +13 -0
  36. fractal_server/app/runner/executors/slurm/_slurm_config.py +26 -18
  37. fractal_server/app/runner/executors/slurm/ssh/__init__.py +0 -3
  38. fractal_server/app/runner/executors/slurm/ssh/_executor_wait_thread.py +31 -22
  39. fractal_server/app/runner/executors/slurm/ssh/_slurm_job.py +2 -5
  40. fractal_server/app/runner/executors/slurm/ssh/executor.py +21 -27
  41. fractal_server/app/runner/executors/slurm/sudo/__init__.py +0 -3
  42. fractal_server/app/runner/executors/slurm/sudo/_check_jobs_status.py +1 -2
  43. fractal_server/app/runner/executors/slurm/sudo/_executor_wait_thread.py +37 -47
  44. fractal_server/app/runner/executors/slurm/sudo/executor.py +25 -24
  45. fractal_server/app/runner/v2/__init__.py +4 -9
  46. fractal_server/app/runner/v2/_local/__init__.py +3 -0
  47. fractal_server/app/runner/v2/_local/_local_config.py +5 -4
  48. fractal_server/app/runner/v2/_slurm_common/get_slurm_config.py +4 -4
  49. fractal_server/app/runner/v2/_slurm_ssh/__init__.py +2 -0
  50. fractal_server/app/runner/v2/_slurm_sudo/__init__.py +4 -2
  51. fractal_server/app/runner/v2/deduplicate_list.py +1 -1
  52. fractal_server/app/runner/v2/runner.py +25 -10
  53. fractal_server/app/runner/v2/runner_functions.py +12 -11
  54. fractal_server/app/runner/v2/task_interface.py +15 -7
  55. fractal_server/app/schemas/_filter_validators.py +6 -3
  56. fractal_server/app/schemas/_validators.py +7 -5
  57. fractal_server/app/schemas/user.py +23 -18
  58. fractal_server/app/schemas/user_group.py +25 -11
  59. fractal_server/app/schemas/user_settings.py +31 -24
  60. fractal_server/app/schemas/v2/__init__.py +1 -0
  61. fractal_server/app/schemas/v2/accounting.py +18 -0
  62. fractal_server/app/schemas/v2/dataset.py +48 -35
  63. fractal_server/app/schemas/v2/dumps.py +16 -14
  64. fractal_server/app/schemas/v2/job.py +49 -29
  65. fractal_server/app/schemas/v2/manifest.py +32 -28
  66. fractal_server/app/schemas/v2/project.py +18 -8
  67. fractal_server/app/schemas/v2/task.py +86 -75
  68. fractal_server/app/schemas/v2/task_collection.py +41 -30
  69. fractal_server/app/schemas/v2/task_group.py +39 -20
  70. fractal_server/app/schemas/v2/workflow.py +24 -12
  71. fractal_server/app/schemas/v2/workflowtask.py +63 -61
  72. fractal_server/app/security/__init__.py +1 -1
  73. fractal_server/config.py +86 -73
  74. fractal_server/images/models.py +18 -12
  75. fractal_server/main.py +1 -1
  76. fractal_server/migrations/versions/af1ef1c83c9b_add_accounting_tables.py +57 -0
  77. fractal_server/tasks/v2/utils_background.py +2 -2
  78. fractal_server/tasks/v2/utils_database.py +1 -1
  79. {fractal_server-2.12.1.dist-info → fractal_server-2.13.1.dist-info}/METADATA +9 -10
  80. {fractal_server-2.12.1.dist-info → fractal_server-2.13.1.dist-info}/RECORD +83 -81
  81. fractal_server/app/runner/v2/_local_experimental/__init__.py +0 -121
  82. fractal_server/app/runner/v2/_local_experimental/_local_config.py +0 -108
  83. fractal_server/app/runner/v2/_local_experimental/_submit_setup.py +0 -42
  84. fractal_server/app/runner/v2/_local_experimental/executor.py +0 -157
  85. {fractal_server-2.12.1.dist-info → fractal_server-2.13.1.dist-info}/LICENSE +0 -0
  86. {fractal_server-2.12.1.dist-info → fractal_server-2.13.1.dist-info}/WHEEL +0 -0
  87. {fractal_server-2.12.1.dist-info → fractal_server-2.13.1.dist-info}/entry_points.txt +0 -0
@@ -62,7 +62,7 @@ async def create_project(
62
62
  project_name=project.name, user_id=user.id, db=db
63
63
  )
64
64
 
65
- db_project = ProjectV2(**project.dict())
65
+ db_project = ProjectV2(**project.model_dump())
66
66
  db_project.user_list.append(user)
67
67
 
68
68
  db.add(db_project)
@@ -106,7 +106,7 @@ async def update_project(
106
106
  project_name=project_update.name, user_id=user.id, db=db
107
107
  )
108
108
 
109
- for key, value in project_update.dict(exclude_unset=True).items():
109
+ for key, value in project_update.model_dump(exclude_unset=True).items():
110
110
  setattr(project, key, value)
111
111
 
112
112
  await db.commit()
@@ -154,7 +154,7 @@ async def get_workflowtask_status(
154
154
  if wf_task_status is None:
155
155
  # If a wftask ID was not found, ignore it and continue
156
156
  continue
157
- clean_workflow_tasks_status_dict[wf_task.id] = wf_task_status
157
+ clean_workflow_tasks_status_dict[str(wf_task.id)] = wf_task_status
158
158
  if wf_task_status == WorkflowTaskStatusTypeV2.FAILED:
159
159
  # Starting from the beginning of `workflow.task_list`, stop the
160
160
  # first time that you hit a failed job
@@ -165,11 +165,15 @@ async def apply_workflow(
165
165
  # The 'filters' field is not supported any more but still exists as a
166
166
  # database column, therefore we manually exclude it from dumps.
167
167
  dataset_dump=json.loads(
168
- dataset.json(exclude={"images", "history", "filters"})
168
+ dataset.model_dump_json(exclude={"images", "history", "filters"})
169
169
  ),
170
- workflow_dump=json.loads(workflow.json(exclude={"task_list"})),
171
- project_dump=json.loads(project.json(exclude={"user_list"})),
172
- **job_create.dict(),
170
+ workflow_dump=json.loads(
171
+ workflow.model_dump_json(exclude={"task_list"})
172
+ ),
173
+ project_dump=json.loads(
174
+ project.model_dump_json(exclude={"user_list"})
175
+ ),
176
+ **job_create.model_dump(),
173
177
  )
174
178
 
175
179
  db.add(job)
@@ -202,8 +206,6 @@ async def apply_workflow(
202
206
  # Define user-side job directory
203
207
  if FRACTAL_RUNNER_BACKEND == "local":
204
208
  WORKFLOW_DIR_REMOTE = WORKFLOW_DIR_LOCAL
205
- elif FRACTAL_RUNNER_BACKEND == "local_experimental":
206
- WORKFLOW_DIR_REMOTE = WORKFLOW_DIR_LOCAL
207
209
  elif FRACTAL_RUNNER_BACKEND == "slurm":
208
210
  WORKFLOW_DIR_REMOTE = cache_dir / WORKFLOW_DIR_LOCAL.name
209
211
  elif FRACTAL_RUNNER_BACKEND == "slurm_ssh":
@@ -237,6 +239,7 @@ async def apply_workflow(
237
239
  workflow_id=workflow.id,
238
240
  dataset_id=dataset.id,
239
241
  job_id=job.id,
242
+ user_id=user.id,
240
243
  user_settings=user_settings,
241
244
  worker_init=job.worker_init,
242
245
  slurm_user=user_settings.slurm_user,
@@ -107,7 +107,7 @@ async def patch_task(
107
107
  db_task = await _get_task_full_access(
108
108
  task_id=task_id, user_id=user.id, db=db
109
109
  )
110
- update = task_update.dict(exclude_unset=True)
110
+ update = task_update.model_dump(exclude_unset=True)
111
111
 
112
112
  # Forbid changes that set a previously unset command
113
113
  if db_task.type == "non_parallel" and "command_parallel" in update:
@@ -182,7 +182,8 @@ async def create_task(
182
182
  )
183
183
 
184
184
  # Add task
185
- db_task = TaskV2(**task.dict(), type=task_type)
185
+
186
+ db_task = TaskV2(**task.model_dump(exclude_unset=True), type=task_type)
186
187
  pkg_name = db_task.name
187
188
  await _verify_non_duplication_user_constraint(
188
189
  db=db, pkg_name=pkg_name, user_id=user.id, version=db_task.version
@@ -206,6 +207,7 @@ async def create_task(
206
207
  await db.commit()
207
208
  await db.refresh(db_task)
208
209
  await db.close()
210
+
209
211
  return db_task
210
212
 
211
213
 
@@ -13,7 +13,7 @@ from fastapi import Response
13
13
  from fastapi import status
14
14
  from fastapi import UploadFile
15
15
  from pydantic import BaseModel
16
- from pydantic import root_validator
16
+ from pydantic import model_validator
17
17
  from pydantic import ValidationError
18
18
  from sqlmodel import select
19
19
 
@@ -68,7 +68,8 @@ class CollectionRequestData(BaseModel):
68
68
  file: Optional[UploadFile] = None
69
69
  origin: TaskGroupV2OriginEnum
70
70
 
71
- @root_validator(pre=True)
71
+ @model_validator(mode="before")
72
+ @classmethod
72
73
  def validate_data(cls, values):
73
74
  file = values.get("file")
74
75
  package = values.get("task_collect").package
@@ -1,4 +1,3 @@
1
- from datetime import datetime
2
1
  from typing import Optional
3
2
 
4
3
  from fastapi import APIRouter
@@ -6,6 +5,7 @@ from fastapi import Depends
6
5
  from fastapi import HTTPException
7
6
  from fastapi import Response
8
7
  from fastapi import status
8
+ from pydantic.types import AwareDatetime
9
9
  from sqlmodel import or_
10
10
  from sqlmodel import select
11
11
 
@@ -23,7 +23,6 @@ from fractal_server.app.routes.auth import current_active_user
23
23
  from fractal_server.app.routes.auth._aux_auth import (
24
24
  _verify_user_belongs_to_group,
25
25
  )
26
- from fractal_server.app.routes.aux import _raise_if_naive_datetime
27
26
  from fractal_server.app.schemas.v2 import TaskGroupActivityActionV2
28
27
  from fractal_server.app.schemas.v2 import TaskGroupActivityStatusV2
29
28
  from fractal_server.app.schemas.v2 import TaskGroupActivityV2Read
@@ -43,13 +42,11 @@ async def get_task_group_activity_list(
43
42
  pkg_name: Optional[str] = None,
44
43
  status: Optional[TaskGroupActivityStatusV2] = None,
45
44
  action: Optional[TaskGroupActivityActionV2] = None,
46
- timestamp_started_min: Optional[datetime] = None,
45
+ timestamp_started_min: Optional[AwareDatetime] = None,
47
46
  user: UserOAuth = Depends(current_active_user),
48
47
  db: AsyncSession = Depends(get_async_db),
49
48
  ) -> list[TaskGroupActivityV2Read]:
50
49
 
51
- _raise_if_naive_datetime(timestamp_started_min)
52
-
53
50
  stm = select(TaskGroupActivityV2).where(
54
51
  TaskGroupActivityV2.user_id == user.id
55
52
  )
@@ -223,7 +220,7 @@ async def patch_task_group(
223
220
  db=db,
224
221
  )
225
222
  if (
226
- "user_group_id" in task_group_update.dict(exclude_unset=True)
223
+ "user_group_id" in task_group_update.model_dump(exclude_unset=True)
227
224
  and task_group_update.user_group_id != task_group.user_group_id
228
225
  ):
229
226
  await _verify_non_duplication_group_constraint(
@@ -232,7 +229,7 @@ async def patch_task_group(
232
229
  version=task_group.version,
233
230
  user_group_id=task_group_update.user_group_id,
234
231
  )
235
- for key, value in task_group_update.dict(exclude_unset=True).items():
232
+ for key, value in task_group_update.model_dump(exclude_unset=True).items():
236
233
  if (key == "user_group_id") and (value is not None):
237
234
  await _verify_user_belongs_to_group(
238
235
  user_id=user.id, user_group_id=value, db=db
@@ -82,7 +82,7 @@ async def create_workflow(
82
82
  name=workflow.name, project_id=project_id, db=db
83
83
  )
84
84
 
85
- db_workflow = WorkflowV2(project_id=project_id, **workflow.dict())
85
+ db_workflow = WorkflowV2(project_id=project_id, **workflow.model_dump())
86
86
  db.add(db_workflow)
87
87
  await db.commit()
88
88
  await db.refresh(db_workflow)
@@ -149,7 +149,7 @@ async def update_workflow(
149
149
  name=patch.name, project_id=project_id, db=db
150
150
  )
151
151
 
152
- for key, value in patch.dict(exclude_unset=True).items():
152
+ for key, value in patch.model_dump(exclude_unset=True).items():
153
153
  if key == "reordered_workflowtask_ids":
154
154
  current_workflowtask_ids = [
155
155
  wftask.id for wftask in workflow.task_list
@@ -262,7 +262,7 @@ async def export_worfklow(
262
262
  wf_task_list = []
263
263
  for wftask in workflow.task_list:
264
264
  task_group = await db.get(TaskGroupV2, wftask.task.taskgroupv2_id)
265
- wf_task_list.append(wftask.dict())
265
+ wf_task_list.append(wftask.model_dump())
266
266
  wf_task_list[-1]["task"] = dict(
267
267
  pkg_name=task_group.pkg_name,
268
268
  version=task_group.version,
@@ -321,7 +321,7 @@ async def import_workflow(
321
321
  detail=f"Could not find a task matching with {wf_task.task}.",
322
322
  )
323
323
  new_wf_task = WorkflowTaskCreateV2(
324
- **wf_task.dict(exclude_none=True, exclude={"task"})
324
+ **wf_task.model_dump(exclude_none=True, exclude={"task"})
325
325
  )
326
326
  list_wf_tasks.append(new_wf_task)
327
327
  list_task_ids.append(task_id)
@@ -336,7 +336,7 @@ async def import_workflow(
336
336
  # Create new Workflow
337
337
  db_workflow = WorkflowV2(
338
338
  project_id=project_id,
339
- **workflow_import.dict(exclude_none=True, exclude={"task_list"}),
339
+ **workflow_import.model_dump(exclude_none=True, exclude={"task_list"}),
340
340
  )
341
341
  db.add(db_workflow)
342
342
  await db.commit()
@@ -345,7 +345,7 @@ async def import_workflow(
345
345
  # Insert task into the workflow
346
346
  for ind, new_wf_task in enumerate(list_wf_tasks):
347
347
  await _workflow_insert_task(
348
- **new_wf_task.dict(),
348
+ **new_wf_task.model_dump(),
349
349
  workflow_id=db_workflow.id,
350
350
  task_id=list_task_ids[ind],
351
351
  db=db,
@@ -281,7 +281,9 @@ async def update_workflowtask(
281
281
  ),
282
282
  )
283
283
 
284
- for key, value in workflow_task_update.dict(exclude_unset=True).items():
284
+ for key, value in workflow_task_update.model_dump(
285
+ exclude_unset=True
286
+ ).items():
285
287
  if key == "args_parallel":
286
288
  # Get default arguments via a Task property method
287
289
  actual_args = deepcopy(value)
@@ -58,11 +58,14 @@ async def _get_single_user_with_groups(
58
58
  group_ids_names.insert(0, default_group)
59
59
  else:
60
60
  pass
61
+ oauth_accounts = [
62
+ oauth_account.model_dump() for oauth_account in user.oauth_accounts
63
+ ]
61
64
 
62
65
  return UserRead(
63
66
  **user.model_dump(),
64
67
  group_ids_names=group_ids_names,
65
- oauth_accounts=user.oauth_accounts,
68
+ oauth_accounts=oauth_accounts,
66
69
  )
67
70
 
68
71
 
@@ -57,14 +57,14 @@ async def patch_current_user(
57
57
  Note: a user cannot patch their own password (as enforced within the
58
58
  `UserUpdateStrict` schema).
59
59
  """
60
- update = UserUpdate(**user_update.dict(exclude_unset=True))
60
+ update = UserUpdate(**user_update.model_dump(exclude_unset=True))
61
61
 
62
62
  # NOTE: here it would be relevant to catch an `InvalidPasswordException`
63
63
  # (from `fastapi_users.exceptions`), if we were to allow users change
64
64
  # their own password
65
65
 
66
66
  user = await user_manager.update(update, current_user, safe=True)
67
- validated_user = schemas.model_validate(UserOAuth, user)
67
+ validated_user = schemas.model_validate(UserOAuth, user.model_dump())
68
68
 
69
69
  patched_user = await db.get(
70
70
  UserOAuth, validated_user.id, populate_existing=True
@@ -82,7 +82,6 @@ async def get_current_user_settings(
82
82
  current_user: UserOAuth = Depends(current_active_user),
83
83
  db: AsyncSession = Depends(get_async_db),
84
84
  ) -> UserSettingsReadStrict:
85
-
86
85
  verify_user_has_settings(current_user)
87
86
  user_settings = await db.get(UserSettings, current_user.user_settings_id)
88
87
  return user_settings
@@ -96,13 +95,12 @@ async def patch_current_user_settings(
96
95
  current_user: UserOAuth = Depends(current_active_user),
97
96
  db: AsyncSession = Depends(get_async_db),
98
97
  ) -> UserSettingsReadStrict:
99
-
100
98
  verify_user_has_settings(current_user)
101
99
  current_user_settings = await db.get(
102
100
  UserSettings, current_user.user_settings_id
103
101
  )
104
102
 
105
- for k, v in settings_update.dict(exclude_unset=True).items():
103
+ for k, v in settings_update.model_dump(exclude_unset=True).items():
106
104
  setattr(current_user_settings, k, v)
107
105
 
108
106
  db.add(current_user_settings)
@@ -194,7 +194,7 @@ async def patch_user_settings_bulk(
194
194
  .where(LinkUserGroup.group_id == group_id)
195
195
  )
196
196
  settings_list = res.scalars().all()
197
- update = settings_update.dict(exclude_unset=True)
197
+ update = settings_update.model_dump(exclude_unset=True)
198
198
  for settings in settings_list:
199
199
  for k, v in update.items():
200
200
  setattr(settings, k, v)
@@ -75,7 +75,7 @@ async def patch_user(
75
75
  safe=False,
76
76
  request=None,
77
77
  )
78
- validated_user = schemas.model_validate(UserOAuth, user)
78
+ validated_user = schemas.model_validate(UserOAuth, user.model_dump())
79
79
  patched_user = await db.get(
80
80
  UserOAuth, validated_user.id, populate_existing=True
81
81
  )
@@ -139,7 +139,6 @@ async def set_user_groups(
139
139
  superuser: UserOAuth = Depends(current_active_superuser),
140
140
  db: AsyncSession = Depends(get_async_db),
141
141
  ) -> UserRead:
142
-
143
142
  # Preliminary check that all objects exist in the db
144
143
  user = await _user_or_404(user_id=user_id, db=db)
145
144
  target_group_ids = user_update.group_ids
@@ -209,7 +208,6 @@ async def get_user_settings(
209
208
  superuser: UserOAuth = Depends(current_active_superuser),
210
209
  db: AsyncSession = Depends(get_async_db),
211
210
  ) -> UserSettingsRead:
212
-
213
211
  user = await _user_or_404(user_id=user_id, db=db)
214
212
  verify_user_has_settings(user)
215
213
  user_settings = await db.get(UserSettings, user.user_settings_id)
@@ -229,7 +227,7 @@ async def patch_user_settings(
229
227
  verify_user_has_settings(user)
230
228
  user_settings = await db.get(UserSettings, user.user_settings_id)
231
229
 
232
- for k, v in settings_update.dict(exclude_unset=True).items():
230
+ for k, v in settings_update.model_dump(exclude_unset=True).items():
233
231
  setattr(user_settings, k, v)
234
232
 
235
233
  db.add(user_settings)
@@ -1,20 +0,0 @@
1
- from datetime import datetime
2
- from typing import Optional
3
-
4
- from fastapi import HTTPException
5
- from fastapi import status
6
-
7
-
8
- def _raise_if_naive_datetime(*timestamps: tuple[Optional[datetime]]) -> None:
9
- """
10
- Raise 422 if any not-null argument is a naive `datetime` object:
11
- https://docs.python.org/3/library/datetime.html#determining-if-an-object-is-aware-or-naive
12
- """
13
- for timestamp in filter(None, timestamps):
14
- if (timestamp.tzinfo is None) or (
15
- timestamp.tzinfo.utcoffset(timestamp) is None
16
- ):
17
- raise HTTPException(
18
- status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
19
- detail=f"{timestamp=} is naive. You must provide a timezone.",
20
- )
@@ -6,7 +6,7 @@ from ....syringe import Inject
6
6
 
7
7
 
8
8
  def _backend_supports_shutdown(backend: str) -> bool:
9
- if backend in ["slurm", "slurm_ssh", "local_experimental"]:
9
+ if backend in ["slurm", "slurm_ssh"]:
10
10
  return True
11
11
  else:
12
12
  return False
@@ -1,6 +1,5 @@
1
1
  from fastapi import HTTPException
2
2
  from fastapi import status
3
- from pydantic import BaseModel
4
3
  from pydantic import ValidationError
5
4
 
6
5
  from fractal_server.app.db import AsyncSession
@@ -55,7 +54,7 @@ async def validate_user_settings(
55
54
  UserSettingsValidationModel = SlurmSudoUserSettings
56
55
  else:
57
56
  # For other backends, we don't validate anything
58
- UserSettingsValidationModel = BaseModel
57
+ return user_settings
59
58
 
60
59
  try:
61
60
  UserSettingsValidationModel(**user_settings.model_dump())
@@ -0,0 +1,13 @@
1
+ # https://slurm.schedmd.com/squeue.html#lbAG
2
+ STATES_FINISHED = {
3
+ "BOOT_FAIL",
4
+ "CANCELLED",
5
+ "COMPLETED",
6
+ "DEADLINE",
7
+ "FAILED",
8
+ "NODE_FAIL",
9
+ "OUT_OF_MEMORY",
10
+ "PREEMPTED",
11
+ "SPECIAL_EXIT",
12
+ "TIMEOUT",
13
+ }
@@ -18,9 +18,9 @@ from typing import Optional
18
18
  from typing import Union
19
19
 
20
20
  from pydantic import BaseModel
21
- from pydantic import Extra
21
+ from pydantic import ConfigDict
22
22
  from pydantic import Field
23
- from pydantic.error_wrappers import ValidationError
23
+ from pydantic import ValidationError
24
24
 
25
25
  from .....config import get_settings
26
26
  from .....logger import set_logger
@@ -37,7 +37,7 @@ class SlurmConfigError(ValueError):
37
37
  pass
38
38
 
39
39
 
40
- class _SlurmConfigSet(BaseModel, extra=Extra.forbid):
40
+ class _SlurmConfigSet(BaseModel):
41
41
  """
42
42
  Options that can be set in `FRACTAL_SLURM_CONFIG_FILE` for the default/gpu
43
43
  SLURM config. Only used as part of `SlurmConfigFile`.
@@ -54,19 +54,21 @@ class _SlurmConfigSet(BaseModel, extra=Extra.forbid):
54
54
  extra_lines:
55
55
  """
56
56
 
57
- partition: Optional[str]
58
- cpus_per_task: Optional[int]
59
- mem: Optional[Union[int, str]]
60
- constraint: Optional[str]
61
- gres: Optional[str]
62
- time: Optional[str]
63
- account: Optional[str]
64
- extra_lines: Optional[list[str]]
65
- pre_submission_commands: Optional[list[str]]
66
- gpus: Optional[str]
57
+ model_config = ConfigDict(extra="forbid")
67
58
 
59
+ partition: Optional[str] = None
60
+ cpus_per_task: Optional[int] = None
61
+ mem: Optional[Union[int, str]] = None
62
+ constraint: Optional[str] = None
63
+ gres: Optional[str] = None
64
+ time: Optional[str] = None
65
+ account: Optional[str] = None
66
+ extra_lines: Optional[list[str]] = None
67
+ pre_submission_commands: Optional[list[str]] = None
68
+ gpus: Optional[str] = None
68
69
 
69
- class _BatchingConfigSet(BaseModel, extra=Extra.forbid):
70
+
71
+ class _BatchingConfigSet(BaseModel):
70
72
  """
71
73
  Options that can be set in `FRACTAL_SLURM_CONFIG_FILE` to configure the
72
74
  batching strategy (that is, how to combine several tasks in a single SLURM
@@ -83,6 +85,8 @@ class _BatchingConfigSet(BaseModel, extra=Extra.forbid):
83
85
  max_num_jobs:
84
86
  """
85
87
 
88
+ model_config = ConfigDict(extra="forbid")
89
+
86
90
  target_cpus_per_job: int
87
91
  max_cpus_per_job: int
88
92
  target_mem_per_job: Union[int, str]
@@ -91,7 +95,7 @@ class _BatchingConfigSet(BaseModel, extra=Extra.forbid):
91
95
  max_num_jobs: int
92
96
 
93
97
 
94
- class SlurmConfigFile(BaseModel, extra=Extra.forbid):
98
+ class SlurmConfigFile(BaseModel):
95
99
  """
96
100
  Specifications for the content of `FRACTAL_SLURM_CONFIG_FILE`
97
101
 
@@ -136,10 +140,12 @@ class SlurmConfigFile(BaseModel, extra=Extra.forbid):
136
140
  directory.
137
141
  """
138
142
 
143
+ model_config = ConfigDict(extra="forbid")
144
+
139
145
  default_slurm_config: _SlurmConfigSet
140
- gpu_slurm_config: Optional[_SlurmConfigSet]
146
+ gpu_slurm_config: Optional[_SlurmConfigSet] = None
141
147
  batching_config: _BatchingConfigSet
142
- user_local_exports: Optional[dict[str, str]]
148
+ user_local_exports: Optional[dict[str, str]] = None
143
149
 
144
150
 
145
151
  def load_slurm_config_file(
@@ -196,7 +202,7 @@ def load_slurm_config_file(
196
202
  return obj
197
203
 
198
204
 
199
- class SlurmConfig(BaseModel, extra=Extra.forbid):
205
+ class SlurmConfig(BaseModel):
200
206
  """
201
207
  Abstraction for SLURM parameters
202
208
 
@@ -247,6 +253,8 @@ class SlurmConfig(BaseModel, extra=Extra.forbid):
247
253
  command.
248
254
  """
249
255
 
256
+ model_config = ConfigDict(extra="forbid")
257
+
250
258
  # Required SLURM parameters (note that the integer attributes are those
251
259
  # that will need to scale up with the number of parallel tasks per job)
252
260
  partition: str
@@ -1,3 +0,0 @@
1
- from .executor import SlurmExecutor
2
-
3
- __all__ = ["SlurmExecutor"]
@@ -1,10 +1,8 @@
1
1
  import os
2
+ import threading
2
3
  import time
3
4
  import traceback
4
5
  from itertools import count
5
- from typing import Callable
6
-
7
- from cfut import FileWaitThread
8
6
 
9
7
  from ......logger import set_logger
10
8
  from fractal_server.app.runner.exceptions import JobExecutionError
@@ -12,35 +10,46 @@ from fractal_server.app.runner.exceptions import JobExecutionError
12
10
  logger = set_logger(__name__)
13
11
 
14
12
 
15
- class FractalSlurmWaitThread(FileWaitThread):
13
+ class FractalSlurmSSHWaitThread(threading.Thread):
16
14
  """
17
- Overrides the original clusterfutures.FileWaitThread, so that:
18
-
19
- 1. Each jobid in the waiting list is associated to a tuple of filenames,
20
- rather than a single one.
21
- 2. In the `check` method, we avoid output-file existence checks (which
22
- would require `sudo -u user ls` calls), and we rather check for the
23
- existence of the shutdown file. All the logic to check whether a job is
24
- complete is deferred to the `cfut.slurm.jobs_finished` function.
25
- 3. There are additional attributes (...).
26
-
27
- This class is based on clusterfutures 0.5. Original Copyright: 2022
28
- Adrian Sampson, released under the MIT licence
15
+ Thread that monitors a pool of SLURM jobs
16
+
17
+ This class is a custom re-implementation of the waiting thread class from:
18
+
19
+ > clusterfutures <https://github.com/sampsyo/clusterfutures>
20
+ > Original Copyright
21
+ > Copyright 2021 Adrian Sampson <asampson@cs.washington.edu>
22
+ > License: MIT
23
+
24
+ Attributes:
25
+ shutdown_file:
26
+ shutdown_callback:
27
+ slurm_poll_interval:
28
+ jobs_finished_callback:
29
+ active_job_ids:
30
+ shutdown:
31
+ lock:
29
32
  """
30
33
 
31
34
  shutdown_file: str
32
- shutdown_callback: Callable
33
- jobs_finished_callback: Callable
35
+ shutdown_callback: callable
34
36
  slurm_poll_interval = 30
37
+ jobs_finished_callback: callable
35
38
  active_job_ids: list[str]
39
+ shutdown: bool
40
+ _lock: threading.Lock
36
41
 
37
- def __init__(self, *args, **kwargs):
42
+ def __init__(self, callback: callable, interval=1):
38
43
  """
39
44
  Init method
40
45
 
41
46
  This method is executed on the main thread.
42
47
  """
43
- super().__init__(*args, **kwargs)
48
+ threading.Thread.__init__(self, daemon=True)
49
+ self.callback = callback
50
+ self.interval = interval
51
+ self._lock = threading.Lock()
52
+ self.shutdown = False
44
53
  self.active_job_ids = []
45
54
 
46
55
  def wait(self, *, job_id: str):
@@ -53,7 +62,7 @@ class FractalSlurmWaitThread(FileWaitThread):
53
62
  error_msg = "Cannot call `wait` method after executor shutdown."
54
63
  logger.warning(error_msg)
55
64
  raise JobExecutionError(info=error_msg)
56
- with self.lock:
65
+ with self._lock:
57
66
  self.active_job_ids.append(job_id)
58
67
 
59
68
  def check_shutdown(self):
@@ -109,7 +118,7 @@ class FractalSlurmWaitThread(FileWaitThread):
109
118
  pass
110
119
  return
111
120
  if ind % skip == 0:
112
- with self.lock:
121
+ with self._lock:
113
122
  try:
114
123
  self.check_jobs()
115
124
  except Exception: # nosec
@@ -1,8 +1,7 @@
1
+ import uuid
1
2
  from pathlib import Path
2
3
  from typing import Optional
3
4
 
4
- from cfut.util import random_string
5
-
6
5
  from fractal_server.app.runner.executors.slurm._slurm_config import (
7
6
  SlurmConfig,
8
7
  )
@@ -106,9 +105,7 @@ class SlurmJob:
106
105
  )
107
106
  else:
108
107
  self.wftask_file_prefixes = wftask_file_prefixes
109
- self.workerids = tuple(
110
- random_string() for i in range(self.num_tasks_tot)
111
- )
108
+ self.workerids = tuple(uuid.uuid4() for i in range(self.num_tasks_tot))
112
109
  self.slurm_config = slurm_config
113
110
 
114
111
  def get_clean_output_pickle_files(self) -> tuple[str, ...]: