fractal-server 2.7.0a11__py3-none-any.whl → 2.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. fractal_server/__init__.py +1 -1
  2. fractal_server/app/models/user_settings.py +1 -0
  3. fractal_server/app/models/v2/task.py +16 -2
  4. fractal_server/app/routes/admin/v2/task_group.py +7 -0
  5. fractal_server/app/routes/api/v2/dataset.py +39 -6
  6. fractal_server/app/routes/api/v2/task.py +4 -6
  7. fractal_server/app/routes/api/v2/task_collection.py +17 -44
  8. fractal_server/app/routes/api/v2/task_collection_custom.py +5 -4
  9. fractal_server/app/schemas/user_settings.py +18 -0
  10. fractal_server/app/schemas/v2/__init__.py +1 -0
  11. fractal_server/app/schemas/v2/dataset.py +5 -3
  12. fractal_server/app/schemas/v2/task_collection.py +20 -4
  13. fractal_server/app/schemas/v2/task_group.py +8 -1
  14. fractal_server/app/security/__init__.py +8 -1
  15. fractal_server/config.py +8 -28
  16. fractal_server/migrations/versions/19eca0dd47a9_user_settings_project_dir.py +39 -0
  17. fractal_server/migrations/versions/8e8f227a3e36_update_taskv2_post_2_7_0.py +42 -0
  18. fractal_server/tasks/utils.py +0 -31
  19. fractal_server/tasks/v1/background_operations.py +11 -11
  20. fractal_server/tasks/v1/endpoint_operations.py +5 -5
  21. fractal_server/tasks/v1/utils.py +2 -2
  22. fractal_server/tasks/v2/collection_local.py +357 -0
  23. fractal_server/tasks/v2/{background_operations_ssh.py → collection_ssh.py} +108 -102
  24. fractal_server/tasks/v2/templates/_1_create_venv.sh +0 -8
  25. fractal_server/tasks/v2/templates/{_2_upgrade_pip.sh → _2_preliminary_pip_operations.sh} +2 -1
  26. fractal_server/tasks/v2/templates/_3_pip_install.sh +22 -1
  27. fractal_server/tasks/v2/templates/_5_pip_show.sh +5 -5
  28. fractal_server/tasks/v2/utils_background.py +209 -0
  29. fractal_server/tasks/v2/utils_package_names.py +77 -0
  30. fractal_server/tasks/v2/{utils.py → utils_python_interpreter.py} +0 -26
  31. fractal_server/tasks/v2/utils_templates.py +59 -0
  32. fractal_server/utils.py +48 -3
  33. {fractal_server-2.7.0a11.dist-info → fractal_server-2.8.0.dist-info}/METADATA +14 -17
  34. {fractal_server-2.7.0a11.dist-info → fractal_server-2.8.0.dist-info}/RECORD +38 -35
  35. fractal_server/data_migrations/2_7_0.py +0 -323
  36. fractal_server/tasks/v2/_venv_pip.py +0 -193
  37. fractal_server/tasks/v2/background_operations.py +0 -456
  38. /fractal_server/{tasks/v2/endpoint_operations.py → app/routes/api/v2/_aux_functions_task_collection.py} +0 -0
  39. {fractal_server-2.7.0a11.dist-info → fractal_server-2.8.0.dist-info}/LICENSE +0 -0
  40. {fractal_server-2.7.0a11.dist-info → fractal_server-2.8.0.dist-info}/WHEEL +0 -0
  41. {fractal_server-2.7.0a11.dist-info → fractal_server-2.8.0.dist-info}/entry_points.txt +0 -0
@@ -1,456 +0,0 @@
1
- """
2
- The main function exported from this module is `background_collect_pip`, which
3
- is used as a background task for the task-collection endpoint.
4
- """
5
- import json
6
- from pathlib import Path
7
- from shutil import rmtree as shell_rmtree
8
- from tempfile import TemporaryDirectory
9
- from typing import Optional
10
- from typing import Union
11
- from zipfile import ZipFile
12
-
13
- from sqlalchemy.orm import Session as DBSyncSession
14
- from sqlalchemy.orm.attributes import flag_modified
15
- from sqlmodel import select
16
-
17
- from ..utils import get_collection_freeze_v2
18
- from ..utils import get_collection_log_v2
19
- from ..utils import get_collection_path
20
- from ..utils import get_log_path
21
- from .database_operations import create_db_tasks_and_update_task_group
22
- from fractal_server.app.db import get_sync_db
23
- from fractal_server.app.models.v2 import CollectionStateV2
24
- from fractal_server.app.models.v2 import TaskGroupV2
25
- from fractal_server.app.schemas.v2 import CollectionStatusV2
26
- from fractal_server.app.schemas.v2 import TaskCreateV2
27
- from fractal_server.app.schemas.v2 import TaskReadV2
28
- from fractal_server.app.schemas.v2.manifest import ManifestV2
29
- from fractal_server.logger import get_logger
30
- from fractal_server.logger import reset_logger_handlers
31
- from fractal_server.logger import set_logger
32
- from fractal_server.tasks.v2._venv_pip import _create_venv_install_package_pip
33
- from fractal_server.tasks.v2.utils import get_python_interpreter_v2
34
- from fractal_server.utils import execute_command
35
-
36
-
37
- def _set_collection_state_data_status(
38
- *,
39
- state_id: int,
40
- new_status: CollectionStatusV2,
41
- logger_name: str,
42
- db: DBSyncSession,
43
- ):
44
- logger = get_logger(logger_name)
45
- logger.debug(f"{state_id=} - set state.data['status'] to {new_status}")
46
- collection_state = db.get(CollectionStateV2, state_id)
47
- collection_state.data["status"] = CollectionStatusV2(new_status)
48
- flag_modified(collection_state, "data")
49
- db.commit()
50
-
51
-
52
- def _set_collection_state_data_log(
53
- *,
54
- state_id: int,
55
- new_log: str,
56
- logger_name: str,
57
- db: DBSyncSession,
58
- ):
59
- logger = get_logger(logger_name)
60
- logger.debug(f"{state_id=} - set state.data['log']")
61
- collection_state = db.get(CollectionStateV2, state_id)
62
- collection_state.data["log"] = new_log
63
- flag_modified(collection_state, "data")
64
- db.commit()
65
-
66
-
67
- def _set_collection_state_data_info(
68
- *,
69
- state_id: int,
70
- new_info: str,
71
- logger_name: str,
72
- db: DBSyncSession,
73
- ):
74
- logger = get_logger(logger_name)
75
- logger.debug(f"{state_id=} - set state.data['info']")
76
- collection_state = db.get(CollectionStateV2, state_id)
77
- collection_state.data["info"] = new_info
78
- flag_modified(collection_state, "data")
79
- db.commit()
80
-
81
-
82
- def _handle_failure(
83
- state_id: int,
84
- log_file_path: Path,
85
- logger_name: str,
86
- exception: Exception,
87
- db: DBSyncSession,
88
- task_group_id: int,
89
- path: Optional[Path] = None,
90
- ):
91
- """
92
- Note: `venv_path` is only required to trigger the folder deletion.
93
- """
94
-
95
- logger = get_logger(logger_name)
96
- logger.error(f"Task collection failed. Original error: {str(exception)}")
97
-
98
- _set_collection_state_data_status(
99
- state_id=state_id,
100
- new_status=CollectionStatusV2.FAIL,
101
- logger_name=logger_name,
102
- db=db,
103
- )
104
-
105
- new_log = log_file_path.open().read()
106
- _set_collection_state_data_log(
107
- state_id=state_id,
108
- new_log=new_log,
109
- logger_name=logger_name,
110
- db=db,
111
- )
112
- # For backwards-compatibility, we also set state.data["info"]
113
- _set_collection_state_data_info(
114
- state_id=state_id,
115
- new_info=f"Original error: {exception}",
116
- logger_name=logger_name,
117
- db=db,
118
- )
119
- # Delete corrupted package dir
120
- if path is not None and Path(path).exists():
121
- logger.info(f"Now delete temporary folder {path}")
122
- shell_rmtree(path)
123
- logger.info("Temporary folder deleted")
124
-
125
- # Delete TaskGroupV2 object / and apply cascade operation to FKs
126
- logger.info(f"Now delete TaskGroupV2 with {task_group_id=}")
127
- logger.info("Start of CollectionStateV2 cascade operations.")
128
- stm = select(CollectionStateV2).where(
129
- CollectionStateV2.taskgroupv2_id == task_group_id
130
- )
131
- res = db.execute(stm)
132
- collection_states = res.scalars().all()
133
- for collection_state in collection_states:
134
- logger.info(
135
- f"Setting CollectionStateV2[{collection_state.id}].taskgroupv2_id "
136
- "to None."
137
- )
138
- collection_state.taskgroupv2_id = None
139
- db.add(collection_state)
140
- logger.info("End of CollectionStateV2 cascade operations.")
141
- task_group = db.get(TaskGroupV2, task_group_id)
142
- db.delete(task_group)
143
- db.commit()
144
- logger.info(f"TaskGroupV2 with {task_group_id=} deleted")
145
-
146
- reset_logger_handlers(logger)
147
- return
148
-
149
-
150
- def _prepare_tasks_metadata(
151
- *,
152
- package_manifest: ManifestV2,
153
- python_bin: Path,
154
- package_root: Path,
155
- package_version: Optional[str] = None,
156
- ) -> list[TaskCreateV2]:
157
- """
158
- Based on the package manifest and additional info, prepare the task list.
159
-
160
- Args:
161
- package_manifest:
162
- python_bin:
163
- package_root:
164
- package_version:
165
- """
166
- task_list = []
167
- for _task in package_manifest.task_list:
168
- # Set non-command attributes
169
- task_attributes = {}
170
- if package_version is not None:
171
- task_attributes["version"] = package_version
172
- if package_manifest.has_args_schemas:
173
- task_attributes[
174
- "args_schema_version"
175
- ] = package_manifest.args_schema_version
176
- # Set command attributes
177
- if _task.executable_non_parallel is not None:
178
- non_parallel_path = package_root / _task.executable_non_parallel
179
- task_attributes["command_non_parallel"] = (
180
- f"{python_bin.as_posix()} " f"{non_parallel_path.as_posix()}"
181
- )
182
- if _task.executable_parallel is not None:
183
- parallel_path = package_root / _task.executable_parallel
184
- task_attributes[
185
- "command_parallel"
186
- ] = f"{python_bin.as_posix()} {parallel_path.as_posix()}"
187
- # Create object
188
- task_obj = TaskCreateV2(
189
- **_task.dict(
190
- exclude={
191
- "executable_non_parallel",
192
- "executable_parallel",
193
- }
194
- ),
195
- **task_attributes,
196
- authors=package_manifest.authors,
197
- )
198
- task_list.append(task_obj)
199
- return task_list
200
-
201
-
202
- def _check_task_files_exist(task_list: list[TaskCreateV2]) -> None:
203
- """
204
- Check that the modules listed in task commands point to existing files.
205
-
206
- Args: task_list
207
- """
208
- for _task in task_list:
209
- if _task.command_non_parallel is not None:
210
- _task_path = _task.command_non_parallel.split()[1]
211
- if not Path(_task_path).exists():
212
- raise FileNotFoundError(
213
- f"Task `{_task.name}` has `command_non_parallel` "
214
- f"pointing to missing file `{_task_path}`."
215
- )
216
- if _task.command_parallel is not None:
217
- _task_path = _task.command_parallel.split()[1]
218
- if not Path(_task_path).exists():
219
- raise FileNotFoundError(
220
- f"Task `{_task.name}` has `command_parallel` "
221
- f"pointing to missing file `{_task_path}`."
222
- )
223
-
224
-
225
- async def _download_package(
226
- *,
227
- python_version: str,
228
- pkg_name: str,
229
- version: str,
230
- dest: Union[str, Path],
231
- ) -> Path:
232
- """
233
- Download package to destination and return wheel-file path.
234
- """
235
- python_bin = get_python_interpreter_v2(python_version=python_version)
236
- pip = f"{python_bin} -m pip"
237
- package_and_version = f"{pkg_name}=={version}"
238
- cmd = f"{pip} download --no-deps {package_and_version} -d {dest}"
239
- stdout = await execute_command(command=cmd)
240
- pkg_file = next(
241
- line.split()[-1] for line in stdout.split("\n") if "Saved" in line
242
- )
243
- return Path(pkg_file)
244
-
245
-
246
- def _load_manifest_from_wheel(
247
- wheel_file_path: str,
248
- logger_name: str,
249
- ) -> ManifestV2:
250
- """
251
- Given a wheel file on-disk, extract the Fractal manifest.
252
- """
253
- logger = get_logger(logger_name)
254
-
255
- with ZipFile(wheel_file_path) as wheel:
256
- namelist = wheel.namelist()
257
- try:
258
- manifest = next(
259
- name
260
- for name in namelist
261
- if "__FRACTAL_MANIFEST__.json" in name
262
- )
263
- except StopIteration:
264
- msg = (
265
- f"{wheel_file_path} does not include __FRACTAL_MANIFEST__.json"
266
- )
267
- logger.error(msg)
268
- raise ValueError(msg)
269
- with wheel.open(manifest) as manifest_fd:
270
- manifest_dict = json.load(manifest_fd)
271
- manifest_version = str(manifest_dict["manifest_version"])
272
- if manifest_version != "2":
273
- msg = f"Manifest version {manifest_version=} not supported"
274
- logger.error(msg)
275
- raise ValueError(msg)
276
- pkg_manifest = ManifestV2(**manifest_dict)
277
- return pkg_manifest
278
-
279
-
280
- async def _get_package_manifest(
281
- *,
282
- task_group: TaskGroupV2,
283
- logger_name: str,
284
- ) -> ManifestV2:
285
- wheel_file_path = task_group.wheel_path
286
- if wheel_file_path is None:
287
- with TemporaryDirectory() as tmpdir:
288
- # Copy or download the package wheel file to tmpdir
289
- wheel_file_path = await _download_package(
290
- python_version=task_group.python_version,
291
- pkg_name=task_group.pkg_name,
292
- version=task_group.version,
293
- dest=tmpdir,
294
- )
295
- wheel_file_path = wheel_file_path.as_posix()
296
- # Read package manifest from temporary wheel file
297
- manifest = _load_manifest_from_wheel(
298
- wheel_file_path=wheel_file_path,
299
- logger_name=logger_name,
300
- )
301
- else:
302
- # Read package manifest from wheel file
303
- manifest = _load_manifest_from_wheel(
304
- wheel_file_path=wheel_file_path,
305
- logger_name=logger_name,
306
- )
307
- return manifest
308
-
309
-
310
- async def background_collect_pip(
311
- *,
312
- state_id: int,
313
- task_group: TaskGroupV2,
314
- ) -> None:
315
- """
316
- Setup venv, install package, collect tasks.
317
-
318
- This function (executed as background task), includes the several steps
319
- associated to automated collection of a Python task package.
320
- 1. Preliminary checks
321
- 2. Create venv and run `pip install`
322
- 3. Collect tasks into db
323
- 4. Finalize things.
324
- 5. Handle failures by copying the log into the state and deleting the
325
- package directory.
326
- """
327
- logger_name = (
328
- f"{task_group.user_id}-{task_group.pkg_name}-{task_group.version}"
329
- )
330
-
331
- try:
332
- Path(task_group.path).mkdir(parents=True, exist_ok=False)
333
- except FileExistsError as e:
334
- logger = set_logger(
335
- logger_name=logger_name,
336
- log_file_path=get_log_path(Path(task_group.path)),
337
- )
338
-
339
- logfile_path = get_log_path(Path(task_group.path))
340
- with next(get_sync_db()) as db:
341
- _handle_failure(
342
- state_id=state_id,
343
- log_file_path=logfile_path,
344
- logger_name=logger_name,
345
- exception=e,
346
- db=db,
347
- path=None, # Do not remove an existing path
348
- task_group_id=task_group.id,
349
- )
350
- return
351
-
352
- logger = set_logger(
353
- logger_name=logger_name,
354
- log_file_path=get_log_path(Path(task_group.path)),
355
- )
356
-
357
- # Start
358
- logger.debug("START")
359
- for key, value in task_group.model_dump().items():
360
- logger.debug(f"task_group.{key}: {value}")
361
-
362
- with next(get_sync_db()) as db:
363
- try:
364
- # Block 1: get and validate manfifest
365
- pkg_manifest = await _get_package_manifest(
366
- task_group=task_group,
367
- logger_name=logger_name,
368
- )
369
-
370
- # Block 2: create venv and run pip install
371
- # Required: state_id, venv_path, task_pkg
372
- logger.debug("installing - START")
373
- _set_collection_state_data_status(
374
- state_id=state_id,
375
- new_status=CollectionStatusV2.INSTALLING,
376
- logger_name=logger_name,
377
- db=db,
378
- )
379
- python_bin, package_root = await _create_venv_install_package_pip(
380
- task_group=task_group,
381
- logger_name=logger_name,
382
- )
383
- logger.debug("installing - END")
384
-
385
- # Block 3: create task metadata and create database entries
386
- # Required: state_id, python_bin, package_root, task_pkg
387
- logger.debug("collecting - START")
388
- _set_collection_state_data_status(
389
- state_id=state_id,
390
- new_status=CollectionStatusV2.COLLECTING,
391
- logger_name=logger_name,
392
- db=db,
393
- )
394
- logger.debug("collecting - prepare tasks and update db " "- START")
395
- task_list = _prepare_tasks_metadata(
396
- package_manifest=pkg_manifest,
397
- package_version=task_group.version,
398
- package_root=package_root,
399
- python_bin=python_bin,
400
- )
401
- _check_task_files_exist(task_list=task_list)
402
-
403
- # Prepare some task-group attributes
404
- task_group = create_db_tasks_and_update_task_group(
405
- task_list=task_list,
406
- task_group_id=task_group.id,
407
- db=db,
408
- )
409
-
410
- logger.debug("collecting - prepare tasks and update db " "- END")
411
- logger.debug("collecting - END")
412
-
413
- # Block 4: finalize (write collection files, write metadata to DB)
414
- logger.debug("finalising - START")
415
- collection_path = get_collection_path(Path(task_group.path))
416
- collection_state = db.get(CollectionStateV2, state_id)
417
- task_read_list = [
418
- TaskReadV2(**task.model_dump()).dict()
419
- for task in task_group.task_list
420
- ]
421
- collection_state.data["task_list"] = task_read_list
422
- collection_state.data["log"] = get_collection_log_v2(
423
- Path(task_group.path)
424
- )
425
- collection_state.data["freeze"] = get_collection_freeze_v2(
426
- Path(task_group.path)
427
- )
428
- with collection_path.open("w") as f:
429
- json.dump(collection_state.data, f, indent=2)
430
-
431
- flag_modified(collection_state, "data")
432
- db.commit()
433
- logger.debug("finalising - END")
434
-
435
- except Exception as e:
436
- logfile_path = get_log_path(Path(task_group.path))
437
- _handle_failure(
438
- state_id=state_id,
439
- log_file_path=logfile_path,
440
- logger_name=logger_name,
441
- exception=e,
442
- db=db,
443
- path=task_group.path,
444
- task_group_id=task_group.id,
445
- )
446
- return
447
-
448
- logger.debug("Task-collection status: OK")
449
- logger.info("Background task collection completed successfully")
450
- _set_collection_state_data_status(
451
- state_id=state_id,
452
- new_status=CollectionStatusV2.OK,
453
- logger_name=logger_name,
454
- db=db,
455
- )
456
- reset_logger_handlers(logger)