fractal-server 2.7.1__py3-none-any.whl → 2.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/models/user_settings.py +1 -0
- fractal_server/app/models/v2/task.py +15 -0
- fractal_server/app/routes/api/v2/dataset.py +39 -6
- fractal_server/app/routes/api/v2/task.py +2 -5
- fractal_server/app/routes/api/v2/task_collection.py +14 -42
- fractal_server/app/routes/api/v2/task_collection_custom.py +3 -3
- fractal_server/app/schemas/_validators.py +1 -1
- fractal_server/app/schemas/user_settings.py +18 -0
- fractal_server/app/schemas/v2/dataset.py +6 -4
- fractal_server/app/schemas/v2/task_collection.py +31 -12
- fractal_server/migrations/versions/19eca0dd47a9_user_settings_project_dir.py +39 -0
- fractal_server/string_tools.py +10 -3
- fractal_server/tasks/utils.py +0 -31
- fractal_server/tasks/v1/background_operations.py +11 -11
- fractal_server/tasks/v1/endpoint_operations.py +5 -5
- fractal_server/tasks/v1/utils.py +2 -2
- fractal_server/tasks/v2/collection_local.py +357 -0
- fractal_server/tasks/v2/{background_operations_ssh.py → collection_ssh.py} +108 -102
- fractal_server/tasks/v2/templates/_1_create_venv.sh +0 -8
- fractal_server/tasks/v2/templates/_2_preliminary_pip_operations.sh +2 -2
- fractal_server/tasks/v2/templates/_3_pip_install.sh +22 -1
- fractal_server/tasks/v2/templates/_5_pip_show.sh +5 -5
- fractal_server/tasks/v2/utils_background.py +209 -0
- fractal_server/tasks/v2/utils_package_names.py +77 -0
- fractal_server/tasks/v2/{utils.py → utils_python_interpreter.py} +0 -26
- fractal_server/tasks/v2/utils_templates.py +59 -0
- fractal_server/utils.py +48 -3
- {fractal_server-2.7.1.dist-info → fractal_server-2.8.1.dist-info}/METADATA +11 -8
- {fractal_server-2.7.1.dist-info → fractal_server-2.8.1.dist-info}/RECORD +34 -31
- fractal_server/tasks/v2/_venv_pip.py +0 -198
- fractal_server/tasks/v2/background_operations.py +0 -456
- /fractal_server/{tasks/v2/endpoint_operations.py → app/routes/api/v2/_aux_functions_task_collection.py} +0 -0
- {fractal_server-2.7.1.dist-info → fractal_server-2.8.1.dist-info}/LICENSE +0 -0
- {fractal_server-2.7.1.dist-info → fractal_server-2.8.1.dist-info}/WHEEL +0 -0
- {fractal_server-2.7.1.dist-info → fractal_server-2.8.1.dist-info}/entry_points.txt +0 -0
@@ -1,456 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
The main function exported from this module is `background_collect_pip`, which
|
3
|
-
is used as a background task for the task-collection endpoint.
|
4
|
-
"""
|
5
|
-
import json
|
6
|
-
from pathlib import Path
|
7
|
-
from shutil import rmtree as shell_rmtree
|
8
|
-
from tempfile import TemporaryDirectory
|
9
|
-
from typing import Optional
|
10
|
-
from typing import Union
|
11
|
-
from zipfile import ZipFile
|
12
|
-
|
13
|
-
from sqlalchemy.orm import Session as DBSyncSession
|
14
|
-
from sqlalchemy.orm.attributes import flag_modified
|
15
|
-
from sqlmodel import select
|
16
|
-
|
17
|
-
from ..utils import get_collection_freeze_v2
|
18
|
-
from ..utils import get_collection_log_v2
|
19
|
-
from ..utils import get_collection_path
|
20
|
-
from ..utils import get_log_path
|
21
|
-
from .database_operations import create_db_tasks_and_update_task_group
|
22
|
-
from fractal_server.app.db import get_sync_db
|
23
|
-
from fractal_server.app.models.v2 import CollectionStateV2
|
24
|
-
from fractal_server.app.models.v2 import TaskGroupV2
|
25
|
-
from fractal_server.app.schemas.v2 import CollectionStatusV2
|
26
|
-
from fractal_server.app.schemas.v2 import TaskCreateV2
|
27
|
-
from fractal_server.app.schemas.v2 import TaskReadV2
|
28
|
-
from fractal_server.app.schemas.v2.manifest import ManifestV2
|
29
|
-
from fractal_server.logger import get_logger
|
30
|
-
from fractal_server.logger import reset_logger_handlers
|
31
|
-
from fractal_server.logger import set_logger
|
32
|
-
from fractal_server.tasks.v2._venv_pip import _create_venv_install_package_pip
|
33
|
-
from fractal_server.tasks.v2.utils import get_python_interpreter_v2
|
34
|
-
from fractal_server.utils import execute_command
|
35
|
-
|
36
|
-
|
37
|
-
def _set_collection_state_data_status(
|
38
|
-
*,
|
39
|
-
state_id: int,
|
40
|
-
new_status: CollectionStatusV2,
|
41
|
-
logger_name: str,
|
42
|
-
db: DBSyncSession,
|
43
|
-
):
|
44
|
-
logger = get_logger(logger_name)
|
45
|
-
logger.debug(f"{state_id=} - set state.data['status'] to {new_status}")
|
46
|
-
collection_state = db.get(CollectionStateV2, state_id)
|
47
|
-
collection_state.data["status"] = CollectionStatusV2(new_status)
|
48
|
-
flag_modified(collection_state, "data")
|
49
|
-
db.commit()
|
50
|
-
|
51
|
-
|
52
|
-
def _set_collection_state_data_log(
|
53
|
-
*,
|
54
|
-
state_id: int,
|
55
|
-
new_log: str,
|
56
|
-
logger_name: str,
|
57
|
-
db: DBSyncSession,
|
58
|
-
):
|
59
|
-
logger = get_logger(logger_name)
|
60
|
-
logger.debug(f"{state_id=} - set state.data['log']")
|
61
|
-
collection_state = db.get(CollectionStateV2, state_id)
|
62
|
-
collection_state.data["log"] = new_log
|
63
|
-
flag_modified(collection_state, "data")
|
64
|
-
db.commit()
|
65
|
-
|
66
|
-
|
67
|
-
def _set_collection_state_data_info(
|
68
|
-
*,
|
69
|
-
state_id: int,
|
70
|
-
new_info: str,
|
71
|
-
logger_name: str,
|
72
|
-
db: DBSyncSession,
|
73
|
-
):
|
74
|
-
logger = get_logger(logger_name)
|
75
|
-
logger.debug(f"{state_id=} - set state.data['info']")
|
76
|
-
collection_state = db.get(CollectionStateV2, state_id)
|
77
|
-
collection_state.data["info"] = new_info
|
78
|
-
flag_modified(collection_state, "data")
|
79
|
-
db.commit()
|
80
|
-
|
81
|
-
|
82
|
-
def _handle_failure(
|
83
|
-
state_id: int,
|
84
|
-
log_file_path: Path,
|
85
|
-
logger_name: str,
|
86
|
-
exception: Exception,
|
87
|
-
db: DBSyncSession,
|
88
|
-
task_group_id: int,
|
89
|
-
path: Optional[Path] = None,
|
90
|
-
):
|
91
|
-
"""
|
92
|
-
Note: `venv_path` is only required to trigger the folder deletion.
|
93
|
-
"""
|
94
|
-
|
95
|
-
logger = get_logger(logger_name)
|
96
|
-
logger.error(f"Task collection failed. Original error: {str(exception)}")
|
97
|
-
|
98
|
-
_set_collection_state_data_status(
|
99
|
-
state_id=state_id,
|
100
|
-
new_status=CollectionStatusV2.FAIL,
|
101
|
-
logger_name=logger_name,
|
102
|
-
db=db,
|
103
|
-
)
|
104
|
-
|
105
|
-
new_log = log_file_path.open().read()
|
106
|
-
_set_collection_state_data_log(
|
107
|
-
state_id=state_id,
|
108
|
-
new_log=new_log,
|
109
|
-
logger_name=logger_name,
|
110
|
-
db=db,
|
111
|
-
)
|
112
|
-
# For backwards-compatibility, we also set state.data["info"]
|
113
|
-
_set_collection_state_data_info(
|
114
|
-
state_id=state_id,
|
115
|
-
new_info=f"Original error: {exception}",
|
116
|
-
logger_name=logger_name,
|
117
|
-
db=db,
|
118
|
-
)
|
119
|
-
# Delete corrupted package dir
|
120
|
-
if path is not None and Path(path).exists():
|
121
|
-
logger.info(f"Now delete temporary folder {path}")
|
122
|
-
shell_rmtree(path)
|
123
|
-
logger.info("Temporary folder deleted")
|
124
|
-
|
125
|
-
# Delete TaskGroupV2 object / and apply cascade operation to FKs
|
126
|
-
logger.info(f"Now delete TaskGroupV2 with {task_group_id=}")
|
127
|
-
logger.info("Start of CollectionStateV2 cascade operations.")
|
128
|
-
stm = select(CollectionStateV2).where(
|
129
|
-
CollectionStateV2.taskgroupv2_id == task_group_id
|
130
|
-
)
|
131
|
-
res = db.execute(stm)
|
132
|
-
collection_states = res.scalars().all()
|
133
|
-
for collection_state in collection_states:
|
134
|
-
logger.info(
|
135
|
-
f"Setting CollectionStateV2[{collection_state.id}].taskgroupv2_id "
|
136
|
-
"to None."
|
137
|
-
)
|
138
|
-
collection_state.taskgroupv2_id = None
|
139
|
-
db.add(collection_state)
|
140
|
-
logger.info("End of CollectionStateV2 cascade operations.")
|
141
|
-
task_group = db.get(TaskGroupV2, task_group_id)
|
142
|
-
db.delete(task_group)
|
143
|
-
db.commit()
|
144
|
-
logger.info(f"TaskGroupV2 with {task_group_id=} deleted")
|
145
|
-
|
146
|
-
reset_logger_handlers(logger)
|
147
|
-
return
|
148
|
-
|
149
|
-
|
150
|
-
def _prepare_tasks_metadata(
|
151
|
-
*,
|
152
|
-
package_manifest: ManifestV2,
|
153
|
-
python_bin: Path,
|
154
|
-
package_root: Path,
|
155
|
-
package_version: Optional[str] = None,
|
156
|
-
) -> list[TaskCreateV2]:
|
157
|
-
"""
|
158
|
-
Based on the package manifest and additional info, prepare the task list.
|
159
|
-
|
160
|
-
Args:
|
161
|
-
package_manifest:
|
162
|
-
python_bin:
|
163
|
-
package_root:
|
164
|
-
package_version:
|
165
|
-
"""
|
166
|
-
task_list = []
|
167
|
-
for _task in package_manifest.task_list:
|
168
|
-
# Set non-command attributes
|
169
|
-
task_attributes = {}
|
170
|
-
if package_version is not None:
|
171
|
-
task_attributes["version"] = package_version
|
172
|
-
if package_manifest.has_args_schemas:
|
173
|
-
task_attributes[
|
174
|
-
"args_schema_version"
|
175
|
-
] = package_manifest.args_schema_version
|
176
|
-
# Set command attributes
|
177
|
-
if _task.executable_non_parallel is not None:
|
178
|
-
non_parallel_path = package_root / _task.executable_non_parallel
|
179
|
-
task_attributes["command_non_parallel"] = (
|
180
|
-
f"{python_bin.as_posix()} " f"{non_parallel_path.as_posix()}"
|
181
|
-
)
|
182
|
-
if _task.executable_parallel is not None:
|
183
|
-
parallel_path = package_root / _task.executable_parallel
|
184
|
-
task_attributes[
|
185
|
-
"command_parallel"
|
186
|
-
] = f"{python_bin.as_posix()} {parallel_path.as_posix()}"
|
187
|
-
# Create object
|
188
|
-
task_obj = TaskCreateV2(
|
189
|
-
**_task.dict(
|
190
|
-
exclude={
|
191
|
-
"executable_non_parallel",
|
192
|
-
"executable_parallel",
|
193
|
-
}
|
194
|
-
),
|
195
|
-
**task_attributes,
|
196
|
-
authors=package_manifest.authors,
|
197
|
-
)
|
198
|
-
task_list.append(task_obj)
|
199
|
-
return task_list
|
200
|
-
|
201
|
-
|
202
|
-
def _check_task_files_exist(task_list: list[TaskCreateV2]) -> None:
|
203
|
-
"""
|
204
|
-
Check that the modules listed in task commands point to existing files.
|
205
|
-
|
206
|
-
Args: task_list
|
207
|
-
"""
|
208
|
-
for _task in task_list:
|
209
|
-
if _task.command_non_parallel is not None:
|
210
|
-
_task_path = _task.command_non_parallel.split()[1]
|
211
|
-
if not Path(_task_path).exists():
|
212
|
-
raise FileNotFoundError(
|
213
|
-
f"Task `{_task.name}` has `command_non_parallel` "
|
214
|
-
f"pointing to missing file `{_task_path}`."
|
215
|
-
)
|
216
|
-
if _task.command_parallel is not None:
|
217
|
-
_task_path = _task.command_parallel.split()[1]
|
218
|
-
if not Path(_task_path).exists():
|
219
|
-
raise FileNotFoundError(
|
220
|
-
f"Task `{_task.name}` has `command_parallel` "
|
221
|
-
f"pointing to missing file `{_task_path}`."
|
222
|
-
)
|
223
|
-
|
224
|
-
|
225
|
-
async def _download_package(
|
226
|
-
*,
|
227
|
-
python_version: str,
|
228
|
-
pkg_name: str,
|
229
|
-
version: str,
|
230
|
-
dest: Union[str, Path],
|
231
|
-
) -> Path:
|
232
|
-
"""
|
233
|
-
Download package to destination and return wheel-file path.
|
234
|
-
"""
|
235
|
-
python_bin = get_python_interpreter_v2(python_version=python_version)
|
236
|
-
pip = f"{python_bin} -m pip"
|
237
|
-
package_and_version = f"{pkg_name}=={version}"
|
238
|
-
cmd = f"{pip} download --no-deps {package_and_version} -d {dest}"
|
239
|
-
stdout = await execute_command(command=cmd)
|
240
|
-
pkg_file = next(
|
241
|
-
line.split()[-1] for line in stdout.split("\n") if "Saved" in line
|
242
|
-
)
|
243
|
-
return Path(pkg_file)
|
244
|
-
|
245
|
-
|
246
|
-
def _load_manifest_from_wheel(
|
247
|
-
wheel_file_path: str,
|
248
|
-
logger_name: str,
|
249
|
-
) -> ManifestV2:
|
250
|
-
"""
|
251
|
-
Given a wheel file on-disk, extract the Fractal manifest.
|
252
|
-
"""
|
253
|
-
logger = get_logger(logger_name)
|
254
|
-
|
255
|
-
with ZipFile(wheel_file_path) as wheel:
|
256
|
-
namelist = wheel.namelist()
|
257
|
-
try:
|
258
|
-
manifest = next(
|
259
|
-
name
|
260
|
-
for name in namelist
|
261
|
-
if "__FRACTAL_MANIFEST__.json" in name
|
262
|
-
)
|
263
|
-
except StopIteration:
|
264
|
-
msg = (
|
265
|
-
f"{wheel_file_path} does not include __FRACTAL_MANIFEST__.json"
|
266
|
-
)
|
267
|
-
logger.error(msg)
|
268
|
-
raise ValueError(msg)
|
269
|
-
with wheel.open(manifest) as manifest_fd:
|
270
|
-
manifest_dict = json.load(manifest_fd)
|
271
|
-
manifest_version = str(manifest_dict["manifest_version"])
|
272
|
-
if manifest_version != "2":
|
273
|
-
msg = f"Manifest version {manifest_version=} not supported"
|
274
|
-
logger.error(msg)
|
275
|
-
raise ValueError(msg)
|
276
|
-
pkg_manifest = ManifestV2(**manifest_dict)
|
277
|
-
return pkg_manifest
|
278
|
-
|
279
|
-
|
280
|
-
async def _get_package_manifest(
|
281
|
-
*,
|
282
|
-
task_group: TaskGroupV2,
|
283
|
-
logger_name: str,
|
284
|
-
) -> ManifestV2:
|
285
|
-
wheel_file_path = task_group.wheel_path
|
286
|
-
if wheel_file_path is None:
|
287
|
-
with TemporaryDirectory() as tmpdir:
|
288
|
-
# Copy or download the package wheel file to tmpdir
|
289
|
-
wheel_file_path = await _download_package(
|
290
|
-
python_version=task_group.python_version,
|
291
|
-
pkg_name=task_group.pkg_name,
|
292
|
-
version=task_group.version,
|
293
|
-
dest=tmpdir,
|
294
|
-
)
|
295
|
-
wheel_file_path = wheel_file_path.as_posix()
|
296
|
-
# Read package manifest from temporary wheel file
|
297
|
-
manifest = _load_manifest_from_wheel(
|
298
|
-
wheel_file_path=wheel_file_path,
|
299
|
-
logger_name=logger_name,
|
300
|
-
)
|
301
|
-
else:
|
302
|
-
# Read package manifest from wheel file
|
303
|
-
manifest = _load_manifest_from_wheel(
|
304
|
-
wheel_file_path=wheel_file_path,
|
305
|
-
logger_name=logger_name,
|
306
|
-
)
|
307
|
-
return manifest
|
308
|
-
|
309
|
-
|
310
|
-
async def background_collect_pip(
|
311
|
-
*,
|
312
|
-
state_id: int,
|
313
|
-
task_group: TaskGroupV2,
|
314
|
-
) -> None:
|
315
|
-
"""
|
316
|
-
Setup venv, install package, collect tasks.
|
317
|
-
|
318
|
-
This function (executed as background task), includes the several steps
|
319
|
-
associated to automated collection of a Python task package.
|
320
|
-
1. Preliminary checks
|
321
|
-
2. Create venv and run `pip install`
|
322
|
-
3. Collect tasks into db
|
323
|
-
4. Finalize things.
|
324
|
-
5. Handle failures by copying the log into the state and deleting the
|
325
|
-
package directory.
|
326
|
-
"""
|
327
|
-
logger_name = (
|
328
|
-
f"{task_group.user_id}-{task_group.pkg_name}-{task_group.version}"
|
329
|
-
)
|
330
|
-
|
331
|
-
try:
|
332
|
-
Path(task_group.path).mkdir(parents=True, exist_ok=False)
|
333
|
-
except FileExistsError as e:
|
334
|
-
logger = set_logger(
|
335
|
-
logger_name=logger_name,
|
336
|
-
log_file_path=get_log_path(Path(task_group.path)),
|
337
|
-
)
|
338
|
-
|
339
|
-
logfile_path = get_log_path(Path(task_group.path))
|
340
|
-
with next(get_sync_db()) as db:
|
341
|
-
_handle_failure(
|
342
|
-
state_id=state_id,
|
343
|
-
log_file_path=logfile_path,
|
344
|
-
logger_name=logger_name,
|
345
|
-
exception=e,
|
346
|
-
db=db,
|
347
|
-
path=None, # Do not remove an existing path
|
348
|
-
task_group_id=task_group.id,
|
349
|
-
)
|
350
|
-
return
|
351
|
-
|
352
|
-
logger = set_logger(
|
353
|
-
logger_name=logger_name,
|
354
|
-
log_file_path=get_log_path(Path(task_group.path)),
|
355
|
-
)
|
356
|
-
|
357
|
-
# Start
|
358
|
-
logger.debug("START")
|
359
|
-
for key, value in task_group.model_dump().items():
|
360
|
-
logger.debug(f"task_group.{key}: {value}")
|
361
|
-
|
362
|
-
with next(get_sync_db()) as db:
|
363
|
-
try:
|
364
|
-
# Block 1: get and validate manfifest
|
365
|
-
pkg_manifest = await _get_package_manifest(
|
366
|
-
task_group=task_group,
|
367
|
-
logger_name=logger_name,
|
368
|
-
)
|
369
|
-
|
370
|
-
# Block 2: create venv and run pip install
|
371
|
-
# Required: state_id, venv_path, task_pkg
|
372
|
-
logger.debug("installing - START")
|
373
|
-
_set_collection_state_data_status(
|
374
|
-
state_id=state_id,
|
375
|
-
new_status=CollectionStatusV2.INSTALLING,
|
376
|
-
logger_name=logger_name,
|
377
|
-
db=db,
|
378
|
-
)
|
379
|
-
python_bin, package_root = await _create_venv_install_package_pip(
|
380
|
-
task_group=task_group,
|
381
|
-
logger_name=logger_name,
|
382
|
-
)
|
383
|
-
logger.debug("installing - END")
|
384
|
-
|
385
|
-
# Block 3: create task metadata and create database entries
|
386
|
-
# Required: state_id, python_bin, package_root, task_pkg
|
387
|
-
logger.debug("collecting - START")
|
388
|
-
_set_collection_state_data_status(
|
389
|
-
state_id=state_id,
|
390
|
-
new_status=CollectionStatusV2.COLLECTING,
|
391
|
-
logger_name=logger_name,
|
392
|
-
db=db,
|
393
|
-
)
|
394
|
-
logger.debug("collecting - prepare tasks and update db " "- START")
|
395
|
-
task_list = _prepare_tasks_metadata(
|
396
|
-
package_manifest=pkg_manifest,
|
397
|
-
package_version=task_group.version,
|
398
|
-
package_root=package_root,
|
399
|
-
python_bin=python_bin,
|
400
|
-
)
|
401
|
-
_check_task_files_exist(task_list=task_list)
|
402
|
-
|
403
|
-
# Prepare some task-group attributes
|
404
|
-
task_group = create_db_tasks_and_update_task_group(
|
405
|
-
task_list=task_list,
|
406
|
-
task_group_id=task_group.id,
|
407
|
-
db=db,
|
408
|
-
)
|
409
|
-
|
410
|
-
logger.debug("collecting - prepare tasks and update db " "- END")
|
411
|
-
logger.debug("collecting - END")
|
412
|
-
|
413
|
-
# Block 4: finalize (write collection files, write metadata to DB)
|
414
|
-
logger.debug("finalising - START")
|
415
|
-
collection_path = get_collection_path(Path(task_group.path))
|
416
|
-
collection_state = db.get(CollectionStateV2, state_id)
|
417
|
-
task_read_list = [
|
418
|
-
TaskReadV2(**task.model_dump()).dict()
|
419
|
-
for task in task_group.task_list
|
420
|
-
]
|
421
|
-
collection_state.data["task_list"] = task_read_list
|
422
|
-
collection_state.data["log"] = get_collection_log_v2(
|
423
|
-
Path(task_group.path)
|
424
|
-
)
|
425
|
-
collection_state.data["freeze"] = get_collection_freeze_v2(
|
426
|
-
Path(task_group.path)
|
427
|
-
)
|
428
|
-
with collection_path.open("w") as f:
|
429
|
-
json.dump(collection_state.data, f, indent=2)
|
430
|
-
|
431
|
-
flag_modified(collection_state, "data")
|
432
|
-
db.commit()
|
433
|
-
logger.debug("finalising - END")
|
434
|
-
|
435
|
-
except Exception as e:
|
436
|
-
logfile_path = get_log_path(Path(task_group.path))
|
437
|
-
_handle_failure(
|
438
|
-
state_id=state_id,
|
439
|
-
log_file_path=logfile_path,
|
440
|
-
logger_name=logger_name,
|
441
|
-
exception=e,
|
442
|
-
db=db,
|
443
|
-
path=task_group.path,
|
444
|
-
task_group_id=task_group.id,
|
445
|
-
)
|
446
|
-
return
|
447
|
-
|
448
|
-
logger.debug("Task-collection status: OK")
|
449
|
-
logger.info("Background task collection completed successfully")
|
450
|
-
_set_collection_state_data_status(
|
451
|
-
state_id=state_id,
|
452
|
-
new_status=CollectionStatusV2.OK,
|
453
|
-
logger_name=logger_name,
|
454
|
-
db=db,
|
455
|
-
)
|
456
|
-
reset_logger_handlers(logger)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|