fractal-server 2.15.1__py3-none-any.whl → 2.15.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/db/__init__.py +2 -6
- fractal_server/app/routes/admin/v2/job.py +8 -1
- fractal_server/app/routes/api/v2/_aux_functions.py +58 -0
- fractal_server/app/routes/api/v2/history.py +62 -23
- fractal_server/app/routes/api/v2/workflow.py +13 -0
- fractal_server/app/routes/api/v2/workflowtask.py +10 -0
- fractal_server/app/runner/executors/slurm_common/base_slurm_runner.py +46 -4
- fractal_server/app/runner/executors/slurm_common/remote.py +9 -9
- fractal_server/app/runner/v2/_slurm_ssh.py +0 -13
- fractal_server/ssh/_fabric.py +6 -1
- fractal_server/tasks/v2/local/collect_pixi.py +1 -1
- fractal_server/tasks/v2/local/reactivate_pixi.py +1 -1
- fractal_server/tasks/v2/ssh/collect_pixi.py +1 -1
- fractal_server/tasks/v2/ssh/reactivate_pixi.py +1 -1
- {fractal_server-2.15.1.dist-info → fractal_server-2.15.3.dist-info}/METADATA +1 -1
- {fractal_server-2.15.1.dist-info → fractal_server-2.15.3.dist-info}/RECORD +20 -20
- {fractal_server-2.15.1.dist-info → fractal_server-2.15.3.dist-info}/LICENSE +0 -0
- {fractal_server-2.15.1.dist-info → fractal_server-2.15.3.dist-info}/WHEEL +0 -0
- {fractal_server-2.15.1.dist-info → fractal_server-2.15.3.dist-info}/entry_points.txt +0 -0
fractal_server/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__VERSION__ = "2.15.
|
|
1
|
+
__VERSION__ = "2.15.3"
|
|
@@ -45,13 +45,11 @@ class DB:
|
|
|
45
45
|
settings = Inject(get_settings)
|
|
46
46
|
settings.check_db()
|
|
47
47
|
|
|
48
|
-
engine_kwargs_async = {"pool_pre_ping": True}
|
|
49
|
-
|
|
50
48
|
cls._engine_async = create_async_engine(
|
|
51
49
|
settings.DATABASE_ASYNC_URL,
|
|
52
50
|
echo=settings.DB_ECHO,
|
|
53
51
|
future=True,
|
|
54
|
-
|
|
52
|
+
pool_pre_ping=True,
|
|
55
53
|
)
|
|
56
54
|
cls._async_session_maker = sessionmaker(
|
|
57
55
|
cls._engine_async,
|
|
@@ -65,13 +63,11 @@ class DB:
|
|
|
65
63
|
settings = Inject(get_settings)
|
|
66
64
|
settings.check_db()
|
|
67
65
|
|
|
68
|
-
engine_kwargs_sync = {}
|
|
69
|
-
|
|
70
66
|
cls._engine_sync = create_engine(
|
|
71
67
|
settings.DATABASE_SYNC_URL,
|
|
72
68
|
echo=settings.DB_ECHO,
|
|
73
69
|
future=True,
|
|
74
|
-
|
|
70
|
+
pool_pre_ping=True,
|
|
75
71
|
)
|
|
76
72
|
|
|
77
73
|
cls._sync_session_maker = sessionmaker(
|
|
@@ -156,8 +156,15 @@ async def update_job(
|
|
|
156
156
|
detail=f"Cannot set job status to {job_update.status}",
|
|
157
157
|
)
|
|
158
158
|
|
|
159
|
+
timestamp = get_timestamp()
|
|
159
160
|
setattr(job, "status", job_update.status)
|
|
160
|
-
setattr(job, "end_timestamp",
|
|
161
|
+
setattr(job, "end_timestamp", timestamp)
|
|
162
|
+
setattr(
|
|
163
|
+
job,
|
|
164
|
+
"log",
|
|
165
|
+
f"{job.log or ''}\nThis job was manually marked as "
|
|
166
|
+
f"'{JobStatusTypeV2.FAILED}' by an admin ({timestamp.isoformat()}).",
|
|
167
|
+
)
|
|
161
168
|
await db.commit()
|
|
162
169
|
await db.refresh(job)
|
|
163
170
|
await db.close()
|
|
@@ -6,6 +6,7 @@ from typing import Literal
|
|
|
6
6
|
|
|
7
7
|
from fastapi import HTTPException
|
|
8
8
|
from fastapi import status
|
|
9
|
+
from sqlalchemy.exc import MultipleResultsFound
|
|
9
10
|
from sqlalchemy.orm.attributes import flag_modified
|
|
10
11
|
from sqlmodel import select
|
|
11
12
|
from sqlmodel.sql.expression import SelectOfScalar
|
|
@@ -19,6 +20,9 @@ from ....models.v2 import TaskV2
|
|
|
19
20
|
from ....models.v2 import WorkflowTaskV2
|
|
20
21
|
from ....models.v2 import WorkflowV2
|
|
21
22
|
from ....schemas.v2 import JobStatusTypeV2
|
|
23
|
+
from fractal_server.logger import set_logger
|
|
24
|
+
|
|
25
|
+
logger = set_logger(__name__)
|
|
22
26
|
|
|
23
27
|
|
|
24
28
|
async def _get_project_check_owner(
|
|
@@ -325,6 +329,24 @@ def _get_submitted_jobs_statement() -> SelectOfScalar:
|
|
|
325
329
|
return stm
|
|
326
330
|
|
|
327
331
|
|
|
332
|
+
async def _workflow_has_submitted_job(
|
|
333
|
+
workflow_id: int,
|
|
334
|
+
db: AsyncSession,
|
|
335
|
+
) -> bool:
|
|
336
|
+
|
|
337
|
+
res = await db.execute(
|
|
338
|
+
select(JobV2.id)
|
|
339
|
+
.where(JobV2.status == JobStatusTypeV2.SUBMITTED)
|
|
340
|
+
.where(JobV2.workflow_id == workflow_id)
|
|
341
|
+
.limit(1)
|
|
342
|
+
)
|
|
343
|
+
submitted_jobs = res.scalar_one_or_none()
|
|
344
|
+
if submitted_jobs is not None:
|
|
345
|
+
return True
|
|
346
|
+
|
|
347
|
+
return False
|
|
348
|
+
|
|
349
|
+
|
|
328
350
|
async def _workflow_insert_task(
|
|
329
351
|
*,
|
|
330
352
|
workflow_id: int,
|
|
@@ -481,3 +503,39 @@ async def _get_workflowtask_or_404(
|
|
|
481
503
|
)
|
|
482
504
|
else:
|
|
483
505
|
return wftask
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
async def _get_submitted_job_or_none(
|
|
509
|
+
*,
|
|
510
|
+
dataset_id: int,
|
|
511
|
+
workflow_id: int,
|
|
512
|
+
db: AsyncSession,
|
|
513
|
+
) -> JobV2 | None:
|
|
514
|
+
"""
|
|
515
|
+
Get the submitted job for given dataset/workflow, if any.
|
|
516
|
+
|
|
517
|
+
This function also handles the invalid branch where more than one job
|
|
518
|
+
is found.
|
|
519
|
+
|
|
520
|
+
Args:
|
|
521
|
+
dataset_id:
|
|
522
|
+
workflow_id:
|
|
523
|
+
db:
|
|
524
|
+
"""
|
|
525
|
+
res = await db.execute(
|
|
526
|
+
_get_submitted_jobs_statement()
|
|
527
|
+
.where(JobV2.dataset_id == dataset_id)
|
|
528
|
+
.where(JobV2.workflow_id == workflow_id)
|
|
529
|
+
)
|
|
530
|
+
try:
|
|
531
|
+
return res.scalars().one_or_none()
|
|
532
|
+
except MultipleResultsFound as e:
|
|
533
|
+
error_msg = (
|
|
534
|
+
"Multiple running jobs found for "
|
|
535
|
+
f"{dataset_id=} and {workflow_id=}."
|
|
536
|
+
)
|
|
537
|
+
logger.error(f"{error_msg} Original error: {str(e)}.")
|
|
538
|
+
raise HTTPException(
|
|
539
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
|
540
|
+
detail=error_msg,
|
|
541
|
+
)
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from copy import deepcopy
|
|
2
|
-
|
|
3
1
|
from fastapi import APIRouter
|
|
4
2
|
from fastapi import Depends
|
|
5
3
|
from fastapi import HTTPException
|
|
@@ -9,6 +7,7 @@ from sqlmodel import func
|
|
|
9
7
|
from sqlmodel import select
|
|
10
8
|
|
|
11
9
|
from ._aux_functions import _get_dataset_check_owner
|
|
10
|
+
from ._aux_functions import _get_submitted_job_or_none
|
|
12
11
|
from ._aux_functions import _get_workflow_check_owner
|
|
13
12
|
from ._aux_functions_history import _verify_workflow_and_dataset_access
|
|
14
13
|
from ._aux_functions_history import get_history_run_or_404
|
|
@@ -72,6 +71,7 @@ async def get_workflow_tasks_statuses(
|
|
|
72
71
|
user: UserOAuth = Depends(current_active_user),
|
|
73
72
|
db: AsyncSession = Depends(get_async_db),
|
|
74
73
|
) -> JSONResponse:
|
|
74
|
+
|
|
75
75
|
# Access control
|
|
76
76
|
workflow = await _get_workflow_check_owner(
|
|
77
77
|
project_id=project_id,
|
|
@@ -86,6 +86,19 @@ async def get_workflow_tasks_statuses(
|
|
|
86
86
|
db=db,
|
|
87
87
|
)
|
|
88
88
|
|
|
89
|
+
running_job = await _get_submitted_job_or_none(
|
|
90
|
+
db=db,
|
|
91
|
+
dataset_id=dataset_id,
|
|
92
|
+
workflow_id=workflow_id,
|
|
93
|
+
)
|
|
94
|
+
if running_job is not None:
|
|
95
|
+
running_wftasks = workflow.task_list[
|
|
96
|
+
running_job.first_task_index : running_job.last_task_index + 1
|
|
97
|
+
]
|
|
98
|
+
running_wftask_ids = [wft.id for wft in running_wftasks]
|
|
99
|
+
else:
|
|
100
|
+
running_wftask_ids = []
|
|
101
|
+
|
|
89
102
|
response: dict[int, dict[str, int | str] | None] = {}
|
|
90
103
|
for wftask in workflow.task_list:
|
|
91
104
|
res = await db.execute(
|
|
@@ -95,17 +108,37 @@ async def get_workflow_tasks_statuses(
|
|
|
95
108
|
.order_by(HistoryRun.timestamp_started.desc())
|
|
96
109
|
.limit(1)
|
|
97
110
|
)
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
111
|
+
latest_run = res.scalar_one_or_none()
|
|
112
|
+
|
|
113
|
+
if latest_run is None:
|
|
114
|
+
if wftask.id in running_wftask_ids:
|
|
115
|
+
logger.debug(f"A1: No HistoryRun for {wftask.id=}.")
|
|
116
|
+
response[wftask.id] = dict(status=HistoryUnitStatus.SUBMITTED)
|
|
117
|
+
else:
|
|
118
|
+
logger.debug(f"A2: No HistoryRun for {wftask.id=}.")
|
|
119
|
+
response[wftask.id] = None
|
|
104
120
|
continue
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
121
|
+
else:
|
|
122
|
+
if wftask.id in running_wftask_ids:
|
|
123
|
+
if latest_run.job_id == running_job.id:
|
|
124
|
+
logger.debug(
|
|
125
|
+
f"B1 for {wftask.id} and {latest_run.job_id=}."
|
|
126
|
+
)
|
|
127
|
+
response[wftask.id] = dict(status=latest_run.status)
|
|
128
|
+
else:
|
|
129
|
+
logger.debug(
|
|
130
|
+
f"B2 for {wftask.id} and {latest_run.job_id=}."
|
|
131
|
+
)
|
|
132
|
+
response[wftask.id] = dict(
|
|
133
|
+
status=HistoryUnitStatus.SUBMITTED
|
|
134
|
+
)
|
|
135
|
+
else:
|
|
136
|
+
logger.debug(f"C1: {wftask.id=} not in {running_wftask_ids=}.")
|
|
137
|
+
response[wftask.id] = dict(status=latest_run.status)
|
|
138
|
+
|
|
139
|
+
response[wftask.id][
|
|
140
|
+
"num_available_images"
|
|
141
|
+
] = latest_run.num_available_images
|
|
109
142
|
|
|
110
143
|
for target_status in HistoryUnitStatus:
|
|
111
144
|
stm = (
|
|
@@ -122,18 +155,24 @@ async def get_workflow_tasks_statuses(
|
|
|
122
155
|
num_images = res.scalar()
|
|
123
156
|
response[wftask.id][f"num_{target_status}_images"] = num_images
|
|
124
157
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
158
|
+
# Set `num_available_images=None` for cases where it would be
|
|
159
|
+
# smaller than `num_total_images`
|
|
160
|
+
values_to_skip = (None, {"status": HistoryUnitStatus.SUBMITTED})
|
|
161
|
+
response_update = {}
|
|
162
|
+
for wftask_id, status_value in response.items():
|
|
163
|
+
if status_value in values_to_skip:
|
|
164
|
+
# Skip cases where status has no image counters
|
|
165
|
+
continue
|
|
166
|
+
num_total_images = sum(
|
|
167
|
+
status_value[f"num_{target_status}_images"]
|
|
168
|
+
for target_status in HistoryUnitStatus
|
|
169
|
+
)
|
|
170
|
+
if num_total_images > status_value["num_available_images"]:
|
|
171
|
+
status_value["num_available_images"] = None
|
|
172
|
+
response_update[wftask_id] = status_value
|
|
173
|
+
response.update(response_update)
|
|
135
174
|
|
|
136
|
-
return JSONResponse(content=
|
|
175
|
+
return JSONResponse(content=response, status_code=200)
|
|
137
176
|
|
|
138
177
|
|
|
139
178
|
@router.get("/project/{project_id}/status/run/")
|
|
@@ -22,6 +22,7 @@ from ._aux_functions import _check_workflow_exists
|
|
|
22
22
|
from ._aux_functions import _get_project_check_owner
|
|
23
23
|
from ._aux_functions import _get_submitted_jobs_statement
|
|
24
24
|
from ._aux_functions import _get_workflow_check_owner
|
|
25
|
+
from ._aux_functions import _workflow_has_submitted_job
|
|
25
26
|
from ._aux_functions_tasks import _add_warnings_to_workflow_tasks
|
|
26
27
|
from fractal_server.app.models import UserOAuth
|
|
27
28
|
from fractal_server.app.models.v2 import TaskGroupV2
|
|
@@ -146,6 +147,18 @@ async def update_workflow(
|
|
|
146
147
|
|
|
147
148
|
for key, value in patch.model_dump(exclude_unset=True).items():
|
|
148
149
|
if key == "reordered_workflowtask_ids":
|
|
150
|
+
|
|
151
|
+
if await _workflow_has_submitted_job(
|
|
152
|
+
workflow_id=workflow_id, db=db
|
|
153
|
+
):
|
|
154
|
+
raise HTTPException(
|
|
155
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
|
156
|
+
detail=(
|
|
157
|
+
"Cannot re-order WorkflowTasks while a Job is running "
|
|
158
|
+
"for this Workflow."
|
|
159
|
+
),
|
|
160
|
+
)
|
|
161
|
+
|
|
149
162
|
current_workflowtask_ids = [
|
|
150
163
|
wftask.id for wftask in workflow.task_list
|
|
151
164
|
]
|
|
@@ -10,6 +10,7 @@ from ....db import AsyncSession
|
|
|
10
10
|
from ....db import get_async_db
|
|
11
11
|
from ._aux_functions import _get_workflow_check_owner
|
|
12
12
|
from ._aux_functions import _get_workflow_task_check_owner
|
|
13
|
+
from ._aux_functions import _workflow_has_submitted_job
|
|
13
14
|
from ._aux_functions import _workflow_insert_task
|
|
14
15
|
from ._aux_functions_tasks import _check_type_filters_compatibility
|
|
15
16
|
from ._aux_functions_tasks import _get_task_read_access
|
|
@@ -224,6 +225,15 @@ async def delete_workflowtask(
|
|
|
224
225
|
db=db,
|
|
225
226
|
)
|
|
226
227
|
|
|
228
|
+
if await _workflow_has_submitted_job(workflow_id=workflow_id, db=db):
|
|
229
|
+
raise HTTPException(
|
|
230
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
|
231
|
+
detail=(
|
|
232
|
+
"Cannot delete a WorkflowTask while a Job is running for this "
|
|
233
|
+
"Workflow."
|
|
234
|
+
),
|
|
235
|
+
)
|
|
236
|
+
|
|
227
237
|
# Delete WorkflowTask
|
|
228
238
|
await db.delete(db_workflow_task)
|
|
229
239
|
await db.commit()
|
|
@@ -137,6 +137,34 @@ class BaseSlurmRunner(BaseRunner):
|
|
|
137
137
|
def run_squeue(self, *, job_ids: list[str], **kwargs) -> str:
|
|
138
138
|
raise NotImplementedError("Implement in child class.")
|
|
139
139
|
|
|
140
|
+
def _is_squeue_error_recoverable(self, exception: BaseException) -> True:
|
|
141
|
+
"""
|
|
142
|
+
Determine whether a `squeue` error is considered recoverable.
|
|
143
|
+
|
|
144
|
+
A _recoverable_ error is one which will disappear after some time,
|
|
145
|
+
without any specific action from the `fractal-server` side.
|
|
146
|
+
|
|
147
|
+
Note: if this function returns `True` for an error that does not
|
|
148
|
+
actually recover, this leads to an infinite loop where
|
|
149
|
+
`fractal-server` keeps polling `squeue` information forever.
|
|
150
|
+
|
|
151
|
+
More info at
|
|
152
|
+
https://github.com/fractal-analytics-platform/fractal-server/issues/2682
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
exception: The exception raised by `self.run_squeue`.
|
|
156
|
+
Returns:
|
|
157
|
+
Whether the error is considered recoverable.
|
|
158
|
+
"""
|
|
159
|
+
str_exception = str(exception)
|
|
160
|
+
if (
|
|
161
|
+
"slurm_load_jobs" in str_exception
|
|
162
|
+
and "Socket timed out on send/recv operation" in str_exception
|
|
163
|
+
):
|
|
164
|
+
return True
|
|
165
|
+
else:
|
|
166
|
+
return False
|
|
167
|
+
|
|
140
168
|
def _get_finished_jobs(self, job_ids: list[str]) -> set[str]:
|
|
141
169
|
# If there is no Slurm job to check, return right away
|
|
142
170
|
if not job_ids:
|
|
@@ -161,12 +189,26 @@ class BaseSlurmRunner(BaseRunner):
|
|
|
161
189
|
{stdout.split()[0]: stdout.split()[1]}
|
|
162
190
|
)
|
|
163
191
|
except Exception as e:
|
|
164
|
-
|
|
165
|
-
"[_get_finished_jobs] `squeue` failed for "
|
|
166
|
-
f"{job_id=}, mark job as completed. "
|
|
192
|
+
msg = (
|
|
193
|
+
f"[_get_finished_jobs] `squeue` failed for {job_id=}. "
|
|
167
194
|
f"Original error: {str(e)}."
|
|
168
195
|
)
|
|
169
|
-
|
|
196
|
+
logger.warning(msg)
|
|
197
|
+
if self._is_squeue_error_recoverable(e):
|
|
198
|
+
logger.warning(
|
|
199
|
+
"[_get_finished_jobs] Recoverable `squeue` "
|
|
200
|
+
f"error - mark {job_id=} as FRACTAL_UNDEFINED and"
|
|
201
|
+
" retry later."
|
|
202
|
+
)
|
|
203
|
+
slurm_statuses.update(
|
|
204
|
+
{str(job_id): "FRACTAL_UNDEFINED"}
|
|
205
|
+
)
|
|
206
|
+
else:
|
|
207
|
+
logger.warning(
|
|
208
|
+
"[_get_finished_jobs] Non-recoverable `squeue`"
|
|
209
|
+
f"error - mark {job_id=} as completed."
|
|
210
|
+
)
|
|
211
|
+
slurm_statuses.update({str(job_id): "COMPLETED"})
|
|
170
212
|
|
|
171
213
|
# If a job is not in `squeue` output, mark it as completed.
|
|
172
214
|
finished_jobs = {
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import json
|
|
3
|
-
import logging
|
|
4
3
|
import os
|
|
5
4
|
import sys
|
|
6
5
|
|
|
@@ -32,7 +31,6 @@ def worker(
|
|
|
32
31
|
# Create output folder, if missing
|
|
33
32
|
out_dir = os.path.dirname(out_fname)
|
|
34
33
|
if not os.path.exists(out_dir):
|
|
35
|
-
logging.debug(f"_slurm.remote.worker: create {out_dir=}")
|
|
36
34
|
os.mkdir(out_dir)
|
|
37
35
|
|
|
38
36
|
# Execute the job and capture exceptions
|
|
@@ -40,10 +38,8 @@ def worker(
|
|
|
40
38
|
with open(in_fname) as f:
|
|
41
39
|
input_data = json.load(f)
|
|
42
40
|
|
|
43
|
-
server_python_version = input_data["python_version"]
|
|
44
|
-
server_fractal_server_version = input_data["fractal_server_version"]
|
|
45
|
-
|
|
46
41
|
# Fractal-server version must be identical
|
|
42
|
+
server_fractal_server_version = input_data["fractal_server_version"]
|
|
47
43
|
worker_fractal_server_version = __VERSION__
|
|
48
44
|
if worker_fractal_server_version != server_fractal_server_version:
|
|
49
45
|
raise FractalVersionMismatch(
|
|
@@ -51,11 +47,16 @@ def worker(
|
|
|
51
47
|
f"{worker_fractal_server_version=}"
|
|
52
48
|
)
|
|
53
49
|
|
|
54
|
-
#
|
|
55
|
-
|
|
50
|
+
# Get `worker_python_version` as a `list` since this is the type of
|
|
51
|
+
# `server_python_version` after a JSON dump/load round trip.
|
|
52
|
+
worker_python_version = list(sys.version_info[:3])
|
|
53
|
+
|
|
54
|
+
# Print a warning for Python version mismatch
|
|
55
|
+
server_python_version = input_data["python_version"]
|
|
56
56
|
if worker_python_version != server_python_version:
|
|
57
57
|
if worker_python_version[:2] != server_python_version[:2]:
|
|
58
|
-
|
|
58
|
+
print(
|
|
59
|
+
"WARNING: "
|
|
59
60
|
f"{server_python_version=} but {worker_python_version=}."
|
|
60
61
|
)
|
|
61
62
|
|
|
@@ -116,7 +117,6 @@ if __name__ == "__main__":
|
|
|
116
117
|
required=True,
|
|
117
118
|
)
|
|
118
119
|
parsed_args = parser.parse_args()
|
|
119
|
-
logging.debug(f"{parsed_args=}")
|
|
120
120
|
|
|
121
121
|
kwargs = dict(
|
|
122
122
|
in_fname=parsed_args.input_file,
|
|
@@ -20,7 +20,6 @@ from pathlib import Path
|
|
|
20
20
|
from ....ssh._fabric import FractalSSH
|
|
21
21
|
from ...models.v2 import DatasetV2
|
|
22
22
|
from ...models.v2 import WorkflowV2
|
|
23
|
-
from ..exceptions import JobExecutionError
|
|
24
23
|
from ..executors.slurm_common.get_slurm_config import get_slurm_config
|
|
25
24
|
from ..executors.slurm_ssh.runner import SlurmSSHRunner
|
|
26
25
|
from ..set_start_and_last_task_index import set_start_and_last_task_index
|
|
@@ -64,18 +63,6 @@ def process_workflow(
|
|
|
64
63
|
if isinstance(worker_init, str):
|
|
65
64
|
worker_init = worker_init.split("\n")
|
|
66
65
|
|
|
67
|
-
# Create main remote folder
|
|
68
|
-
try:
|
|
69
|
-
fractal_ssh.mkdir(folder=str(workflow_dir_remote))
|
|
70
|
-
logger.info(f"Created {str(workflow_dir_remote)} via SSH.")
|
|
71
|
-
except Exception as e:
|
|
72
|
-
error_msg = (
|
|
73
|
-
f"Could not create {str(workflow_dir_remote)} via SSH.\n"
|
|
74
|
-
f"Original error: {str(e)}."
|
|
75
|
-
)
|
|
76
|
-
logger.error(error_msg)
|
|
77
|
-
raise JobExecutionError(info=error_msg)
|
|
78
|
-
|
|
79
66
|
with SlurmSSHRunner(
|
|
80
67
|
fractal_ssh=fractal_ssh,
|
|
81
68
|
root_dir_local=workflow_dir_local,
|
fractal_server/ssh/_fabric.py
CHANGED
|
@@ -165,7 +165,11 @@ class FractalSSH:
|
|
|
165
165
|
raise e
|
|
166
166
|
|
|
167
167
|
def _run(
|
|
168
|
-
self,
|
|
168
|
+
self,
|
|
169
|
+
*args,
|
|
170
|
+
label: str,
|
|
171
|
+
lock_timeout: float | None = None,
|
|
172
|
+
**kwargs,
|
|
169
173
|
) -> Any:
|
|
170
174
|
actual_lock_timeout = self.default_lock_timeout
|
|
171
175
|
if lock_timeout is not None:
|
|
@@ -353,6 +357,7 @@ class FractalSSH:
|
|
|
353
357
|
label=f"run {cmd}",
|
|
354
358
|
lock_timeout=actual_lock_timeout,
|
|
355
359
|
hide=True,
|
|
360
|
+
in_stream=False,
|
|
356
361
|
)
|
|
357
362
|
t_1 = time.perf_counter()
|
|
358
363
|
self.logger.info(
|
|
@@ -161,7 +161,7 @@ def collect_local_pixi(
|
|
|
161
161
|
|
|
162
162
|
# Make task folder 755
|
|
163
163
|
source_dir = Path(task_group.path, SOURCE_DIR_NAME).as_posix()
|
|
164
|
-
command = f"chmod 755 {source_dir}
|
|
164
|
+
command = f"chmod -R 755 {source_dir}"
|
|
165
165
|
execute_command_sync(
|
|
166
166
|
command=command,
|
|
167
167
|
logger_name=LOGGER_NAME,
|
|
@@ -145,7 +145,7 @@ def reactivate_local_pixi(
|
|
|
145
145
|
|
|
146
146
|
# Make task folder 755
|
|
147
147
|
source_dir = Path(task_group.path, SOURCE_DIR_NAME).as_posix()
|
|
148
|
-
command = f"chmod 755 {source_dir}
|
|
148
|
+
command = f"chmod -R 755 {source_dir}"
|
|
149
149
|
execute_command_sync(
|
|
150
150
|
command=command,
|
|
151
151
|
logger_name=LOGGER_NAME,
|
|
@@ -221,7 +221,7 @@ def collect_ssh_pixi(
|
|
|
221
221
|
source_dir = Path(
|
|
222
222
|
task_group.path, SOURCE_DIR_NAME
|
|
223
223
|
).as_posix()
|
|
224
|
-
fractal_ssh.run_command(cmd=f"chmod 755 {source_dir}
|
|
224
|
+
fractal_ssh.run_command(cmd=f"chmod -R 755 {source_dir}")
|
|
225
225
|
|
|
226
226
|
# Read and validate remote manifest file
|
|
227
227
|
manifest_path_remote = (
|
|
@@ -196,7 +196,7 @@ def reactivate_ssh_pixi(
|
|
|
196
196
|
activity.log = get_current_log(log_file_path)
|
|
197
197
|
activity = add_commit_refresh(obj=activity, db=db)
|
|
198
198
|
|
|
199
|
-
fractal_ssh.run_command(cmd=f"chmod 755 {source_dir}
|
|
199
|
+
fractal_ssh.run_command(cmd=f"chmod -R 755 {source_dir}")
|
|
200
200
|
|
|
201
201
|
# Finalize (write metadata to DB)
|
|
202
202
|
activity.status = TaskGroupActivityStatusV2.OK
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
fractal_server/__init__.py,sha256=
|
|
1
|
+
fractal_server/__init__.py,sha256=TohihjKSFnz5CZYMVH94PEv_l0OHohZckQFN39DhyqE,23
|
|
2
2
|
fractal_server/__main__.py,sha256=rkM8xjY1KeS3l63irB8yCrlVobR-73uDapC4wvrIlxI,6957
|
|
3
3
|
fractal_server/alembic.ini,sha256=MWwi7GzjzawI9cCAK1LW7NxIBQDUqD12-ptJoq5JpP0,3153
|
|
4
4
|
fractal_server/app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
-
fractal_server/app/db/__init__.py,sha256=
|
|
5
|
+
fractal_server/app/db/__init__.py,sha256=U2gwpNyy79iMsK1lg43LRl9z-MW8wiOaICJ7GGdA4yo,2814
|
|
6
6
|
fractal_server/app/models/__init__.py,sha256=xJWiGAwpXmCpnFMC4c_HTqoUCzMOXrakoGLUH_uMvdA,415
|
|
7
7
|
fractal_server/app/models/linkusergroup.py,sha256=3KkkE4QIUAlTrBAZs_tVy0pGvAxUAq6yOEjflct_z2M,678
|
|
8
8
|
fractal_server/app/models/linkuserproject.py,sha256=hvaxh3Lkiy2uUCwB8gvn8RorCpvxSSdzWdCS_U1GL7g,315
|
|
@@ -23,21 +23,21 @@ fractal_server/app/routes/admin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
|
23
23
|
fractal_server/app/routes/admin/v2/__init__.py,sha256=_5lqb6-M8-fZqE1HRMep6pAFYRUKMxrvbZOKs-RXWkw,933
|
|
24
24
|
fractal_server/app/routes/admin/v2/accounting.py,sha256=YPWwCWylXrJpV4bq_dJ3t6Kn5uuveTrFx-5w1wzfETU,3594
|
|
25
25
|
fractal_server/app/routes/admin/v2/impersonate.py,sha256=gc4lshfEPFR6W2asH7aKu6hqE6chzusdhAUVV9p51eU,1131
|
|
26
|
-
fractal_server/app/routes/admin/v2/job.py,sha256=
|
|
26
|
+
fractal_server/app/routes/admin/v2/job.py,sha256=VcyXHYjieOKnTAi1NsiO_bK3A6UufUwX2lmWCwa4sa0,7585
|
|
27
27
|
fractal_server/app/routes/admin/v2/project.py,sha256=MA_LdoEuSuisSGRO43TapMuJ080y5iaUGSAUgKuuKOg,1188
|
|
28
28
|
fractal_server/app/routes/admin/v2/task.py,sha256=93QIbWZNnqaBhG9R9-RStDX2mpqRNN3G7BIb0KM-jeE,4312
|
|
29
29
|
fractal_server/app/routes/admin/v2/task_group.py,sha256=biibAvMPD2w-267eyTm3wH2s3mITjiS5gYzwCCwmLbI,7099
|
|
30
30
|
fractal_server/app/routes/admin/v2/task_group_lifecycle.py,sha256=2J3M9VXWD_0j9jRTZ5APuUXl9E-aVv0qF8K02vvcO3s,9150
|
|
31
31
|
fractal_server/app/routes/api/__init__.py,sha256=B8l6PSAhR10iZqHEiyTat-_0tkeKdrCigIE6DJGP5b8,638
|
|
32
32
|
fractal_server/app/routes/api/v2/__init__.py,sha256=D3sRRsqkmZO6kBxUjg40q0aRDsnuXI4sOOfn0xF9JsM,2820
|
|
33
|
-
fractal_server/app/routes/api/v2/_aux_functions.py,sha256=
|
|
33
|
+
fractal_server/app/routes/api/v2/_aux_functions.py,sha256=YU7yT9L6yc01VMWozXPnRcx0X0063rTylmeU6PKNyKM,14260
|
|
34
34
|
fractal_server/app/routes/api/v2/_aux_functions_history.py,sha256=Z23xwvBaVEEQ5B-JsWZJpjj4_QqoXqHYONztnbAH6gw,4425
|
|
35
35
|
fractal_server/app/routes/api/v2/_aux_functions_task_lifecycle.py,sha256=GpKfw9yj01LmOAuNMTOreU1PFkCKpjK5oCt7_wp35-A,6741
|
|
36
36
|
fractal_server/app/routes/api/v2/_aux_functions_task_version_update.py,sha256=WLDOYCnb6fnS5avKflyx6yN24Vo1n5kJk5ZyiKbzb8Y,1175
|
|
37
37
|
fractal_server/app/routes/api/v2/_aux_functions_tasks.py,sha256=MNty8CBnTMPSAKE5gMT7tCY8QWpCQyhft_shq12hHpA,12208
|
|
38
38
|
fractal_server/app/routes/api/v2/_aux_task_group_disambiguation.py,sha256=8x1_q9FyCzItnPmdSdLQuwUTy4B9xCsXscp97_lJcpM,4635
|
|
39
39
|
fractal_server/app/routes/api/v2/dataset.py,sha256=6u4MFqJ3YZ0Zq6Xx8CRMrTPKW55ZaR63Uno21DqFr4Q,8889
|
|
40
|
-
fractal_server/app/routes/api/v2/history.py,sha256=
|
|
40
|
+
fractal_server/app/routes/api/v2/history.py,sha256=ErLqkJbhx9XzHL4KQvMraVAtD9WOmDNh5tZi5wmNkL0,17114
|
|
41
41
|
fractal_server/app/routes/api/v2/images.py,sha256=TS1ltUhP0_SaViupdHrSh3MLDi5OVk-lOhE1VCVyZj0,7869
|
|
42
42
|
fractal_server/app/routes/api/v2/job.py,sha256=8xRTwh_OCHmK9IfI_zUASa2ozewR0qu0zVBl_a4IvHw,6467
|
|
43
43
|
fractal_server/app/routes/api/v2/pre_submission_checks.py,sha256=2jaaM5WJBTGpOWhm6a42JViT8j4X5hixltxIY1p-188,4936
|
|
@@ -51,9 +51,9 @@ fractal_server/app/routes/api/v2/task_collection_pixi.py,sha256=LS5xOYRRvI25TyvP
|
|
|
51
51
|
fractal_server/app/routes/api/v2/task_group.py,sha256=Wmp5Rt6NQm8_EbdJyi3XOkTXxJTTd4MNIy0ja6K-ifA,9205
|
|
52
52
|
fractal_server/app/routes/api/v2/task_group_lifecycle.py,sha256=-uS_z8E3__t_twEqhZOzcEcAxZsgnpg-c7Ya9RF3_bs,9998
|
|
53
53
|
fractal_server/app/routes/api/v2/task_version_update.py,sha256=o8W_C0I84X0u8gAMnCvi8ChiVAKrb5WzUBuJLSuujCA,8235
|
|
54
|
-
fractal_server/app/routes/api/v2/workflow.py,sha256=
|
|
54
|
+
fractal_server/app/routes/api/v2/workflow.py,sha256=SfjegoVO4DaGmDD7OPhWNLkcvZhJKwNX4DTQAcVKk9Q,10699
|
|
55
55
|
fractal_server/app/routes/api/v2/workflow_import.py,sha256=kOGDaCj0jCGK1WSYGbnUjtUg2U1YxUY9UMH-2ilqJg4,9027
|
|
56
|
-
fractal_server/app/routes/api/v2/workflowtask.py,sha256=
|
|
56
|
+
fractal_server/app/routes/api/v2/workflowtask.py,sha256=5_SQAG8ztDnaaRXwKalcO69HVpSl-QbrhiI7fCP3YRI,7924
|
|
57
57
|
fractal_server/app/routes/auth/__init__.py,sha256=fao6CS0WiAjHDTvBzgBVV_bSXFpEAeDBF6Z6q7rRkPc,1658
|
|
58
58
|
fractal_server/app/routes/auth/_aux_auth.py,sha256=UZgauY0V6mSqjte_sYI1cBl2h8bcbLaeWzgpl1jdJlk,4883
|
|
59
59
|
fractal_server/app/routes/auth/current_user.py,sha256=EjkwMxUA0l6FLbDJdertHGnuOoSS-HEysmm6l5FkAlY,5903
|
|
@@ -81,9 +81,9 @@ fractal_server/app/runner/executors/slurm_common/__init__.py,sha256=47DEQpj8HBSa
|
|
|
81
81
|
fractal_server/app/runner/executors/slurm_common/_batching.py,sha256=gbHZIxt90GjUwhB9_UInwVqpX-KdxRQMDeXzUagdL3U,8816
|
|
82
82
|
fractal_server/app/runner/executors/slurm_common/_job_states.py,sha256=nuV-Zba38kDrRESOVB3gaGbrSPZc4q7YGichQaeqTW0,238
|
|
83
83
|
fractal_server/app/runner/executors/slurm_common/_slurm_config.py,sha256=U9BONnnwn8eDqDevwUtFSBcvIsxvNgDHirhcQGJ9t9E,15947
|
|
84
|
-
fractal_server/app/runner/executors/slurm_common/base_slurm_runner.py,sha256=
|
|
84
|
+
fractal_server/app/runner/executors/slurm_common/base_slurm_runner.py,sha256=1Sh56lb7NERVtsBMvVs4K7nVHhMy_KDbwquPl1ub8vE,37937
|
|
85
85
|
fractal_server/app/runner/executors/slurm_common/get_slurm_config.py,sha256=jhoFHauWJm55bIC_v7pFylbK8WgcRJemGu2OjUiRbpQ,7377
|
|
86
|
-
fractal_server/app/runner/executors/slurm_common/remote.py,sha256=
|
|
86
|
+
fractal_server/app/runner/executors/slurm_common/remote.py,sha256=LHK2Ram8X8q6jNSCxnnwKUwmSJMsyQyRem_VjH53qdw,3811
|
|
87
87
|
fractal_server/app/runner/executors/slurm_common/slurm_job_task_models.py,sha256=K4SdJOKsUWzDlnkb8Ug_UmTx6nBMsTqn9_oKqwE4XDI,3520
|
|
88
88
|
fractal_server/app/runner/executors/slurm_ssh/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
89
89
|
fractal_server/app/runner/executors/slurm_ssh/run_subprocess.py,sha256=SyW6t4egvbiARph2YkFjc88Hj94fCamZVi50L7ph8VM,996
|
|
@@ -98,7 +98,7 @@ fractal_server/app/runner/shutdown.py,sha256=ViSNJyXWU_iWPSDOOMGNh_iQdUFrdPh_jvf
|
|
|
98
98
|
fractal_server/app/runner/task_files.py,sha256=V_7aZhu6-c6Y-0XUe-5cZVDrdnXEJhp8pQoUMtx6ko0,4041
|
|
99
99
|
fractal_server/app/runner/v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
100
100
|
fractal_server/app/runner/v2/_local.py,sha256=tTJgABK-zAZmmRzoie_MPNTXJx_zBAXiZiiWl1CC2qo,3035
|
|
101
|
-
fractal_server/app/runner/v2/_slurm_ssh.py,sha256=
|
|
101
|
+
fractal_server/app/runner/v2/_slurm_ssh.py,sha256=JlDngsVSOUNqEubDl-5jkIxQQmV4mhTqbMbJVY_rL6M,2840
|
|
102
102
|
fractal_server/app/runner/v2/_slurm_sudo.py,sha256=Gvsh4tUlc1_3KdF3B7zEqs-YIntC_joLtTGSNFbKKSs,2939
|
|
103
103
|
fractal_server/app/runner/v2/db_tools.py,sha256=du5dKhMMFMErQXbGIgu9JvO_vtMensodyPsyDeqz1yQ,3324
|
|
104
104
|
fractal_server/app/runner/v2/deduplicate_list.py,sha256=IVTE4abBU1bUprFTkxrTfYKnvkNTanWQ-KWh_etiT08,645
|
|
@@ -185,7 +185,7 @@ fractal_server/migrations/versions/f384e1c0cf5d_drop_task_default_args_columns.p
|
|
|
185
185
|
fractal_server/migrations/versions/fbce16ff4e47_new_history_items.py,sha256=TDWCaIoM0Q4SpRWmR9zr_rdp3lJXhCfBPTMhtrP5xYE,3950
|
|
186
186
|
fractal_server/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
187
187
|
fractal_server/ssh/__init__.py,sha256=sVUmzxf7_DuXG1xoLQ1_00fo5NPhi2LJipSmU5EAkPs,124
|
|
188
|
-
fractal_server/ssh/_fabric.py,sha256=
|
|
188
|
+
fractal_server/ssh/_fabric.py,sha256=7fCxTYqkAOaTTm67trfYdYQenOsI4EfrRQoG6x3M5kk,25188
|
|
189
189
|
fractal_server/string_tools.py,sha256=qLB5u6-4QxXPiZrUeWn_cEo47axj4OXFzDd47kNTIWw,1847
|
|
190
190
|
fractal_server/syringe.py,sha256=3YJeIALH-wibuJ9R5VMNYUWh7x1-MkWT0SqGcWG5MY8,2795
|
|
191
191
|
fractal_server/tasks/__init__.py,sha256=kadmVUoIghl8s190_Tt-8f-WBqMi8u8oU4Pvw39NHE8,23
|
|
@@ -194,19 +194,19 @@ fractal_server/tasks/v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG
|
|
|
194
194
|
fractal_server/tasks/v2/local/__init__.py,sha256=S842wRersYKBKjc7xbmj0ov8b5i1YuCHa2f_yYuxcaI,312
|
|
195
195
|
fractal_server/tasks/v2/local/_utils.py,sha256=p2KJ4BvEwJxahICpzbvzrc5-ciLCFnLXWPCwdNGi-3Q,2495
|
|
196
196
|
fractal_server/tasks/v2/local/collect.py,sha256=MQncScKbWv3g9lrjF8WOhzuEoTEOOgS02RqOJno5csI,11897
|
|
197
|
-
fractal_server/tasks/v2/local/collect_pixi.py,sha256=
|
|
197
|
+
fractal_server/tasks/v2/local/collect_pixi.py,sha256=coV9SqOf5rz2dgUFG7uVisPFS0xvXEubFwU7rb3QHe8,10753
|
|
198
198
|
fractal_server/tasks/v2/local/deactivate.py,sha256=LoEs2TUoHQOq3JfxufW6zroXD-Xx_b-hLtdigEBi1JU,9732
|
|
199
199
|
fractal_server/tasks/v2/local/deactivate_pixi.py,sha256=_ycvnLIZ8zUFB3fZbCzDlNudh-SSetl4UkyFrClCcUU,3494
|
|
200
200
|
fractal_server/tasks/v2/local/reactivate.py,sha256=Q43DOadNeFyyfgNP67lUqaXmZsS6onv67XwxH_-5ANA,5756
|
|
201
|
-
fractal_server/tasks/v2/local/reactivate_pixi.py,sha256=
|
|
201
|
+
fractal_server/tasks/v2/local/reactivate_pixi.py,sha256=X1gdeuFGPtohjWEZ7OX2v8m6aI7Z93M-y64FqtYjApg,7320
|
|
202
202
|
fractal_server/tasks/v2/ssh/__init__.py,sha256=vX5aIM9Hbn2T_cIP_LrZ5ekRqJzYm_GSfp-4Iv7kqeI,300
|
|
203
203
|
fractal_server/tasks/v2/ssh/_utils.py,sha256=ktVH7psQSAhh353fVUe-BwiBZHzTdgXnR-Xv_vfuX0Y,3857
|
|
204
204
|
fractal_server/tasks/v2/ssh/collect.py,sha256=M9gFD1h9Q1Z-BFQq65dI0vFs6HPCkKQzOkxaLddmChY,14334
|
|
205
|
-
fractal_server/tasks/v2/ssh/collect_pixi.py,sha256=
|
|
205
|
+
fractal_server/tasks/v2/ssh/collect_pixi.py,sha256=MYxHY5P69P7DdM4uC8FAsAoQBuqr8UJdDti0CPHAn_U,13801
|
|
206
206
|
fractal_server/tasks/v2/ssh/deactivate.py,sha256=XAIy84cLT9MSTMiN67U-wfOjxMm5s7lmrGwhW0qp7BU,12439
|
|
207
207
|
fractal_server/tasks/v2/ssh/deactivate_pixi.py,sha256=K0yK_NPUqhFMj6cp6G_0Kfn0Yo7oQux4kT5dFPulnos,4748
|
|
208
208
|
fractal_server/tasks/v2/ssh/reactivate.py,sha256=NJIgMNFKaXMhbvK0iZOsMwMtsms6Boj9f8N4L01X9Bo,8271
|
|
209
|
-
fractal_server/tasks/v2/ssh/reactivate_pixi.py,sha256=
|
|
209
|
+
fractal_server/tasks/v2/ssh/reactivate_pixi.py,sha256=Vay6kfsrc5XKx2WJVTu_pLhgpuHZDdnrEB6Er8XchYo,9784
|
|
210
210
|
fractal_server/tasks/v2/templates/1_create_venv.sh,sha256=PK0jdHKtQpda1zULebBaVPORt4t6V17wa4N1ohcj5ac,548
|
|
211
211
|
fractal_server/tasks/v2/templates/2_pip_install.sh,sha256=jMJPQJXHKznO6fxOOXtFXKPdCmTf1VLLWj_JL_ZdKxo,1644
|
|
212
212
|
fractal_server/tasks/v2/templates/3_pip_freeze.sh,sha256=JldREScEBI4cD_qjfX4UK7V4aI-FnX9ZvVNxgpSOBFc,168
|
|
@@ -230,8 +230,8 @@ fractal_server/types/validators/_workflow_task_arguments_validators.py,sha256=HL
|
|
|
230
230
|
fractal_server/urls.py,sha256=QjIKAC1a46bCdiPMu3AlpgFbcv6a4l3ABcd5xz190Og,471
|
|
231
231
|
fractal_server/utils.py,sha256=Vn35lApt1T1J8nc09sAVqd10Cy0sa3dLipcljI-hkuk,2185
|
|
232
232
|
fractal_server/zip_tools.py,sha256=tqz_8f-vQ9OBRW-4OQfO6xxY-YInHTyHmZxU7U4PqZo,4885
|
|
233
|
-
fractal_server-2.15.
|
|
234
|
-
fractal_server-2.15.
|
|
235
|
-
fractal_server-2.15.
|
|
236
|
-
fractal_server-2.15.
|
|
237
|
-
fractal_server-2.15.
|
|
233
|
+
fractal_server-2.15.3.dist-info/LICENSE,sha256=QKAharUuhxL58kSoLizKJeZE3mTCBnX6ucmz8W0lxlk,1576
|
|
234
|
+
fractal_server-2.15.3.dist-info/METADATA,sha256=-Z1TLhBsEtPcHcx4oq8Hij5f1mUfDyTiL3ttW1cBmTE,4243
|
|
235
|
+
fractal_server-2.15.3.dist-info/WHEEL,sha256=7dDg4QLnNKTvwIDR9Ac8jJaAmBC_owJrckbC0jjThyA,88
|
|
236
|
+
fractal_server-2.15.3.dist-info/entry_points.txt,sha256=8tV2kynvFkjnhbtDnxAqImL6HMVKsopgGfew0DOp5UY,58
|
|
237
|
+
fractal_server-2.15.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|