PyPI - fractal-server - Versions diffs - 1.3.0a2__py3-none-any.whl → 1.3.0a3__py3-none-any.whl - Mend

fractal-server 1.3.0a2py3-none-any.whl → 1.3.0a3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

fractal_server/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __VERSION__ = "1.3.~~0a2~~"
1	+ __VERSION__ = "1.3.0a3"

fractal_server/app/api/v1/dataset.py CHANGED Viewed

@@ -5,10 +5,12 @@ from fastapi import Depends
 from fastapi import HTTPException
 from fastapi import Response
 from fastapi import status
+from sqlmodel import or_
 from sqlmodel import select
 from ...db import AsyncSession
 from ...db import get_db
+from ...models import ApplyWorkflow
 from ...models import Dataset
 from ...models import DatasetCreate
 from ...models import DatasetRead
@@ -124,6 +126,25 @@ async def delete_dataset(
         db=db,
     )
     dataset = output["dataset"]
+    # Check that no ApplyWorkflow is in relationship with the current Dataset
+    stm = select(ApplyWorkflow).filter(
+        or_(
+            ApplyWorkflow.input_dataset_id == dataset_id,
+            ApplyWorkflow.output_dataset_id == dataset_id,
+        )
+    )
+    res = await db.execute(stm)
+    job = res.scalars().first()
+    if job:
+        raise HTTPException(
+            status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+            detail=(
+                f"Cannot remove dataset {dataset_id}: "
+                f"it's still linked to job {job.id}."
+            ),
+        )
     await db.delete(dataset)
     await db.commit()
     await db.close()

fractal_server/app/api/v1/task.py CHANGED Viewed

@@ -41,12 +41,13 @@ from ...db import get_db
 from ...db import get_sync_db
 from ...models import State
 from ...models import Task
-from ...security import current_active_superuser
 from ...security import current_active_user
 from ...security import User
 router = APIRouter()
+logger = set_logger(__name__)
 async def _background_collect_pip(
     state_id: int,
@@ -178,7 +179,6 @@ async def collect_tasks_pip(
     response: Response,
     user: User = Depends(current_active_user),
     db: AsyncSession = Depends(get_db),
-    public: bool = True,
 ) -> StateRead:  # State[TaskCollectStatus]
     """
     Task collection endpoint
@@ -192,7 +192,7 @@ async def collect_tasks_pip(
     # Validate payload as _TaskCollectPip, which has more strict checks than
     # TaskCollectPip
     try:
-        task_pkg = _TaskCollectPip(**task_collect.dict())
+        task_pkg = _TaskCollectPip(**task_collect.dict(exclude_unset=True))
     except ValidationError as e:
         raise HTTPException(
             status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
@@ -201,6 +201,7 @@ async def collect_tasks_pip(
     with TemporaryDirectory() as tmpdir:
         try:
+            # Copy or download the package wheel file to tmpdir
             if task_pkg.is_local_package:
                 shell_copy(task_pkg.package_path.as_posix(), tmpdir)
                 pkg_path = Path(tmpdir) / task_pkg.package_path.name
@@ -208,10 +209,12 @@ async def collect_tasks_pip(
                 pkg_path = await download_package(
                     task_pkg=task_pkg, dest=tmpdir
                 )
-            version_manifest = inspect_package(pkg_path)
-            task_pkg.version = version_manifest["version"]
+            # Read package info from wheel file, and override the ones coming
+            # from the request body
+            pkg_info = inspect_package(pkg_path)
+            task_pkg.package_name = pkg_info["pkg_name"]
+            task_pkg.package_version = pkg_info["pkg_version"]
+            task_pkg.package_manifest = pkg_info["pkg_manifest"]
             task_pkg.check()
         except Exception as e:
             raise HTTPException(
@@ -220,14 +223,28 @@ async def collect_tasks_pip(
             )
     try:
-        pkg_user = None if public else user.slurm_user
-        venv_path = create_package_dir_pip(task_pkg=task_pkg, user=pkg_user)
+        venv_path = create_package_dir_pip(task_pkg=task_pkg)
     except FileExistsError:
-        venv_path = create_package_dir_pip(
-            task_pkg=task_pkg, user=pkg_user, create=False
-        )
+        venv_path = create_package_dir_pip(task_pkg=task_pkg, create=False)
         try:
             task_collect_status = get_collection_data(venv_path)
+            for task in task_collect_status.task_list:
+                db_task = await db.get(Task, task.id)
+                if (
+                    (not db_task)
+                    or db_task.source != task.source
+                    or db_task.name != task.name
+                ):
+                    await db.close()
+                    raise HTTPException(
+                        status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+                        detail=(
+                            "Cannot collect package. Folder already exists, "
+                            f"but task {task.id} does not exists or it does "
+                            f"not have the expected source ({task.source}) or "
+                            f"name ({task.name})."
+                        ),
+                    )
         except FileNotFoundError as e:
             await db.close()
             raise HTTPException(
@@ -344,19 +361,42 @@ async def get_task(
 async def patch_task(
     task_id: int,
     task_update: TaskUpdate,
-    user: User = Depends(current_active_superuser),
+    user: User = Depends(current_active_user),
     db: AsyncSession = Depends(get_db),
 ) -> Optional[TaskRead]:
     """
-    Edit a specific task
+    Edit a specific task (restricted to superusers and task owner)
     """
     if task_update.source:
         raise HTTPException(
             status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
             detail="patch_task endpoint cannot set `source`",
         )
+    # Retrieve task from database
     db_task = await db.get(Task, task_id)
+    # This check constitutes a preliminary version of access control:
+    # if the current user is not a superuser and differs from the task owner
+    # (including when `owner is None`), we raise an 403 HTTP Exception.
+    if not user.is_superuser:
+        if db_task.owner is None:
+            raise HTTPException(
+                status_code=status.HTTP_403_FORBIDDEN,
+                detail=("Only a superuser can edit a task with `owner=None`."),
+            )
+        else:
+            owner = user.username or user.slurm_user
+            if owner != db_task.owner:
+                raise HTTPException(
+                    status_code=status.HTTP_403_FORBIDDEN,
+                    detail=(
+                        f"Current user ({owner}) cannot modify task "
+                        f"({task_id}) with different owner ({db_task.owner})."
+                    ),
+                )
     update = task_update.dict(exclude_unset=True)
     for key, value in update.items():
         if isinstance(value, str):
@@ -386,14 +426,36 @@ async def create_task(
     """
     Create a new task
     """
+    # Set task.owner attribute
+    if user.username:
+        owner = user.username
+    elif user.slurm_user:
+        owner = user.slurm_user
+    else:
+        raise HTTPException(
+            status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+            detail=(
+                "Cannot add a new task because current user does not "
+                "have `username` or `slurm_user` attributes."
+            ),
+        )
+    # Prepend owner to task.source
+    task.source = f"{owner}:{task.source}"
+    # Verify that source is not already in use (note: this check is only useful
+    # to provide a user-friendly error message, but `task.source` uniqueness is
+    # already guaranteed by a constraint in the table definition).
     stm = select(Task).where(Task.source == task.source)
     res = await db.execute(stm)
     if res.scalars().all():
         raise HTTPException(
             status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
-            detail=f"Task with source={task.source} already in use",
+            detail=f'Task source "{task.source}" already in use',
         )
-    db_task = Task.from_orm(task)
+    # Add task
+    db_task = Task(**task.dict(), owner=owner)
     db.add(db_task)
     await db.commit()
     await db.refresh(db_task)

fractal_server/app/api/v1/workflow.py CHANGED Viewed

@@ -20,8 +20,11 @@ from fastapi import Response
 from fastapi import status
 from sqlmodel import select
+from ....logger import close_logger
+from ....logger import set_logger
 from ...db import AsyncSession
 from ...db import get_db
+from ...models import ApplyWorkflow
 from ...models import Task
 from ...models import Workflow
 from ...models import WorkflowCreate
@@ -181,6 +184,19 @@ async def delete_workflow(
         project_id=project_id, workflow_id=workflow_id, user_id=user.id, db=db
     )
+    # Check that no ApplyWorkflow is in relationship with the current Workflow
+    stm = select(ApplyWorkflow).where(ApplyWorkflow.workflow_id == workflow_id)
+    res = await db.execute(stm)
+    job = res.scalars().first()
+    if job:
+        raise HTTPException(
+            status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+            detail=(
+                f"Cannot remove workflow {workflow_id}: "
+                f"it's still linked to job {job.id}."
+            ),
+        )
     await db.delete(workflow)
     await db.commit()
@@ -203,6 +219,18 @@ async def export_worfklow(
     workflow = await _get_workflow_check_owner(
         project_id=project_id, workflow_id=workflow_id, user_id=user.id, db=db
     )
+    # Emit a warning when exporting a workflow with custom tasks
+    logger = set_logger(None)
+    for wftask in workflow.task_list:
+        if wftask.task.owner is not None:
+            logger.warning(
+                f"Custom tasks (like the one with id={wftask.task.id} and "
+                f'source="{wftask.task.source}") are not meant to be '
+                "portable; re-importing this workflow may not work as "
+                "expected."
+            )
+    close_logger(logger)
     await db.close()
     return workflow
@@ -237,37 +265,21 @@ async def import_workflow(
     )
     # Check that all required tasks are available
-    # NOTE: by now we go through the pair (source, name), but later on we may
-    # combine them into source -- see issue #293.
     tasks = [wf_task.task for wf_task in workflow.task_list]
-    sourcename_to_id = {}
+    source_to_id = {}
     for task in tasks:
         source = task.source
-        name = task.name
-        if not (source, name) in sourcename_to_id.keys():
+        if source not in source_to_id.keys():
             stm = select(Task).where(Task.source == source)
             tasks_by_source = (await db.execute(stm)).scalars().all()
-            if not tasks_by_source:
+            if len(tasks_by_source) != 1:
                 raise HTTPException(
                     status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
-                    detail=(f"Found 0 tasks with {source=}."),
-                )
-            else:
-                stm = (
-                    select(Task)
-                    .where(Task.source == source)
-                    .where(Task.name == name)
+                    detail=(
+                        f"Found {len(tasks_by_source)} tasks with {source=}."
+                    ),
                 )
-                current_task = (await db.execute(stm)).scalars().all()
-                if len(current_task) != 1:
-                    raise HTTPException(
-                        status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
-                        detail=(
-                            f"Found {len(current_task)} tasks with "
-                            f"{name =} and {source=}."
-                        ),
-                    )
-                sourcename_to_id[(source, name)] = current_task[0].id
+            source_to_id[source] = tasks_by_source[0].id
     # Create new Workflow (with empty task_list)
     db_workflow = Workflow(
@@ -283,8 +295,7 @@ async def import_workflow(
         for _, wf_task in enumerate(workflow.task_list):
             # Identify task_id
             source = wf_task.task.source
-            name = wf_task.task.name
-            task_id = sourcename_to_id[(source, name)]
+            task_id = source_to_id[source]
             # Prepare new_wf_task
             new_wf_task = WorkflowTaskCreate(
                 **wf_task.dict(exclude_none=True),

fractal_server/app/models/job.py CHANGED Viewed

@@ -6,8 +6,9 @@ from sqlalchemy import Column
 from sqlalchemy.types import DateTime
 from sqlmodel import Field
 from sqlmodel import Relationship
+from sqlmodel import SQLModel
-from ...common.schemas import ApplyWorkflowBase
+from ...common.schemas import _ApplyWorkflowBase
 from ...utils import get_timestamp
 from .project import Dataset
 from .workflow import Workflow
@@ -38,7 +39,7 @@ class JobStatusType(str, Enum):
     FAILED = "failed"
-class ApplyWorkflow(ApplyWorkflowBase, table=True):
+class ApplyWorkflow(_ApplyWorkflowBase, SQLModel, table=True):
     """
     Represent a workflow run

fractal_server/app/models/project.py CHANGED Viewed

@@ -5,6 +5,7 @@ from sqlalchemy import Column
 from sqlalchemy.types import JSON
 from sqlmodel import Field
 from sqlmodel import Relationship
+from sqlmodel import SQLModel
 from ...common.schemas.project import _DatasetBase
 from ...common.schemas.project import _ProjectBase
@@ -14,7 +15,7 @@ from .security import UserOAuth as User
 from .workflow import Workflow
-class Dataset(_DatasetBase, table=True):
+class Dataset(_DatasetBase, SQLModel, table=True):
     """
     Represent a dataset
@@ -49,7 +50,7 @@ class Dataset(_DatasetBase, table=True):
         return [r.path for r in self.resource_list]
-class Project(_ProjectBase, table=True):
+class Project(_ProjectBase, SQLModel, table=True):
     id: Optional[int] = Field(default=None, primary_key=True)
     user_list: list[User] = Relationship(
@@ -82,6 +83,6 @@ class Project(_ProjectBase, table=True):
     )
-class Resource(_ResourceBase, table=True):
+class Resource(_ResourceBase, SQLModel, table=True):
     id: Optional[int] = Field(default=None, primary_key=True)
     dataset_id: int = Field(foreign_key="dataset.id")

fractal_server/app/models/security.py CHANGED Viewed

@@ -62,6 +62,8 @@ class UserOAuth(SQLModel, table=True):
     slurm_user: Optional[str]
     cache_dir: Optional[str]
+    username: Optional[str]
     oauth_accounts: list["OAuthAccount"] = Relationship(
         back_populates="user",
         sa_relationship_kwargs={"lazy": "selectin", "cascade": "all, delete"},

fractal_server/app/models/state.py CHANGED Viewed

@@ -6,12 +6,13 @@ from sqlalchemy import Column
 from sqlalchemy.types import DateTime
 from sqlalchemy.types import JSON
 from sqlmodel import Field
+from sqlmodel import SQLModel
 from ...common.schemas import _StateBase
 from ...utils import get_timestamp
-class State(_StateBase, table=True):
+class State(_StateBase, SQLModel, table=True):
     """
     Store arbitrary data in the database

fractal_server/app/models/task.py CHANGED Viewed

@@ -4,33 +4,44 @@ from typing import Optional
 from sqlalchemy import Column
 from sqlalchemy.types import JSON
 from sqlmodel import Field
+from sqlmodel import SQLModel
 from ...common.schemas.task import _TaskBase
-class Task(_TaskBase, table=True):
+class Task(_TaskBase, SQLModel, table=True):
     """
     Task model
     Attributes:
         id: Primary key
-        command: TBD
-        input_type: TBD
-        output_type: TBD
-        default_args: TBD
-        meta: TBD
+        command: Executable command
+        input_type: Expected type of input `Dataset`
+        output_type: Expected type of output `Dataset`
+        meta:
+            Additional metadata related to execution (e.g. computational
+            resources)
         source: inherited from `_TaskBase`
         name: inherited from `_TaskBase`
+        args_schema: JSON schema of task arguments
+        args_schema_version:
+            label pointing at how the JSON schema of task arguments was
+            generated
     """
     id: Optional[int] = Field(default=None, primary_key=True)
+    name: str
     command: str
+    source: str = Field(unique=True)
     input_type: str
     output_type: str
-    default_args: Optional[dict[str, Any]] = Field(
-        sa_column=Column(JSON), default={}
-    )
     meta: Optional[dict[str, Any]] = Field(sa_column=Column(JSON), default={})
+    owner: Optional[str] = None
+    version: Optional[str] = None
+    args_schema: Optional[dict[str, Any]] = Field(
+        sa_column=Column(JSON), default=None
+    )
+    args_schema_version: Optional[str]
     @property
     def parallelization_level(self) -> Optional[str]:

fractal_server/app/models/workflow.py CHANGED Viewed

@@ -1,3 +1,5 @@
+import json
+import logging
 from typing import Any
 from typing import Optional
 from typing import Union
@@ -8,6 +10,7 @@ from sqlalchemy.ext.orderinglist import ordering_list
 from sqlalchemy.types import JSON
 from sqlmodel import Field
 from sqlmodel import Relationship
+from sqlmodel import SQLModel
 from ...common.schemas.workflow import _WorkflowBase
 from ...common.schemas.workflow import _WorkflowTaskBase
@@ -15,7 +18,7 @@ from ..db import AsyncSession
 from .task import Task
-class WorkflowTask(_WorkflowTaskBase, table=True):
+class WorkflowTask(_WorkflowTaskBase, SQLModel, table=True):
     """
     A Task as part of a Workflow
@@ -36,8 +39,7 @@ class WorkflowTask(_WorkflowTaskBase, table=True):
         meta:
             Additional parameters useful for execution
         args:
-            Additional task arguments, overriding the ones in
-            `WorkflowTask.task.args`
+            Task arguments
         task:
             `Task` object associated with the current `WorkflowTask`
@@ -81,15 +83,6 @@ class WorkflowTask(_WorkflowTaskBase, table=True):
             )
         return value
-    @property
-    def arguments(self):
-        """
-        Override default arguments
-        """
-        out = self.task.default_args.copy()
-        out.update(self.args or {})
-        return out
     @property
     def is_parallel(self) -> bool:
         return self.task.is_parallel
@@ -108,23 +101,8 @@ class WorkflowTask(_WorkflowTaskBase, table=True):
         res.update(self.meta or {})
         return res
-    def assemble_args(self, extra: dict[str, Any] = None) -> dict:
-        """
-        Merge of `extra` arguments and `self.arguments`.
-        Returns
-            full_args:
-                A dictionary consisting of the merge of `extra` and
-                `self.arguments`.
-        """
-        full_args = {}
-        if extra:
-            full_args.update(extra)
-        full_args.update(self.arguments)
-        return full_args
-class Workflow(_WorkflowBase, table=True):
+class Workflow(_WorkflowBase, SQLModel, table=True):
     """
     Workflow
@@ -172,7 +150,34 @@ class Workflow(_WorkflowBase, table=True):
         """
         if order is None:
             order = len(self.task_list)
-        wf_task = WorkflowTask(task_id=task_id, args=args, meta=meta)
+        # Get task from db, extract the JSON Schema for its arguments (if any),
+        # read default values and set them in default_args
+        db_task = await db.get(Task, task_id)
+        default_args = {}
+        if db_task.args_schema is not None:
+            try:
+                properties = db_task.args_schema["properties"]
+                for prop_name, prop_schema in properties.items():
+                    default_value = prop_schema.get("default", None)
+                    if default_value:
+                        default_args[prop_name] = default_value
+            except KeyError as e:
+                logging.warning(
+                    "Cannot set default_args from args_schema="
+                    f"{json.dumps(db_task.args_schema)}\n"
+                    f"Original KeyError: {str(e)}"
+                )
+        # Override default_args with args
+        actual_args = default_args.copy()
+        if args is not None:
+            for k, v in args.items():
+                actual_args[k] = v
+        if not actual_args:
+            actual_args = None
+        # Create DB entry
+        wf_task = WorkflowTask(task_id=task_id, args=actual_args, meta=meta)
         db.add(wf_task)
         self.task_list.insert(order, wf_task)
         self.task_list.reorder()  # type: ignore

fractal_server/app/runner/_common.py CHANGED Viewed

@@ -187,10 +187,10 @@ def call_single_task(
     Call a single task
     This assembles the runner arguments (input_paths, output_path, ...) and
-    task arguments (i.e., arguments that are specific to the task, such as
-    message or index in the dummy task), writes them to file, call the task
-    executable command passing the arguments file as an input and assembles
-    the output.
+    wftask arguments (i.e., arguments that are specific to the WorkflowTask,
+    such as message or index in the dummy task), writes them to file, call the
+    task executable command passing the arguments file as an input and
+    assembles the output.
     **Note**: This function is directly submitted to a
     `concurrent.futures`-compatible executor, as in
@@ -205,7 +205,7 @@ def call_single_task(
     Args:
         wftask:
             The workflow task to be called. This includes task specific
-            arguments via the task.task.arguments attribute.
+            arguments via the wftask.args attribute.
         task_pars:
             The parameters required to run the task which are not specific to
             the task, e.g., I/O paths.
@@ -238,11 +238,12 @@ def call_single_task(
         task_order=wftask.order,
     )
-    # assemble full args
-    args_dict = wftask.assemble_args(extra=task_pars.dict())
-    # write args file
-    write_args_file(args_dict, path=task_files.args)
+    # write args file (by assembling task_pars and wftask.args)
+    write_args_file(
+        task_pars.dict(),
+        wftask.args or {},
+        path=task_files.args,
+    )
     # assemble full command
     cmd = (
@@ -341,10 +342,10 @@ def call_single_parallel_task(
         component=component,
     )
-    # assemble full args
+    # write args file (by assembling task_pars, wftask.args and component)
     write_args_file(
         task_pars.dict(),
-        wftask.arguments,
+        wftask.args or {},
         dict(component=component),
         path=task_files.args,
     )

fractal_server/app/runner/_slurm/executor.py CHANGED Viewed

@@ -743,7 +743,8 @@ class FractalSlurmExecutor(SlurmExecutor):
                         "the job started running, the SLURM out/err files "
                         "will be empty.\n"
                         "2. Some error occurred upon writing the file to disk "
-                        "(e.g. due to an overloaded NFS filesystem). "
+                        "(e.g. because there is not enough space on disk, or "
+                        "due to an overloaded NFS filesystem). "
                         "Note that the server configuration has "
                         "FRACTAL_SLURM_OUTPUT_FILE_GRACE_TIME="
                         f"{settings.FRACTAL_SLURM_OUTPUT_FILE_GRACE_TIME} "

fractal_server/common/requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 pydantic
-sqlmodel
 fastapi-users
 devtools
 pytest
+typing-extensions

fractal_server/common/schemas/__init__.py CHANGED Viewed

@@ -3,6 +3,7 @@ from .manifest import *  # noqa: F403
 from .project import *  # noqa: F403
 from .state import *  # noqa: F403
 from .task import *  # noqa: F403
+from .task_collection import *  # noqa: F403
 from .user import *  # noqa: F403
 from .workflow import *  # noqa: F403
@@ -10,6 +11,7 @@ from .workflow import *  # noqa: F403
 __all__ = (
     project.__all__  # noqa: F405
     + task.__all__  # noqa: F405
+    + task_collection.__all__  # noqa: F405
     + workflow.__all__  # noqa: F405
     + applyworkflow.__all__  # noqa: F405
     + manifest.__all__  # noqa: F405

fractal-server 1.3.0a2__py3-none-any.whl → 1.3.0a3__py3-none-any.whl

fractal-server 1.3.0a2py3-none-any.whl → 1.3.0a3py3-none-any.whl