PyPI - fractal-task-tools - Versions diffs - 0.2.0__py3-none-any.whl - Mend

fractal-task-tools 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

fractal_task_tools/__init__.py +5 -0
fractal_task_tools/_args_schemas.py +246 -0
fractal_task_tools/_cli.py +97 -0
fractal_task_tools/_cli_tools.py +86 -0
fractal_task_tools/_create_manifest.py +163 -0
fractal_task_tools/_deepdiff.py +68 -0
fractal_task_tools/_descriptions.py +252 -0
fractal_task_tools/_package_name_tools.py +27 -0
fractal_task_tools/_pydantic_generatejsonschema.py +81 -0
fractal_task_tools/_signature_constraints.py +143 -0
fractal_task_tools/_task_arguments.py +75 -0
fractal_task_tools/_task_docs.py +109 -0
fractal_task_tools/_titles.py +100 -0
fractal_task_tools/_union_types.py +55 -0
fractal_task_tools/task_models.py +168 -0
fractal_task_tools/task_wrapper.py +71 -0
fractal_task_tools-0.2.0.dist-info/METADATA +97 -0
fractal_task_tools-0.2.0.dist-info/RECORD +22 -0
fractal_task_tools-0.2.0.dist-info/WHEEL +5 -0
fractal_task_tools-0.2.0.dist-info/entry_points.txt +2 -0
fractal_task_tools-0.2.0.dist-info/licenses/LICENSE +29 -0
fractal_task_tools-0.2.0.dist-info/top_level.txt +1 -0

fractal_task_tools/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+import logging
+logging.basicConfig(level=logging.INFO)
+__VERSION__ = "0.2.0"

fractal_task_tools/_args_schemas.py ADDED Viewed

@@ -0,0 +1,246 @@
+import logging
+import os
+from collections import Counter
+from pathlib import Path
+from typing import Any
+from typing import Callable
+from typing import Optional
+import pydantic
+from docstring_parser import parse as docparse
+from ._descriptions import _get_class_attrs_descriptions
+from ._descriptions import _get_function_args_descriptions
+from ._descriptions import _insert_class_attrs_descriptions
+from ._descriptions import _insert_function_args_descriptions
+from ._pydantic_generatejsonschema import CustomGenerateJsonSchema
+from ._signature_constraints import _extract_function
+from ._signature_constraints import _validate_function_signature
+from ._titles import _include_titles
+_Schema = dict[str, Any]
+def _remove_attributes_from_descriptions(old_schema: _Schema) -> _Schema:
+    """
+    Keeps only the description part of the docstrings: e.g from
+    ```
+    'Custom class for Omero-channel window, based on OME-NGFF v0.4.\\n'
+    '\\n'
+    'Attributes:\\n'
+    'min: Do not change. It will be set to `0` by default.\\n'
+    'max: Do not change. It will be set according to bitdepth of the images\\n'
+    '    by default (e.g. 65535 for 16 bit images).\\n'
+    'start: Lower-bound rescaling value for visualization.\\n'
+    'end: Upper-bound rescaling value for visualization.'
+    ```
+    to `'Custom class for Omero-channel window, based on OME-NGFF v0.4.\\n'`.
+    Args:
+        old_schema: TBD
+    """
+    new_schema = old_schema.copy()
+    if "$defs" in new_schema:
+        for name, definition in new_schema["$defs"].items():
+            if "description" in definition.keys():
+                parsed_docstring = docparse(definition["description"])
+                new_schema["$defs"][name][
+                    "description"
+                ] = parsed_docstring.short_description
+            elif "title" in definition.keys():
+                title = definition["title"]
+                new_schema["$defs"][name][
+                    "description"
+                ] = f"Missing description for {title}."
+            else:
+                new_schema["$defs"][name][
+                    "description"
+                ] = "Missing description"
+    logging.info("[_remove_attributes_from_descriptions] END")
+    return new_schema
+def _create_schema_for_function(function: Callable) -> _Schema:
+    from packaging.version import parse
+    if parse(pydantic.__version__) >= parse("2.11.0"):
+        from pydantic.experimental.arguments_schema import (
+            generate_arguments_schema,
+        )
+        from pydantic import ConfigDict
+        from pydantic.fields import FieldInfo, ComputedFieldInfo
+        # NOTE: v2.12.0 modified the generated field titles. The function
+        # `make_title` restores the `<2.12.0` behavior
+        def make_title(name: str, info: FieldInfo | ComputedFieldInfo):
+            return name.title().replace("_", " ").strip()
+        core_schema = generate_arguments_schema(
+            function,
+            schema_type="arguments",
+            config=ConfigDict(field_title_generator=make_title),
+        )
+    elif parse(pydantic.__version__) >= parse("2.9.0"):
+        from pydantic._internal._config import ConfigWrapper  # noqa
+        from pydantic._internal import _generate_schema  # noqa
+        gen_core_schema = _generate_schema.GenerateSchema(
+            ConfigWrapper(None),
+            None,
+        )
+        core_schema = gen_core_schema.generate_schema(function)
+        core_schema = gen_core_schema.clean_schema(core_schema)
+    else:
+        from pydantic._internal._typing_extra import add_module_globals  # noqa
+        from pydantic._internal import _generate_schema  # noqa
+        from pydantic._internal._config import ConfigWrapper  # noqa
+        namespace = add_module_globals(function, None)
+        gen_core_schema = _generate_schema.GenerateSchema(
+            ConfigWrapper(None), namespace
+        )
+        core_schema = gen_core_schema.generate_schema(function)
+        core_schema = gen_core_schema.clean_schema(core_schema)
+    gen_json_schema = CustomGenerateJsonSchema()
+    json_schema = gen_json_schema.generate(core_schema, mode="validation")
+    return json_schema
+def create_schema_for_single_task(
+    executable: str,
+    package: Optional[str] = None,
+    pydantic_models: Optional[list[tuple[str, str, str]]] = None,
+    task_function: Optional[Callable] = None,
+    verbose: bool = False,
+) -> _Schema:
+    """
+    Main function to create a JSON Schema of task arguments
+    This function can be used in two ways:
+    1. `task_function` argument is `None`, `package` is set, and `executable`
+        is a path relative to that package.
+    2. `task_function` argument is provided, `executable` is an absolute path
+        to the function module, and `package` is `None. This is useful for
+        testing.
+    """
+    DEFINITIONS_KEY = "$defs"
+    logging.info("[create_schema_for_single_task] START")
+    if task_function is None:
+        usage = "1"
+        # Usage 1 (standard)
+        if package is None:
+            raise ValueError(
+                "Cannot call `create_schema_for_single_task with "
+                f"{task_function=} and {package=}. Exit."
+            )
+        if os.path.isabs(executable):
+            raise ValueError(
+                "Cannot call `create_schema_for_single_task with "
+                f"{task_function=} and absolute {executable=}. Exit."
+            )
+    else:
+        usage = "2"
+        # Usage 2 (testing)
+        if package is not None:
+            raise ValueError(
+                "Cannot call `create_schema_for_single_task with "
+                f"{task_function=} and non-None {package=}. Exit."
+            )
+        if not os.path.isabs(executable):
+            raise ValueError(
+                "Cannot call `create_schema_for_single_task with "
+                f"{task_function=} and non-absolute {executable=}. Exit."
+            )
+    # Extract function from module
+    if usage == "1":
+        # Extract the function name (for the moment we assume the function has
+        # the same name as the module)
+        function_name = Path(executable).with_suffix("").name
+        # Extract the function object
+        task_function = _extract_function(
+            package_name=package,
+            module_relative_path=executable,
+            function_name=function_name,
+            verbose=verbose,
+        )
+    else:
+        # The function object is already available, extract its name
+        function_name = task_function.__name__
+    if verbose:
+        logging.info(f"[create_schema_for_single_task] {function_name=}")
+        logging.info(f"[create_schema_for_single_task] {task_function=}")
+    # Validate function signature against some custom constraints
+    _validate_function_signature(task_function)
+    # Create and clean up schema
+    schema = _create_schema_for_function(task_function)
+    schema = _remove_attributes_from_descriptions(schema)
+    # Include titles for custom-model-typed arguments
+    schema = _include_titles(
+        schema, definitions_key=DEFINITIONS_KEY, verbose=verbose
+    )
+    # Include main title
+    if schema.get("title") is None:
+        def to_camel_case(snake_str):
+            return "".join(
+                x.capitalize() for x in snake_str.lower().split("_")
+            )
+        schema["title"] = to_camel_case(task_function.__name__)
+    # Include descriptions of function. Note: this function works both
+    # for usages 1 or 2 (see docstring).
+    function_args_descriptions = _get_function_args_descriptions(
+        package_name=package,
+        module_path=executable,
+        function_name=function_name,
+        verbose=verbose,
+    )
+    schema = _insert_function_args_descriptions(
+        schema=schema, descriptions=function_args_descriptions
+    )
+    if pydantic_models is not None:
+        # Check that model names are unique
+        pydantic_models_names = [item[2] for item in pydantic_models]
+        duplicate_class_names = [
+            name
+            for name, count in Counter(pydantic_models_names).items()
+            if count > 1
+        ]
+        if duplicate_class_names:
+            pydantic_models_str = "  " + "\n  ".join(map(str, pydantic_models))
+            raise ValueError(
+                "Cannot parse docstrings for models with non-unique names "
+                f"{duplicate_class_names}, in\n{pydantic_models_str}"
+            )
+        # Extract model-attribute descriptions and insert them into schema
+        for package_name, module_relative_path, class_name in pydantic_models:
+            attrs_descriptions = _get_class_attrs_descriptions(
+                package_name=package_name,
+                module_relative_path=module_relative_path,
+                class_name=class_name,
+            )
+            schema = _insert_class_attrs_descriptions(
+                schema=schema,
+                class_name=class_name,
+                descriptions=attrs_descriptions,
+                definition_key=DEFINITIONS_KEY,
+            )
+    logging.info("[create_schema_for_single_task] END")
+    return schema

fractal_task_tools/_cli.py ADDED Viewed

@@ -0,0 +1,97 @@
+import argparse as ap
+import sys
+from fractal_task_tools._cli_tools import check_manifest
+from fractal_task_tools._cli_tools import write_manifest_to_file
+from fractal_task_tools._create_manifest import create_manifest
+main_parser = ap.ArgumentParser(
+    description="`fractal-manifest` command-line interface",
+    allow_abbrev=False,
+)
+subparsers = main_parser.add_subparsers(
+    title="Available commands",
+    dest="cmd",
+)
+create_manifest_parser = subparsers.add_parser(
+    "create",
+    description="Create new manifest file",
+    allow_abbrev=False,
+)
+check_manifest_parser = subparsers.add_parser(
+    "check",
+    description="Check existing manifest file",
+    allow_abbrev=False,
+)
+for subparser in (create_manifest_parser, check_manifest_parser):
+    subparser.add_argument(
+        "--package",
+        type=str,
+        help="Example: 'fractal_tasks_core'",
+        required=True,
+    )
+    subparser.add_argument(
+        "--task-list-path",
+        type=str,
+        help=(
+            "Dot-separated path to the `task_list.py` module, "
+            "relative to the package root (default value: 'dev.task_list')."
+        ),
+        default="dev.task_list",
+        required=False,
+    )
+check_manifest_parser.add_argument(
+    "--ignore-keys-order",
+    type=bool,
+    help=(
+        "Ignore the order of dictionary keys when comparing manifests "
+        "(default value: False)."
+    ),
+    default=False,
+    required=False,
+)
+def _parse_arguments(sys_argv: list[str] | None = None) -> ap.Namespace:
+    """
+    Parse `sys.argv` or custom CLI arguments.
+    Arguments:
+        sys_argv: If set, overrides `sys.argv` (useful for testing).
+    """
+    if sys_argv is None:
+        sys_argv = sys.argv[:]
+    args = main_parser.parse_args(sys_argv[1:])
+    return args
+def main():
+    args = _parse_arguments()
+    if args.cmd == "create":
+        manifest = create_manifest(
+            raw_package_name=args.package,
+            task_list_path=args.task_list_path,
+        )
+        write_manifest_to_file(
+            raw_package_name=args.package,
+            manifest=manifest,
+        )
+    elif args.cmd == "check":
+        manifest = create_manifest(
+            raw_package_name=args.package,
+            task_list_path=args.task_list_path,
+        )
+        check_manifest(
+            raw_package_name=args.package,
+            manifest=manifest,
+            ignore_keys_order=args.ignore_keys_order,
+        )

fractal_task_tools/_cli_tools.py ADDED Viewed

@@ -0,0 +1,86 @@
+import json
+import logging
+import os
+import sys
+from importlib import import_module
+from pathlib import Path
+from fractal_task_tools._create_manifest import MANIFEST_FILENAME
+from fractal_task_tools._deepdiff import deepdiff
+from fractal_task_tools._package_name_tools import normalize_package_name
+def write_manifest_to_file(
+    *,
+    raw_package_name: str,
+    manifest: str,
+) -> None:
+    """
+    Write manifest to file.
+    Arguments:
+        raw_package_name:
+        manifest: The manifest object
+    """
+    logging.info("[write_manifest_to_file] START")
+    package_name = normalize_package_name(raw_package_name)
+    logging.info(f"[write_manifest_to_file] {package_name=}")
+    imported_package = import_module(package_name)
+    package_root_dir = Path(imported_package.__file__).parent
+    manifest_path = (package_root_dir / MANIFEST_FILENAME).as_posix()
+    logging.info(f"[write_manifest_to_file] {os.getcwd()=}")
+    logging.info(f"[write_manifest_to_file] {package_root_dir=}")
+    logging.info(f"[write_manifest_to_file] {manifest_path=}")
+    with open(manifest_path, "w") as f:
+        json.dump(manifest, f, indent=2)
+        f.write("\n")
+    logging.info("[write_manifest_to_file] END")
+def check_manifest(
+    *,
+    raw_package_name: str,
+    manifest: str,
+    ignore_keys_order: bool,
+) -> None:
+    """
+    Write manifest to file.
+    Arguments:
+        raw_package_name:
+        manifest: The manifest object
+        ignore_keys_order: Whether to ignore keys order.
+    """
+    package_name = normalize_package_name(raw_package_name)
+    logging.info(f"[check_manifest] {package_name=}")
+    imported_package = import_module(package_name)
+    package_root_dir = Path(imported_package.__file__).parent
+    manifest_path = (package_root_dir / MANIFEST_FILENAME).as_posix()
+    logging.info(f"[check_manifest] {os.getcwd()=}")
+    logging.info(f"[check_manifest] {package_root_dir=}")
+    logging.info(f"[check_manifest] {manifest_path=}")
+    with open(manifest_path, "r") as f:
+        old_manifest = json.load(f)
+    if manifest == old_manifest:
+        logging.info("[check_manifest] On-disk manifest is up to date.")
+    else:
+        logging.error("[check_manifest] On-disk manifest is not up to date.")
+        try:
+            deepdiff(
+                old_object=old_manifest,
+                new_object=manifest,
+                path="manifest",
+                ignore_keys_order=ignore_keys_order,
+            )
+        except ValueError as e:
+            logging.error(str(e))
+            sys.exit("New/old manifests differ")
+    logging.info("[check_manifest] END")

fractal_task_tools/_create_manifest.py ADDED Viewed

@@ -0,0 +1,163 @@
+"""
+Generate JSON schemas for task arguments and combine them into a manifest.
+"""
+import logging
+from importlib import import_module
+from typing import Any
+from ._args_schemas import create_schema_for_single_task
+from ._package_name_tools import normalize_package_name
+from ._task_arguments import validate_arguments
+from ._task_docs import create_docs_info
+from ._task_docs import read_docs_info_from_file
+from .task_models import _BaseTask
+ARGS_SCHEMA_VERSION = "pydantic_v2"
+MANIFEST_FILENAME = "__FRACTAL_MANIFEST__.json"
+MANIFEST_VERSION = "2"
+def create_manifest(
+    *,
+    raw_package_name: str,
+    task_list_path: str,
+) -> dict[str, Any]:
+    """
+    Create the package manifest based on a `task_list.py` module
+    Arguments:
+        raw_package_name:
+            The name of the package. Note that this name must be importable
+            (after normalization).
+        task_list_path:
+            Relative path to the `task_list.py` module, with respect to the
+            package root (example `dev.task_list`).
+    Returns:
+        Task-package manifest.
+    """
+    # Preliminary validation
+    if "/" in task_list_path or task_list_path.endswith(".py"):
+        raise ValueError(
+            f"Invalid {task_list_path=} (valid example: `dev.task_list`)."
+        )
+    # Normalize package name
+    package_name = normalize_package_name(raw_package_name)
+    logging.info(f"Start generating a new manifest for {package_name}")
+    # Prepare an empty manifest
+    manifest = dict(
+        manifest_version=MANIFEST_VERSION,
+        task_list=[],
+        has_args_schemas=True,
+        args_schema_version=ARGS_SCHEMA_VERSION,
+        authors=None,
+    )
+    # Import the task-list module
+    task_list_module = import_module(f"{package_name}.{task_list_path}")
+    # Load TASK_LIST
+    TASK_LIST: list[_BaseTask] = getattr(task_list_module, "TASK_LIST")
+    # Load INPUT_MODELS
+    try:
+        INPUT_MODELS = getattr(task_list_module, "INPUT_MODELS")
+    except AttributeError:
+        INPUT_MODELS = []
+        logging.warning(
+            "No `INPUT_MODELS` found in task_list module. Setting it to `[]`."
+        )
+    # Load AUTHORS
+    try:
+        manifest["authors"] = getattr(task_list_module, "AUTHORS")
+    except AttributeError:
+        logging.warning("No `AUTHORS` found in task_list module.")
+    # Load DOCS_LINK
+    try:
+        DOCS_LINK = getattr(task_list_module, "DOCS_LINK")
+        # Transform empty string into None
+        if DOCS_LINK == "":
+            DOCS_LINK = None
+            logging.warning(
+                "`DOCS_LINK=" "` transformed into `DOCS_LINK=None`."
+            )
+    except AttributeError:
+        DOCS_LINK = None
+        logging.warning("No `DOCS_LINK` found in task_list module.")
+    # Loop over TASK_LIST, and append the proper task dictionaries
+    # to manifest["task_list"]
+    for task_obj in TASK_LIST:
+        # Convert Pydantic object to dictionary
+        task_dict = task_obj.model_dump(
+            exclude={
+                "meta_init",
+                "executable_init",
+                "meta",
+                "executable",
+            },
+            exclude_unset=True,
+        )
+        task_dict["type"] = task_obj.type
+        # Copy some properties from `task_obj` to `task_dict`
+        if task_obj.executable_non_parallel is not None:
+            task_dict[
+                "executable_non_parallel"
+            ] = task_obj.executable_non_parallel
+        if task_obj.executable_parallel is not None:
+            task_dict["executable_parallel"] = task_obj.executable_parallel
+        if task_obj.meta_non_parallel is not None:
+            task_dict["meta_non_parallel"] = task_obj.meta_non_parallel
+        if task_obj.meta_parallel is not None:
+            task_dict["meta_parallel"] = task_obj.meta_parallel
+        # Autogenerate JSON Schemas for non-parallel/parallel task arguments
+        for kind in ["non_parallel", "parallel"]:
+            executable = task_dict.get(f"executable_{kind}")
+            if executable is not None:
+                logging.info(f"[{executable}] START")
+                schema = create_schema_for_single_task(
+                    executable,
+                    package=package_name,
+                    pydantic_models=INPUT_MODELS,
+                )
+                validate_arguments(
+                    task_type=task_obj.type,
+                    schema=schema,
+                    executable_kind=kind,
+                )
+                logging.info(f"[{executable}] END (new schema)")
+                task_dict[f"args_schema_{kind}"] = schema
+        # Compute and set `docs_info`
+        docs_info = task_dict.get("docs_info")
+        if docs_info is None:
+            docs_info = create_docs_info(
+                executable_non_parallel=task_obj.executable_non_parallel,
+                executable_parallel=task_obj.executable_parallel,
+                package=package_name,
+            )
+        elif docs_info.startswith("file:"):
+            docs_info = read_docs_info_from_file(
+                docs_info=docs_info,
+                task_list_path=task_list_module.__file__,
+            )
+        if docs_info is not None:
+            task_dict["docs_info"] = docs_info
+        # Set `docs_link`
+        if DOCS_LINK is not None:
+            task_dict["docs_link"] = DOCS_LINK
+        # Append task
+        manifest["task_list"].append(task_dict)
+    return manifest

fractal_task_tools/_deepdiff.py ADDED Viewed

@@ -0,0 +1,68 @@
+from typing import Union
+ValidType = Union[list, dict, str, int, float, bool, None]
+MAX_RECURSION_LEVEL = 20
+def deepdiff(
+    *,
+    old_object: ValidType,
+    new_object: ValidType,
+    path: str,
+    ignore_keys_order: bool,
+    recursion_level: int = 1,
+):
+    if type(old_object) is not type(new_object):
+        raise ValueError(
+            f"[{path}] Type difference:\n"
+            f"\tOld: {type(old_object)}\n\tNew: {type(new_object)}"
+        )
+    if type(old_object) not in [list, dict, str, int, float, bool, type(None)]:
+        raise ValueError(f"[{path}] Invalid type {type(old_object)}, exit.")
+    if recursion_level > MAX_RECURSION_LEVEL:
+        raise ValueError(f"Reached {MAX_RECURSION_LEVEL=}. Exit.")
+    if type(old_object) is dict:
+        old_keys = list(old_object.keys())
+        new_keys = list(new_object.keys())
+        if ignore_keys_order:
+            old_keys = sorted(old_keys)
+            new_keys = sorted(new_keys)
+        if old_keys != new_keys:
+            raise ValueError(
+                f"[{path}] Dictionaries have different keys:\n"
+                f"\tOld: {old_keys}\n\tNew: {new_keys}"
+            )
+        for key, value_a in old_object.items():
+            deepdiff(
+                old_object=value_a,
+                new_object=new_object[key],
+                path=f"{path}['{key}']",
+                ignore_keys_order=ignore_keys_order,
+                recursion_level=recursion_level + 1,
+            )
+    elif type(old_object) is list:
+        if len(old_object) != len(new_object):
+            raise ValueError(
+                f"{path} Lists have different lengths:\n"
+                f"\tOld:{len(old_object)}\n\tNew: {len(new_object)}"
+            )
+        for ind, item_a in enumerate(old_object):
+            deepdiff(
+                old_object=item_a,
+                new_object=new_object[ind],
+                path=f"{path}[{ind}]",
+                ignore_keys_order=ignore_keys_order,
+                recursion_level=recursion_level + 1,
+            )
+    else:
+        if old_object != new_object:
+            raise ValueError(
+                f"{path} Values are different:\n"
+                f"\tOld: '{old_object}'\n\tNew: '{new_object}'"
+            )