PyPI - genelastic - Versions diffs - 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

genelastic 0.8.0py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

genelastic/api/.env +4 -0
genelastic/api/cli_start_api.py +2 -2
genelastic/api/errors.py +52 -0
genelastic/api/extends/example.py +0 -6
genelastic/api/extends/example.yml +0 -20
genelastic/api/routes.py +313 -181
genelastic/api/server.py +8 -3
genelastic/api/specification.yml +343 -181
genelastic/common/__init__.py +0 -44
genelastic/common/cli.py +48 -0
genelastic/common/elastic.py +374 -46
genelastic/common/exceptions.py +34 -2
genelastic/common/server.py +9 -1
genelastic/common/types.py +1 -14
genelastic/import_data/__init__.py +0 -27
genelastic/import_data/checker.py +99 -0
genelastic/import_data/checker_observer.py +13 -0
genelastic/import_data/cli/__init__.py +0 -0
genelastic/import_data/cli/cli_check.py +136 -0
genelastic/import_data/{cli_gen_data.py → cli/gen_data.py} +4 -4
genelastic/import_data/cli/import_data.py +346 -0
genelastic/import_data/cli/info.py +247 -0
genelastic/import_data/{cli_integrity.py → cli/integrity.py} +29 -7
genelastic/import_data/cli/validate.py +146 -0
genelastic/import_data/collect.py +185 -0
genelastic/import_data/constants.py +136 -11
genelastic/import_data/import_bundle.py +102 -59
genelastic/import_data/import_bundle_factory.py +70 -149
genelastic/import_data/importers/__init__.py +0 -0
genelastic/import_data/importers/importer_base.py +131 -0
genelastic/import_data/importers/importer_factory.py +85 -0
genelastic/import_data/importers/importer_types.py +223 -0
genelastic/import_data/logger.py +2 -1
genelastic/import_data/models/__init__.py +0 -0
genelastic/import_data/models/analyses.py +178 -0
genelastic/import_data/models/analysis.py +144 -0
genelastic/import_data/models/data_file.py +110 -0
genelastic/import_data/models/process.py +45 -0
genelastic/import_data/models/processes.py +84 -0
genelastic/import_data/models/tags.py +170 -0
genelastic/import_data/models/unique_list.py +109 -0
genelastic/import_data/models/validate.py +26 -0
genelastic/import_data/patterns.py +90 -0
genelastic/import_data/random_bundle.py +10 -8
genelastic/import_data/resolve.py +157 -0
genelastic/ui/.env +1 -0
genelastic/ui/cli_start_ui.py +4 -2
genelastic/ui/routes.py +289 -42
genelastic/ui/static/cea-cnrgh.ico +0 -0
genelastic/ui/static/cea.ico +0 -0
genelastic/ui/static/layout.ico +0 -0
genelastic/ui/static/novaseq6000.png +0 -0
genelastic/ui/static/style.css +430 -0
genelastic/ui/static/ui.js +458 -0
genelastic/ui/templates/analyses.html +96 -9
genelastic/ui/templates/analysis_detail.html +44 -0
genelastic/ui/templates/bi_process_detail.html +129 -0
genelastic/ui/templates/bi_processes.html +114 -9
genelastic/ui/templates/explorer.html +356 -0
genelastic/ui/templates/home.html +205 -2
genelastic/ui/templates/layout.html +148 -29
genelastic/ui/templates/version.html +19 -7
genelastic/ui/templates/wet_process_detail.html +131 -0
genelastic/ui/templates/wet_processes.html +114 -9
genelastic-0.9.0.dist-info/METADATA +686 -0
genelastic-0.9.0.dist-info/RECORD +76 -0
genelastic-0.9.0.dist-info/WHEEL +4 -0
genelastic-0.9.0.dist-info/entry_points.txt +10 -0
genelastic-0.9.0.dist-info/licenses/LICENSE +519 -0
genelastic/import_data/analyses.py +0 -69
genelastic/import_data/analysis.py +0 -205
genelastic/import_data/bi_process.py +0 -27
genelastic/import_data/bi_processes.py +0 -49
genelastic/import_data/cli_import.py +0 -379
genelastic/import_data/cli_info.py +0 -256
genelastic/import_data/cli_validate.py +0 -54
genelastic/import_data/data_file.py +0 -87
genelastic/import_data/filename_pattern.py +0 -57
genelastic/import_data/tags.py +0 -123
genelastic/import_data/wet_process.py +0 -28
genelastic/import_data/wet_processes.py +0 -53
genelastic-0.8.0.dist-info/METADATA +0 -109
genelastic-0.8.0.dist-info/RECORD +0 -52
genelastic-0.8.0.dist-info/WHEEL +0 -5
genelastic-0.8.0.dist-info/entry_points.txt +0 -8
genelastic-0.8.0.dist-info/top_level.txt +0 -1

genelastic/import_data/constants.py CHANGED Viewed

@@ -5,20 +5,145 @@ This module contains genelastic constants.
 import typing
-ALLOWED_CATEGORIES: typing.Final[list[str]] = ["vcf", "cov"]
+import schema
+ALLOWED_EXTENSIONS: typing.Final[list[str]] = [
+    "vcf",
+    "cov",
+    "json",
+    "yml",
+    "yaml",
+]
 BUNDLE_CURRENT_VERSION = 3
-DEFAULT_TAG_REGEX = "[^_-]+"
-DEFAULT_TAG_PREFIX = "%"
-DEFAULT_TAG_SUFFIX = ""
+DEFAULT_TAG_REGEX = "[^_]+"
+DEFAULT_TAG_DELIMITER_START = "%"
+DEFAULT_TAG_DELIMITER_END = ""
 DEFAULT_TAG2FIELD: typing.Final[dict[str, dict[str, str]]] = {
-    "%S": {"field": "sample_name", "regex": DEFAULT_TAG_REGEX},
-    "%F": {"field": "source", "regex": DEFAULT_TAG_REGEX},
-    "%W": {"field": "wet_process", "regex": DEFAULT_TAG_REGEX},
-    "%B": {"field": "bi_process", "regex": DEFAULT_TAG_REGEX},
-    "%D": {"field": "cov_depth", "regex": DEFAULT_TAG_REGEX},
-    "%A": {"field": "barcode", "regex": DEFAULT_TAG_REGEX},
-    "%R": {"field": "reference_genome", "regex": DEFAULT_TAG_REGEX},
+    "S": {"field": "sample_name", "regex": DEFAULT_TAG_REGEX},
+    "F": {"field": "source", "regex": DEFAULT_TAG_REGEX},
+    "W": {"field": "wet_process", "regex": DEFAULT_TAG_REGEX},
+    "B": {"field": "bi_process", "regex": DEFAULT_TAG_REGEX},
+    "D": {"field": "cov_depth", "regex": DEFAULT_TAG_REGEX},
+    "A": {"field": "barcode", "regex": DEFAULT_TAG_REGEX},
+    "R": {"field": "reference_genome", "regex": DEFAULT_TAG_REGEX},
 }
+TOOLS_SUFFIX_RE = r"_(?P<tool>[a-zA-Z0-9]+)-(?P<version>\d+(?:-\d+){0,2})(?!-)"
+"""
+Regular expression to extract individual tool-version metadata pairs from a
+validated ``.metrics`` suffix in filenames.
+- Captures exactly one tool-version pair, where:
+  - ``tool`` is an alphanumeric identifier (letters and digits),
+  - ``version`` consists of 1 to 3 numeric components separated by hyphens
+    (e.g., '1', '1-0', '1-0-0'),
+- Uses named capture groups (``tool`` and ``version``) to extract data,
+- The negative lookahead ``(?!-)`` ensures the version does not end with a
+  hyphen,
+- Intended for extracting all matching pairs after the ``.metrics`` prefix has
+  been validated.
+"""
+_METRICS_SUFFIX_RE = r"(?:\.metrics(?:_[a-zA-Z0-9]+-\d+(?:-\d+){0,2}(?!-))*)?"
+"""
+Regular expression to match and validate the entire optional ``.metrics``
+suffix in filenames.
+- Matches zero or one occurrence of:
+  - A literal ``.metrics`` prefix, which must be the first suffix in the
+    filename,
+  - Followed optionally by zero or more tool-version pairs, each starting with
+    an underscore ``_`` and matching the same format as ``TOOLS_SUFFIX_RE``,
+- Validates that the whole suffix structure is correct (including optional
+  presence),
+- Ensures that when present, the suffix starts with ``.metrics`` and is
+  correctly formatted,
+- Does not extract individual tool-version pairs; its role is to validate the
+  suffix as a whole.
+"""
+_EXTENSIONS_SUFFIX_RE = rf"\.(?P<ext>{'|'.join(ALLOWED_EXTENSIONS)})(\.gz)?"
+"""
+Regular expression for matching allowed file extensions with optional gzip
+compression.
+This regex matches the file extension suffixes for files belonging to
+a set of predefined allowed extensions, specified in the ``ALLOWED_EXTENSIONS``
+list.
+The pattern matches:
+- a dot (``.``) followed by one of the allowed extensions,
+- optionally, a second extension ``.gz`` indicating gzip compression.
+Examples of matched suffixes: ``.vcf``, ``.cov``, ``.json``, ``.vcf.gz``,
+``.json.gz``.
+"""
+FILE_SUFFIXES_RE = rf"{_METRICS_SUFFIX_RE}{_EXTENSIONS_SUFFIX_RE}"
+"""Regex used to validate the suffix part of a filename.
+It matches an optional metrics suffix (containing tool-version metadata),
+immediately followed by a required allowed file extension suffix
+(possibly compressed with .gz).
+This regex is the combination of ``_METRICS_SUFFIX_RE`` and
+``_EXTENSIONS_SUFFIX_RE``.
+"""
+QC_METRICS_SCHEMA = schema.Schema(
+    {
+        "id": str,
+        "genome_coverage_size": float,
+        "genome_coverage_percent": float,
+        "n50": int,
+        "larger_contig": int,
+        "iqr": int,
+        "outlier_percent": float,
+        "mean_depth": float,
+        "mean_duplicat_percent": float,
+        "fold_regions_percents": {
+            "5": float,
+            "10": float,
+            "20": float,
+            "30": float,
+            "40": float,
+        },
+    }
+)
+SV_METRICS_SCHEMA = schema.Schema(
+    {
+        "metadata_mandatory": [{str: schema.Or(str, int, float, bool)}],
+        schema.Optional("metadata_optional"): [
+            {str: schema.Or(str, int, float, bool)}
+        ],
+        "regions": [
+            {
+                "name": str,
+                "bed": str,
+                "results": [
+                    {
+                        "svtype": str,
+                        "size": str,
+                        "FP_query": int,
+                        "TP_truth": int,
+                        "TP_query": int,
+                        "FN_truth": int,
+                        "total_truth": int,
+                        "total_query": int,
+                        "precision": schema.Or(int, float),
+                        "recall": schema.Or(int, float),
+                        "f1": schema.Or(int, float),
+                    }
+                ],
+            }
+        ],
+    }
+)

genelastic/import_data/import_bundle.py CHANGED Viewed

@@ -6,50 +6,87 @@ This module provides functionality for importing data bundles.
 import logging
 import sys
 import typing
+from pathlib import Path
-from genelastic.common import BundleDict
-from .analyses import Analyses
-from .bi_processes import BioInfoProcesses
-from .constants import BUNDLE_CURRENT_VERSION
-from .data_file import DataFile
-from .tags import Tags
-from .wet_processes import WetProcesses
+from genelastic.common.cli import log_subsection
+from genelastic.common.types import BundleDict
+from genelastic.import_data.models.analyses import Analyses
+from genelastic.import_data.models.process import BioInfoProcess, WetProcess
+from genelastic.import_data.models.processes import Processes
+from genelastic.import_data.models.tags import Tags
 logger = logging.getLogger("genelastic")
+def resolve_data_path(bundle_file: Path, data_path: Path | None) -> Path:
+    """Resolves the data path relative to the given bundle file if necessary.
+    If ``data_path`` is:
+    - Absolute: it is returned as-is,
+    - Relative: it is resolved relative to the parent of ``bundle_file``,
+    - None: considered as the current directory (``.``) and resolved
+      accordingly.
+    :param bundle_file: Path to the bundle file used for resolution context.
+    :param data_path: Optional path to the data directory or file.
+    :return: An absolute Path object pointing to the resolved data location.
+    """
+    resolved_data_path = data_path if data_path else Path()
+    if not resolved_data_path.is_absolute():
+        resolved_data_path = Path(
+            bundle_file.parent / resolved_data_path
+        ).resolve()
+    return resolved_data_path
 class ImportBundle:
     """Class for handling an import bundle description."""
-    def __init__(  # noqa: C901
-        self, x: typing.Sequence[BundleDict], *, check: bool = False
+    def __init__(
+        self,
+        x: typing.Sequence[BundleDict],
+        *,
+        multi_match: bool = False,
+        check: bool = False,
     ) -> None:
+        self._documents = x
+        self._custom_tags_doc: (
+            dict[str, dict[str, str | dict[str, str]]] | None
+        ) = None
         analyses: list[BundleDict] = []
         wet_processes: list[BundleDict] = []
         bi_processes: list[BundleDict] = []
-        tags = Tags(x)
+        self._search_custom_tags()
+        tags = (
+            Tags.from_dict(self._custom_tags_doc)
+            if self._custom_tags_doc
+            else Tags()
+        )
         # Loop on dicts
         for d in x:
-            # Check version
-            if "version" not in d:
-                msg = "No version inside YAML document."
-                raise RuntimeError(msg)
-            if int(d["version"]) != BUNDLE_CURRENT_VERSION:
-                raise RuntimeError
             # Gather all analyses
             if "analyses" in d and d["analyses"] is not None:
                 # Copy some bundle properties into each analysis
                 for analysis in d["analyses"]:
-                    for key in ["bundle_file", "root_dir"]:
-                        if key in d:
-                            analysis[key] = d[key]
+                    bundle_file = d["bundle_file"]
-                    # Add the tags to use.
+                    analysis["bundle_file"] = bundle_file
                     analysis["tags"] = tags
+                    analysis["multi_match"] = multi_match
+                    # Resolve data path
+                    data_path = (
+                        Path(analysis["data_path"])
+                        if "data_path" in analysis
+                        else None
+                    )
+                    analysis["data_path"] = resolve_data_path(
+                        bundle_file, data_path
+                    )
                 analyses.extend(d["analyses"])
             # If some wet processes are defined, copy the bundle file path into each of them.
@@ -65,18 +102,32 @@ class ImportBundle:
                 bi_processes.extend(d["bi_processes"])
         # Instantiate all objects
-        self._wet_processes: WetProcesses = WetProcesses.from_array_of_dicts(
-            wet_processes
+        log_subsection("Loading wet processes...")
+        self._wet_processes = Processes.from_dicts(wet_processes, WetProcess)
+        logger.info(
+            "=> %s wet process(es) loaded from bundle(s).",
+            len(self._wet_processes),
+        )
+        log_subsection("Loading bioinformatics processes...")
+        self._bi_processes = Processes.from_dicts(bi_processes, BioInfoProcess)
+        logger.info(
+            "=> %s bioinformatics process(es) loaded from bundle(s).",
+            len(self._bi_processes),
         )
-        self._bi_processes: BioInfoProcesses = (
-            BioInfoProcesses.from_array_of_dicts(bi_processes)
+        log_subsection("Loading analyses...")
+        self._analyses = Analyses.from_dicts(analyses)
+        logger.info(
+            "=> %s analysis(es) loaded from bundle(s).", len(self._analyses)
         )
-        self._analyses: Analyses = Analyses.from_array_of_dicts(analyses)
+        logger.info("")
         if check:
-            self.check_referenced_processes()
+            self._check_referenced_processes()
-    def check_referenced_processes(self) -> None:
+    def _check_referenced_processes(self) -> None:
         """Check if wet and bi processes referenced inside each analysis are defined.
         If one of the processes is not defined, the program exits.
         """
@@ -85,8 +136,7 @@ class ImportBundle:
             if (
                 analysis_wet_process
-                and analysis_wet_process
-                not in self._wet_processes.get_process_ids()
+                and analysis_wet_process not in self._wet_processes
             ):
                 sys.exit(
                     f"Analysis at index {index} in file {analysis.bundle_file} "
@@ -97,48 +147,41 @@ class ImportBundle:
             if (
                 analysis_bi_process
-                and analysis_bi_process
-                not in self._bi_processes.get_process_ids()
+                and analysis_bi_process not in self._bi_processes
             ):
                 sys.exit(
                     f"Analysis at index {index} in file {analysis.bundle_file} "
                     f"is referencing an undefined bi process: {analysis_bi_process}"
                 )
+    def _search_custom_tags(self) -> None:
+        docs_with_custom_tags = [d for d in self._documents if "tags" in d]
+        # Only one 'tags' redefinition is allowed across all the documents.
+        if len(docs_with_custom_tags) > 1:
+            bundle_files = sorted(
+                [str(d["bundle_file"]) for d in docs_with_custom_tags]
+            )
+            msg = (
+                f"Only one 'tags' key should be defined across all documents, "
+                f"but multiple were found : {', '.join(bundle_files)}"
+            )
+            raise RuntimeError(msg)
+        if len(docs_with_custom_tags) == 1:
+            self._custom_tags_doc = docs_with_custom_tags[0]
     @property
     def analyses(self) -> Analyses:
         """The analyses."""
         return self._analyses
     @property
-    def wet_processes(self) -> WetProcesses:
+    def wet_processes(self) -> Processes:
         """The wet processes."""
         return self._wet_processes
     @property
-    def bi_processes(self) -> BioInfoProcesses:
+    def bi_processes(self) -> Processes:
         """The bi processes."""
         return self._bi_processes
-    def get_nb_files(self, cat: str | None = None) -> int:
-        """Get the number of files in a category."""
-        files = self.get_files(cat)
-        return len(files)
-    def get_files(self, cat: str | None = None) -> list[DataFile]:
-        """Returns all files of a category."""
-        files: list[DataFile] = []
-        # Loop on all analyses
-        for analysis in self.analyses:
-            files += analysis.get_data_files(cat)
-        return files
-    def get_nb_matched_files(self) -> int:
-        """Get the number of files that match the pattern."""
-        return sum(a.get_nb_files() for a in self.analyses)
-    def get_nb_unmatched_files(self) -> int:
-        """Get the number of files that do not match."""
-        return sum(len(a.get_unmatched_file_paths()) for a in self.analyses)

genelastic/import_data/import_bundle_factory.py CHANGED Viewed

@@ -1,52 +1,26 @@
 """ImportBundle factory module."""
 import logging
-import re
-import sys
 from pathlib import Path
+from typing import Any
 import schema
 import yaml
-from yaml.parser import ParserError
-from yaml.scanner import ScannerError
+from yaml import YAMLError
-from genelastic.common import BundleDict
+from genelastic.common.exceptions import (
+    ValidationError,
+    YAMLFileReadError,
+)
+from genelastic.common.types import BundleDict
 from .constants import BUNDLE_CURRENT_VERSION
 from .import_bundle import ImportBundle
+from .models.tags import Tags
 logger = logging.getLogger("genelastic")
-def validate_tag_char(s: str) -> bool:
-    """A tag should only contain one special character, excluding the following : (, ), ?, <, >."""
-    if len(s) > 1:
-        return False
-    return re.match(r"^[^\w()<>?]$", s) is not None
-def validate_field_chars(s: str) -> bool:
-    """Fields should only contain word characters.
-    A word character is a character a-z, A-Z, 0-9, including _ (underscore).
-    """
-    return re.match(r"^\w+$", s) is not None
-_SCHEMA_V1 = schema.Schema(
-    {"version": 1, schema.Optional("vcf_files"): schema.Or(None, [str])}
-)
-_SCHEMA_V2 = schema.Schema(
-    {
-        "version": 2,
-        schema.Optional("vcf"): {
-            schema.Optional("filename_pattern"): str,
-            "files": [str],
-        },
-    }
-)
 _SCHEMA_V3 = schema.Schema(
     {
         "version": 3,
@@ -54,8 +28,8 @@ _SCHEMA_V3 = schema.Schema(
             None,
             [
                 {
-                    schema.Optional("file_prefix"): str,
-                    schema.Optional("files"): [str],
+                    "file_prefix": str,
+                    schema.Optional("suffix"): str,
                     schema.Optional("sample_name"): str,
                     schema.Optional("source"): str,
                     schema.Optional("barcode"): str,
@@ -113,24 +87,24 @@ _SCHEMA_V3 = schema.Schema(
             ],
         ),
         schema.Optional("tags"): {
-            schema.Optional("format"): {
-                schema.Optional("prefix"): schema.And(
+            schema.Optional("delimiter"): {
+                schema.Optional("start"): schema.And(
                     str,
-                    validate_tag_char,
-                    error="Key 'prefix' should only contain one special character, "
+                    Tags.validate_tag_delimiter,
+                    error="Key 'delimiter.start' should only contain one special character, "
                     "excluding the following : (, ), ?, <, >.",
                 ),
-                schema.Optional("suffix"): schema.And(
+                schema.Optional("end"): schema.And(
                     str,
-                    validate_tag_char,
-                    error="Key 'suffix' should only contain one special character, "
+                    Tags.validate_tag_delimiter,
+                    error="Key 'delimiter.end' should only contain one special character, "
                     "excluding the following : (, ), ?, <, >.",
                 ),
             },
-            "match": {
+            schema.Optional("match"): {
                 schema.And(
                     str,
-                    validate_field_chars,
+                    Tags.validate_tag_name,
                     error="Tags listed under the 'match' key should only contain "
                     "word characters. A word character is a character "
                     "a-z, A-Z, 0-9, including _ (underscore).",
@@ -142,106 +116,81 @@ _SCHEMA_V3 = schema.Schema(
 def make_import_bundle_from_files(
-    files: list[Path], *, check: bool = False
+    files: list[Path], *, multi_match: bool = False, check: bool = False
 ) -> ImportBundle:
-    """Create an ImportBundle instance from a list of YAML files."""
-    all_documents = []
+    """Create an ImportBundle instance from a list of YAML files.
+    :raises YAMLFileReadError: If a YAML file cannot be read.
+    :raises ValidationError: If an import bundle is invalid.
+    :return: An ImportBundle instance.
+    """
+    all_docs = []
     for file in files:
         # Load documents stored in each file.
-        new_documents = load_import_bundle_file(file)
+        docs = load_yaml_file(file)
+        for doc in docs:
+            # Let schema handle structure/type/version validation.
+            validate_doc(doc)
-        for i, new_document in enumerate(new_documents):
-            # Upgrade each new document to the latest/current version.
-            if new_document["version"] != BUNDLE_CURRENT_VERSION:
-                new_documents[i] = upgrade_bundle_version(
-                    new_document, BUNDLE_CURRENT_VERSION
-                )
-            # Set the root directory path in each new document.
-            new_documents[i]["root_dir"] = str(file.parent)
             # Set the original bundle YAML file path in each new document.
-            new_documents[i]["bundle_file"] = str(file)
+            doc["bundle_file"] = Path(file).resolve()
-        all_documents.extend(new_documents)
+        all_docs.extend(docs)
     # Create bundle instance.
-    return ImportBundle(all_documents, check=check)
+    return ImportBundle(all_docs, multi_match=multi_match, check=check)
-def set_version(x: BundleDict) -> None:
-    """Set version number.
+def validate_doc(doc: Any) -> None:  # noqa: ANN401
+    """Validate a single YAML document against its versioned bundle schema.
-    Deduce the version number from the keys present inside the dictionary.
+    :param doc: Dictionary with a 'version' key indicating the schema to use.
+    :raises ValidationError: If validation fails.
     """
-    # Empty doc
-    if len(x) == 0:
-        x["version"] = BUNDLE_CURRENT_VERSION
+    bundle_version = None
-    # Wrong content in version field
-    elif "version" in x:
-        if not isinstance(x["version"], int):
-            msg = "Version must be an integer."
-            raise ValueError(msg)
+    if isinstance(doc, dict):
+        # If the document is a dict but lacks a version,
+        # assume current version.
+        if "version" not in doc:
+            doc["version"] = BUNDLE_CURRENT_VERSION
-    # Version 1
-    elif "vcf_files" in x or "cov_files" in x:
-        x["version"] = 1
+        bundle_version = doc["version"]
-    # Version 2
-    elif "vcf" in x and "filename_pattern" in x["vcf"]:
-        x["version"] = 2
-    # Latest version
-    else:
-        x["version"] = BUNDLE_CURRENT_VERSION
-def validate_doc(x: BundleDict) -> None:
-    """Validate the dictionary using its corresponding schema."""
     # Get schema
-    bundle_schema = globals().get("_SCHEMA_V" + str(x["version"]))
-    if bundle_schema is None:
-        raise ValueError(
-            f"Unknown version \"{x['version']}\" for import " + "bundle file."
+    bundle_schema = globals().get(f"_SCHEMA_V{bundle_version}")
+    if not bundle_schema:
+        msg = (
+            f"Failed to validate import bundle. "
+            f"Reason: unsupported version found ({bundle_version})."
         )
+        raise ValidationError(msg)
     # Validate
-    bundle_schema.validate(x)
+    try:
+        bundle_schema.validate(doc)
+    except schema.SchemaError as e:
+        msg = f"Failed to validate import bundle. Reason: {e}"
+        raise ValidationError(msg) from None
-def load_import_bundle_file(file: Path) -> list[BundleDict]:
-    """Loads a YAML import bundle file."""
-    # Load YAML
-    logger.info('Load YAML data import file "%s".', file)
-    docs: list[BundleDict] = []
+def load_yaml_file(file_path: Path) -> list[Any]:
+    """Load a YAML file.
+    :param file_path: Path to the file to load.
+    :raises YAMLFileError: If the file cannot be opened, decoded or
+        parsed as valid YAML.
+    :returns: A list of documents loaded from the YAML file.
+    """
     try:
-        with file.open(encoding="utf-8") as f:
-            docs = list(yaml.safe_load_all(f))
-    except (IsADirectoryError, FileNotFoundError) as e:
-        logger.error(e)
-        sys.exit(1)
-    except ScannerError as e:
-        logger.error("YAML file lexical analysis failed : %s", e)
-        sys.exit(1)
-    except ParserError as e:
-        logger.error("YAML file syntactic analysis failed : %s", e)
-        sys.exit(1)
+        with file_path.open(encoding="utf-8") as f:
+            documents = list(yaml.safe_load_all(f))
+    except (OSError, YAMLError, UnicodeDecodeError) as e:
+        msg = f"Failed to read YAML file '{file_path}'. Reason: {e}"
+        raise YAMLFileReadError(msg) from None
-    # Guess/set version
-    if docs is None:
-        docs = [{"version": BUNDLE_CURRENT_VERSION}]
-    else:
-        for i, x in enumerate(docs):
-            if x is None:
-                docs[i] = {"version": BUNDLE_CURRENT_VERSION}
-            else:
-                set_version(x)
-    # Find schema and validate document
-    for x in docs:
-        validate_doc(x)
-    return docs
+    return documents
 def upgrade_bundle_version(x: BundleDict, to_version: int) -> BundleDict:
@@ -268,31 +217,3 @@ def upgrade_bundle_version(x: BundleDict, to_version: int) -> BundleDict:
         y = upgrade_fct(y)  # type: ignore[misc]
     return y
-def _upgrade_from_v1_to_v2(x: BundleDict) -> BundleDict:
-    # Upgrade
-    y = {"version": 2, "vcf": {"files": []}}
-    if "vcf_files" in x and x["vcf_files"] is not None:
-        y["vcf"]["files"] = x["vcf_files"]  # type: ignore[index]
-    # Validate schema
-    _SCHEMA_V2.validate(y)
-    return y
-def _upgrade_from_v2_to_v3(x: BundleDict) -> BundleDict:
-    # Upgrade
-    y: BundleDict = {"version": 3, "analyses": []}
-    if "vcf" in x:
-        analysis_entry = {}
-        if "files" in x["vcf"]:
-            analysis_entry["files"] = x["vcf"]["files"]
-        if "filename_pattern" in x["vcf"]:
-            analysis_entry["file_prefix"] = x["vcf"]["filename_pattern"]
-        y["analyses"].append(analysis_entry)
-    _SCHEMA_V3.validate(y)
-    return y

genelastic/import_data/importers/__init__.py ADDED Viewed

File without changes

genelastic 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

genelastic 0.8.0py3-none-any.whl → 0.9.0py3-none-any.whl