genelastic 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genelastic/api/.env +4 -0
- genelastic/api/cli_start_api.py +18 -0
- genelastic/api/errors.py +52 -0
- genelastic/api/extends/example.py +0 -6
- genelastic/api/extends/example.yml +0 -0
- genelastic/api/routes.py +313 -181
- genelastic/api/server.py +34 -26
- genelastic/api/settings.py +5 -9
- genelastic/api/specification.yml +512 -0
- genelastic/common/__init__.py +0 -39
- genelastic/common/cli.py +100 -0
- genelastic/common/elastic.py +374 -46
- genelastic/common/exceptions.py +34 -2
- genelastic/common/server.py +59 -0
- genelastic/common/types.py +1 -14
- genelastic/import_data/__init__.py +0 -27
- genelastic/import_data/checker.py +99 -0
- genelastic/import_data/checker_observer.py +13 -0
- genelastic/import_data/cli/__init__.py +0 -0
- genelastic/import_data/cli/cli_check.py +136 -0
- genelastic/import_data/cli/gen_data.py +143 -0
- genelastic/import_data/cli/import_data.py +346 -0
- genelastic/import_data/cli/info.py +247 -0
- genelastic/import_data/{cli_integrity.py → cli/integrity.py} +29 -7
- genelastic/import_data/cli/validate.py +146 -0
- genelastic/import_data/collect.py +185 -0
- genelastic/import_data/constants.py +136 -11
- genelastic/import_data/import_bundle.py +102 -59
- genelastic/import_data/import_bundle_factory.py +70 -149
- genelastic/import_data/importers/__init__.py +0 -0
- genelastic/import_data/importers/importer_base.py +131 -0
- genelastic/import_data/importers/importer_factory.py +85 -0
- genelastic/import_data/importers/importer_types.py +223 -0
- genelastic/import_data/logger.py +2 -1
- genelastic/import_data/models/__init__.py +0 -0
- genelastic/import_data/models/analyses.py +178 -0
- genelastic/import_data/models/analysis.py +144 -0
- genelastic/import_data/models/data_file.py +110 -0
- genelastic/import_data/models/process.py +45 -0
- genelastic/import_data/models/processes.py +84 -0
- genelastic/import_data/models/tags.py +170 -0
- genelastic/import_data/models/unique_list.py +109 -0
- genelastic/import_data/models/validate.py +26 -0
- genelastic/import_data/patterns.py +90 -0
- genelastic/import_data/random_bundle.py +79 -54
- genelastic/import_data/resolve.py +157 -0
- genelastic/ui/.env +1 -0
- genelastic/ui/cli_start_ui.py +20 -0
- genelastic/ui/routes.py +333 -0
- genelastic/ui/server.py +9 -82
- genelastic/ui/settings.py +2 -6
- genelastic/ui/static/cea-cnrgh.ico +0 -0
- genelastic/ui/static/cea.ico +0 -0
- genelastic/ui/static/layout.ico +0 -0
- genelastic/ui/static/novaseq6000.png +0 -0
- genelastic/ui/static/style.css +430 -0
- genelastic/ui/static/ui.js +458 -0
- genelastic/ui/templates/analyses.html +98 -0
- genelastic/ui/templates/analysis_detail.html +44 -0
- genelastic/ui/templates/bi_process_detail.html +129 -0
- genelastic/ui/templates/bi_processes.html +116 -0
- genelastic/ui/templates/explorer.html +356 -0
- genelastic/ui/templates/home.html +207 -0
- genelastic/ui/templates/layout.html +153 -0
- genelastic/ui/templates/version.html +21 -0
- genelastic/ui/templates/wet_process_detail.html +131 -0
- genelastic/ui/templates/wet_processes.html +116 -0
- genelastic-0.9.0.dist-info/METADATA +686 -0
- genelastic-0.9.0.dist-info/RECORD +76 -0
- genelastic-0.9.0.dist-info/WHEEL +4 -0
- genelastic-0.9.0.dist-info/entry_points.txt +10 -0
- genelastic-0.9.0.dist-info/licenses/LICENSE +519 -0
- genelastic/import_data/analyses.py +0 -69
- genelastic/import_data/analysis.py +0 -205
- genelastic/import_data/bi_process.py +0 -27
- genelastic/import_data/bi_processes.py +0 -49
- genelastic/import_data/cli_gen_data.py +0 -116
- genelastic/import_data/cli_import.py +0 -379
- genelastic/import_data/cli_info.py +0 -256
- genelastic/import_data/cli_validate.py +0 -54
- genelastic/import_data/data_file.py +0 -87
- genelastic/import_data/filename_pattern.py +0 -57
- genelastic/import_data/tags.py +0 -123
- genelastic/import_data/wet_process.py +0 -28
- genelastic/import_data/wet_processes.py +0 -53
- genelastic-0.7.0.dist-info/METADATA +0 -105
- genelastic-0.7.0.dist-info/RECORD +0 -40
- genelastic-0.7.0.dist-info/WHEEL +0 -5
- genelastic-0.7.0.dist-info/entry_points.txt +0 -6
- genelastic-0.7.0.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""This module defines the DataFile class, which handles the representation,
|
|
2
|
+
management, and extraction of metadata for a data file within a data bundle.
|
|
3
|
+
|
|
4
|
+
It includes functionality to construct DataFile instances from paths and
|
|
5
|
+
optional filename patterns, retrieve file paths and metadata, and support
|
|
6
|
+
for extracting metadata from filenames using specified patterns.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from types import NotImplementedType
|
|
12
|
+
|
|
13
|
+
from genelastic.common.types import Metadata
|
|
14
|
+
from genelastic.import_data.patterns import MetricsPattern
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger("genelastic")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class DataFile:
|
|
20
|
+
"""Class for handling a data file and its metadata."""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
analysis_id: str,
|
|
25
|
+
path: Path,
|
|
26
|
+
bundle_file: Path,
|
|
27
|
+
metadata: Metadata,
|
|
28
|
+
) -> None:
|
|
29
|
+
self._analysis_id = analysis_id
|
|
30
|
+
self._path = path
|
|
31
|
+
self._bundle_file = bundle_file
|
|
32
|
+
self._metadata = metadata
|
|
33
|
+
self._metrics = MetricsPattern.extract_metadata(path)
|
|
34
|
+
self._validate_params()
|
|
35
|
+
|
|
36
|
+
self._ext = str(self._metadata["ext"]).lower()
|
|
37
|
+
|
|
38
|
+
key = "type" if self._metrics is not None else "ext"
|
|
39
|
+
self._type = str(self._metadata[key]).lower()
|
|
40
|
+
|
|
41
|
+
def __eq__(self, other: object) -> bool | NotImplementedType:
|
|
42
|
+
"""Defines equality comparison for DataFile instances based on their
|
|
43
|
+
file path.
|
|
44
|
+
"""
|
|
45
|
+
if isinstance(other, DataFile):
|
|
46
|
+
return self._path == other._path
|
|
47
|
+
return NotImplemented
|
|
48
|
+
|
|
49
|
+
def __hash__(self) -> int:
|
|
50
|
+
"""Defines hash behavior for DataFile to allow use in sets and as dict keys."""
|
|
51
|
+
return hash(self._path)
|
|
52
|
+
|
|
53
|
+
def _validate_params(self) -> None:
|
|
54
|
+
"""Validate values of some ``DataFile`` constructor parameters.
|
|
55
|
+
|
|
56
|
+
:raises RuntimeError: One of the parameters value is invalid.
|
|
57
|
+
"""
|
|
58
|
+
if "ext" not in self._metadata:
|
|
59
|
+
msg = (
|
|
60
|
+
f"Data file '{self._path}' "
|
|
61
|
+
f"is missing the required metadata key 'ext'."
|
|
62
|
+
)
|
|
63
|
+
raise RuntimeError(msg)
|
|
64
|
+
|
|
65
|
+
if self._metrics is not None and "type" not in self._metadata:
|
|
66
|
+
msg = (
|
|
67
|
+
f"Metrics data file '{self._path}' "
|
|
68
|
+
f"is missing the required metadata key 'type'."
|
|
69
|
+
)
|
|
70
|
+
raise RuntimeError(msg)
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def analysis_id(self) -> str:
|
|
74
|
+
"""Get the analysis ID."""
|
|
75
|
+
return self._analysis_id
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def path(self) -> Path:
|
|
79
|
+
"""Retrieve the data file path."""
|
|
80
|
+
return self._path
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def ext(self) -> str:
|
|
84
|
+
"""Retrieve the data file extension."""
|
|
85
|
+
return self._ext
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def type(self) -> str:
|
|
89
|
+
"""Retrieve the data file type.
|
|
90
|
+
|
|
91
|
+
Normally, the type is the file's extension.
|
|
92
|
+
If the file is a metrics file, its type is taken from the metadata key
|
|
93
|
+
'type'.
|
|
94
|
+
"""
|
|
95
|
+
return self._type
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def bundle_file(self) -> Path:
|
|
99
|
+
"""Retrieve the path to the associated data bundle file."""
|
|
100
|
+
return self._bundle_file
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def metadata(self) -> Metadata:
|
|
104
|
+
"""Retrieve a copy of the metadata associated with the data file."""
|
|
105
|
+
return self._metadata.copy()
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def metrics(self) -> list[dict[str, str]] | None:
|
|
109
|
+
"""Retrieve a copy of the metrics associated with the data file."""
|
|
110
|
+
return self._metrics
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
from abc import ABC
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Process(ABC): # noqa: B024
|
|
7
|
+
"""Abstract base class for a Process.
|
|
8
|
+
|
|
9
|
+
It is not intended to be instantiated directly. Instead, use one of its
|
|
10
|
+
subclasses, ``WetProcess`` or ``BioInfoProcess``.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
proc_id: str,
|
|
16
|
+
bundle_file: str | None = None,
|
|
17
|
+
**data: Any, # noqa: ANN401
|
|
18
|
+
) -> None:
|
|
19
|
+
self._proc_id = proc_id
|
|
20
|
+
self._bundle_file = bundle_file
|
|
21
|
+
self._data = data
|
|
22
|
+
self._type = self.__class__.__name__
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def id(self) -> str:
|
|
26
|
+
"""Unique identifier of the process."""
|
|
27
|
+
return self._proc_id
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def data(self) -> dict[str, Any]:
|
|
31
|
+
"""Return a copy of the associated data."""
|
|
32
|
+
return copy.deepcopy(self._data)
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def type(self) -> str:
|
|
36
|
+
"""Type of the process."""
|
|
37
|
+
return self._type
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class WetProcess(Process):
|
|
41
|
+
"""Concrete wet lab process."""
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class BioInfoProcess(Process):
|
|
45
|
+
"""Concrete bioinformatics process."""
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import typing
|
|
3
|
+
from collections import UserDict
|
|
4
|
+
from typing import Self
|
|
5
|
+
|
|
6
|
+
from genelastic.common.types import BundleDict
|
|
7
|
+
from genelastic.import_data.models.process import (
|
|
8
|
+
Process,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger("genelastic")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Processes(UserDict[str, Process]):
|
|
15
|
+
"""Container for homogeneous Process objects.
|
|
16
|
+
|
|
17
|
+
Unlike a standard dict:
|
|
18
|
+
- Only subclasses of ``Process`` are allowed as values.
|
|
19
|
+
- All items must be of the same concrete subclass of ``Process``.
|
|
20
|
+
- Duplicate keys are not allowed and will raise an exception.
|
|
21
|
+
|
|
22
|
+
:ivar _item_type: Internal attribute storing the concrete subclass
|
|
23
|
+
of ``Process`` enforced in this container.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
_item_type: type | None = None
|
|
27
|
+
|
|
28
|
+
def __setitem__(self, key: str, value: Process) -> None:
|
|
29
|
+
if not isinstance(value, Process):
|
|
30
|
+
msg = (
|
|
31
|
+
"Object type not supported. "
|
|
32
|
+
"Container only supports 'Process' subclasses as items."
|
|
33
|
+
)
|
|
34
|
+
raise TypeError(msg)
|
|
35
|
+
|
|
36
|
+
if self._item_type is None:
|
|
37
|
+
self._item_type = type(value)
|
|
38
|
+
elif not isinstance(value, self._item_type):
|
|
39
|
+
msg = (
|
|
40
|
+
f"Cannot mix types. Container already holds "
|
|
41
|
+
f"{self._item_type.__name__} items."
|
|
42
|
+
)
|
|
43
|
+
raise TypeError(msg)
|
|
44
|
+
|
|
45
|
+
if key in self:
|
|
46
|
+
msg = (
|
|
47
|
+
f"Duplicate key. "
|
|
48
|
+
f"Container already holds an item with key '{key}'."
|
|
49
|
+
)
|
|
50
|
+
raise ValueError(msg)
|
|
51
|
+
|
|
52
|
+
super().__setitem__(key, value)
|
|
53
|
+
|
|
54
|
+
def add(self, item: Process) -> None:
|
|
55
|
+
"""Add one process item to the container.
|
|
56
|
+
|
|
57
|
+
:raises TypeError: If ``item`` is not a subclass of ``Process``,
|
|
58
|
+
or if it does not match the subclass type of items already in the
|
|
59
|
+
container.
|
|
60
|
+
:raises ValueError: If an item with the same key (``item.id``) already
|
|
61
|
+
exists in the container.
|
|
62
|
+
"""
|
|
63
|
+
self[item.id] = item
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def from_dicts(
|
|
67
|
+
cls, arr: typing.Sequence[BundleDict], process_cls: type[Process]
|
|
68
|
+
) -> Self:
|
|
69
|
+
"""Build a Processes container instance from a sequence of dictionaries.
|
|
70
|
+
|
|
71
|
+
:param arr: Sequence of dictionaries representing process data.
|
|
72
|
+
:param process_cls: The subclass of ``Process`` to instantiate for each
|
|
73
|
+
dict.
|
|
74
|
+
:raises TypeError: If instantiating ``process_cls`` fails due to invalid
|
|
75
|
+
dictionary arguments, or if the resulting object type does not
|
|
76
|
+
match the container's enforced type.
|
|
77
|
+
:raises ValueError: If two or more dictionaries yield items with the
|
|
78
|
+
same key (``id``), leading to duplicate entries in the container.
|
|
79
|
+
:return: A Processes container instance populated with process objects.
|
|
80
|
+
"""
|
|
81
|
+
instance = cls()
|
|
82
|
+
for d in arr:
|
|
83
|
+
instance.add(process_cls(**d))
|
|
84
|
+
return instance
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import re
|
|
3
|
+
import typing
|
|
4
|
+
from collections import UserDict
|
|
5
|
+
|
|
6
|
+
from genelastic.common.exceptions import TagsDefinitionError
|
|
7
|
+
from genelastic.common.types import BundleDict
|
|
8
|
+
from genelastic.import_data.constants import (
|
|
9
|
+
DEFAULT_TAG2FIELD,
|
|
10
|
+
DEFAULT_TAG_DELIMITER_END,
|
|
11
|
+
DEFAULT_TAG_DELIMITER_START,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger("genelastic")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Tags(UserDict[str, dict[str, str]]):
|
|
18
|
+
"""Represents a set of tags used to extract metadata from filenames.
|
|
19
|
+
|
|
20
|
+
Each tag maps a name to a metadata field and a regex pattern, supporting
|
|
21
|
+
custom delimiters. This class combines default tags (``DEFAULT_TAG2FIELD``)
|
|
22
|
+
with optional user-defined tags, and provides utilities for searching,
|
|
23
|
+
accessing, and resolving tags in filename patterns.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
delimiter_start: str | None = None,
|
|
29
|
+
delimiter_end: str | None = None,
|
|
30
|
+
match: dict[str, dict[str, str]] | None = None,
|
|
31
|
+
) -> None:
|
|
32
|
+
"""Initialize a Tags instance.
|
|
33
|
+
|
|
34
|
+
:param delimiter_start: Optional character prepended to all tag names.
|
|
35
|
+
Defaults to ``DEFAULT_TAG_DELIMITER_START``.
|
|
36
|
+
:param delimiter_end: Optional character appended to all tag names.
|
|
37
|
+
Defaults to ``DEFAULT_TAG_DELIMITER_END``.
|
|
38
|
+
:param match: Optional dictionary of user-defined tags. Overrides
|
|
39
|
+
``DEFAULT_TAG2FIELD`` if keys overlap.
|
|
40
|
+
"""
|
|
41
|
+
super().__init__()
|
|
42
|
+
|
|
43
|
+
if delimiter_start is None:
|
|
44
|
+
self._delimiter_start = DEFAULT_TAG_DELIMITER_START
|
|
45
|
+
else:
|
|
46
|
+
if not self.validate_tag_delimiter(delimiter_start):
|
|
47
|
+
msg = (
|
|
48
|
+
"A tag delimiter start should contain only one special "
|
|
49
|
+
"character, excluding the following: (, ), ?, <, >."
|
|
50
|
+
)
|
|
51
|
+
raise TagsDefinitionError(msg)
|
|
52
|
+
self._delimiter_start = delimiter_start
|
|
53
|
+
|
|
54
|
+
if delimiter_end is None:
|
|
55
|
+
self._delimiter_end = DEFAULT_TAG_DELIMITER_END
|
|
56
|
+
else:
|
|
57
|
+
if not self.validate_tag_delimiter(delimiter_end):
|
|
58
|
+
msg = (
|
|
59
|
+
"A tag delimiter end should contain only one special "
|
|
60
|
+
"character, excluding the following: (, ), ?, <, >."
|
|
61
|
+
)
|
|
62
|
+
raise TagsDefinitionError(msg)
|
|
63
|
+
self._delimiter_end = delimiter_end
|
|
64
|
+
|
|
65
|
+
# Combine default tags with user-provided tags. User-defined ones takes
|
|
66
|
+
# precedence.
|
|
67
|
+
effective_match = DEFAULT_TAG2FIELD | (match or {})
|
|
68
|
+
|
|
69
|
+
# Store each tag in the dictionary using the full name
|
|
70
|
+
# (delimiter start + tag name + delimiter end).
|
|
71
|
+
for tag_name, tag_attrs in effective_match.items():
|
|
72
|
+
if not self.validate_tag_name(tag_name):
|
|
73
|
+
msg = (
|
|
74
|
+
f"Invalid tag '{tag_name}': its name should contain at "
|
|
75
|
+
f"least one alphanumeric character: a-z, A-Z and 0-9."
|
|
76
|
+
)
|
|
77
|
+
raise TagsDefinitionError(msg)
|
|
78
|
+
|
|
79
|
+
for mandatory_key in ("field", "regex"):
|
|
80
|
+
if mandatory_key not in tag_attrs:
|
|
81
|
+
msg = (
|
|
82
|
+
f"Invalid tag '{tag_name}': mandatory key "
|
|
83
|
+
f"'{mandatory_key}' missing."
|
|
84
|
+
)
|
|
85
|
+
raise TagsDefinitionError(msg)
|
|
86
|
+
|
|
87
|
+
tag = f"{self._delimiter_start}{tag_name}{self._delimiter_end}"
|
|
88
|
+
self[tag] = tag_attrs
|
|
89
|
+
|
|
90
|
+
logger.info(
|
|
91
|
+
"The following tags will be used "
|
|
92
|
+
"to extract metadata from filenames : %s",
|
|
93
|
+
self,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def delimiter_start(self) -> str:
|
|
98
|
+
"""Return the tag delimiter start. Defaults to
|
|
99
|
+
``DEFAULT_TAG_DELIMITER_START``.
|
|
100
|
+
"""
|
|
101
|
+
return self._delimiter_start
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def delimiter_end(self) -> str:
|
|
105
|
+
"""Return the tag delimiter end. Defaults to
|
|
106
|
+
``DEFAULT_TAG_DELIMITER_END``.
|
|
107
|
+
"""
|
|
108
|
+
return self._delimiter_end
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
def search_regex(self) -> str:
|
|
112
|
+
"""Return a regex pattern to search for tags inside a string.
|
|
113
|
+
|
|
114
|
+
This regex matches any tag using the current start and end delimiters.
|
|
115
|
+
Used for filename prefixes validation or resolving tags into regex
|
|
116
|
+
patterns.
|
|
117
|
+
"""
|
|
118
|
+
return (
|
|
119
|
+
re.escape(self._delimiter_start)
|
|
120
|
+
+ r"[a-zA-Z0-9]+"
|
|
121
|
+
+ re.escape(self._delimiter_end)
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
@classmethod
|
|
125
|
+
def from_dict(cls, bundle: BundleDict) -> typing.Self:
|
|
126
|
+
"""Create tags from a bundle dict."""
|
|
127
|
+
delimiter_start, delimiter_end = None, None
|
|
128
|
+
|
|
129
|
+
if "tags" not in bundle:
|
|
130
|
+
msg = (
|
|
131
|
+
"Could not create a Tags object: bundle does not define tags "
|
|
132
|
+
"(root key 'tags' missing)."
|
|
133
|
+
)
|
|
134
|
+
raise TagsDefinitionError(msg)
|
|
135
|
+
|
|
136
|
+
tags = bundle["tags"]
|
|
137
|
+
match = tags.get("match")
|
|
138
|
+
tag_delimiter = tags.get("delimiter")
|
|
139
|
+
|
|
140
|
+
if tag_delimiter:
|
|
141
|
+
delimiter_start = tag_delimiter.get("start")
|
|
142
|
+
delimiter_end = tag_delimiter.get("end")
|
|
143
|
+
|
|
144
|
+
return cls(
|
|
145
|
+
delimiter_start=delimiter_start,
|
|
146
|
+
delimiter_end=delimiter_end,
|
|
147
|
+
match=match,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
@staticmethod
|
|
151
|
+
def validate_tag_delimiter(s: str) -> bool:
|
|
152
|
+
"""A tag delimiter should only contain one special character,
|
|
153
|
+
excluding the following: (, ), ?, <, >.
|
|
154
|
+
"""
|
|
155
|
+
if len(s) != 1:
|
|
156
|
+
return False
|
|
157
|
+
|
|
158
|
+
return not re.match(r"^[\w()<>?]$", s)
|
|
159
|
+
|
|
160
|
+
@staticmethod
|
|
161
|
+
def validate_tag_name(s: str) -> bool:
|
|
162
|
+
"""A tag name should contain at least one alphanumeric character:
|
|
163
|
+
``a-z``, ``A-Z`` and ``0-9``.
|
|
164
|
+
|
|
165
|
+
:return: True if the tag name is valid, False otherwise.
|
|
166
|
+
"""
|
|
167
|
+
if len(s) < 1:
|
|
168
|
+
return False
|
|
169
|
+
|
|
170
|
+
return bool(re.match(r"^[^_\W]+$", s))
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import typing
|
|
2
|
+
from collections import UserList
|
|
3
|
+
from typing import SupportsIndex
|
|
4
|
+
|
|
5
|
+
from genelastic.common.exceptions import UniqueListDuplicateError
|
|
6
|
+
|
|
7
|
+
T = typing.TypeVar("T")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class UniqueList(UserList[T]):
|
|
11
|
+
"""A list that only allows unique elements.
|
|
12
|
+
|
|
13
|
+
:param init_list: Optional iterable to initialize the list.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, init_list: typing.Iterable[T] | None = None) -> None:
|
|
17
|
+
super().__init__()
|
|
18
|
+
|
|
19
|
+
if init_list:
|
|
20
|
+
for item in init_list:
|
|
21
|
+
self._ensure_unique(item)
|
|
22
|
+
super().append(item)
|
|
23
|
+
|
|
24
|
+
def __setitem__(
|
|
25
|
+
self, i: SupportsIndex | slice, item: T | typing.Iterable[T]
|
|
26
|
+
) -> None:
|
|
27
|
+
if isinstance(i, slice):
|
|
28
|
+
if not isinstance(item, typing.Iterable):
|
|
29
|
+
msg = "Expected iterable for slice assignment."
|
|
30
|
+
raise TypeError(msg)
|
|
31
|
+
|
|
32
|
+
slice_dupes = self._find_dupes(item)
|
|
33
|
+
if slice_dupes:
|
|
34
|
+
formatted_dupes = [str(dupe) for dupe in slice_dupes]
|
|
35
|
+
msg = (
|
|
36
|
+
f"Duplicate item(s) in slice assignment: "
|
|
37
|
+
f"{', '.join(formatted_dupes)}."
|
|
38
|
+
)
|
|
39
|
+
raise UniqueListDuplicateError(msg)
|
|
40
|
+
for x in item:
|
|
41
|
+
if x in self and x not in self[i]:
|
|
42
|
+
msg = f"Duplicate item: {x}."
|
|
43
|
+
raise UniqueListDuplicateError(msg)
|
|
44
|
+
super().__setitem__(i, item)
|
|
45
|
+
else:
|
|
46
|
+
self._ensure_unique(typing.cast(T, item))
|
|
47
|
+
super().__setitem__(i, typing.cast(T, item))
|
|
48
|
+
|
|
49
|
+
def __add__(self, other: typing.Iterable[T]) -> "UniqueList[T]":
|
|
50
|
+
for item in other:
|
|
51
|
+
self._ensure_unique(item)
|
|
52
|
+
return UniqueList(super().__add__(other))
|
|
53
|
+
|
|
54
|
+
def __iadd__(self, other: typing.Iterable[T]) -> typing.Self:
|
|
55
|
+
for item in other:
|
|
56
|
+
self._ensure_unique(item)
|
|
57
|
+
return super().__iadd__(other)
|
|
58
|
+
|
|
59
|
+
def __mul__(self, n: int) -> typing.Self:
|
|
60
|
+
raise NotImplementedError
|
|
61
|
+
|
|
62
|
+
def __imul__(self, n: int) -> typing.Self:
|
|
63
|
+
raise NotImplementedError
|
|
64
|
+
|
|
65
|
+
@staticmethod
|
|
66
|
+
def _find_dupes(a: typing.Iterable[T]) -> list[T]:
|
|
67
|
+
seen = set()
|
|
68
|
+
dupes = []
|
|
69
|
+
for x in a:
|
|
70
|
+
if x in seen:
|
|
71
|
+
dupes.append(x)
|
|
72
|
+
else:
|
|
73
|
+
seen.add(x)
|
|
74
|
+
return dupes
|
|
75
|
+
|
|
76
|
+
def _ensure_unique(self, item: T) -> None:
|
|
77
|
+
if item in self:
|
|
78
|
+
msg = f"Duplicate item: {item}."
|
|
79
|
+
raise UniqueListDuplicateError(msg)
|
|
80
|
+
|
|
81
|
+
def append(self, item: T) -> None:
|
|
82
|
+
"""Appends a unique item to the end of the list.
|
|
83
|
+
|
|
84
|
+
:param item: Element to append.
|
|
85
|
+
:raises UniqueListError: If the item already exists in the list.
|
|
86
|
+
"""
|
|
87
|
+
self._ensure_unique(item)
|
|
88
|
+
super().append(item)
|
|
89
|
+
|
|
90
|
+
def insert(self, i: int, item: T) -> None:
|
|
91
|
+
"""Inserts a unique item at a specified position.
|
|
92
|
+
|
|
93
|
+
:param i: Index where the item should be inserted.
|
|
94
|
+
:param item: Element to insert.
|
|
95
|
+
:raises UniqueListError: If the item already exists in the list.
|
|
96
|
+
"""
|
|
97
|
+
self._ensure_unique(item)
|
|
98
|
+
super().insert(i, item)
|
|
99
|
+
|
|
100
|
+
def extend(self, other: typing.Iterable[T]) -> None:
|
|
101
|
+
"""Extends the list with unique elements from another iterable.
|
|
102
|
+
|
|
103
|
+
:param other: Iterable of elements to add.
|
|
104
|
+
:raises UniqueListError: If any element in the iterable already exists in
|
|
105
|
+
the list.
|
|
106
|
+
"""
|
|
107
|
+
for item in other:
|
|
108
|
+
self._ensure_unique(item)
|
|
109
|
+
super().extend(other)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class ValidationIssue:
|
|
7
|
+
"""Contains context about a bundle validation issue."""
|
|
8
|
+
|
|
9
|
+
exc_type: str
|
|
10
|
+
file_path: Path
|
|
11
|
+
file_index: int
|
|
12
|
+
file_count: int
|
|
13
|
+
doc_index: int | None = None
|
|
14
|
+
doc_count: int | None = None
|
|
15
|
+
|
|
16
|
+
def __str__(self) -> str:
|
|
17
|
+
if not self.doc_index:
|
|
18
|
+
return (
|
|
19
|
+
f"[{self.exc_type}] "
|
|
20
|
+
f"File {self.file_index}/{self.file_count}: {self.file_path}"
|
|
21
|
+
)
|
|
22
|
+
return (
|
|
23
|
+
f"[{self.exc_type}] "
|
|
24
|
+
f"File {self.file_index}/{self.file_count}: {self.file_path} "
|
|
25
|
+
f"(in doc #{self.doc_index}/{self.doc_count})"
|
|
26
|
+
)
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from genelastic.common.types import Metadata
|
|
5
|
+
from genelastic.import_data.constants import TOOLS_SUFFIX_RE
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class FilenamePattern:
|
|
9
|
+
"""Utility class to extract metadata from filenames based on a regex
|
|
10
|
+
pattern.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, pattern: str) -> None:
|
|
14
|
+
"""Initializes a FilenamePattern instance.
|
|
15
|
+
|
|
16
|
+
:param pattern: The regex pattern used to extract metadata from
|
|
17
|
+
filenames.
|
|
18
|
+
"""
|
|
19
|
+
self._re = re.compile(pattern)
|
|
20
|
+
|
|
21
|
+
def extract_metadata(self, filename: str) -> Metadata:
|
|
22
|
+
"""Extracts metadata from the given filename using the defined pattern.
|
|
23
|
+
|
|
24
|
+
:param filename: The filename from which metadata should be extracted.
|
|
25
|
+
:raises RuntimeError: If the filename does not match the pattern.
|
|
26
|
+
:returns: A dictionary containing the extracted metadata.
|
|
27
|
+
"""
|
|
28
|
+
m = self._re.search(filename)
|
|
29
|
+
if not m:
|
|
30
|
+
msg = (
|
|
31
|
+
f"Failed parsing filename '{filename}' with pattern "
|
|
32
|
+
f"'{self._re.pattern}'."
|
|
33
|
+
)
|
|
34
|
+
raise RuntimeError(msg)
|
|
35
|
+
|
|
36
|
+
# Convert necessary values.
|
|
37
|
+
metadata = m.groupdict()
|
|
38
|
+
if "cov_depth" in metadata:
|
|
39
|
+
metadata["cov_depth"] = int(metadata["cov_depth"])
|
|
40
|
+
|
|
41
|
+
return metadata
|
|
42
|
+
|
|
43
|
+
def matches_pattern(self, filename: str) -> bool:
|
|
44
|
+
"""Checks whether the given filename matches the defined pattern.
|
|
45
|
+
|
|
46
|
+
:param filename: The filename to check.
|
|
47
|
+
:returns: True if the filename matches the pattern, False otherwise.
|
|
48
|
+
"""
|
|
49
|
+
return bool(self._re.fullmatch(filename))
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class MetricsPattern:
|
|
53
|
+
"""Utility class to extract tool/version metadata from filenames with a
|
|
54
|
+
``.metrics`` suffix.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def extract_metadata(file: Path) -> list[dict[str, str]] | None:
|
|
59
|
+
"""Extracts metadata from a filename based on the ``.metrics`` suffix.
|
|
60
|
+
|
|
61
|
+
:param file: The path to the file to be analyzed.
|
|
62
|
+
:raises RuntimeError: If the suffix is malformed or cannot be parsed.
|
|
63
|
+
:returns:
|
|
64
|
+
- None if the file does not have a ``.metrics`` prefix,
|
|
65
|
+
- An empty list if the prefix is present but no metadata is found,
|
|
66
|
+
- A list of dictionaries with ``tool`` and ``version`` keys if
|
|
67
|
+
metadata is extracted.
|
|
68
|
+
"""
|
|
69
|
+
if not file.suffixes or not file.suffixes[0].startswith(".metrics"):
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
tools_str = file.suffixes[0].replace(".metrics", "")
|
|
73
|
+
matches = list(re.finditer(TOOLS_SUFFIX_RE, tools_str))
|
|
74
|
+
matched_str = "".join(m.group(0) for m in matches)
|
|
75
|
+
|
|
76
|
+
if matched_str != tools_str:
|
|
77
|
+
msg = (
|
|
78
|
+
f"Failed extracting metrics from filename '{file}': "
|
|
79
|
+
f"'{tools_str}' does not fully match pattern "
|
|
80
|
+
f"'{TOOLS_SUFFIX_RE}'."
|
|
81
|
+
)
|
|
82
|
+
raise RuntimeError(msg)
|
|
83
|
+
|
|
84
|
+
return [
|
|
85
|
+
{
|
|
86
|
+
"tool": m.group("tool"),
|
|
87
|
+
"version": m.group("version").replace("-", "."),
|
|
88
|
+
}
|
|
89
|
+
for m in matches
|
|
90
|
+
]
|