genelastic 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. genelastic/api/.env +4 -0
  2. genelastic/api/cli_start_api.py +2 -2
  3. genelastic/api/errors.py +52 -0
  4. genelastic/api/extends/example.py +0 -6
  5. genelastic/api/extends/example.yml +0 -20
  6. genelastic/api/routes.py +313 -181
  7. genelastic/api/server.py +8 -3
  8. genelastic/api/specification.yml +343 -181
  9. genelastic/common/__init__.py +0 -44
  10. genelastic/common/cli.py +48 -0
  11. genelastic/common/elastic.py +374 -46
  12. genelastic/common/exceptions.py +34 -2
  13. genelastic/common/server.py +9 -1
  14. genelastic/common/types.py +1 -14
  15. genelastic/import_data/__init__.py +0 -27
  16. genelastic/import_data/checker.py +99 -0
  17. genelastic/import_data/checker_observer.py +13 -0
  18. genelastic/import_data/cli/__init__.py +0 -0
  19. genelastic/import_data/cli/cli_check.py +136 -0
  20. genelastic/import_data/{cli_gen_data.py → cli/gen_data.py} +4 -4
  21. genelastic/import_data/cli/import_data.py +346 -0
  22. genelastic/import_data/cli/info.py +247 -0
  23. genelastic/import_data/{cli_integrity.py → cli/integrity.py} +29 -7
  24. genelastic/import_data/cli/validate.py +146 -0
  25. genelastic/import_data/collect.py +185 -0
  26. genelastic/import_data/constants.py +136 -11
  27. genelastic/import_data/import_bundle.py +102 -59
  28. genelastic/import_data/import_bundle_factory.py +70 -149
  29. genelastic/import_data/importers/__init__.py +0 -0
  30. genelastic/import_data/importers/importer_base.py +131 -0
  31. genelastic/import_data/importers/importer_factory.py +85 -0
  32. genelastic/import_data/importers/importer_types.py +223 -0
  33. genelastic/import_data/logger.py +2 -1
  34. genelastic/import_data/models/__init__.py +0 -0
  35. genelastic/import_data/models/analyses.py +178 -0
  36. genelastic/import_data/models/analysis.py +144 -0
  37. genelastic/import_data/models/data_file.py +110 -0
  38. genelastic/import_data/models/process.py +45 -0
  39. genelastic/import_data/models/processes.py +84 -0
  40. genelastic/import_data/models/tags.py +170 -0
  41. genelastic/import_data/models/unique_list.py +109 -0
  42. genelastic/import_data/models/validate.py +26 -0
  43. genelastic/import_data/patterns.py +90 -0
  44. genelastic/import_data/random_bundle.py +10 -8
  45. genelastic/import_data/resolve.py +157 -0
  46. genelastic/ui/.env +1 -0
  47. genelastic/ui/cli_start_ui.py +4 -2
  48. genelastic/ui/routes.py +289 -42
  49. genelastic/ui/static/cea-cnrgh.ico +0 -0
  50. genelastic/ui/static/cea.ico +0 -0
  51. genelastic/ui/static/layout.ico +0 -0
  52. genelastic/ui/static/novaseq6000.png +0 -0
  53. genelastic/ui/static/style.css +430 -0
  54. genelastic/ui/static/ui.js +458 -0
  55. genelastic/ui/templates/analyses.html +96 -9
  56. genelastic/ui/templates/analysis_detail.html +44 -0
  57. genelastic/ui/templates/bi_process_detail.html +129 -0
  58. genelastic/ui/templates/bi_processes.html +114 -9
  59. genelastic/ui/templates/explorer.html +356 -0
  60. genelastic/ui/templates/home.html +205 -2
  61. genelastic/ui/templates/layout.html +148 -29
  62. genelastic/ui/templates/version.html +19 -7
  63. genelastic/ui/templates/wet_process_detail.html +131 -0
  64. genelastic/ui/templates/wet_processes.html +114 -9
  65. genelastic-0.9.0.dist-info/METADATA +686 -0
  66. genelastic-0.9.0.dist-info/RECORD +76 -0
  67. genelastic-0.9.0.dist-info/WHEEL +4 -0
  68. genelastic-0.9.0.dist-info/entry_points.txt +10 -0
  69. genelastic-0.9.0.dist-info/licenses/LICENSE +519 -0
  70. genelastic/import_data/analyses.py +0 -69
  71. genelastic/import_data/analysis.py +0 -205
  72. genelastic/import_data/bi_process.py +0 -27
  73. genelastic/import_data/bi_processes.py +0 -49
  74. genelastic/import_data/cli_import.py +0 -379
  75. genelastic/import_data/cli_info.py +0 -256
  76. genelastic/import_data/cli_validate.py +0 -54
  77. genelastic/import_data/data_file.py +0 -87
  78. genelastic/import_data/filename_pattern.py +0 -57
  79. genelastic/import_data/tags.py +0 -123
  80. genelastic/import_data/wet_process.py +0 -28
  81. genelastic/import_data/wet_processes.py +0 -53
  82. genelastic-0.8.0.dist-info/METADATA +0 -109
  83. genelastic-0.8.0.dist-info/RECORD +0 -52
  84. genelastic-0.8.0.dist-info/WHEEL +0 -5
  85. genelastic-0.8.0.dist-info/entry_points.txt +0 -8
  86. genelastic-0.8.0.dist-info/top_level.txt +0 -1
@@ -0,0 +1,144 @@
1
+ import contextlib
2
+ import copy
3
+ import logging
4
+ from collections import defaultdict
5
+ from pathlib import Path
6
+ from types import NotImplementedType
7
+
8
+ from genelastic.common.types import Metadata
9
+ from genelastic.import_data.collect import (
10
+ DataFileCollector,
11
+ )
12
+ from genelastic.import_data.constants import (
13
+ ALLOWED_EXTENSIONS,
14
+ )
15
+ from genelastic.import_data.models.data_file import DataFile
16
+ from genelastic.import_data.patterns import FilenamePattern
17
+
18
+ logger = logging.getLogger("genelastic")
19
+
20
+
21
+ class Analysis:
22
+ """Class Analysis that represents an analysis."""
23
+
24
+ METADATA_INTERNAL_KEYS = frozenset(
25
+ ["tags", "multi_match", "ext", "file_prefix"]
26
+ )
27
+
28
+ def __init__(
29
+ self,
30
+ analysis_id: str,
31
+ bundle_file: Path,
32
+ data_path: Path,
33
+ filename_pattern: FilenamePattern,
34
+ **metadata: str | int,
35
+ ) -> None:
36
+ self._analysis_id = analysis_id
37
+ self._bundle_file = bundle_file
38
+ self._data_path = data_path
39
+ self._metadata = self._remove_internal_keys(metadata)
40
+ self._data_files_by_ext: dict[str, set[DataFile]] = defaultdict(set)
41
+
42
+ logger.info("")
43
+ logger.info("[ Analysis ID %s ]", self._analysis_id)
44
+
45
+ self._collected_files = DataFileCollector(
46
+ analysis_id,
47
+ bundle_file,
48
+ data_path,
49
+ filename_pattern,
50
+ ).run()
51
+
52
+ for data_file in self._collected_files.data_files:
53
+ self._data_files_by_ext[data_file.ext].add(data_file)
54
+
55
+ logger.info(
56
+ " -> Extracted %s file extension(s): %s.",
57
+ len(self._data_files_by_ext.keys()),
58
+ ", ".join(ext.upper() for ext in self._data_files_by_ext),
59
+ )
60
+
61
+ def __eq__(self, other: object) -> bool | NotImplementedType:
62
+ """Defines equality comparison for Analysis instances based on their
63
+ ID.
64
+ """
65
+ if isinstance(other, Analysis):
66
+ return self._analysis_id == other._analysis_id
67
+ return NotImplemented
68
+
69
+ def __lt__(self, other: object) -> bool | NotImplementedType:
70
+ """Defines sort order for Analysis instances based on their ID."""
71
+ if isinstance(other, Analysis):
72
+ return self._analysis_id < other._analysis_id
73
+ return NotImplemented
74
+
75
+ def __str__(self) -> str:
76
+ return (
77
+ f"Analysis(id='{self._analysis_id}', "
78
+ f"bundle_file='{self._bundle_file}', "
79
+ f"data_path='{self._data_path}', "
80
+ f"metadata={self._metadata})"
81
+ )
82
+
83
+ @staticmethod
84
+ def _remove_internal_keys(
85
+ metadata: Metadata,
86
+ ) -> Metadata:
87
+ updated_metadata = metadata.copy()
88
+
89
+ for key in Analysis.METADATA_INTERNAL_KEYS:
90
+ with contextlib.suppress(KeyError):
91
+ del updated_metadata[key]
92
+
93
+ return updated_metadata
94
+
95
+ @property
96
+ def metadata(self) -> Metadata:
97
+ """Get metadata."""
98
+ return copy.deepcopy(self._metadata)
99
+
100
+ @property
101
+ def bundle_file(self) -> Path:
102
+ """Get the bundle file."""
103
+ return self._bundle_file
104
+
105
+ @property
106
+ def data_path(self) -> Path:
107
+ """Get the data path specified in the bundle file."""
108
+ return self._data_path
109
+
110
+ @property
111
+ def id(self) -> str:
112
+ """Get the analysis ID."""
113
+ return self._analysis_id
114
+
115
+ @property
116
+ def matched_files(self) -> set[Path]:
117
+ """Returns the list of files that matched the filename pattern."""
118
+ return self._collected_files.matched_files
119
+
120
+ @property
121
+ def unmatched_files(self) -> set[Path]:
122
+ """Returns the list of files that did not match the filename pattern."""
123
+ return self._collected_files.unmatched_files
124
+
125
+ @property
126
+ def extensions(self) -> set[str]:
127
+ """Returns all the matched files extensions."""
128
+ return set(self._data_files_by_ext.keys())
129
+
130
+ def get_data_files(self, ext: str | None = None) -> set[DataFile]:
131
+ """Returns the list of matched files as DataFile objects.
132
+
133
+ :param ext: Filter the list of matched files by their extension
134
+ (case-sensitive).
135
+ """
136
+ if ext:
137
+ if ext not in ALLOWED_EXTENSIONS:
138
+ msg = f"Unsupported extension {ext}."
139
+ raise ValueError(msg)
140
+
141
+ if ext in self._data_files_by_ext:
142
+ return self._data_files_by_ext[ext]
143
+ return set()
144
+ return {f for value in self._data_files_by_ext.values() for f in value}
@@ -0,0 +1,110 @@
1
+ """This module defines the DataFile class, which handles the representation,
2
+ management, and extraction of metadata for a data file within a data bundle.
3
+
4
+ It includes functionality to construct DataFile instances from paths and
5
+ optional filename patterns, retrieve file paths and metadata, and support
6
+ for extracting metadata from filenames using specified patterns.
7
+ """
8
+
9
+ import logging
10
+ from pathlib import Path
11
+ from types import NotImplementedType
12
+
13
+ from genelastic.common.types import Metadata
14
+ from genelastic.import_data.patterns import MetricsPattern
15
+
16
+ logger = logging.getLogger("genelastic")
17
+
18
+
19
+ class DataFile:
20
+ """Class for handling a data file and its metadata."""
21
+
22
+ def __init__(
23
+ self,
24
+ analysis_id: str,
25
+ path: Path,
26
+ bundle_file: Path,
27
+ metadata: Metadata,
28
+ ) -> None:
29
+ self._analysis_id = analysis_id
30
+ self._path = path
31
+ self._bundle_file = bundle_file
32
+ self._metadata = metadata
33
+ self._metrics = MetricsPattern.extract_metadata(path)
34
+ self._validate_params()
35
+
36
+ self._ext = str(self._metadata["ext"]).lower()
37
+
38
+ key = "type" if self._metrics is not None else "ext"
39
+ self._type = str(self._metadata[key]).lower()
40
+
41
+ def __eq__(self, other: object) -> bool | NotImplementedType:
42
+ """Defines equality comparison for DataFile instances based on their
43
+ file path.
44
+ """
45
+ if isinstance(other, DataFile):
46
+ return self._path == other._path
47
+ return NotImplemented
48
+
49
+ def __hash__(self) -> int:
50
+ """Defines hash behavior for DataFile to allow use in sets and as dict keys."""
51
+ return hash(self._path)
52
+
53
+ def _validate_params(self) -> None:
54
+ """Validate values of some ``DataFile`` constructor parameters.
55
+
56
+ :raises RuntimeError: One of the parameters value is invalid.
57
+ """
58
+ if "ext" not in self._metadata:
59
+ msg = (
60
+ f"Data file '{self._path}' "
61
+ f"is missing the required metadata key 'ext'."
62
+ )
63
+ raise RuntimeError(msg)
64
+
65
+ if self._metrics is not None and "type" not in self._metadata:
66
+ msg = (
67
+ f"Metrics data file '{self._path}' "
68
+ f"is missing the required metadata key 'type'."
69
+ )
70
+ raise RuntimeError(msg)
71
+
72
+ @property
73
+ def analysis_id(self) -> str:
74
+ """Get the analysis ID."""
75
+ return self._analysis_id
76
+
77
+ @property
78
+ def path(self) -> Path:
79
+ """Retrieve the data file path."""
80
+ return self._path
81
+
82
+ @property
83
+ def ext(self) -> str:
84
+ """Retrieve the data file extension."""
85
+ return self._ext
86
+
87
+ @property
88
+ def type(self) -> str:
89
+ """Retrieve the data file type.
90
+
91
+ Normally, the type is the file's extension.
92
+ If the file is a metrics file, its type is taken from the metadata key
93
+ 'type'.
94
+ """
95
+ return self._type
96
+
97
+ @property
98
+ def bundle_file(self) -> Path:
99
+ """Retrieve the path to the associated data bundle file."""
100
+ return self._bundle_file
101
+
102
+ @property
103
+ def metadata(self) -> Metadata:
104
+ """Retrieve a copy of the metadata associated with the data file."""
105
+ return self._metadata.copy()
106
+
107
+ @property
108
+ def metrics(self) -> list[dict[str, str]] | None:
109
+ """Retrieve a copy of the metrics associated with the data file."""
110
+ return self._metrics
@@ -0,0 +1,45 @@
1
+ import copy
2
+ from abc import ABC
3
+ from typing import Any
4
+
5
+
6
+ class Process(ABC): # noqa: B024
7
+ """Abstract base class for a Process.
8
+
9
+ It is not intended to be instantiated directly. Instead, use one of its
10
+ subclasses, ``WetProcess`` or ``BioInfoProcess``.
11
+ """
12
+
13
+ def __init__(
14
+ self,
15
+ proc_id: str,
16
+ bundle_file: str | None = None,
17
+ **data: Any, # noqa: ANN401
18
+ ) -> None:
19
+ self._proc_id = proc_id
20
+ self._bundle_file = bundle_file
21
+ self._data = data
22
+ self._type = self.__class__.__name__
23
+
24
+ @property
25
+ def id(self) -> str:
26
+ """Unique identifier of the process."""
27
+ return self._proc_id
28
+
29
+ @property
30
+ def data(self) -> dict[str, Any]:
31
+ """Return a copy of the associated data."""
32
+ return copy.deepcopy(self._data)
33
+
34
+ @property
35
+ def type(self) -> str:
36
+ """Type of the process."""
37
+ return self._type
38
+
39
+
40
+ class WetProcess(Process):
41
+ """Concrete wet lab process."""
42
+
43
+
44
+ class BioInfoProcess(Process):
45
+ """Concrete bioinformatics process."""
@@ -0,0 +1,84 @@
1
+ import logging
2
+ import typing
3
+ from collections import UserDict
4
+ from typing import Self
5
+
6
+ from genelastic.common.types import BundleDict
7
+ from genelastic.import_data.models.process import (
8
+ Process,
9
+ )
10
+
11
+ logger = logging.getLogger("genelastic")
12
+
13
+
14
+ class Processes(UserDict[str, Process]):
15
+ """Container for homogeneous Process objects.
16
+
17
+ Unlike a standard dict:
18
+ - Only subclasses of ``Process`` are allowed as values.
19
+ - All items must be of the same concrete subclass of ``Process``.
20
+ - Duplicate keys are not allowed and will raise an exception.
21
+
22
+ :ivar _item_type: Internal attribute storing the concrete subclass
23
+ of ``Process`` enforced in this container.
24
+ """
25
+
26
+ _item_type: type | None = None
27
+
28
+ def __setitem__(self, key: str, value: Process) -> None:
29
+ if not isinstance(value, Process):
30
+ msg = (
31
+ "Object type not supported. "
32
+ "Container only supports 'Process' subclasses as items."
33
+ )
34
+ raise TypeError(msg)
35
+
36
+ if self._item_type is None:
37
+ self._item_type = type(value)
38
+ elif not isinstance(value, self._item_type):
39
+ msg = (
40
+ f"Cannot mix types. Container already holds "
41
+ f"{self._item_type.__name__} items."
42
+ )
43
+ raise TypeError(msg)
44
+
45
+ if key in self:
46
+ msg = (
47
+ f"Duplicate key. "
48
+ f"Container already holds an item with key '{key}'."
49
+ )
50
+ raise ValueError(msg)
51
+
52
+ super().__setitem__(key, value)
53
+
54
+ def add(self, item: Process) -> None:
55
+ """Add one process item to the container.
56
+
57
+ :raises TypeError: If ``item`` is not a subclass of ``Process``,
58
+ or if it does not match the subclass type of items already in the
59
+ container.
60
+ :raises ValueError: If an item with the same key (``item.id``) already
61
+ exists in the container.
62
+ """
63
+ self[item.id] = item
64
+
65
+ @classmethod
66
+ def from_dicts(
67
+ cls, arr: typing.Sequence[BundleDict], process_cls: type[Process]
68
+ ) -> Self:
69
+ """Build a Processes container instance from a sequence of dictionaries.
70
+
71
+ :param arr: Sequence of dictionaries representing process data.
72
+ :param process_cls: The subclass of ``Process`` to instantiate for each
73
+ dict.
74
+ :raises TypeError: If instantiating ``process_cls`` fails due to invalid
75
+ dictionary arguments, or if the resulting object type does not
76
+ match the container's enforced type.
77
+ :raises ValueError: If two or more dictionaries yield items with the
78
+ same key (``id``), leading to duplicate entries in the container.
79
+ :return: A Processes container instance populated with process objects.
80
+ """
81
+ instance = cls()
82
+ for d in arr:
83
+ instance.add(process_cls(**d))
84
+ return instance
@@ -0,0 +1,170 @@
1
+ import logging
2
+ import re
3
+ import typing
4
+ from collections import UserDict
5
+
6
+ from genelastic.common.exceptions import TagsDefinitionError
7
+ from genelastic.common.types import BundleDict
8
+ from genelastic.import_data.constants import (
9
+ DEFAULT_TAG2FIELD,
10
+ DEFAULT_TAG_DELIMITER_END,
11
+ DEFAULT_TAG_DELIMITER_START,
12
+ )
13
+
14
+ logger = logging.getLogger("genelastic")
15
+
16
+
17
+ class Tags(UserDict[str, dict[str, str]]):
18
+ """Represents a set of tags used to extract metadata from filenames.
19
+
20
+ Each tag maps a name to a metadata field and a regex pattern, supporting
21
+ custom delimiters. This class combines default tags (``DEFAULT_TAG2FIELD``)
22
+ with optional user-defined tags, and provides utilities for searching,
23
+ accessing, and resolving tags in filename patterns.
24
+ """
25
+
26
+ def __init__(
27
+ self,
28
+ delimiter_start: str | None = None,
29
+ delimiter_end: str | None = None,
30
+ match: dict[str, dict[str, str]] | None = None,
31
+ ) -> None:
32
+ """Initialize a Tags instance.
33
+
34
+ :param delimiter_start: Optional character prepended to all tag names.
35
+ Defaults to ``DEFAULT_TAG_DELIMITER_START``.
36
+ :param delimiter_end: Optional character appended to all tag names.
37
+ Defaults to ``DEFAULT_TAG_DELIMITER_END``.
38
+ :param match: Optional dictionary of user-defined tags. Overrides
39
+ ``DEFAULT_TAG2FIELD`` if keys overlap.
40
+ """
41
+ super().__init__()
42
+
43
+ if delimiter_start is None:
44
+ self._delimiter_start = DEFAULT_TAG_DELIMITER_START
45
+ else:
46
+ if not self.validate_tag_delimiter(delimiter_start):
47
+ msg = (
48
+ "A tag delimiter start should contain only one special "
49
+ "character, excluding the following: (, ), ?, <, >."
50
+ )
51
+ raise TagsDefinitionError(msg)
52
+ self._delimiter_start = delimiter_start
53
+
54
+ if delimiter_end is None:
55
+ self._delimiter_end = DEFAULT_TAG_DELIMITER_END
56
+ else:
57
+ if not self.validate_tag_delimiter(delimiter_end):
58
+ msg = (
59
+ "A tag delimiter end should contain only one special "
60
+ "character, excluding the following: (, ), ?, <, >."
61
+ )
62
+ raise TagsDefinitionError(msg)
63
+ self._delimiter_end = delimiter_end
64
+
65
+ # Combine default tags with user-provided tags. User-defined ones takes
66
+ # precedence.
67
+ effective_match = DEFAULT_TAG2FIELD | (match or {})
68
+
69
+ # Store each tag in the dictionary using the full name
70
+ # (delimiter start + tag name + delimiter end).
71
+ for tag_name, tag_attrs in effective_match.items():
72
+ if not self.validate_tag_name(tag_name):
73
+ msg = (
74
+ f"Invalid tag '{tag_name}': its name should contain at "
75
+ f"least one alphanumeric character: a-z, A-Z and 0-9."
76
+ )
77
+ raise TagsDefinitionError(msg)
78
+
79
+ for mandatory_key in ("field", "regex"):
80
+ if mandatory_key not in tag_attrs:
81
+ msg = (
82
+ f"Invalid tag '{tag_name}': mandatory key "
83
+ f"'{mandatory_key}' missing."
84
+ )
85
+ raise TagsDefinitionError(msg)
86
+
87
+ tag = f"{self._delimiter_start}{tag_name}{self._delimiter_end}"
88
+ self[tag] = tag_attrs
89
+
90
+ logger.info(
91
+ "The following tags will be used "
92
+ "to extract metadata from filenames : %s",
93
+ self,
94
+ )
95
+
96
+ @property
97
+ def delimiter_start(self) -> str:
98
+ """Return the tag delimiter start. Defaults to
99
+ ``DEFAULT_TAG_DELIMITER_START``.
100
+ """
101
+ return self._delimiter_start
102
+
103
+ @property
104
+ def delimiter_end(self) -> str:
105
+ """Return the tag delimiter end. Defaults to
106
+ ``DEFAULT_TAG_DELIMITER_END``.
107
+ """
108
+ return self._delimiter_end
109
+
110
+ @property
111
+ def search_regex(self) -> str:
112
+ """Return a regex pattern to search for tags inside a string.
113
+
114
+ This regex matches any tag using the current start and end delimiters.
115
+ Used for filename prefixes validation or resolving tags into regex
116
+ patterns.
117
+ """
118
+ return (
119
+ re.escape(self._delimiter_start)
120
+ + r"[a-zA-Z0-9]+"
121
+ + re.escape(self._delimiter_end)
122
+ )
123
+
124
+ @classmethod
125
+ def from_dict(cls, bundle: BundleDict) -> typing.Self:
126
+ """Create tags from a bundle dict."""
127
+ delimiter_start, delimiter_end = None, None
128
+
129
+ if "tags" not in bundle:
130
+ msg = (
131
+ "Could not create a Tags object: bundle does not define tags "
132
+ "(root key 'tags' missing)."
133
+ )
134
+ raise TagsDefinitionError(msg)
135
+
136
+ tags = bundle["tags"]
137
+ match = tags.get("match")
138
+ tag_delimiter = tags.get("delimiter")
139
+
140
+ if tag_delimiter:
141
+ delimiter_start = tag_delimiter.get("start")
142
+ delimiter_end = tag_delimiter.get("end")
143
+
144
+ return cls(
145
+ delimiter_start=delimiter_start,
146
+ delimiter_end=delimiter_end,
147
+ match=match,
148
+ )
149
+
150
+ @staticmethod
151
+ def validate_tag_delimiter(s: str) -> bool:
152
+ """A tag delimiter should only contain one special character,
153
+ excluding the following: (, ), ?, <, >.
154
+ """
155
+ if len(s) != 1:
156
+ return False
157
+
158
+ return not re.match(r"^[\w()<>?]$", s)
159
+
160
+ @staticmethod
161
+ def validate_tag_name(s: str) -> bool:
162
+ """A tag name should contain at least one alphanumeric character:
163
+ ``a-z``, ``A-Z`` and ``0-9``.
164
+
165
+ :return: True if the tag name is valid, False otherwise.
166
+ """
167
+ if len(s) < 1:
168
+ return False
169
+
170
+ return bool(re.match(r"^[^_\W]+$", s))
@@ -0,0 +1,109 @@
1
+ import typing
2
+ from collections import UserList
3
+ from typing import SupportsIndex
4
+
5
+ from genelastic.common.exceptions import UniqueListDuplicateError
6
+
7
+ T = typing.TypeVar("T")
8
+
9
+
10
+ class UniqueList(UserList[T]):
11
+ """A list that only allows unique elements.
12
+
13
+ :param init_list: Optional iterable to initialize the list.
14
+ """
15
+
16
+ def __init__(self, init_list: typing.Iterable[T] | None = None) -> None:
17
+ super().__init__()
18
+
19
+ if init_list:
20
+ for item in init_list:
21
+ self._ensure_unique(item)
22
+ super().append(item)
23
+
24
+ def __setitem__(
25
+ self, i: SupportsIndex | slice, item: T | typing.Iterable[T]
26
+ ) -> None:
27
+ if isinstance(i, slice):
28
+ if not isinstance(item, typing.Iterable):
29
+ msg = "Expected iterable for slice assignment."
30
+ raise TypeError(msg)
31
+
32
+ slice_dupes = self._find_dupes(item)
33
+ if slice_dupes:
34
+ formatted_dupes = [str(dupe) for dupe in slice_dupes]
35
+ msg = (
36
+ f"Duplicate item(s) in slice assignment: "
37
+ f"{', '.join(formatted_dupes)}."
38
+ )
39
+ raise UniqueListDuplicateError(msg)
40
+ for x in item:
41
+ if x in self and x not in self[i]:
42
+ msg = f"Duplicate item: {x}."
43
+ raise UniqueListDuplicateError(msg)
44
+ super().__setitem__(i, item)
45
+ else:
46
+ self._ensure_unique(typing.cast(T, item))
47
+ super().__setitem__(i, typing.cast(T, item))
48
+
49
+ def __add__(self, other: typing.Iterable[T]) -> "UniqueList[T]":
50
+ for item in other:
51
+ self._ensure_unique(item)
52
+ return UniqueList(super().__add__(other))
53
+
54
+ def __iadd__(self, other: typing.Iterable[T]) -> typing.Self:
55
+ for item in other:
56
+ self._ensure_unique(item)
57
+ return super().__iadd__(other)
58
+
59
+ def __mul__(self, n: int) -> typing.Self:
60
+ raise NotImplementedError
61
+
62
+ def __imul__(self, n: int) -> typing.Self:
63
+ raise NotImplementedError
64
+
65
+ @staticmethod
66
+ def _find_dupes(a: typing.Iterable[T]) -> list[T]:
67
+ seen = set()
68
+ dupes = []
69
+ for x in a:
70
+ if x in seen:
71
+ dupes.append(x)
72
+ else:
73
+ seen.add(x)
74
+ return dupes
75
+
76
+ def _ensure_unique(self, item: T) -> None:
77
+ if item in self:
78
+ msg = f"Duplicate item: {item}."
79
+ raise UniqueListDuplicateError(msg)
80
+
81
+ def append(self, item: T) -> None:
82
+ """Appends a unique item to the end of the list.
83
+
84
+ :param item: Element to append.
85
+ :raises UniqueListError: If the item already exists in the list.
86
+ """
87
+ self._ensure_unique(item)
88
+ super().append(item)
89
+
90
+ def insert(self, i: int, item: T) -> None:
91
+ """Inserts a unique item at a specified position.
92
+
93
+ :param i: Index where the item should be inserted.
94
+ :param item: Element to insert.
95
+ :raises UniqueListError: If the item already exists in the list.
96
+ """
97
+ self._ensure_unique(item)
98
+ super().insert(i, item)
99
+
100
+ def extend(self, other: typing.Iterable[T]) -> None:
101
+ """Extends the list with unique elements from another iterable.
102
+
103
+ :param other: Iterable of elements to add.
104
+ :raises UniqueListError: If any element in the iterable already exists in
105
+ the list.
106
+ """
107
+ for item in other:
108
+ self._ensure_unique(item)
109
+ super().extend(other)
@@ -0,0 +1,26 @@
1
+ from dataclasses import dataclass
2
+ from pathlib import Path
3
+
4
+
5
+ @dataclass
6
+ class ValidationIssue:
7
+ """Contains context about a bundle validation issue."""
8
+
9
+ exc_type: str
10
+ file_path: Path
11
+ file_index: int
12
+ file_count: int
13
+ doc_index: int | None = None
14
+ doc_count: int | None = None
15
+
16
+ def __str__(self) -> str:
17
+ if not self.doc_index:
18
+ return (
19
+ f"[{self.exc_type}] "
20
+ f"File {self.file_index}/{self.file_count}: {self.file_path}"
21
+ )
22
+ return (
23
+ f"[{self.exc_type}] "
24
+ f"File {self.file_index}/{self.file_count}: {self.file_path} "
25
+ f"(in doc #{self.doc_index}/{self.doc_count})"
26
+ )