genelastic 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. genelastic/api/.env +4 -0
  2. genelastic/api/cli_start_api.py +2 -2
  3. genelastic/api/errors.py +52 -0
  4. genelastic/api/extends/example.py +0 -6
  5. genelastic/api/extends/example.yml +0 -20
  6. genelastic/api/routes.py +313 -181
  7. genelastic/api/server.py +8 -3
  8. genelastic/api/specification.yml +343 -181
  9. genelastic/common/__init__.py +0 -44
  10. genelastic/common/cli.py +48 -0
  11. genelastic/common/elastic.py +374 -46
  12. genelastic/common/exceptions.py +34 -2
  13. genelastic/common/server.py +9 -1
  14. genelastic/common/types.py +1 -14
  15. genelastic/import_data/__init__.py +0 -27
  16. genelastic/import_data/checker.py +99 -0
  17. genelastic/import_data/checker_observer.py +13 -0
  18. genelastic/import_data/cli/__init__.py +0 -0
  19. genelastic/import_data/cli/cli_check.py +136 -0
  20. genelastic/import_data/{cli_gen_data.py → cli/gen_data.py} +4 -4
  21. genelastic/import_data/cli/import_data.py +346 -0
  22. genelastic/import_data/cli/info.py +247 -0
  23. genelastic/import_data/{cli_integrity.py → cli/integrity.py} +29 -7
  24. genelastic/import_data/cli/validate.py +146 -0
  25. genelastic/import_data/collect.py +185 -0
  26. genelastic/import_data/constants.py +136 -11
  27. genelastic/import_data/import_bundle.py +102 -59
  28. genelastic/import_data/import_bundle_factory.py +70 -149
  29. genelastic/import_data/importers/__init__.py +0 -0
  30. genelastic/import_data/importers/importer_base.py +131 -0
  31. genelastic/import_data/importers/importer_factory.py +85 -0
  32. genelastic/import_data/importers/importer_types.py +223 -0
  33. genelastic/import_data/logger.py +2 -1
  34. genelastic/import_data/models/__init__.py +0 -0
  35. genelastic/import_data/models/analyses.py +178 -0
  36. genelastic/import_data/models/analysis.py +144 -0
  37. genelastic/import_data/models/data_file.py +110 -0
  38. genelastic/import_data/models/process.py +45 -0
  39. genelastic/import_data/models/processes.py +84 -0
  40. genelastic/import_data/models/tags.py +170 -0
  41. genelastic/import_data/models/unique_list.py +109 -0
  42. genelastic/import_data/models/validate.py +26 -0
  43. genelastic/import_data/patterns.py +90 -0
  44. genelastic/import_data/random_bundle.py +10 -8
  45. genelastic/import_data/resolve.py +157 -0
  46. genelastic/ui/.env +1 -0
  47. genelastic/ui/cli_start_ui.py +4 -2
  48. genelastic/ui/routes.py +289 -42
  49. genelastic/ui/static/cea-cnrgh.ico +0 -0
  50. genelastic/ui/static/cea.ico +0 -0
  51. genelastic/ui/static/layout.ico +0 -0
  52. genelastic/ui/static/novaseq6000.png +0 -0
  53. genelastic/ui/static/style.css +430 -0
  54. genelastic/ui/static/ui.js +458 -0
  55. genelastic/ui/templates/analyses.html +96 -9
  56. genelastic/ui/templates/analysis_detail.html +44 -0
  57. genelastic/ui/templates/bi_process_detail.html +129 -0
  58. genelastic/ui/templates/bi_processes.html +114 -9
  59. genelastic/ui/templates/explorer.html +356 -0
  60. genelastic/ui/templates/home.html +205 -2
  61. genelastic/ui/templates/layout.html +148 -29
  62. genelastic/ui/templates/version.html +19 -7
  63. genelastic/ui/templates/wet_process_detail.html +131 -0
  64. genelastic/ui/templates/wet_processes.html +114 -9
  65. genelastic-0.9.0.dist-info/METADATA +686 -0
  66. genelastic-0.9.0.dist-info/RECORD +76 -0
  67. genelastic-0.9.0.dist-info/WHEEL +4 -0
  68. genelastic-0.9.0.dist-info/entry_points.txt +10 -0
  69. genelastic-0.9.0.dist-info/licenses/LICENSE +519 -0
  70. genelastic/import_data/analyses.py +0 -69
  71. genelastic/import_data/analysis.py +0 -205
  72. genelastic/import_data/bi_process.py +0 -27
  73. genelastic/import_data/bi_processes.py +0 -49
  74. genelastic/import_data/cli_import.py +0 -379
  75. genelastic/import_data/cli_info.py +0 -256
  76. genelastic/import_data/cli_validate.py +0 -54
  77. genelastic/import_data/data_file.py +0 -87
  78. genelastic/import_data/filename_pattern.py +0 -57
  79. genelastic/import_data/tags.py +0 -123
  80. genelastic/import_data/wet_process.py +0 -28
  81. genelastic/import_data/wet_processes.py +0 -53
  82. genelastic-0.8.0.dist-info/METADATA +0 -109
  83. genelastic-0.8.0.dist-info/RECORD +0 -52
  84. genelastic-0.8.0.dist-info/WHEEL +0 -5
  85. genelastic-0.8.0.dist-info/entry_points.txt +0 -8
  86. genelastic-0.8.0.dist-info/top_level.txt +0 -1
@@ -0,0 +1,131 @@
1
+ import gzip
2
+ import json
3
+ import logging
4
+ from abc import ABC, abstractmethod
5
+ from collections.abc import Iterable
6
+ from json import JSONDecodeError
7
+ from typing import Any, Generic, TypeVar
8
+
9
+ import yaml
10
+
11
+ from genelastic.common.elastic import ElasticImportConn
12
+ from genelastic.import_data.models.data_file import DataFile
13
+
14
+ logger = logging.getLogger("genelastic")
15
+
16
+ T = TypeVar("T")
17
+
18
+
19
+ class ImporterError(Exception):
20
+ """An error occurred while loading, validating or transforming a data file
21
+ into JSON documents.
22
+ """
23
+
24
+
25
+ class BaseImporter(ABC, Generic[T]):
26
+ """Abstract base class for all importers."""
27
+
28
+ def __init__(
29
+ self,
30
+ data_file: DataFile,
31
+ es_import_conn: ElasticImportConn,
32
+ thread_count: int = 4,
33
+ ) -> None:
34
+ self._data_file = data_file
35
+ self._es_import_conn = es_import_conn
36
+ self._thread_count = thread_count
37
+
38
+ self._cls_name = self.__class__.__name__
39
+ self._process_file()
40
+
41
+ def _process_file(self) -> None:
42
+ """Process the file before import: load, validate and transform the
43
+ data into JSON documents.
44
+ :raises ImporterError: If an error occurs while processing the file.
45
+ """
46
+ logger.debug("%s: Loading data...", self._cls_name)
47
+ data = self._load()
48
+ logger.debug("%s: Validating data...", self._cls_name)
49
+ self._validate(data)
50
+ logger.debug("%s: Transforming data...", self._cls_name)
51
+ self._documents = self._transform(data)
52
+
53
+ def import_docs(self) -> None:
54
+ """Import the JSON documents into Elasticsearch."""
55
+ logger.debug("%s: Indexing documents...", self._cls_name)
56
+ self._es_import_conn.parallel_bulk_import(
57
+ self._documents, self._thread_count
58
+ )
59
+
60
+ @property
61
+ @abstractmethod
62
+ def target_index(self) -> str:
63
+ """Returns the import target index name."""
64
+
65
+ @property
66
+ def documents(self) -> Iterable[dict[str, Any]]:
67
+ """Return the documents about to be indexed."""
68
+ return self._documents
69
+
70
+ @abstractmethod
71
+ def _load(self) -> T:
72
+ """Load and parse raw data from the file."""
73
+
74
+ def _validate(self, data: T) -> None:
75
+ """Validate the data structure (optional)."""
76
+
77
+ @abstractmethod
78
+ def _transform(self, data: T) -> Iterable[dict[str, Any]]:
79
+ """Transform raw data into Elasticsearch-ready documents."""
80
+
81
+
82
+ class JSONBaseImporter(BaseImporter[Any], ABC):
83
+ """Base importer to load JSON and gzipped JSON data files."""
84
+
85
+ def _load(self) -> Any: # noqa: ANN401
86
+ try:
87
+ if self._data_file.path.suffix == ".gz":
88
+ logger.debug(
89
+ "Opening gzip-compressed file in text mode and "
90
+ "reading content...",
91
+ )
92
+ with gzip.open(
93
+ self._data_file.path, "rt", encoding="utf-8"
94
+ ) as f:
95
+ content = f.read()
96
+ else:
97
+ logger.debug(
98
+ "Opening uncompressed file in text mode and "
99
+ "reading content...",
100
+ )
101
+ content = self._data_file.path.read_text(encoding="utf-8")
102
+ except (OSError, UnicodeDecodeError) as e:
103
+ raise ImporterError(e) from None
104
+
105
+ if not content.strip():
106
+ msg = f"JSON file '{self._data_file.path}' is empty."
107
+ raise ImporterError(msg) from None
108
+
109
+ try:
110
+ data = json.loads(content)
111
+ except JSONDecodeError as e:
112
+ raise ImporterError(e) from None
113
+
114
+ return data
115
+
116
+
117
+ class YAMLBaseImporter(BaseImporter[Any], ABC):
118
+ """Base importer to load YAML data files."""
119
+
120
+ def _load(self) -> dict[str, Any]:
121
+ try:
122
+ with self._data_file.path.open(encoding="utf-8") as f:
123
+ doc: dict[str, Any] = yaml.safe_load(f)
124
+ except (yaml.YAMLError, OSError, UnicodeDecodeError) as e:
125
+ raise ImporterError(e) from None
126
+
127
+ if doc is None:
128
+ msg = f"YAML file '{self._data_file.path}' is empty."
129
+ raise ImporterError(msg) from None
130
+
131
+ return doc
@@ -0,0 +1,85 @@
1
+ import logging
2
+ import typing
3
+ from typing import ClassVar, TypedDict
4
+
5
+ from genelastic.common.elastic import ElasticImportConn
6
+ from genelastic.import_data.importers.importer_base import (
7
+ BaseImporter,
8
+ ImporterError,
9
+ )
10
+ from genelastic.import_data.importers.importer_types import (
11
+ CoverageImporter,
12
+ QCImporter,
13
+ SmallvarImporter,
14
+ SVImporter,
15
+ VCFImporter,
16
+ )
17
+ from genelastic.import_data.models.data_file import DataFile
18
+
19
+ logger = logging.getLogger("genelastic")
20
+
21
+
22
+ class _ImporterConfig(TypedDict):
23
+ """Internal configuration mapping an importer class to its supported file
24
+ extensions.
25
+ """
26
+
27
+ cls: type[BaseImporter[typing.Any]]
28
+ extensions: set[str]
29
+
30
+
31
+ class ImporterFactory:
32
+ """Factory to create a BaseImporter instance based on the file's
33
+ extension and type.
34
+ """
35
+
36
+ _importers: ClassVar[dict[str, _ImporterConfig]] = {
37
+ "vcf": _ImporterConfig(cls=VCFImporter, extensions={"vcf"}),
38
+ "cov": _ImporterConfig(cls=CoverageImporter, extensions={"cov"}),
39
+ "qc": _ImporterConfig(cls=QCImporter, extensions={"yaml", "yml"}),
40
+ "smallvar": _ImporterConfig(cls=SmallvarImporter, extensions={"json"}),
41
+ "sv": _ImporterConfig(cls=SVImporter, extensions={"json"}),
42
+ }
43
+
44
+ @staticmethod
45
+ def get_importer(
46
+ data_file: DataFile,
47
+ es_import_conn: ElasticImportConn,
48
+ thread_count: int = 4,
49
+ ) -> BaseImporter[typing.Any]:
50
+ """Create an appropriate BaseImporter instance based on the data
51
+ file's extension and type.
52
+
53
+ :param data_file: Data file to process and import.
54
+ :param es_import_conn: Elasticsearch import connector instance.
55
+ :param thread_count: Number of threads to use for parallel data file
56
+ import.
57
+ :return: An instance of the appropriate BaseImporter subclass.
58
+ :raises ImporterError: If the data file extension or type is invalid.
59
+ """
60
+ try:
61
+ importer = ImporterFactory._importers[data_file.type]
62
+ except KeyError:
63
+ supported_types = sorted(
64
+ [f"'{i_type}'" for i_type in ImporterFactory._importers]
65
+ )
66
+ msg = (
67
+ f"Data file '{data_file.path.name}': no importer for type "
68
+ f"'{data_file.type}'. Supported types are: "
69
+ f"{', '.join(supported_types)}."
70
+ )
71
+ raise ImporterError(msg) from None
72
+
73
+ if data_file.ext not in importer["extensions"]:
74
+ supported_exts = sorted(
75
+ [f"'{ext}'" for ext in importer["extensions"]]
76
+ )
77
+ msg = (
78
+ f"Data file '{data_file.path.name}': extension "
79
+ f"'{data_file.ext}' not supported by importer "
80
+ f"{importer['cls'].__name__}. Supported extensions are: "
81
+ f"{', '.join(supported_exts)}."
82
+ )
83
+ raise ImporterError(msg)
84
+
85
+ return importer["cls"](data_file, es_import_conn, thread_count)
@@ -0,0 +1,223 @@
1
+ import csv
2
+ import logging
3
+ from collections.abc import Iterable
4
+ from datetime import UTC, datetime
5
+ from typing import Any
6
+
7
+ import schema
8
+ import vcf
9
+ from vcf.model import _Record
10
+
11
+ from genelastic.import_data.constants import (
12
+ QC_METRICS_SCHEMA,
13
+ SV_METRICS_SCHEMA,
14
+ )
15
+ from genelastic.import_data.importers.importer_base import (
16
+ BaseImporter,
17
+ ImporterError,
18
+ JSONBaseImporter,
19
+ YAMLBaseImporter,
20
+ )
21
+
22
+ logger = logging.getLogger("genelastic")
23
+
24
+
25
+ class CoverageImporter(BaseImporter[Iterable[list[str]]]):
26
+ """Importer for coverage files."""
27
+
28
+ @property
29
+ def target_index(self) -> str:
30
+ """Returns the import target index name."""
31
+ return self._es_import_conn.coverage_index
32
+
33
+ def _load(self) -> Iterable[list[str]]:
34
+ """Load a TSV formatted coverage file.
35
+ :raises ImporterError: If the file could not be opened or decoded.
36
+ """
37
+ try:
38
+ with self._data_file.path.open(newline="", encoding="utf-8") as f:
39
+ reader = csv.reader(f, delimiter="\t")
40
+ try:
41
+ first_row = next(reader)
42
+ except StopIteration:
43
+ msg = f"Coverage file '{self._data_file.path}' is empty."
44
+ raise ImporterError(msg) from None
45
+ yield first_row
46
+ yield from reader
47
+ except (OSError, UnicodeDecodeError) as e:
48
+ raise ImporterError(e) from None
49
+
50
+ def _transform(self, data: Iterable[list[str]]) -> Iterable[dict[str, Any]]:
51
+ """Transform each coverage file row into a JSON document."""
52
+ for row in data:
53
+ yield {
54
+ "_index": self.target_index,
55
+ "_source": {
56
+ "analysis_id": self._data_file.analysis_id,
57
+ "created_at": datetime.now(UTC).isoformat(),
58
+ "row": {
59
+ "chr": row[0],
60
+ "pos": int(row[1]) + 1,
61
+ "depth": int(row[2]),
62
+ },
63
+ },
64
+ }
65
+
66
+
67
+ class VCFImporter(BaseImporter[Iterable[_Record]]):
68
+ """Importer for VCF files."""
69
+
70
+ @property
71
+ def target_index(self) -> str:
72
+ """Returns the import target index name."""
73
+ return self._es_import_conn.vcf_variants_index
74
+
75
+ def _load(self) -> Iterable[_Record]:
76
+ """Load a VCF file. GZ compressed VCF files are supported.
77
+ :raises ImporterError: If the file could not be opened, decoded or is empty.
78
+ """
79
+ try:
80
+ yield from vcf.Reader(
81
+ filename=str(self._data_file.path), encoding="utf-8"
82
+ )
83
+ except StopIteration:
84
+ msg = f"VCF file '{self._data_file.path}' is empty."
85
+ raise ImporterError(msg) from None
86
+ except (OSError, UnicodeDecodeError) as e:
87
+ raise ImporterError(e) from None
88
+
89
+ def _transform(self, data: Iterable[_Record]) -> Iterable[dict[str, Any]]:
90
+ """Transform each VCF file record into a JSON document."""
91
+ for record in data:
92
+ # Fix values
93
+ if not record.CHROM.startswith("chr"):
94
+ if record.CHROM.lower().startswith("chr"):
95
+ record.CHROM = "chr" + record.CHROM[3:]
96
+ else:
97
+ record.CHROM = "chr" + record.CHROM
98
+
99
+ # Build document
100
+ alt = [x if x is None else x.type for x in record.ALT]
101
+
102
+ yield {
103
+ "_index": self.target_index,
104
+ "_source": {
105
+ "created_at": datetime.now(UTC).isoformat(),
106
+ "analysis_id": self._data_file.analysis_id,
107
+ "record": {
108
+ "type": "vcf",
109
+ "chr": record.CHROM,
110
+ "pos": record.POS,
111
+ "alt": alt,
112
+ "info": record.INFO,
113
+ },
114
+ },
115
+ }
116
+
117
+
118
+ class QCImporter(YAMLBaseImporter):
119
+ """Importer for QC YAML metrics files."""
120
+
121
+ @property
122
+ def target_index(self) -> str:
123
+ """Returns the import target index name."""
124
+ return self._es_import_conn.qc_metrics_index
125
+
126
+ def _validate(self, data: dict[str, Any]) -> None:
127
+ """Validate the YAML document against the expected schema.
128
+
129
+ :raises ImporterError: If the file format is invalid.
130
+ """
131
+ try:
132
+ QC_METRICS_SCHEMA.validate(data)
133
+ except schema.SchemaError as e:
134
+ raise ImporterError(e) from None
135
+
136
+ def _transform(self, data: dict[str, Any]) -> Iterable[dict[str, Any]]:
137
+ """Transform a QC YAML metrics file into a JSON document."""
138
+ yield {
139
+ "_index": self.target_index,
140
+ "_source": {
141
+ "created_at": datetime.now(UTC).isoformat(),
142
+ "analysis_id": self._data_file.analysis_id,
143
+ "metrics": data,
144
+ },
145
+ }
146
+
147
+
148
+ class SmallvarImporter(JSONBaseImporter):
149
+ """Importer for SmallVar JSON metrics files."""
150
+
151
+ @property
152
+ def target_index(self) -> str:
153
+ """Returns the import target index name."""
154
+ return self._es_import_conn.smallvar_metrics_index
155
+
156
+ def _transform(self, data: dict[str, Any]) -> Iterable[dict[str, Any]]:
157
+ """Transform a SmallVar metrics file into JSON documents."""
158
+ try:
159
+ for metric in data["metrics"]:
160
+ values_count = len(metric["data"][0]["values"])
161
+
162
+ metric_id = metric["id"].replace(".", "_").lower()
163
+
164
+ for i in range(values_count):
165
+ doc = {}
166
+ for item in metric["data"]:
167
+ # Attribute name should not use '.' as it refers
168
+ # to nested objects.
169
+ label = item["label"].replace(".", "_")
170
+ doc[label] = item["values"][i]
171
+
172
+ yield {
173
+ "_index": self.target_index,
174
+ "_source": {
175
+ "created_at": datetime.now(UTC).isoformat(),
176
+ "analysis_id": self._data_file.analysis_id,
177
+ "metric_id": metric_id,
178
+ "metrics": doc,
179
+ },
180
+ }
181
+ except KeyError as e:
182
+ msg = (
183
+ f"Smallvar metrics file '{self._data_file.path}' "
184
+ f"is invalid: missing key {e}."
185
+ )
186
+ raise ImporterError(msg) from None
187
+
188
+
189
+ class SVImporter(JSONBaseImporter):
190
+ """Importer for SV JSON metrics files."""
191
+
192
+ @property
193
+ def target_index(self) -> str:
194
+ """Returns the import target index name."""
195
+ return self._es_import_conn.sv_metrics_index
196
+
197
+ def _validate(self, data: dict[str, Any]) -> None:
198
+ """Validate the YAML document against the expected schema.
199
+
200
+ :raises ImporterError: If the file format is invalid.
201
+ """
202
+ try:
203
+ SV_METRICS_SCHEMA.validate(data)
204
+ except schema.SchemaError as e:
205
+ raise ImporterError(e) from None
206
+
207
+ def _transform(self, data: dict[str, Any]) -> Iterable[dict[str, Any]]:
208
+ """Transform a SV metrics file into a JSON document."""
209
+ for region in data["regions"]:
210
+ for result in region["results"]:
211
+ # Convert all values to float to avoid mapping issues.
212
+ result["precision"] = float(result["precision"])
213
+ result["recall"] = float(result["recall"])
214
+ result["f1"] = float(result["f1"])
215
+
216
+ yield {
217
+ "_index": self.target_index,
218
+ "_source": {
219
+ "created_at": datetime.now(UTC).isoformat(),
220
+ "analysis_id": self._data_file.analysis_id,
221
+ "metrics": data,
222
+ },
223
+ }
@@ -56,5 +56,6 @@ def configure_logging(verbose: int, log_file: str | None = None) -> None:
56
56
  1: logging.INFO, # default
57
57
  2: logging.DEBUG, # verbose mode
58
58
  }
59
+ level = level_map.get(verbose)
59
60
  # If verbose is greater than 2, set level to TRACE.
60
- root.setLevel(level_map.get(verbose, logging.TRACE)) # type: ignore[attr-defined]
61
+ root.setLevel(level if level else logging.TRACE) # type: ignore[attr-defined]
File without changes
@@ -0,0 +1,178 @@
1
+ import logging
2
+ import typing
3
+ from pathlib import Path
4
+
5
+ from genelastic.common.types import BundleDict
6
+ from genelastic.import_data.collect import (
7
+ extract_analysis_metadata,
8
+ )
9
+ from genelastic.import_data.models.analysis import Analysis
10
+ from genelastic.import_data.models.data_file import DataFile
11
+ from genelastic.import_data.models.unique_list import UniqueList
12
+ from genelastic.import_data.resolve import (
13
+ resolve_filename_pattern,
14
+ validate_file_prefix,
15
+ )
16
+
17
+ logger = logging.getLogger("genelastic")
18
+
19
+
20
+ class Analyses(UniqueList[Analysis]):
21
+ """Container of Analysis objects."""
22
+
23
+ def get_data_files(self, ext: str | None = None) -> list[DataFile]:
24
+ """Returns matched files as DataFile objects across all analyses.
25
+
26
+ :param ext: Filter the list of matched files by their extension
27
+ (case-sensitive).
28
+ """
29
+ return [df for a in self for df in a.get_data_files(ext=ext)]
30
+
31
+ @property
32
+ def extensions(self) -> set[str]:
33
+ """Returns all matched files extensions across all analyses."""
34
+ return {ext for a in self for ext in a.extensions}
35
+
36
+ @property
37
+ def matched_files(self) -> set[Path]:
38
+ """Returns the number of files that matched the pattern across all
39
+ analyses.
40
+ """
41
+ return {f for a in self for f in a.matched_files}
42
+
43
+ @property
44
+ def unmatched_files(self) -> set[Path]:
45
+ """Return the set of files that were not matched by any analysis.
46
+
47
+ The behavior differs depending on whether analyses share the same
48
+ ``data_path``:
49
+
50
+ - Within the same directory: a file is considered unmatched only if
51
+ **all** analyses in that directory failed to match it. This is
52
+ computed as the intersection of their respective ``unmatched_files``
53
+ sets.
54
+
55
+ - Across different directories: unmatched files are simply aggregated
56
+ (union of sets), since each directory is independent.
57
+
58
+ :return: A set of paths corresponding to unmatched files across all
59
+ analyses.
60
+ """
61
+ unmatched_per_dir: dict[Path, set[Path]] = {}
62
+
63
+ for a in self:
64
+ try:
65
+ unmatched_per_dir[a.data_path] = set.intersection(
66
+ unmatched_per_dir[a.data_path], a.unmatched_files
67
+ )
68
+ except KeyError:
69
+ unmatched_per_dir[a.data_path] = a.unmatched_files
70
+
71
+ if not unmatched_per_dir.values():
72
+ return set()
73
+ return set.union(*unmatched_per_dir.values())
74
+
75
+ @classmethod
76
+ def from_dict(cls, bundle: BundleDict) -> typing.Self:
77
+ """Initialize an ``Analyses`` container from a single bundle dictionary.
78
+
79
+ Expected bundle keys:
80
+
81
+ - Mandatory: ``file_prefix``, ``tags``, ``bundle_file``, ``data_path``.
82
+ - Optional: ``multi_match`` (default: ``False``), ``suffix`` (default: ``None``).
83
+
84
+ :param bundle: A dictionary describing one analysis configuration.
85
+ :raises InvalidFilePrefixError: If the ``file_prefix`` is invalid.
86
+ :raises FilenamePatternResolveError: If ``multi_match=False`` and some
87
+ tag fields are missing from the bundle metadata.
88
+ :raises UniqueListDuplicateError: If two ``Analysis`` objects happens
89
+ to share the same ID inside the ``Analyses`` instance.
90
+ :raises DataFileCollectorError: If the ``data_path`` is not an existing
91
+ directory or if metadata extraction or instantiation of a data file
92
+ objet fails for a given file.
93
+ :return: An ``Analyses`` instance containing one or several
94
+ ``Analysis`` objects.
95
+ """
96
+ analyses = cls()
97
+
98
+ # Validate file prefix structure.
99
+ logger.info("- Validating file prefix '%s'...", bundle["file_prefix"])
100
+ validate_file_prefix(
101
+ file_prefix=bundle["file_prefix"], tags=bundle["tags"]
102
+ )
103
+
104
+ # Resolve the filename pattern. In multi-match mode, tags without
105
+ # metadata values are accepted. They will be resolved later from
106
+ # filename-extracted metadata. In single-match mode, a
107
+ # FilenamePatternResolveError exception will be raised.
108
+ strict_mode = not bool(bundle.get("multi_match"))
109
+ logger.info(
110
+ "- Resolving filename pattern in %s mode...",
111
+ "strict" if strict_mode else "non-strict",
112
+ )
113
+ filename_pattern = resolve_filename_pattern(
114
+ file_prefix=bundle["file_prefix"],
115
+ tags=bundle["tags"],
116
+ metadata=bundle,
117
+ suffix=bundle.get("suffix"),
118
+ strict=strict_mode,
119
+ )
120
+
121
+ # Scan the data path to extract metadata from filenames.
122
+ logger.info(
123
+ "- Collecting files to extract metadata from using the resolved "
124
+ "filename pattern."
125
+ )
126
+ extracted_metadata = extract_analysis_metadata(
127
+ data_path=bundle["data_path"],
128
+ file_prefix=bundle["file_prefix"],
129
+ tags=bundle["tags"],
130
+ filename_pattern=filename_pattern,
131
+ )
132
+
133
+ logger.info(
134
+ "- Extracted metadata from %d analysis(es): %s",
135
+ len(extracted_metadata.keys()),
136
+ ", ".join(extracted_metadata.keys()),
137
+ )
138
+
139
+ for analysis_id, metadata in extracted_metadata.items():
140
+ # For each file match, merge filename-extracted metadata with the
141
+ # original bundle to describe one analysis.
142
+ full_metadata = {**bundle, **metadata}
143
+ full_metadata["analysis_id"] = analysis_id
144
+
145
+ # Re-resolve filename pattern in strict mode to let the analysis
146
+ # collect its own files (all tags should now be defined).
147
+ full_metadata["filename_pattern"] = resolve_filename_pattern(
148
+ file_prefix=full_metadata["file_prefix"],
149
+ tags=full_metadata["tags"],
150
+ metadata=full_metadata,
151
+ suffix=full_metadata.get("suffix"),
152
+ strict=True,
153
+ )
154
+
155
+ # Instantiate the Analysis and add it to the container.
156
+ analyses.append(Analysis(**full_metadata))
157
+ logger.info("")
158
+
159
+ return analyses
160
+
161
+ @classmethod
162
+ def from_dicts(cls, arr: typing.Sequence[BundleDict]) -> typing.Self:
163
+ """Initialize an ``Analyses`` container from multiple bundle
164
+ dictionaries.
165
+
166
+ This is a convenience wrapper that calls ``from_dict`` for each
167
+ bundle in the sequence and concatenates the results.
168
+
169
+ :param arr: A sequence of bundle dictionaries.
170
+ :return: An ``Analyses`` instance containing all analyses from the
171
+ input bundles.
172
+ """
173
+ analyses = cls()
174
+
175
+ for bundle in arr:
176
+ analyses.extend(analyses.from_dict(bundle))
177
+
178
+ return analyses