genelastic 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. genelastic/__init__.py +0 -13
  2. genelastic/api/__init__.py +0 -0
  3. genelastic/api/extends/__init__.py +0 -0
  4. genelastic/api/extends/example.py +6 -0
  5. genelastic/api/routes.py +221 -0
  6. genelastic/api/server.py +80 -0
  7. genelastic/api/settings.py +14 -0
  8. genelastic/common/__init__.py +39 -0
  9. genelastic/common/cli.py +63 -0
  10. genelastic/common/elastic.py +214 -0
  11. genelastic/common/exceptions.py +4 -0
  12. genelastic/common/types.py +25 -0
  13. genelastic/import_data/__init__.py +27 -0
  14. genelastic/{analyses.py → import_data/analyses.py} +19 -20
  15. genelastic/{analysis.py → import_data/analysis.py} +71 -66
  16. genelastic/{bi_process.py → import_data/bi_process.py} +8 -6
  17. genelastic/{bi_processes.py → import_data/bi_processes.py} +10 -9
  18. genelastic/import_data/cli_gen_data.py +116 -0
  19. genelastic/import_data/cli_import.py +379 -0
  20. genelastic/import_data/cli_info.py +256 -0
  21. genelastic/import_data/cli_integrity.py +384 -0
  22. genelastic/import_data/cli_validate.py +54 -0
  23. genelastic/import_data/constants.py +24 -0
  24. genelastic/{data_file.py → import_data/data_file.py} +26 -21
  25. genelastic/import_data/filename_pattern.py +57 -0
  26. genelastic/{import_bundle.py → import_data/import_bundle.py} +58 -48
  27. genelastic/import_data/import_bundle_factory.py +298 -0
  28. genelastic/{logger.py → import_data/logger.py} +22 -18
  29. genelastic/import_data/random_bundle.py +402 -0
  30. genelastic/{tags.py → import_data/tags.py} +48 -27
  31. genelastic/{wet_process.py → import_data/wet_process.py} +8 -4
  32. genelastic/{wet_processes.py → import_data/wet_processes.py} +15 -9
  33. genelastic/ui/__init__.py +0 -0
  34. genelastic/ui/server.py +87 -0
  35. genelastic/ui/settings.py +11 -0
  36. genelastic-0.7.0.dist-info/METADATA +105 -0
  37. genelastic-0.7.0.dist-info/RECORD +40 -0
  38. {genelastic-0.6.0.dist-info → genelastic-0.7.0.dist-info}/WHEEL +1 -1
  39. genelastic-0.7.0.dist-info/entry_points.txt +6 -0
  40. genelastic/common.py +0 -151
  41. genelastic/constants.py +0 -45
  42. genelastic/filename_pattern.py +0 -62
  43. genelastic/gen_data.py +0 -193
  44. genelastic/import_bundle_factory.py +0 -288
  45. genelastic/import_data.py +0 -294
  46. genelastic/info.py +0 -248
  47. genelastic/integrity.py +0 -324
  48. genelastic/validate_data.py +0 -41
  49. genelastic-0.6.0.dist-info/METADATA +0 -36
  50. genelastic-0.6.0.dist-info/RECORD +0 -25
  51. genelastic-0.6.0.dist-info/entry_points.txt +0 -6
  52. {genelastic-0.6.0.dist-info → genelastic-0.7.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,384 @@
1
+ import argparse
2
+ import logging
3
+
4
+ from elasticsearch import NotFoundError
5
+
6
+ from genelastic.common import (
7
+ Bucket,
8
+ DBIntegrityError,
9
+ ElasticQueryConn,
10
+ add_es_connection_args,
11
+ add_verbose_control_args,
12
+ )
13
+
14
+ from .logger import configure_logging
15
+
16
+ logger = logging.getLogger("genelastic")
17
+ logging.getLogger("elastic_transport").setLevel(
18
+ logging.WARNING
19
+ ) # Disable excessive logging
20
+
21
+
22
+ def read_args() -> argparse.Namespace:
23
+ """Read arguments from command line."""
24
+ parser = argparse.ArgumentParser(
25
+ description="Utility to check the integrity "
26
+ "of the genelastic ElasticSearch database.",
27
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
28
+ allow_abbrev=False,
29
+ )
30
+ add_verbose_control_args(parser)
31
+ add_es_connection_args(parser)
32
+ return parser.parse_args()
33
+
34
+
35
+ def check_for_undefined_file_indices(
36
+ es_query_conn: ElasticQueryConn, analyses_index: str
37
+ ) -> None:
38
+ """Check for potentially undefined files indices in the analyses index.
39
+
40
+ :param es_query_conn: Elasticsearch database instance.
41
+ :param analyses_index: Name of the index where analyses are stored.
42
+ :raises genelastic.common.DBIntegrityError:
43
+ Some files indices are used in the analyses index but are undefined.
44
+ """
45
+ logger.info(
46
+ "Checking for references to undefined file indices in the index '%s'...",
47
+ analyses_index,
48
+ )
49
+
50
+ undefined_indices = set()
51
+
52
+ query = {
53
+ "size": 0,
54
+ "aggs": {
55
+ "get_file_indices": {
56
+ "composite": {
57
+ "sources": {
58
+ "file_index": {"terms": {"field": "file_index.keyword"}}
59
+ },
60
+ "size": 1000,
61
+ }
62
+ }
63
+ },
64
+ }
65
+
66
+ buckets: list[Bucket] = es_query_conn.run_composite_aggregation(
67
+ analyses_index, query
68
+ )
69
+
70
+ for bucket in buckets:
71
+ file_index = bucket["key"]["file_index"]
72
+
73
+ try:
74
+ es_query_conn.client.indices.get(index=file_index)
75
+ logger.debug(
76
+ "File index %s used in index '%s' is defined.",
77
+ file_index,
78
+ analyses_index,
79
+ )
80
+ except NotFoundError:
81
+ logger.debug(
82
+ "File index %s used in '%s' is undefined.",
83
+ file_index,
84
+ analyses_index,
85
+ )
86
+ undefined_indices.add(file_index)
87
+
88
+ if len(undefined_indices) > 0:
89
+ msg = (
90
+ f"Found the following undefined file indices defined in the index '{analyses_index}': "
91
+ f"{', '.join(undefined_indices)}"
92
+ )
93
+ raise DBIntegrityError(msg)
94
+
95
+ logger.info("All defined file indices are referenced.")
96
+
97
+
98
+ def get_undefined_processes(
99
+ es_query_conn: ElasticQueryConn,
100
+ analyses_index: str,
101
+ process_index: str,
102
+ field: str,
103
+ ) -> set[str]:
104
+ """Return a set of undefined processes IDs in an index.
105
+
106
+ :param es_query_conn: Elasticsearch database instance.
107
+ :param analyses_index: Name of the index where analyses are stored.
108
+ :param process_index: Name of the index to check for undefined processes.
109
+ :param field: Field name used to retrieve the process ID.
110
+ :returns: A set of undefined processes IDs.
111
+ """
112
+ query = {
113
+ "size": 0,
114
+ "aggs": {
115
+ "get_analyses_processes": {
116
+ "composite": {
117
+ "sources": {
118
+ "process": {"terms": {"field": f"{field}.keyword"}}
119
+ },
120
+ "size": 1000,
121
+ }
122
+ }
123
+ },
124
+ }
125
+
126
+ buckets: list[Bucket] = es_query_conn.run_composite_aggregation(
127
+ analyses_index, query
128
+ )
129
+
130
+ used_processes = {bucket["key"]["process"] for bucket in buckets}
131
+ logger.debug(
132
+ "Used values for field '%s' in index '%s': %s",
133
+ field,
134
+ analyses_index,
135
+ used_processes,
136
+ )
137
+
138
+ defined_processes = es_query_conn.get_field_values(process_index, "proc_id")
139
+ logger.debug(
140
+ "Defined values in index '%s': %s", process_index, defined_processes
141
+ )
142
+
143
+ return used_processes.difference(defined_processes)
144
+
145
+
146
+ def check_for_undefined_wet_processes(
147
+ es_query_conn: ElasticQueryConn, analyses_index: str, wet_process_index: str
148
+ ) -> None:
149
+ """Check that each wet process used in the analyses index is defined.
150
+
151
+ :param es_query_conn: Elasticsearch database instance.
152
+ :param analyses_index: Name of the index where analyses are stored.
153
+ :param wet_process_index: Name of the index where wet processes are stored.
154
+ :raises genelastic.common.DBIntegrityError:
155
+ Some wet processes used in the analyses index are undefined.
156
+ """
157
+ logger.info(
158
+ "Checking for undefined wet processes used in index '%s'...",
159
+ analyses_index,
160
+ )
161
+ undefined_wet_processes = get_undefined_processes(
162
+ es_query_conn, analyses_index, wet_process_index, "metadata.wet_process"
163
+ )
164
+
165
+ if len(undefined_wet_processes) > 0:
166
+ msg = (
167
+ f"Index '{analyses_index}' uses the following undefined wet processes: "
168
+ f"{', '.join(undefined_wet_processes)}."
169
+ )
170
+ raise DBIntegrityError(msg)
171
+
172
+ logger.info(
173
+ "All wet processes used in index '%s' are defined.", wet_process_index
174
+ )
175
+
176
+
177
+ def check_for_undefined_bi_processes(
178
+ es_query_conn: ElasticQueryConn, analyses_index: str, bi_process_index: str
179
+ ) -> None:
180
+ """Check that each bio info process used in the analyses index is defined.
181
+
182
+ :param es_query_conn: Elasticsearch database instance.
183
+ :param analyses_index: Name of the index where analyses are stored.
184
+ :param bi_process_index: Name of the index where bio info processes are stored.
185
+ :raises genelastic.common.DBIntegrityError:
186
+ Some bio info processes used in the analyses index are undefined.
187
+ """
188
+ logger.info(
189
+ "Checking for undefined bio info processes used in index '%s'...",
190
+ analyses_index,
191
+ )
192
+ undefined_bi_processes = get_undefined_processes(
193
+ es_query_conn, analyses_index, bi_process_index, "metadata.bi_process"
194
+ )
195
+
196
+ if len(undefined_bi_processes) > 0:
197
+ msg = (
198
+ f"Index '{analyses_index}' uses the following undefined bio info processes: "
199
+ f"{', '.join(undefined_bi_processes)}."
200
+ )
201
+ raise DBIntegrityError(msg)
202
+
203
+ logger.info(
204
+ "All bio info processes used in index '%s' are defined.",
205
+ bi_process_index,
206
+ )
207
+
208
+
209
+ def check_for_unused_file_indices(
210
+ es_query_conn: ElasticQueryConn, analyses_index: str, index_prefix: str
211
+ ) -> int:
212
+ """Check that each of the file indices are used in at least one analysis.
213
+
214
+ :param es_query_conn: Elasticsearch database instance.
215
+ :param analyses_index: Name of the index where analyses are stored.
216
+ :param index_prefix: Prefix given to all the indices of the ElasticSearch database.
217
+ :returns: 1 if some file indices exists but are unused in the analyses index,
218
+ and 0 otherwise.
219
+ """
220
+ json_indices = es_query_conn.client.cat.indices(
221
+ index=f"{index_prefix}-file-*", format="json"
222
+ ).body
223
+
224
+ found_file_indices = set()
225
+ for x in json_indices:
226
+ if isinstance(x, dict):
227
+ found_file_indices.add(x["index"])
228
+
229
+ query = {
230
+ "size": 0,
231
+ "aggs": {
232
+ "get_file_indices": {
233
+ "composite": {
234
+ "sources": {
235
+ "file_index": {"terms": {"field": "file_index.keyword"}}
236
+ },
237
+ "size": 1000,
238
+ }
239
+ }
240
+ },
241
+ }
242
+
243
+ buckets: list[Bucket] = es_query_conn.run_composite_aggregation(
244
+ analyses_index, query
245
+ )
246
+
247
+ used_files_indices = {bucket["key"]["file_index"] for bucket in buckets}
248
+ unused_files_indices = found_file_indices.difference(used_files_indices)
249
+
250
+ if len(unused_files_indices) > 0:
251
+ logger.warning(
252
+ "Found the following unused files indices: %s",
253
+ ", ".join(unused_files_indices),
254
+ )
255
+ return 1
256
+
257
+ logger.info("All files indices are used.")
258
+ return 0
259
+
260
+
261
+ def check_for_unused_wet_processes(
262
+ es_query_conn: ElasticQueryConn, analyses_index: str, wet_proc_index: str
263
+ ) -> int:
264
+ """Check for defined wet processes that are not used in the analyses index.
265
+
266
+ :param es_query_conn: Elasticsearch database instance.
267
+ :param analyses_index: Name of the index where analyses are stored.
268
+ :param wet_proc_index: Name of the index where wet processes are stored.
269
+ :returns: 1 if some wet process are defined but unused in the analyses index,
270
+ and 0 otherwise.
271
+ """
272
+ logger.info(
273
+ "Checking for unused wet processes in the index '%s'...", wet_proc_index
274
+ )
275
+
276
+ defined_wet_procs = es_query_conn.get_field_values(
277
+ wet_proc_index, "proc_id"
278
+ )
279
+ logger.debug(
280
+ "Found the following defined wet processes: %s", defined_wet_procs
281
+ )
282
+
283
+ used_wet_procs = es_query_conn.get_field_values(
284
+ analyses_index, "metadata.wet_process"
285
+ )
286
+ logger.debug(
287
+ "Following processes are used in the index '%s': %s",
288
+ analyses_index,
289
+ used_wet_procs,
290
+ )
291
+
292
+ unused_wet_procs = defined_wet_procs - used_wet_procs
293
+ if len(unused_wet_procs) > 0:
294
+ logger.warning("Found unused wet processes: %s", unused_wet_procs)
295
+ return 1
296
+
297
+ logger.info("No unused wet processes found.")
298
+ return 0
299
+
300
+
301
+ def check_for_unused_bi_processes(
302
+ es_query_conn: ElasticQueryConn, analyses_index: str, bi_proc_index: str
303
+ ) -> int:
304
+ """Check for defined bio info processes that are not used in the analyses index.
305
+
306
+ :param es_query_conn: Elasticsearch database instance.
307
+ :param analyses_index: Name of the index where analyses are stored.
308
+ :param bi_proc_index: Name of the index where bio info processes are stored.
309
+ :returns: 1 if some wet process are defined but unused in the analyses index,
310
+ and 0 otherwise.
311
+ """
312
+ logger.info(
313
+ "Checking for unused bio info processes in the index '%s'...",
314
+ bi_proc_index,
315
+ )
316
+
317
+ defined_bi_procs = es_query_conn.get_field_values(bi_proc_index, "proc_id")
318
+ logger.debug(
319
+ "Found the following defined bio info processes: %s", defined_bi_procs
320
+ )
321
+
322
+ used_bi_procs = es_query_conn.get_field_values(
323
+ analyses_index, "metadata.bi_process"
324
+ )
325
+ logger.debug(
326
+ "Following processes are used in the index '%s': %s",
327
+ analyses_index,
328
+ used_bi_procs,
329
+ )
330
+
331
+ unused_bi_procs = defined_bi_procs - used_bi_procs
332
+ if len(unused_bi_procs) > 0:
333
+ logger.warning("Found unused bio info processes: %s", unused_bi_procs)
334
+ return 1
335
+
336
+ logger.info("No unused bio info processes found.")
337
+ return 0
338
+
339
+
340
+ def main() -> None:
341
+ """Entry point of the integrity script."""
342
+ args = read_args()
343
+
344
+ configure_logging(args.verbose)
345
+ logger.debug("Arguments: %s", args)
346
+
347
+ analyses_index = f"{args.es_index_prefix}-analyses"
348
+ wet_processes_index = f"{args.es_index_prefix}-wet_processes"
349
+ bi_processes_index = f"{args.es_index_prefix}-bi_processes"
350
+
351
+ addr = f"https://{args.es_host}:{args.es_port}"
352
+ logger.info("Trying to connect to Elasticsearch at %s...", addr)
353
+ es_query_conn = ElasticQueryConn(
354
+ addr, args.es_cert_fp, basic_auth=(args.es_usr, args.es_pwd)
355
+ )
356
+
357
+ # Fatal errors
358
+ try:
359
+ es_query_conn.ensure_unique(wet_processes_index, "proc_id")
360
+ es_query_conn.ensure_unique(bi_processes_index, "proc_id")
361
+ check_for_undefined_file_indices(es_query_conn, analyses_index)
362
+ check_for_undefined_wet_processes(
363
+ es_query_conn, analyses_index, wet_processes_index
364
+ )
365
+ check_for_undefined_bi_processes(
366
+ es_query_conn, analyses_index, bi_processes_index
367
+ )
368
+ except DBIntegrityError as e:
369
+ raise SystemExit(e) from e
370
+
371
+ # Warnings
372
+ check_for_unused_wet_processes(
373
+ es_query_conn, analyses_index, wet_processes_index
374
+ )
375
+ check_for_unused_bi_processes(
376
+ es_query_conn, analyses_index, bi_processes_index
377
+ )
378
+ check_for_unused_file_indices(
379
+ es_query_conn, analyses_index, args.es_index_prefix
380
+ )
381
+
382
+
383
+ if __name__ == "__main__":
384
+ main()
@@ -0,0 +1,54 @@
1
+ import argparse
2
+ import logging
3
+ from pathlib import Path
4
+
5
+ from schema import SchemaError
6
+
7
+ from genelastic.common import add_verbose_control_args
8
+
9
+ from .import_bundle_factory import make_import_bundle_from_files
10
+ from .logger import configure_logging
11
+
12
+ logger = logging.getLogger("genelastic")
13
+
14
+
15
+ def read_args() -> argparse.Namespace:
16
+ """Read arguments from command line."""
17
+ parser = argparse.ArgumentParser(
18
+ description="Ensure that YAML files "
19
+ "follow the genelastic YAML bundle schema.",
20
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
21
+ allow_abbrev=False,
22
+ )
23
+ add_verbose_control_args(parser)
24
+ parser.add_argument(
25
+ "files",
26
+ type=Path,
27
+ nargs="+",
28
+ default=None,
29
+ help="YAML files to validate.",
30
+ )
31
+ parser.add_argument(
32
+ "-c",
33
+ "--check",
34
+ action="store_true",
35
+ help="In addition to validating the schema, "
36
+ "check for undefined referenced processes.",
37
+ )
38
+ return parser.parse_args()
39
+
40
+
41
+ def main() -> int:
42
+ """Entry point of the validate script."""
43
+ args = read_args()
44
+ configure_logging(args.verbose)
45
+
46
+ try:
47
+ make_import_bundle_from_files(args.files, check=args.check)
48
+ except (ValueError, RuntimeError, TypeError, SchemaError) as e:
49
+ # Catch any exception that can be raised by 'make_import_bundle_from_files'.
50
+ logger.error(e)
51
+ return 1
52
+
53
+ logger.info("All YAML files respect the genelastic YAML bundle format.")
54
+ return 0
@@ -0,0 +1,24 @@
1
+ """Module: constants
2
+
3
+ This module contains genelastic constants.
4
+ """
5
+
6
+ import typing
7
+
8
+ ALLOWED_CATEGORIES: typing.Final[list[str]] = ["vcf", "cov"]
9
+
10
+ BUNDLE_CURRENT_VERSION = 3
11
+
12
+ DEFAULT_TAG_REGEX = "[^_-]+"
13
+ DEFAULT_TAG_PREFIX = "%"
14
+ DEFAULT_TAG_SUFFIX = ""
15
+
16
+ DEFAULT_TAG2FIELD: typing.Final[dict[str, dict[str, str]]] = {
17
+ "%S": {"field": "sample_name", "regex": DEFAULT_TAG_REGEX},
18
+ "%F": {"field": "source", "regex": DEFAULT_TAG_REGEX},
19
+ "%W": {"field": "wet_process", "regex": DEFAULT_TAG_REGEX},
20
+ "%B": {"field": "bi_process", "regex": DEFAULT_TAG_REGEX},
21
+ "%D": {"field": "cov_depth", "regex": DEFAULT_TAG_REGEX},
22
+ "%A": {"field": "barcode", "regex": DEFAULT_TAG_REGEX},
23
+ "%R": {"field": "reference_genome", "regex": DEFAULT_TAG_REGEX},
24
+ }
@@ -1,5 +1,4 @@
1
- """
2
- This module defines the DataFile class, which handles the representation,
1
+ """This module defines the DataFile class, which handles the representation,
3
2
  management, and extraction of metadata for a data file within a data bundle.
4
3
 
5
4
  It includes functionality to construct DataFile instances from paths and
@@ -8,43 +7,47 @@ for extracting metadata from filenames using specified patterns.
8
7
  """
9
8
 
10
9
  import logging
11
- import os
12
10
  import pathlib
13
- import typing
11
+ from pathlib import Path
12
+
13
+ from genelastic.common import AnalysisMetaData
14
+
14
15
  from .filename_pattern import FilenamePattern
15
- from .common import AnalysisMetaData
16
16
 
17
- logger = logging.getLogger('genelastic')
17
+ logger = logging.getLogger("genelastic")
18
18
 
19
19
 
20
20
  class DataFile:
21
21
  """Class for handling a data file and its metadata."""
22
22
 
23
23
  # Initializer
24
- def __init__(self, path: str, bundle_path: str | None = None,
25
- metadata: typing.Optional[AnalysisMetaData] = None) -> None:
24
+ def __init__(
25
+ self,
26
+ path: Path,
27
+ bundle_path: Path | None = None,
28
+ metadata: AnalysisMetaData | None = None,
29
+ ) -> None:
26
30
  self._path = path
27
31
  self._bundle_path = bundle_path # The bundle YAML file in which this
28
32
  # file was listed.
29
33
  self._metadata = {} if metadata is None else metadata
30
34
 
31
35
  def __repr__(self) -> str:
32
- return (f"File {self._path}, from bundle {self._bundle_path}"
33
- + f", with metadata {self._metadata}")
36
+ return f"File {self._path}, from bundle {self._bundle_path}, with metadata {self._metadata}"
34
37
 
35
38
  # Get path
36
39
  @property
37
- def path(self) -> str:
40
+ def path(self) -> Path:
38
41
  """Retrieve the data file path."""
39
42
  return self._path
40
43
 
41
44
  def exists(self) -> bool:
42
45
  """Tests if the associated file exists on disk."""
43
- return os.path.isfile(self._path)
46
+ return self._path.is_file()
44
47
 
45
48
  # Get bundle path
46
49
  @property
47
- def bundle_path(self) -> str | None:
50
+ def bundle_path(self) -> Path | None:
48
51
  """Retrieve the path to the associated data bundle file."""
49
52
  return self._bundle_path
50
53
 
@@ -57,20 +60,22 @@ class DataFile:
57
60
  # Factory
58
61
  @classmethod
59
62
  def make_from_bundle(
60
- cls,
61
- path: str,
62
- bundle_path: str | None,
63
- pattern: typing.Optional[FilenamePattern] = None) -> 'DataFile':
63
+ cls,
64
+ path: Path,
65
+ bundle_path: Path | None,
66
+ pattern: FilenamePattern | None = None,
67
+ ) -> "DataFile":
64
68
  """Construct a DataFile instance from a bundle path, file path,
65
- and optional filename pattern."""
69
+ and optional filename pattern.
70
+ """
66
71
  # Make absolute path
67
- if not os.path.isabs(path) and not bundle_path is None:
68
- path = os.path.join(os.path.dirname(bundle_path), path)
72
+ if not path.is_absolute() and bundle_path is not None:
73
+ path = bundle_path.parent / path
69
74
 
70
75
  # Extract filename metadata
71
76
  metadata = None
72
77
  if pattern is not None:
73
- metadata = pattern.extract_metadata(os.path.basename(path))
78
+ metadata = pattern.extract_metadata(path.name)
74
79
 
75
80
  if metadata:
76
81
  if "ext" not in metadata:
@@ -0,0 +1,57 @@
1
+ """This module defines the FilenamePattern class, used to define a filename pattern
2
+ and extract metadata from file names using this pattern.
3
+ """
4
+
5
+ import re
6
+
7
+ from genelastic.common import AnalysisMetaData
8
+
9
+
10
+ class FilenamePattern:
11
+ """Class for defining a filename pattern.
12
+ The pattern is used to extract metadata from filenames
13
+ and verify filename conformity.
14
+ """
15
+
16
+ # Initializer
17
+ def __init__(self, pattern: str) -> None:
18
+ """Initializes a FilenamePattern instance.
19
+
20
+ Args:
21
+ pattern (str): The pattern string used for defining
22
+ the filename pattern.
23
+ """
24
+ self._re = re.compile(pattern)
25
+
26
+ def extract_metadata(self, filename: str) -> AnalysisMetaData:
27
+ """Extracts metadata from the given filename based
28
+ on the defined pattern.
29
+
30
+ Args:
31
+ filename (str): The filename from which metadata
32
+ needs to be extracted.
33
+
34
+ Returns:
35
+ dict: A dictionary containing the extracted metadata.
36
+
37
+ Raises:
38
+ RuntimeError: If parsing of filename fails
39
+ with the defined pattern.
40
+ """
41
+ m = self._re.search(filename)
42
+ if not m:
43
+ msg = f'Failed parsing filename "{filename}" with pattern "{self._re.pattern}".'
44
+ raise RuntimeError(msg)
45
+ return m.groupdict()
46
+
47
+ def matches_pattern(self, filename: str) -> bool:
48
+ """Checks if the given filename matches the defined pattern.
49
+
50
+ Args:
51
+ filename (str): The filename to be checked.
52
+
53
+ Returns:
54
+ bool: True if the filename matches the pattern,
55
+ False otherwise.
56
+ """
57
+ return bool(self._re.match(filename))