genelastic 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
genelastic/__init__.py ADDED
@@ -0,0 +1,13 @@
1
+ """Genelastic package for importing Genomic data into Elasticsearch.
2
+ """
3
+
4
+ from .import_bundle import ImportBundle
5
+ from .common import BundleDict
6
+ from .constants import BUNDLE_CURRENT_VERSION
7
+ from .import_bundle_factory import make_import_bundle_from_files, \
8
+ load_import_bundle_file
9
+ from .analysis import Analysis
10
+ from .analyses import Analyses
11
+
12
+ __all__ = ['make_import_bundle_from_files', 'BUNDLE_CURRENT_VERSION',
13
+ 'load_import_bundle_file', 'Analysis', 'ImportBundle']
genelastic/analyses.py ADDED
@@ -0,0 +1,70 @@
1
+ # pylint: disable=missing-module-docstring
2
+ import typing
3
+ from .analysis import Analysis
4
+ from .common import BundleDict
5
+ from .data_file import DataFile
6
+
7
+ class Analyses:
8
+ """Class Analyses is a container of Analysis objects.
9
+ """
10
+
11
+ def __init__(self) -> None:
12
+ self._arr: typing.List[Analysis] = []
13
+ self._iter_index: int = 0
14
+
15
+ def __len__(self) -> int:
16
+ return len(self._arr)
17
+
18
+ def __iter__(self) -> typing.Generator[Analysis, typing.Any, None]:
19
+ yield from self._arr
20
+
21
+ @typing.overload
22
+ def __getitem__(self, k: int) -> Analysis:
23
+ pass
24
+
25
+ @typing.overload
26
+ def __getitem__(self, k: slice) -> typing.List[Analysis]:
27
+ pass
28
+
29
+ def __getitem__(self, k): # type: ignore
30
+ if isinstance(k, int):
31
+ return self._arr[k]
32
+ return self._arr[k.start:k.stop]
33
+
34
+ def add(self, a: Analysis) -> None:
35
+ """Add one Analysis object."""
36
+ self._arr.append(a)
37
+
38
+ def get_nb_files(self, cat: str | None = None) -> int:
39
+ """Get the total number of files as paths."""
40
+ return len(self.get_data_files(cat = cat))
41
+
42
+ def get_data_files(self, cat: str | None = None) -> typing.List[DataFile]:
43
+ """Get the total number of files as DataFile objects.
44
+ """
45
+
46
+ data_files: typing.List[DataFile] = []
47
+
48
+ for a in self._arr:
49
+ data_files.extend(a.get_data_files(cat = cat))
50
+
51
+ return data_files
52
+
53
+ def get_all_categories(self) -> typing.Set[str]:
54
+ """Return all the categories of the analyses."""
55
+ categories = set()
56
+ for a in self._arr:
57
+ categories.update(a.get_all_categories())
58
+ return categories
59
+
60
+ @classmethod
61
+ def from_array_of_dicts(cls, arr: typing.Sequence[BundleDict]
62
+ ) -> typing.Self:
63
+ """Build an Analyses instance."""
64
+
65
+ analyses = cls()
66
+
67
+ for d in arr:
68
+ analyses.add(Analysis(**d))
69
+
70
+ return analyses
genelastic/analysis.py ADDED
@@ -0,0 +1,200 @@
1
+ # pylint: disable=missing-module-docstring
2
+ import copy
3
+ import glob
4
+ import logging
5
+ import os
6
+ import re
7
+ import typing
8
+ from pathlib import Path
9
+
10
+ from .common import AnalysisMetaData
11
+ from .constants import ALLOWED_CATEGORIES
12
+ from .data_file import DataFile
13
+ from .filename_pattern import FilenamePattern
14
+ from .tags import Tags
15
+
16
+ logger = logging.getLogger('genelastic')
17
+
18
+
19
+ class Analysis:
20
+ """Class Analysis that represents an analysis."""
21
+
22
+ # pylint: disable-next=too-many-arguments
23
+ def __init__(self,
24
+ tags: Tags,
25
+ root_dir: str = '.',
26
+ bundle_file: str | None = None,
27
+ file_prefix: str | None = None,
28
+ files: typing.Sequence[str] | None = None,
29
+ data_path: str | None = None,
30
+ **metadata: str | int) -> None:
31
+ self._bundle_file = bundle_file
32
+ self._file_prefix = file_prefix
33
+ self._files = files
34
+ self._data_path = Analysis._resolve_data_path(root_dir, data_path)
35
+ self._tags = tags
36
+ self._metadata: AnalysisMetaData = metadata
37
+ self._categories: typing.Set[str] = set()
38
+
39
+ @property
40
+ def metadata(self) -> AnalysisMetaData:
41
+ """Get metadata."""
42
+ return copy.deepcopy(self._metadata)
43
+
44
+ @property
45
+ def bundle_file(self) -> str | None:
46
+ """Get the bundle file."""
47
+ return self._bundle_file
48
+
49
+ @property
50
+ def filename_regex(self) -> str:
51
+ """
52
+ Resolve placeholders in a file prefix using metadata
53
+ and unresolved placeholders are converted to regex groups
54
+ """
55
+
56
+ x: str = r"^.+\.(?P<ext>vcf|cov)(\.gz)?$"
57
+
58
+ # Use existing generic prefix
59
+ if self._file_prefix:
60
+ x = self._file_prefix
61
+ # Replace %* tags
62
+ for tag_name, tag_attrs in self._tags.items:
63
+ field = tag_attrs["field"]
64
+ regex = tag_attrs["regex"]
65
+
66
+ # Build field regex
67
+ field_regex = (f"(?P<{field}>{self._metadata.get(field)})"
68
+ if field in self._metadata else
69
+ f"(?P<{field}>{regex})")
70
+ # Replace tag with field regex
71
+ x = x.replace(tag_name, field_regex)
72
+
73
+ # Check for tags that were not replaced.
74
+ groups = re.findall(self._tags.search_regex, x)
75
+ for match in groups:
76
+ logger.warning("String '%s' in key 'file_prefix' looks like an undefined tag. "
77
+ "If this string is not a tag, you can ignore this warning.",
78
+ match)
79
+
80
+ # Add missing start and end markers
81
+ if not x.startswith("^"):
82
+ x = "^" + x
83
+ if not x.endswith("$"):
84
+ x += (r"\.(?P<ext>" + '|'.join(ALLOWED_CATEGORIES)
85
+ + r")(\.gz)?$")
86
+ logger.debug("File regex for %s: %s", self._bundle_file, x)
87
+
88
+ return x
89
+
90
+ def get_nb_files(self, cat: str | None = None) -> int:
91
+ """Returns the total number of files.
92
+ """
93
+ return len(self.get_file_paths(cat=cat))
94
+
95
+ def get_data_files(self, cat: str | None = None) -> typing.List[DataFile]:
96
+ """Returns the list of matched files as DataFile objects.
97
+ """
98
+
99
+ files = self.get_file_paths(cat=cat)
100
+ filename_pattern = FilenamePattern(self.filename_regex)
101
+
102
+ data_files: typing.List[DataFile] = []
103
+
104
+ for f in files:
105
+ try:
106
+ data_files.append(DataFile.make_from_bundle(
107
+ path=f, bundle_path=self._bundle_file,
108
+ pattern=filename_pattern))
109
+ except (IOError, ValueError) as e:
110
+ logger.error("Error processing file %s: %s", f, str(e))
111
+
112
+ return data_files
113
+
114
+ def get_file_paths(self, cat: str | None = None) -> typing.Sequence[str]:
115
+ """Returns the list of matched files.
116
+ """
117
+ files, _, _ = self._do_get_file_paths(cat=cat)
118
+ return files
119
+
120
+ def get_unmatched_file_paths(self, cat: str | None = None
121
+ ) -> typing.Sequence[str]:
122
+ """Returns the list of unmatched files.
123
+ """
124
+ _, files, _ = self._do_get_file_paths(cat=cat)
125
+ return files
126
+
127
+ def get_all_categories(self) -> typing.Set[str]:
128
+ """Returns all categories of the analysis."""
129
+ _, _, categories = self._do_get_file_paths()
130
+ return categories
131
+
132
+ @staticmethod
133
+ def _resolve_data_path(root_dir: str, data_path: str | None) -> str:
134
+ resolved_data_path = '' if data_path is None else data_path
135
+
136
+ if not os.path.isabs(resolved_data_path):
137
+ resolved_data_path = os.path.abspath(os.path.join(root_dir, resolved_data_path))
138
+
139
+ return resolved_data_path
140
+
141
+ def _get_files_with_allowed_categories(self) -> typing.Dict[str, str]:
142
+ # Create a dict to store allowed files. Keys are the filepaths,
143
+ # and values are their corresponding category.
144
+ allowed_files: typing.Dict[str, str] = {}
145
+ # If files are listed explicitly in the YAML in the 'files' attribute, process them.
146
+ if self._files is not None:
147
+ abs_filepaths = [Path(self._data_path) / f for f in self._files]
148
+ # Try to retrieve files matching allowed categories by checking their first suffix.
149
+ for file in abs_filepaths:
150
+ cat = file.suffixes[0][1:]
151
+ # Add each matching file and its category to the dict.
152
+ if cat in ALLOWED_CATEGORIES:
153
+ allowed_files[str(file)] = cat
154
+ # Else, look for files on disk using the YAML 'data_path' attribute.
155
+ else:
156
+ # Try to retrieve files matching allowed categories using glob.
157
+ for cat in ALLOWED_CATEGORIES:
158
+ glob_res = []
159
+ glob_res.extend(glob.glob(os.path.join(self._data_path, f"*.{cat}")))
160
+ glob_res.extend(glob.glob(os.path.join(self._data_path, f"*.{cat}.gz")))
161
+
162
+ # Add each globed file and its category to the dict.
163
+ for g_file in glob_res:
164
+ allowed_files[g_file] = cat
165
+
166
+ return allowed_files
167
+
168
+ def _do_get_file_paths(self, cat: str | None = None) \
169
+ -> tuple[typing.Sequence[str], typing.Sequence[str], typing.Set[str]]:
170
+
171
+ # Raise an error if the category given as a parameter is not part of the allowed categories.
172
+ if cat is not None and cat not in ALLOWED_CATEGORIES:
173
+ raise ValueError(f"Unknown category {cat}.")
174
+
175
+ # Obtain a dict of all files matching the allowed categories.
176
+ allowed_files = self._get_files_with_allowed_categories()
177
+
178
+ if cat is None:
179
+ # No category was given as a parameter, so we match all categories.
180
+ files_to_match = allowed_files
181
+ else:
182
+ # A category was given as a parameter, so we match only this specific category.
183
+ files_to_match = dict((k, v) for (k, v) in allowed_files.items() if v == cat)
184
+
185
+ filename_pattern = FilenamePattern(self.filename_regex)
186
+ matching_files: typing.List[str] = []
187
+ non_matching_files: typing.List[str] = []
188
+ categories = set()
189
+
190
+ # We filter files by ensuring that they match the filename pattern defined in the analysis.
191
+ for file, category in sorted(files_to_match.items()):
192
+ if filename_pattern.matches_pattern(os.path.basename(file)):
193
+ matching_files.append(file)
194
+ logger.info("MATCHED file %s.", file)
195
+ # Add the file category to the categories set.
196
+ categories.add(category)
197
+ else:
198
+ logger.warning("UNMATCHED file %s.", file)
199
+ non_matching_files.append(file)
200
+ return matching_files, non_matching_files, categories
@@ -0,0 +1,25 @@
1
+ # pylint: disable=missing-module-docstring
2
+ import copy
3
+ import typing
4
+
5
+ from .common import BioInfoProcessData
6
+
7
+
8
+ class BioInfoProcess:
9
+ """Class representing a bio process."""
10
+ def __init__(self, proc_id: str,
11
+ bundle_file: str | None = None,
12
+ **data: str | typing.List[str]) -> None:
13
+ self._proc_id = proc_id
14
+ self._bundle_file = bundle_file
15
+ self._data: BioInfoProcessData = data
16
+
17
+ @property
18
+ def id(self) -> str:
19
+ """Get the bio process ID."""
20
+ return self._proc_id
21
+
22
+ @property
23
+ def data(self) -> BioInfoProcessData:
24
+ """Get data associated to the bio process."""
25
+ return copy.deepcopy(self._data)
@@ -0,0 +1,48 @@
1
+ # pylint: disable=missing-module-docstring
2
+ import logging
3
+ import typing
4
+
5
+ from .bi_process import BioInfoProcess
6
+ from .common import BundleDict
7
+
8
+ logger = logging.getLogger('genelastic')
9
+
10
+
11
+ class BioInfoProcesses:
12
+ """Class BioInfoProcesses is a container of BioInfoProcess objects."""
13
+
14
+ def __init__(self) -> None:
15
+ self._dict: typing.Dict[str, BioInfoProcess] = {}
16
+
17
+ def __len__(self) -> int:
18
+ return len(self._dict)
19
+
20
+ def __getitem__(self, key: str) -> BioInfoProcess:
21
+ return self._dict[key]
22
+
23
+ def add(self, process: BioInfoProcess) -> None:
24
+ """Add one BioInfoProcess object.
25
+ If a BioInfoProcess object with the same ID already exists in the container,
26
+ the program exits.
27
+ """
28
+ if process.id in self._dict:
29
+ raise ValueError(f"A bi process with the id '{process.id}' is already present.")
30
+
31
+ # Add one WetProcess object.
32
+ self._dict[process.id] = process
33
+
34
+ def get_process_ids(self) -> typing.Set[str]:
35
+ """Get a list of the bio processes IDs."""
36
+ return set(self._dict.keys())
37
+
38
+ @classmethod
39
+ def from_array_of_dicts(cls, arr: typing.Sequence[BundleDict]
40
+ ) -> typing.Self:
41
+ """Build a BioInfoProcesses instance."""
42
+
43
+ bi_processes = cls()
44
+
45
+ for d in arr:
46
+ bi_processes.add(BioInfoProcess(**d))
47
+
48
+ return bi_processes
genelastic/common.py ADDED
@@ -0,0 +1,151 @@
1
+ """
2
+ Module: common
3
+
4
+ This module contains custom types and functions shared by multiple genelastic scripts.
5
+ """
6
+ import argparse
7
+ import sys
8
+ import typing
9
+ import logging
10
+
11
+ import elastic_transport
12
+ import elasticsearch
13
+
14
+ logger = logging.getLogger('genelastic')
15
+
16
+ AnalysisMetaData: typing.TypeAlias = typing.Dict[str, str | int]
17
+ WetProcessesData: typing.TypeAlias = typing.Dict[str, str | int | float]
18
+ BioInfoProcessData: typing.TypeAlias = typing.Dict[str, str | typing.List[str]]
19
+ BundleDict: typing.TypeAlias = typing.Dict[str, typing.Any]
20
+
21
+ AnalysisDocument: typing.TypeAlias = typing.Dict[str, str | None | AnalysisMetaData]
22
+ MetadataDocument: typing.TypeAlias = typing.Dict[str, int | str | typing.List[typing.Any | None]]
23
+ ProcessDocument: typing.TypeAlias = (typing.Dict[str, str] |
24
+ WetProcessesData |
25
+ BioInfoProcessData)
26
+ BulkItems: typing.TypeAlias = typing.List[typing.Dict[str, str |
27
+ MetadataDocument |
28
+ AnalysisDocument |
29
+ ProcessDocument]]
30
+ Bucket: typing.TypeAlias = typing.Dict[str, typing.Dict[typing.Any, typing.Any]]
31
+
32
+
33
+ def connect_to_es(host: str, port: int, usr: str, pwd: str) -> elasticsearch.Elasticsearch:
34
+ """Connect to a remote Elasticsearch database."""
35
+ addr = f"https://{host}:{port}"
36
+ logger.info("Trying to connect to Elasticsearch at %s.", addr)
37
+
38
+ try:
39
+ es = elasticsearch.Elasticsearch(
40
+ addr,
41
+ # ssl_assert_fingerprint=args.es_cert_fp,
42
+ # ca_certs=args.es_cert,
43
+ verify_certs=False,
44
+ basic_auth=(usr, pwd)
45
+ )
46
+ logger.info(es.info())
47
+ except elastic_transport.TransportError as e:
48
+ logger.error(e.message)
49
+ sys.exit(1)
50
+ return es
51
+
52
+
53
+ def run_composite_aggregation(es: elasticsearch.Elasticsearch,
54
+ index: str, query: typing.Dict[str, typing.Any]) \
55
+ -> typing.List[Bucket]:
56
+ """
57
+ Executes a composite aggregation on an Elasticsearch index and returns all paginated results.
58
+
59
+ :param es: Elasticsearch client instance.
60
+ :param index: Name of the index to query.
61
+ :param query: Aggregation query to run.
62
+ :return: List of aggregation results.
63
+ """
64
+ # Extract the aggregation name from the query dict.
65
+ agg_name = next(iter(query["aggs"]))
66
+ all_buckets: typing.List[Bucket] = []
67
+
68
+ try:
69
+ logger.debug("Running composite aggregation query %s on index '%s'.", query, index)
70
+ response = es.search(index=index, body=query)
71
+ except elasticsearch.NotFoundError as e:
72
+ raise SystemExit(f"Error: {e.message} for index '{index}'.") from e
73
+
74
+ while True:
75
+ # Extract buckets from the response.
76
+ buckets: typing.List[Bucket] = response['aggregations'][agg_name]['buckets']
77
+ all_buckets.extend(buckets)
78
+
79
+ # Check if there are more results to fetch.
80
+ if 'after_key' in response['aggregations'][agg_name]:
81
+ after_key = response['aggregations'][agg_name]['after_key']
82
+ query['aggs'][agg_name]['composite']['after'] = after_key
83
+ try:
84
+ logger.debug("Running query %s on index '%s'.", query, index)
85
+ response = es.search(index=index, body=query) # Fetch the next page of results.
86
+ except elasticsearch.NotFoundError as e:
87
+ raise SystemExit(f"Error: {e.message} for index '{index}'.") from e
88
+ else:
89
+ break
90
+
91
+ return all_buckets
92
+
93
+
94
+ def get_process_ids(es: elasticsearch.Elasticsearch, index: str, proc_field_name: str) \
95
+ -> typing.Set[str]:
96
+ """Return a set of process IDs."""
97
+ process_ids = set()
98
+
99
+ query = {
100
+ "size": 0,
101
+ "aggs": {
102
+ "get_proc_ids": {
103
+ "composite": {
104
+ "sources": {"proc_id": {"terms": {"field": f"{proc_field_name}.keyword"}}},
105
+ "size": 1000,
106
+ }
107
+ }
108
+ }
109
+ }
110
+
111
+ buckets: typing.List[Bucket] = run_composite_aggregation(es, index, query)
112
+
113
+ for bucket in buckets:
114
+ process_ids.add(bucket['key']['proc_id'])
115
+
116
+ return process_ids
117
+
118
+
119
+ def add_verbose_control_args(parser: argparse.ArgumentParser) -> None:
120
+ """
121
+ Add verbose control arguments to the parser.
122
+ Arguments are added to the parser by using its reference.
123
+ """
124
+ parser.add_argument('-q', '--quiet', dest='verbose', action='store_const',
125
+ const=0, default=1,
126
+ help='Set verbosity to 0 (quiet mode).')
127
+ parser.add_argument('-v', '--verbose', dest='verbose', action='count',
128
+ default=1,
129
+ help=('Verbose level. -v for information, -vv for debug,' +
130
+ ' -vvv for trace.'))
131
+
132
+
133
+ def add_es_connection_args(parser: argparse.ArgumentParser) -> None:
134
+ """
135
+ Add arguments to the parser needed to gather ElasticSearch server connection parameters.
136
+ Arguments are added to the parser by using its reference.
137
+ """
138
+ parser.add_argument('--es-host', dest='es_host', default='localhost',
139
+ help='Address of Elasticsearch host.')
140
+ parser.add_argument('--es-port', type=int, default=9200, dest='es_port',
141
+ help='Elasticsearch port.')
142
+ parser.add_argument('--es-usr', dest='es_usr', default='elastic',
143
+ help='Elasticsearch user.')
144
+ parser.add_argument('--es-pwd', dest='es_pwd', required=True,
145
+ help='Elasticsearch password.')
146
+ parser.add_argument('--es-cert', dest='es_cert',
147
+ help='Elasticsearch certificate file.')
148
+ parser.add_argument('--es-cert-fp', dest='es_cert_fp',
149
+ help='Elasticsearch certificate fingerprint.')
150
+ parser.add_argument('--es-index-prefix', dest='es_index_prefix',
151
+ help='Add the given prefix to each index created during import.')
@@ -0,0 +1,45 @@
1
+ """
2
+ Module: constants
3
+
4
+ This module contains genelastic constants.
5
+ """
6
+ import typing
7
+
8
+ ALLOWED_CATEGORIES: typing.Final[typing.List[str]] = ['vcf', 'cov']
9
+
10
+ BUNDLE_CURRENT_VERSION = 3
11
+
12
+ DEFAULT_TAG_REGEX = "[^_-]+"
13
+ DEFAULT_TAG_PREFIX = "%"
14
+ DEFAULT_TAG_SUFFIX = ""
15
+
16
+ DEFAULT_TAG2FIELD: typing.Final[typing.Dict[str, typing.Dict[str, str]]] = {
17
+ '%S': {
18
+ "field": 'sample_name',
19
+ "regex": DEFAULT_TAG_REGEX
20
+ },
21
+ '%F': {
22
+ "field": 'source',
23
+ "regex": DEFAULT_TAG_REGEX
24
+ },
25
+ '%W': {
26
+ "field": 'wet_process',
27
+ "regex": DEFAULT_TAG_REGEX
28
+ },
29
+ '%B': {
30
+ "field": 'bi_process',
31
+ "regex": DEFAULT_TAG_REGEX
32
+ },
33
+ '%D': {
34
+ "field": 'cov_depth',
35
+ "regex": DEFAULT_TAG_REGEX
36
+ },
37
+ '%A': {
38
+ "field": 'barcode',
39
+ "regex": DEFAULT_TAG_REGEX
40
+ },
41
+ '%R': {
42
+ "field": 'reference_genome',
43
+ "regex": DEFAULT_TAG_REGEX
44
+ }
45
+ }
@@ -0,0 +1,82 @@
1
+ """
2
+ This module defines the DataFile class, which handles the representation,
3
+ management, and extraction of metadata for a data file within a data bundle.
4
+
5
+ It includes functionality to construct DataFile instances from paths and
6
+ optional filename patterns, retrieve file paths and metadata, and support
7
+ for extracting metadata from filenames using specified patterns.
8
+ """
9
+
10
+ import logging
11
+ import os
12
+ import pathlib
13
+ import typing
14
+ from .filename_pattern import FilenamePattern
15
+ from .common import AnalysisMetaData
16
+
17
+ logger = logging.getLogger('genelastic')
18
+
19
+
20
+ class DataFile:
21
+ """Class for handling a data file and its metadata."""
22
+
23
+ # Initializer
24
+ def __init__(self, path: str, bundle_path: str | None = None,
25
+ metadata: typing.Optional[AnalysisMetaData] = None) -> None:
26
+ self._path = path
27
+ self._bundle_path = bundle_path # The bundle YAML file in which this
28
+ # file was listed.
29
+ self._metadata = {} if metadata is None else metadata
30
+
31
+ def __repr__(self) -> str:
32
+ return (f"File {self._path}, from bundle {self._bundle_path}"
33
+ + f", with metadata {self._metadata}")
34
+
35
+ # Get path
36
+ @property
37
+ def path(self) -> str:
38
+ """Retrieve the data file path."""
39
+ return self._path
40
+
41
+ def exists(self) -> bool:
42
+ """Tests if the associated file exists on disk."""
43
+ return os.path.isfile(self._path)
44
+
45
+ # Get bundle path
46
+ @property
47
+ def bundle_path(self) -> str | None:
48
+ """Retrieve the path to the associated data bundle file."""
49
+ return self._bundle_path
50
+
51
+ # Get metadata
52
+ @property
53
+ def metadata(self) -> AnalysisMetaData:
54
+ """Retrieve a copy of the metadata associated with the data file."""
55
+ return self._metadata.copy()
56
+
57
+ # Factory
58
+ @classmethod
59
+ def make_from_bundle(
60
+ cls,
61
+ path: str,
62
+ bundle_path: str | None,
63
+ pattern: typing.Optional[FilenamePattern] = None) -> 'DataFile':
64
+ """Construct a DataFile instance from a bundle path, file path,
65
+ and optional filename pattern."""
66
+ # Make absolute path
67
+ if not os.path.isabs(path) and not bundle_path is None:
68
+ path = os.path.join(os.path.dirname(bundle_path), path)
69
+
70
+ # Extract filename metadata
71
+ metadata = None
72
+ if pattern is not None:
73
+ metadata = pattern.extract_metadata(os.path.basename(path))
74
+
75
+ if metadata:
76
+ if "ext" not in metadata:
77
+ metadata["ext"] = pathlib.Path(path).suffixes[0][1:]
78
+
79
+ if "cov_depth" in metadata:
80
+ metadata["cov_depth"] = int(metadata["cov_depth"])
81
+
82
+ return cls(path, bundle_path, metadata)