genelastic 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genelastic/api/.env +4 -0
- genelastic/api/cli_start_api.py +2 -2
- genelastic/api/errors.py +52 -0
- genelastic/api/extends/example.py +0 -6
- genelastic/api/extends/example.yml +0 -20
- genelastic/api/routes.py +313 -181
- genelastic/api/server.py +8 -3
- genelastic/api/specification.yml +343 -181
- genelastic/common/__init__.py +0 -44
- genelastic/common/cli.py +48 -0
- genelastic/common/elastic.py +374 -46
- genelastic/common/exceptions.py +34 -2
- genelastic/common/server.py +9 -1
- genelastic/common/types.py +1 -14
- genelastic/import_data/__init__.py +0 -27
- genelastic/import_data/checker.py +99 -0
- genelastic/import_data/checker_observer.py +13 -0
- genelastic/import_data/cli/__init__.py +0 -0
- genelastic/import_data/cli/cli_check.py +136 -0
- genelastic/import_data/{cli_gen_data.py → cli/gen_data.py} +4 -4
- genelastic/import_data/cli/import_data.py +346 -0
- genelastic/import_data/cli/info.py +247 -0
- genelastic/import_data/{cli_integrity.py → cli/integrity.py} +29 -7
- genelastic/import_data/cli/validate.py +146 -0
- genelastic/import_data/collect.py +185 -0
- genelastic/import_data/constants.py +136 -11
- genelastic/import_data/import_bundle.py +102 -59
- genelastic/import_data/import_bundle_factory.py +70 -149
- genelastic/import_data/importers/__init__.py +0 -0
- genelastic/import_data/importers/importer_base.py +131 -0
- genelastic/import_data/importers/importer_factory.py +85 -0
- genelastic/import_data/importers/importer_types.py +223 -0
- genelastic/import_data/logger.py +2 -1
- genelastic/import_data/models/__init__.py +0 -0
- genelastic/import_data/models/analyses.py +178 -0
- genelastic/import_data/models/analysis.py +144 -0
- genelastic/import_data/models/data_file.py +110 -0
- genelastic/import_data/models/process.py +45 -0
- genelastic/import_data/models/processes.py +84 -0
- genelastic/import_data/models/tags.py +170 -0
- genelastic/import_data/models/unique_list.py +109 -0
- genelastic/import_data/models/validate.py +26 -0
- genelastic/import_data/patterns.py +90 -0
- genelastic/import_data/random_bundle.py +10 -8
- genelastic/import_data/resolve.py +157 -0
- genelastic/ui/.env +1 -0
- genelastic/ui/cli_start_ui.py +4 -2
- genelastic/ui/routes.py +289 -42
- genelastic/ui/static/cea-cnrgh.ico +0 -0
- genelastic/ui/static/cea.ico +0 -0
- genelastic/ui/static/layout.ico +0 -0
- genelastic/ui/static/novaseq6000.png +0 -0
- genelastic/ui/static/style.css +430 -0
- genelastic/ui/static/ui.js +458 -0
- genelastic/ui/templates/analyses.html +96 -9
- genelastic/ui/templates/analysis_detail.html +44 -0
- genelastic/ui/templates/bi_process_detail.html +129 -0
- genelastic/ui/templates/bi_processes.html +114 -9
- genelastic/ui/templates/explorer.html +356 -0
- genelastic/ui/templates/home.html +205 -2
- genelastic/ui/templates/layout.html +148 -29
- genelastic/ui/templates/version.html +19 -7
- genelastic/ui/templates/wet_process_detail.html +131 -0
- genelastic/ui/templates/wet_processes.html +114 -9
- genelastic-0.9.0.dist-info/METADATA +686 -0
- genelastic-0.9.0.dist-info/RECORD +76 -0
- genelastic-0.9.0.dist-info/WHEEL +4 -0
- genelastic-0.9.0.dist-info/entry_points.txt +10 -0
- genelastic-0.9.0.dist-info/licenses/LICENSE +519 -0
- genelastic/import_data/analyses.py +0 -69
- genelastic/import_data/analysis.py +0 -205
- genelastic/import_data/bi_process.py +0 -27
- genelastic/import_data/bi_processes.py +0 -49
- genelastic/import_data/cli_import.py +0 -379
- genelastic/import_data/cli_info.py +0 -256
- genelastic/import_data/cli_validate.py +0 -54
- genelastic/import_data/data_file.py +0 -87
- genelastic/import_data/filename_pattern.py +0 -57
- genelastic/import_data/tags.py +0 -123
- genelastic/import_data/wet_process.py +0 -28
- genelastic/import_data/wet_processes.py +0 -53
- genelastic-0.8.0.dist-info/METADATA +0 -109
- genelastic-0.8.0.dist-info/RECORD +0 -52
- genelastic-0.8.0.dist-info/WHEEL +0 -5
- genelastic-0.8.0.dist-info/entry_points.txt +0 -8
- genelastic-0.8.0.dist-info/top_level.txt +0 -1
|
@@ -1,256 +0,0 @@
|
|
|
1
|
-
import argparse
|
|
2
|
-
import logging
|
|
3
|
-
|
|
4
|
-
from genelastic.common import (
|
|
5
|
-
Bucket,
|
|
6
|
-
ElasticQueryConn,
|
|
7
|
-
add_es_connection_args,
|
|
8
|
-
add_verbose_control_args,
|
|
9
|
-
)
|
|
10
|
-
|
|
11
|
-
from .logger import configure_logging
|
|
12
|
-
|
|
13
|
-
logger = logging.getLogger("genelastic")
|
|
14
|
-
logging.getLogger("elastic_transport").setLevel(
|
|
15
|
-
logging.WARNING
|
|
16
|
-
) # Disable excessive logging
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def read_args() -> argparse.Namespace:
|
|
20
|
-
"""Read arguments from command line."""
|
|
21
|
-
parser = argparse.ArgumentParser(
|
|
22
|
-
description="ElasticSearch database info.",
|
|
23
|
-
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
24
|
-
allow_abbrev=False,
|
|
25
|
-
)
|
|
26
|
-
add_verbose_control_args(parser)
|
|
27
|
-
add_es_connection_args(parser)
|
|
28
|
-
parser.add_argument(
|
|
29
|
-
"-y",
|
|
30
|
-
"--list-bundles",
|
|
31
|
-
action="store_true",
|
|
32
|
-
help="List all imported YAML bundles.",
|
|
33
|
-
)
|
|
34
|
-
parser.add_argument(
|
|
35
|
-
"-f",
|
|
36
|
-
"--list-data-files",
|
|
37
|
-
action="store_true",
|
|
38
|
-
help="List all imported data files.",
|
|
39
|
-
)
|
|
40
|
-
parser.add_argument(
|
|
41
|
-
"-w",
|
|
42
|
-
"--list-wet-processes",
|
|
43
|
-
action="store_true",
|
|
44
|
-
help="List all imported wet processes.",
|
|
45
|
-
)
|
|
46
|
-
parser.add_argument(
|
|
47
|
-
"-b",
|
|
48
|
-
"--list-bi-processes",
|
|
49
|
-
action="store_true",
|
|
50
|
-
help="List all imported bio info processes.",
|
|
51
|
-
)
|
|
52
|
-
parser.add_argument(
|
|
53
|
-
"-Y",
|
|
54
|
-
"--list-data-files-per-bundle",
|
|
55
|
-
action="store_true",
|
|
56
|
-
help="For each imported YAML bundle, "
|
|
57
|
-
"display some info and list its data files.",
|
|
58
|
-
)
|
|
59
|
-
return parser.parse_args()
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def list_bundles(es_query_conn: ElasticQueryConn, index: str) -> None:
|
|
63
|
-
"""List all imported YAML bundles."""
|
|
64
|
-
query = {
|
|
65
|
-
"size": 0,
|
|
66
|
-
"aggs": {
|
|
67
|
-
"get_bundle_paths": {
|
|
68
|
-
"composite": {
|
|
69
|
-
"sources": {
|
|
70
|
-
"bundle_path": {
|
|
71
|
-
"terms": {"field": "bundle_path.keyword"}
|
|
72
|
-
}
|
|
73
|
-
},
|
|
74
|
-
"size": 1000,
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
},
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
buckets: list[Bucket] = es_query_conn.run_composite_aggregation(
|
|
81
|
-
index, query
|
|
82
|
-
)
|
|
83
|
-
|
|
84
|
-
logger.info("Imported YAML files")
|
|
85
|
-
logger.info("===================")
|
|
86
|
-
|
|
87
|
-
if len(buckets) == 0:
|
|
88
|
-
logger.info("Empty response.")
|
|
89
|
-
return
|
|
90
|
-
|
|
91
|
-
for bucket in buckets:
|
|
92
|
-
bundle_path = bucket["key"]["bundle_path"]
|
|
93
|
-
logger.info("- %s", bundle_path)
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
def list_data_files(es_query_conn: ElasticQueryConn, index: str) -> None:
|
|
97
|
-
"""List all imported data files."""
|
|
98
|
-
query = {
|
|
99
|
-
"size": 0,
|
|
100
|
-
"aggs": {
|
|
101
|
-
"get_paths": {
|
|
102
|
-
"composite": {
|
|
103
|
-
"sources": {"path": {"terms": {"field": "path.keyword"}}},
|
|
104
|
-
"size": 1000,
|
|
105
|
-
}
|
|
106
|
-
}
|
|
107
|
-
},
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
buckets: list[Bucket] = es_query_conn.run_composite_aggregation(
|
|
111
|
-
index, query
|
|
112
|
-
)
|
|
113
|
-
|
|
114
|
-
logger.info("Imported data files")
|
|
115
|
-
logger.info("===================")
|
|
116
|
-
|
|
117
|
-
if len(buckets) == 0:
|
|
118
|
-
logger.info("Empty response.")
|
|
119
|
-
return
|
|
120
|
-
|
|
121
|
-
for bucket in buckets:
|
|
122
|
-
bundle_path = bucket["key"]["path"]
|
|
123
|
-
logger.info("- %s", bundle_path)
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
def list_processes(es_query_conn: ElasticQueryConn, index: str) -> None:
|
|
127
|
-
"""List all processes."""
|
|
128
|
-
process_ids = es_query_conn.get_field_values(index, "proc_id")
|
|
129
|
-
|
|
130
|
-
if len(process_ids) == 0:
|
|
131
|
-
logger.info("Empty response.")
|
|
132
|
-
return
|
|
133
|
-
|
|
134
|
-
for process_id in process_ids:
|
|
135
|
-
logger.info("- %s", process_id)
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
def list_wet_processes(es_query_conn: ElasticQueryConn, index: str) -> None:
|
|
139
|
-
"""List all wet processes."""
|
|
140
|
-
logger.info("Imported wet processes")
|
|
141
|
-
logger.info("======================")
|
|
142
|
-
list_processes(es_query_conn, index)
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
def list_bi_processes(es_query_conn: ElasticQueryConn, index: str) -> None:
|
|
146
|
-
"""List all bio info processes."""
|
|
147
|
-
logger.info("Imported bi processes")
|
|
148
|
-
logger.info("=====================")
|
|
149
|
-
list_processes(es_query_conn, index)
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
def list_data_files_per_bundle(
|
|
153
|
-
es_query_conn: ElasticQueryConn, index: str
|
|
154
|
-
) -> None:
|
|
155
|
-
"""For each imported YAML bundle, display some info and list its data files."""
|
|
156
|
-
query = {
|
|
157
|
-
"size": 0,
|
|
158
|
-
"aggs": {
|
|
159
|
-
"data_files": {
|
|
160
|
-
"composite": {
|
|
161
|
-
"sources": [
|
|
162
|
-
{
|
|
163
|
-
"bundle_path": {
|
|
164
|
-
"terms": {"field": "bundle_path.keyword"}
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
],
|
|
168
|
-
"size": 100,
|
|
169
|
-
},
|
|
170
|
-
"aggs": {"docs": {"top_hits": {"size": 100}}},
|
|
171
|
-
}
|
|
172
|
-
},
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
buckets: list[Bucket] = es_query_conn.run_composite_aggregation(
|
|
176
|
-
index, query
|
|
177
|
-
)
|
|
178
|
-
|
|
179
|
-
logger.info("Data files per YAML bundle")
|
|
180
|
-
logger.info("==========================")
|
|
181
|
-
|
|
182
|
-
if len(buckets) == 0:
|
|
183
|
-
logger.info("Empty response.")
|
|
184
|
-
return
|
|
185
|
-
|
|
186
|
-
for bucket in buckets:
|
|
187
|
-
documents = bucket["docs"]["hits"]["hits"]
|
|
188
|
-
if len(documents) == 0:
|
|
189
|
-
continue
|
|
190
|
-
|
|
191
|
-
logger.info("- Bundle Path: %s", bucket["key"]["bundle_path"])
|
|
192
|
-
logger.info(
|
|
193
|
-
" -> Wet process: %s",
|
|
194
|
-
documents[0]["_source"]["metadata"]["wet_process"],
|
|
195
|
-
)
|
|
196
|
-
logger.info(
|
|
197
|
-
" -> Bio info process: %s",
|
|
198
|
-
documents[0]["_source"]["metadata"]["bi_process"],
|
|
199
|
-
)
|
|
200
|
-
logger.info(" -> Data files:")
|
|
201
|
-
|
|
202
|
-
for doc in documents:
|
|
203
|
-
logger.info(" - Index: %s", doc["_source"]["file_index"])
|
|
204
|
-
logger.info(" Path: %s", doc["_source"]["path"])
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
def main() -> None:
|
|
208
|
-
"""Entry point of the info script."""
|
|
209
|
-
args = read_args()
|
|
210
|
-
|
|
211
|
-
configure_logging(args.verbose)
|
|
212
|
-
logger.debug("Arguments: %s", args)
|
|
213
|
-
|
|
214
|
-
addr = f"https://{args.es_host}:{args.es_port}"
|
|
215
|
-
logger.info("Trying to connect to Elasticsearch at %s...", addr)
|
|
216
|
-
es_query_conn = ElasticQueryConn(
|
|
217
|
-
addr, args.es_cert_fp, basic_auth=(args.es_usr, args.es_pwd)
|
|
218
|
-
)
|
|
219
|
-
|
|
220
|
-
analysis_index = f"{args.es_index_prefix}-analyses"
|
|
221
|
-
wet_processes_index = f"{args.es_index_prefix}-wet_processes"
|
|
222
|
-
bi_processes_index = f"{args.es_index_prefix}-bi_processes"
|
|
223
|
-
|
|
224
|
-
list_call_count = 0
|
|
225
|
-
|
|
226
|
-
if args.list_bundles:
|
|
227
|
-
list_bundles(es_query_conn, analysis_index)
|
|
228
|
-
list_call_count += 1
|
|
229
|
-
|
|
230
|
-
if args.list_data_files:
|
|
231
|
-
list_data_files(es_query_conn, analysis_index)
|
|
232
|
-
list_call_count += 1
|
|
233
|
-
|
|
234
|
-
if args.list_wet_processes:
|
|
235
|
-
list_wet_processes(es_query_conn, wet_processes_index)
|
|
236
|
-
list_call_count += 1
|
|
237
|
-
|
|
238
|
-
if args.list_bi_processes:
|
|
239
|
-
list_bi_processes(es_query_conn, bi_processes_index)
|
|
240
|
-
list_call_count += 1
|
|
241
|
-
|
|
242
|
-
if args.list_data_files_per_bundle:
|
|
243
|
-
list_data_files_per_bundle(es_query_conn, analysis_index)
|
|
244
|
-
list_call_count += 1
|
|
245
|
-
|
|
246
|
-
if list_call_count == 0:
|
|
247
|
-
logger.debug("No list option specified, listing everything.")
|
|
248
|
-
list_bundles(es_query_conn, analysis_index)
|
|
249
|
-
list_data_files(es_query_conn, analysis_index)
|
|
250
|
-
list_wet_processes(es_query_conn, wet_processes_index)
|
|
251
|
-
list_bi_processes(es_query_conn, bi_processes_index)
|
|
252
|
-
list_data_files_per_bundle(es_query_conn, analysis_index)
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
if __name__ == "__main__":
|
|
256
|
-
main()
|
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
import argparse
|
|
2
|
-
import logging
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
|
|
5
|
-
from schema import SchemaError
|
|
6
|
-
|
|
7
|
-
from genelastic.common import add_verbose_control_args
|
|
8
|
-
|
|
9
|
-
from .import_bundle_factory import make_import_bundle_from_files
|
|
10
|
-
from .logger import configure_logging
|
|
11
|
-
|
|
12
|
-
logger = logging.getLogger("genelastic")
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def read_args() -> argparse.Namespace:
|
|
16
|
-
"""Read arguments from command line."""
|
|
17
|
-
parser = argparse.ArgumentParser(
|
|
18
|
-
description="Ensure that YAML files "
|
|
19
|
-
"follow the genelastic YAML bundle schema.",
|
|
20
|
-
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
21
|
-
allow_abbrev=False,
|
|
22
|
-
)
|
|
23
|
-
add_verbose_control_args(parser)
|
|
24
|
-
parser.add_argument(
|
|
25
|
-
"files",
|
|
26
|
-
type=Path,
|
|
27
|
-
nargs="+",
|
|
28
|
-
default=None,
|
|
29
|
-
help="YAML files to validate.",
|
|
30
|
-
)
|
|
31
|
-
parser.add_argument(
|
|
32
|
-
"-c",
|
|
33
|
-
"--check",
|
|
34
|
-
action="store_true",
|
|
35
|
-
help="In addition to validating the schema, "
|
|
36
|
-
"check for undefined referenced processes.",
|
|
37
|
-
)
|
|
38
|
-
return parser.parse_args()
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def main() -> int:
|
|
42
|
-
"""Entry point of the validate script."""
|
|
43
|
-
args = read_args()
|
|
44
|
-
configure_logging(args.verbose)
|
|
45
|
-
|
|
46
|
-
try:
|
|
47
|
-
make_import_bundle_from_files(args.files, check=args.check)
|
|
48
|
-
except (ValueError, RuntimeError, TypeError, SchemaError) as e:
|
|
49
|
-
# Catch any exception that can be raised by 'make_import_bundle_from_files'.
|
|
50
|
-
logger.error(e)
|
|
51
|
-
return 1
|
|
52
|
-
|
|
53
|
-
logger.info("All YAML files respect the genelastic YAML bundle format.")
|
|
54
|
-
return 0
|
|
@@ -1,87 +0,0 @@
|
|
|
1
|
-
"""This module defines the DataFile class, which handles the representation,
|
|
2
|
-
management, and extraction of metadata for a data file within a data bundle.
|
|
3
|
-
|
|
4
|
-
It includes functionality to construct DataFile instances from paths and
|
|
5
|
-
optional filename patterns, retrieve file paths and metadata, and support
|
|
6
|
-
for extracting metadata from filenames using specified patterns.
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
import logging
|
|
10
|
-
import pathlib
|
|
11
|
-
from pathlib import Path
|
|
12
|
-
|
|
13
|
-
from genelastic.common import AnalysisMetaData
|
|
14
|
-
|
|
15
|
-
from .filename_pattern import FilenamePattern
|
|
16
|
-
|
|
17
|
-
logger = logging.getLogger("genelastic")
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class DataFile:
|
|
21
|
-
"""Class for handling a data file and its metadata."""
|
|
22
|
-
|
|
23
|
-
# Initializer
|
|
24
|
-
def __init__(
|
|
25
|
-
self,
|
|
26
|
-
path: Path,
|
|
27
|
-
bundle_path: Path | None = None,
|
|
28
|
-
metadata: AnalysisMetaData | None = None,
|
|
29
|
-
) -> None:
|
|
30
|
-
self._path = path
|
|
31
|
-
self._bundle_path = bundle_path # The bundle YAML file in which this
|
|
32
|
-
# file was listed.
|
|
33
|
-
self._metadata = {} if metadata is None else metadata
|
|
34
|
-
|
|
35
|
-
def __repr__(self) -> str:
|
|
36
|
-
return f"File {self._path}, from bundle {self._bundle_path}, with metadata {self._metadata}"
|
|
37
|
-
|
|
38
|
-
# Get path
|
|
39
|
-
@property
|
|
40
|
-
def path(self) -> Path:
|
|
41
|
-
"""Retrieve the data file path."""
|
|
42
|
-
return self._path
|
|
43
|
-
|
|
44
|
-
def exists(self) -> bool:
|
|
45
|
-
"""Tests if the associated file exists on disk."""
|
|
46
|
-
return self._path.is_file()
|
|
47
|
-
|
|
48
|
-
# Get bundle path
|
|
49
|
-
@property
|
|
50
|
-
def bundle_path(self) -> Path | None:
|
|
51
|
-
"""Retrieve the path to the associated data bundle file."""
|
|
52
|
-
return self._bundle_path
|
|
53
|
-
|
|
54
|
-
# Get metadata
|
|
55
|
-
@property
|
|
56
|
-
def metadata(self) -> AnalysisMetaData:
|
|
57
|
-
"""Retrieve a copy of the metadata associated with the data file."""
|
|
58
|
-
return self._metadata.copy()
|
|
59
|
-
|
|
60
|
-
# Factory
|
|
61
|
-
@classmethod
|
|
62
|
-
def make_from_bundle(
|
|
63
|
-
cls,
|
|
64
|
-
path: Path,
|
|
65
|
-
bundle_path: Path | None,
|
|
66
|
-
pattern: FilenamePattern | None = None,
|
|
67
|
-
) -> "DataFile":
|
|
68
|
-
"""Construct a DataFile instance from a bundle path, file path,
|
|
69
|
-
and optional filename pattern.
|
|
70
|
-
"""
|
|
71
|
-
# Make absolute path
|
|
72
|
-
if not path.is_absolute() and bundle_path is not None:
|
|
73
|
-
path = bundle_path.parent / path
|
|
74
|
-
|
|
75
|
-
# Extract filename metadata
|
|
76
|
-
metadata = None
|
|
77
|
-
if pattern is not None:
|
|
78
|
-
metadata = pattern.extract_metadata(path.name)
|
|
79
|
-
|
|
80
|
-
if metadata:
|
|
81
|
-
if "ext" not in metadata:
|
|
82
|
-
metadata["ext"] = pathlib.Path(path).suffixes[0][1:]
|
|
83
|
-
|
|
84
|
-
if "cov_depth" in metadata:
|
|
85
|
-
metadata["cov_depth"] = int(metadata["cov_depth"])
|
|
86
|
-
|
|
87
|
-
return cls(path, bundle_path, metadata)
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
"""This module defines the FilenamePattern class, used to define a filename pattern
|
|
2
|
-
and extract metadata from file names using this pattern.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import re
|
|
6
|
-
|
|
7
|
-
from genelastic.common import AnalysisMetaData
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class FilenamePattern:
|
|
11
|
-
"""Class for defining a filename pattern.
|
|
12
|
-
The pattern is used to extract metadata from filenames
|
|
13
|
-
and verify filename conformity.
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
|
-
# Initializer
|
|
17
|
-
def __init__(self, pattern: str) -> None:
|
|
18
|
-
"""Initializes a FilenamePattern instance.
|
|
19
|
-
|
|
20
|
-
Args:
|
|
21
|
-
pattern (str): The pattern string used for defining
|
|
22
|
-
the filename pattern.
|
|
23
|
-
"""
|
|
24
|
-
self._re = re.compile(pattern)
|
|
25
|
-
|
|
26
|
-
def extract_metadata(self, filename: str) -> AnalysisMetaData:
|
|
27
|
-
"""Extracts metadata from the given filename based
|
|
28
|
-
on the defined pattern.
|
|
29
|
-
|
|
30
|
-
Args:
|
|
31
|
-
filename (str): The filename from which metadata
|
|
32
|
-
needs to be extracted.
|
|
33
|
-
|
|
34
|
-
Returns:
|
|
35
|
-
dict: A dictionary containing the extracted metadata.
|
|
36
|
-
|
|
37
|
-
Raises:
|
|
38
|
-
RuntimeError: If parsing of filename fails
|
|
39
|
-
with the defined pattern.
|
|
40
|
-
"""
|
|
41
|
-
m = self._re.search(filename)
|
|
42
|
-
if not m:
|
|
43
|
-
msg = f'Failed parsing filename "{filename}" with pattern "{self._re.pattern}".'
|
|
44
|
-
raise RuntimeError(msg)
|
|
45
|
-
return m.groupdict()
|
|
46
|
-
|
|
47
|
-
def matches_pattern(self, filename: str) -> bool:
|
|
48
|
-
"""Checks if the given filename matches the defined pattern.
|
|
49
|
-
|
|
50
|
-
Args:
|
|
51
|
-
filename (str): The filename to be checked.
|
|
52
|
-
|
|
53
|
-
Returns:
|
|
54
|
-
bool: True if the filename matches the pattern,
|
|
55
|
-
False otherwise.
|
|
56
|
-
"""
|
|
57
|
-
return bool(self._re.match(filename))
|
genelastic/import_data/tags.py
DELETED
|
@@ -1,123 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import re
|
|
3
|
-
import typing
|
|
4
|
-
|
|
5
|
-
from genelastic.common import BundleDict
|
|
6
|
-
|
|
7
|
-
from .constants import DEFAULT_TAG2FIELD, DEFAULT_TAG_PREFIX, DEFAULT_TAG_SUFFIX
|
|
8
|
-
|
|
9
|
-
logger = logging.getLogger("genelastic")
|
|
10
|
-
|
|
11
|
-
TagsDefinition: typing.TypeAlias = dict[str, dict[str, str | dict[str, str]]]
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class Tags:
|
|
15
|
-
"""This class handles the definition of default and custom tags.
|
|
16
|
-
Tags are used to extract custom metadata from files belonging to an analysis.
|
|
17
|
-
"""
|
|
18
|
-
|
|
19
|
-
def __init__(self, documents: typing.Sequence[BundleDict] | None) -> None:
|
|
20
|
-
"""Create a Tag instance."""
|
|
21
|
-
self._tags: dict[str, dict[str, str]] = DEFAULT_TAG2FIELD
|
|
22
|
-
self._tag_prefix: str = DEFAULT_TAG_PREFIX
|
|
23
|
-
self._tag_suffix: str = DEFAULT_TAG_SUFFIX
|
|
24
|
-
|
|
25
|
-
redefined_tags = None
|
|
26
|
-
|
|
27
|
-
if documents:
|
|
28
|
-
# Search for tags definition across loaded YAML documents.
|
|
29
|
-
redefined_tags = self._search_redefined_tags(documents)
|
|
30
|
-
|
|
31
|
-
if redefined_tags:
|
|
32
|
-
self._build_tags(redefined_tags)
|
|
33
|
-
logger.info(
|
|
34
|
-
"The following tags will be used to extract metadata from filenames : %s",
|
|
35
|
-
self._tags,
|
|
36
|
-
)
|
|
37
|
-
else:
|
|
38
|
-
logger.info(
|
|
39
|
-
"Using the default tags to extract metadata from filenames : %s",
|
|
40
|
-
self._tags,
|
|
41
|
-
)
|
|
42
|
-
|
|
43
|
-
def _build_tags(self, redefined_tags: TagsDefinition) -> None:
|
|
44
|
-
# Erase the tags defined by defaults.
|
|
45
|
-
self._tags = {}
|
|
46
|
-
|
|
47
|
-
if "format" in redefined_tags:
|
|
48
|
-
tag_format = redefined_tags["format"]
|
|
49
|
-
|
|
50
|
-
# extra type check for mypy
|
|
51
|
-
if "prefix" in tag_format and isinstance(tag_format["prefix"], str):
|
|
52
|
-
self._tag_prefix = tag_format["prefix"]
|
|
53
|
-
|
|
54
|
-
# extra type check for mypy
|
|
55
|
-
if "suffix" in tag_format and isinstance(tag_format["suffix"], str):
|
|
56
|
-
self._tag_suffix = tag_format["suffix"]
|
|
57
|
-
|
|
58
|
-
for tag_name, tag_attrs in redefined_tags["match"].items():
|
|
59
|
-
if isinstance(tag_attrs, dict): # extra type check for mypy
|
|
60
|
-
self._tags[
|
|
61
|
-
f"{self._tag_prefix}{tag_name}{self._tag_suffix}"
|
|
62
|
-
] = tag_attrs
|
|
63
|
-
|
|
64
|
-
@staticmethod
|
|
65
|
-
def _search_redefined_tags(
|
|
66
|
-
documents: typing.Sequence[BundleDict],
|
|
67
|
-
) -> TagsDefinition | None:
|
|
68
|
-
documents_with_redefined_tags: list[BundleDict] = [
|
|
69
|
-
d for d in documents if "tags" in d
|
|
70
|
-
]
|
|
71
|
-
bundle_paths = [d["bundle_file"] for d in documents_with_redefined_tags]
|
|
72
|
-
|
|
73
|
-
# If there are more than one 'tags' redefinition across the documents, raise an error.
|
|
74
|
-
if len(documents_with_redefined_tags) > 1:
|
|
75
|
-
msg = (
|
|
76
|
-
f"Only one 'tags' key should be defined across all documents, "
|
|
77
|
-
f"but multiple were found : {', '.join(bundle_paths)}"
|
|
78
|
-
)
|
|
79
|
-
raise RuntimeError(msg)
|
|
80
|
-
|
|
81
|
-
if len(documents_with_redefined_tags) == 1:
|
|
82
|
-
redefined_tags: TagsDefinition = documents_with_redefined_tags[0][
|
|
83
|
-
"tags"
|
|
84
|
-
]
|
|
85
|
-
return redefined_tags
|
|
86
|
-
|
|
87
|
-
return None
|
|
88
|
-
|
|
89
|
-
@property
|
|
90
|
-
def tag_prefix(self) -> str:
|
|
91
|
-
"""Return the tag prefix. Default prefix is '%'."""
|
|
92
|
-
return self._tag_prefix
|
|
93
|
-
|
|
94
|
-
@property
|
|
95
|
-
def tag_suffix(self) -> str:
|
|
96
|
-
"""Return the tag suffix. There is no suffix by default."""
|
|
97
|
-
return self._tag_suffix
|
|
98
|
-
|
|
99
|
-
@property
|
|
100
|
-
def items(self) -> typing.ItemsView[str, dict[str, str]]:
|
|
101
|
-
"""Returns the tag items : the key is the tag name,
|
|
102
|
-
and the value is the tag attributes (a dict containing the 'field' and 'regex' keys).
|
|
103
|
-
"""
|
|
104
|
-
return self._tags.items()
|
|
105
|
-
|
|
106
|
-
@property
|
|
107
|
-
def search_regex(self) -> str:
|
|
108
|
-
"""Returns a regex to search for a tag inside a string."""
|
|
109
|
-
return (
|
|
110
|
-
r"("
|
|
111
|
-
+ re.escape(self._tag_prefix)
|
|
112
|
-
+ r"\w+"
|
|
113
|
-
+ re.escape(self._tag_suffix)
|
|
114
|
-
+ r")"
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
def __len__(self) -> int:
|
|
118
|
-
"""Return the number of registered tags."""
|
|
119
|
-
return len(self._tags)
|
|
120
|
-
|
|
121
|
-
def __getitem__(self, key: str) -> dict[str, str]:
|
|
122
|
-
"""Return a tag by its key."""
|
|
123
|
-
return self._tags[key]
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
import copy
|
|
2
|
-
|
|
3
|
-
from genelastic.common import WetProcessesData
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class WetProcess:
|
|
7
|
-
"""Class WetProcess that represents a wet process."""
|
|
8
|
-
|
|
9
|
-
def __init__(
|
|
10
|
-
self,
|
|
11
|
-
proc_id: str,
|
|
12
|
-
bundle_file: str | None = None,
|
|
13
|
-
**data: str | float,
|
|
14
|
-
) -> None:
|
|
15
|
-
"""Create a WetProcess instance."""
|
|
16
|
-
self._proc_id = proc_id
|
|
17
|
-
self._bundle_file = bundle_file
|
|
18
|
-
self._data: WetProcessesData = data
|
|
19
|
-
|
|
20
|
-
@property
|
|
21
|
-
def id(self) -> str:
|
|
22
|
-
"""Get the wet process ID."""
|
|
23
|
-
return self._proc_id
|
|
24
|
-
|
|
25
|
-
@property
|
|
26
|
-
def data(self) -> WetProcessesData:
|
|
27
|
-
"""Get data associated to the wet process."""
|
|
28
|
-
return copy.deepcopy(self._data)
|
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import typing
|
|
3
|
-
|
|
4
|
-
from genelastic.common import BundleDict
|
|
5
|
-
|
|
6
|
-
from .wet_process import WetProcess
|
|
7
|
-
|
|
8
|
-
logger = logging.getLogger("genelastic")
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class WetProcesses:
|
|
12
|
-
"""Class WetProcesses is a container of WetProces objects."""
|
|
13
|
-
|
|
14
|
-
def __init__(self) -> None:
|
|
15
|
-
"""Create an empty container."""
|
|
16
|
-
self._dict: dict[str, WetProcess] = {}
|
|
17
|
-
|
|
18
|
-
def __len__(self) -> int:
|
|
19
|
-
"""Return the number of WetProcess objects inside the container."""
|
|
20
|
-
return len(self._dict)
|
|
21
|
-
|
|
22
|
-
def __getitem__(self, key: str) -> WetProcess:
|
|
23
|
-
"""Return a WetProcess present in the container by its key."""
|
|
24
|
-
return self._dict[key]
|
|
25
|
-
|
|
26
|
-
def add(self, process: WetProcess) -> None:
|
|
27
|
-
"""Add one WetProces object.
|
|
28
|
-
If a WetProces object with the same ID already exists in the container, the program exits.
|
|
29
|
-
"""
|
|
30
|
-
if process.id in self._dict:
|
|
31
|
-
msg = (
|
|
32
|
-
f"A wet process with the id '{process.id}' is already present."
|
|
33
|
-
)
|
|
34
|
-
raise ValueError(msg)
|
|
35
|
-
|
|
36
|
-
# Add one WetProcess object.
|
|
37
|
-
self._dict[process.id] = process
|
|
38
|
-
|
|
39
|
-
def get_process_ids(self) -> set[str]:
|
|
40
|
-
"""Get a list of the wet processes IDs."""
|
|
41
|
-
return set(self._dict.keys())
|
|
42
|
-
|
|
43
|
-
@classmethod
|
|
44
|
-
def from_array_of_dicts(
|
|
45
|
-
cls, arr: typing.Sequence[BundleDict]
|
|
46
|
-
) -> typing.Self:
|
|
47
|
-
"""Build a WetProcesses instance."""
|
|
48
|
-
wet_processes = cls()
|
|
49
|
-
|
|
50
|
-
for d in arr:
|
|
51
|
-
wet_processes.add(WetProcess(**d))
|
|
52
|
-
|
|
53
|
-
return wet_processes
|