genelastic 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genelastic/__init__.py +0 -13
- genelastic/api/__init__.py +0 -0
- genelastic/api/extends/__init__.py +0 -0
- genelastic/api/extends/example.py +6 -0
- genelastic/api/routes.py +221 -0
- genelastic/api/server.py +80 -0
- genelastic/api/settings.py +14 -0
- genelastic/common/__init__.py +39 -0
- genelastic/common/cli.py +63 -0
- genelastic/common/elastic.py +214 -0
- genelastic/common/exceptions.py +4 -0
- genelastic/common/types.py +25 -0
- genelastic/import_data/__init__.py +27 -0
- genelastic/{analyses.py → import_data/analyses.py} +19 -20
- genelastic/{analysis.py → import_data/analysis.py} +71 -66
- genelastic/{bi_process.py → import_data/bi_process.py} +8 -6
- genelastic/{bi_processes.py → import_data/bi_processes.py} +10 -9
- genelastic/import_data/cli_gen_data.py +116 -0
- genelastic/import_data/cli_import.py +379 -0
- genelastic/import_data/cli_info.py +256 -0
- genelastic/import_data/cli_integrity.py +384 -0
- genelastic/import_data/cli_validate.py +54 -0
- genelastic/import_data/constants.py +24 -0
- genelastic/{data_file.py → import_data/data_file.py} +26 -21
- genelastic/import_data/filename_pattern.py +57 -0
- genelastic/{import_bundle.py → import_data/import_bundle.py} +58 -48
- genelastic/import_data/import_bundle_factory.py +298 -0
- genelastic/{logger.py → import_data/logger.py} +22 -18
- genelastic/import_data/random_bundle.py +402 -0
- genelastic/{tags.py → import_data/tags.py} +48 -27
- genelastic/{wet_process.py → import_data/wet_process.py} +8 -4
- genelastic/{wet_processes.py → import_data/wet_processes.py} +15 -9
- genelastic/ui/__init__.py +0 -0
- genelastic/ui/server.py +87 -0
- genelastic/ui/settings.py +11 -0
- genelastic-0.7.0.dist-info/METADATA +105 -0
- genelastic-0.7.0.dist-info/RECORD +40 -0
- {genelastic-0.6.0.dist-info → genelastic-0.7.0.dist-info}/WHEEL +1 -1
- genelastic-0.7.0.dist-info/entry_points.txt +6 -0
- genelastic/common.py +0 -151
- genelastic/constants.py +0 -45
- genelastic/filename_pattern.py +0 -62
- genelastic/gen_data.py +0 -193
- genelastic/import_bundle_factory.py +0 -288
- genelastic/import_data.py +0 -294
- genelastic/info.py +0 -248
- genelastic/integrity.py +0 -324
- genelastic/validate_data.py +0 -41
- genelastic-0.6.0.dist-info/METADATA +0 -36
- genelastic-0.6.0.dist-info/RECORD +0 -25
- genelastic-0.6.0.dist-info/entry_points.txt +0 -6
- {genelastic-0.6.0.dist-info → genelastic-0.7.0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Module: import_bundle
|
|
1
|
+
"""Module: import_bundle
|
|
3
2
|
|
|
4
3
|
This module provides functionality for importing data bundles.
|
|
5
4
|
"""
|
|
@@ -8,64 +7,70 @@ import logging
|
|
|
8
7
|
import sys
|
|
9
8
|
import typing
|
|
10
9
|
|
|
10
|
+
from genelastic.common import BundleDict
|
|
11
|
+
|
|
12
|
+
from .analyses import Analyses
|
|
11
13
|
from .bi_processes import BioInfoProcesses
|
|
12
|
-
from .data_file import DataFile
|
|
13
|
-
from .common import BundleDict
|
|
14
14
|
from .constants import BUNDLE_CURRENT_VERSION
|
|
15
|
-
from .
|
|
15
|
+
from .data_file import DataFile
|
|
16
16
|
from .tags import Tags
|
|
17
17
|
from .wet_processes import WetProcesses
|
|
18
18
|
|
|
19
|
-
logger = logging.getLogger(
|
|
19
|
+
logger = logging.getLogger("genelastic")
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
class ImportBundle:
|
|
23
23
|
"""Class for handling an import bundle description."""
|
|
24
24
|
|
|
25
|
-
def __init__(
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
analyses:
|
|
29
|
-
wet_processes:
|
|
30
|
-
bi_processes:
|
|
25
|
+
def __init__( # noqa: C901
|
|
26
|
+
self, x: typing.Sequence[BundleDict], *, check: bool = False
|
|
27
|
+
) -> None:
|
|
28
|
+
analyses: list[BundleDict] = []
|
|
29
|
+
wet_processes: list[BundleDict] = []
|
|
30
|
+
bi_processes: list[BundleDict] = []
|
|
31
31
|
tags = Tags(x)
|
|
32
32
|
|
|
33
33
|
# Loop on dicts
|
|
34
34
|
for d in x:
|
|
35
35
|
# Check version
|
|
36
|
-
if
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
36
|
+
if "version" not in d:
|
|
37
|
+
msg = "No version inside YAML document."
|
|
38
|
+
raise RuntimeError(msg)
|
|
39
|
+
if int(d["version"]) != BUNDLE_CURRENT_VERSION:
|
|
40
|
+
raise RuntimeError
|
|
40
41
|
|
|
41
42
|
# Gather all analyses
|
|
42
|
-
if
|
|
43
|
+
if "analyses" in d and d["analyses"] is not None:
|
|
43
44
|
# Copy some bundle properties into each analysis
|
|
44
|
-
for analysis in d[
|
|
45
|
-
for key in [
|
|
45
|
+
for analysis in d["analyses"]:
|
|
46
|
+
for key in ["bundle_file", "root_dir"]:
|
|
46
47
|
if key in d:
|
|
47
48
|
analysis[key] = d[key]
|
|
48
49
|
|
|
49
50
|
# Add the tags to use.
|
|
50
|
-
analysis[
|
|
51
|
+
analysis["tags"] = tags
|
|
51
52
|
|
|
52
|
-
analyses.extend(d[
|
|
53
|
+
analyses.extend(d["analyses"])
|
|
53
54
|
|
|
54
55
|
# If some wet processes are defined, copy the bundle file path into each of them.
|
|
55
|
-
if
|
|
56
|
-
for wet_process in d[
|
|
57
|
-
wet_process[
|
|
58
|
-
wet_processes.extend(d[
|
|
56
|
+
if "wet_processes" in d and d["wet_processes"] is not None:
|
|
57
|
+
for wet_process in d["wet_processes"]:
|
|
58
|
+
wet_process["bundle_file"] = d["bundle_file"]
|
|
59
|
+
wet_processes.extend(d["wet_processes"])
|
|
59
60
|
|
|
60
61
|
# If some bio processes are defined, copy the bundle file path into each of them.
|
|
61
|
-
if
|
|
62
|
-
for bi_process in d[
|
|
63
|
-
bi_process[
|
|
64
|
-
bi_processes.extend(d[
|
|
62
|
+
if "bi_processes" in d and d["bi_processes"] is not None:
|
|
63
|
+
for bi_process in d["bi_processes"]:
|
|
64
|
+
bi_process["bundle_file"] = d["bundle_file"]
|
|
65
|
+
bi_processes.extend(d["bi_processes"])
|
|
65
66
|
|
|
66
67
|
# Instantiate all objects
|
|
67
|
-
self._wet_processes: WetProcesses = WetProcesses.from_array_of_dicts(
|
|
68
|
-
|
|
68
|
+
self._wet_processes: WetProcesses = WetProcesses.from_array_of_dicts(
|
|
69
|
+
wet_processes
|
|
70
|
+
)
|
|
71
|
+
self._bi_processes: BioInfoProcesses = (
|
|
72
|
+
BioInfoProcesses.from_array_of_dicts(bi_processes)
|
|
73
|
+
)
|
|
69
74
|
self._analyses: Analyses = Analyses.from_array_of_dicts(analyses)
|
|
70
75
|
|
|
71
76
|
if check:
|
|
@@ -78,17 +83,27 @@ class ImportBundle:
|
|
|
78
83
|
for index, analysis in enumerate(self._analyses):
|
|
79
84
|
analysis_wet_process = analysis.metadata.get("wet_process")
|
|
80
85
|
|
|
81
|
-
if (
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
86
|
+
if (
|
|
87
|
+
analysis_wet_process
|
|
88
|
+
and analysis_wet_process
|
|
89
|
+
not in self._wet_processes.get_process_ids()
|
|
90
|
+
):
|
|
91
|
+
sys.exit(
|
|
92
|
+
f"Analysis at index {index} in file {analysis.bundle_file} "
|
|
93
|
+
f"is referencing an undefined wet process: {analysis_wet_process}"
|
|
94
|
+
)
|
|
85
95
|
|
|
86
96
|
analysis_bi_process = analysis.metadata.get("bi_process")
|
|
87
97
|
|
|
88
|
-
if (
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
98
|
+
if (
|
|
99
|
+
analysis_bi_process
|
|
100
|
+
and analysis_bi_process
|
|
101
|
+
not in self._bi_processes.get_process_ids()
|
|
102
|
+
):
|
|
103
|
+
sys.exit(
|
|
104
|
+
f"Analysis at index {index} in file {analysis.bundle_file} "
|
|
105
|
+
f"is referencing an undefined bi process: {analysis_bi_process}"
|
|
106
|
+
)
|
|
92
107
|
|
|
93
108
|
@property
|
|
94
109
|
def analyses(self) -> Analyses:
|
|
@@ -110,10 +125,9 @@ class ImportBundle:
|
|
|
110
125
|
files = self.get_files(cat)
|
|
111
126
|
return len(files)
|
|
112
127
|
|
|
113
|
-
def get_files(self, cat: str | None = None) ->
|
|
128
|
+
def get_files(self, cat: str | None = None) -> list[DataFile]:
|
|
114
129
|
"""Returns all files of a category."""
|
|
115
|
-
|
|
116
|
-
files: typing.List[DataFile] = []
|
|
130
|
+
files: list[DataFile] = []
|
|
117
131
|
|
|
118
132
|
# Loop on all analyses
|
|
119
133
|
for analysis in self.analyses:
|
|
@@ -123,12 +137,8 @@ class ImportBundle:
|
|
|
123
137
|
|
|
124
138
|
def get_nb_matched_files(self) -> int:
|
|
125
139
|
"""Get the number of files that match the pattern."""
|
|
126
|
-
|
|
127
|
-
return sum(a.get_nb_files()
|
|
128
|
-
for a in self.analyses)
|
|
140
|
+
return sum(a.get_nb_files() for a in self.analyses)
|
|
129
141
|
|
|
130
142
|
def get_nb_unmatched_files(self) -> int:
|
|
131
143
|
"""Get the number of files that do not match."""
|
|
132
|
-
|
|
133
|
-
return sum(len(a.get_unmatched_file_paths())
|
|
134
|
-
for a in self.analyses)
|
|
144
|
+
return sum(len(a.get_unmatched_file_paths()) for a in self.analyses)
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
"""ImportBundle factory module."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import re
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import schema
|
|
9
|
+
import yaml
|
|
10
|
+
from yaml.parser import ParserError
|
|
11
|
+
from yaml.scanner import ScannerError
|
|
12
|
+
|
|
13
|
+
from genelastic.common import BundleDict
|
|
14
|
+
|
|
15
|
+
from .constants import BUNDLE_CURRENT_VERSION
|
|
16
|
+
from .import_bundle import ImportBundle
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger("genelastic")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def validate_tag_char(s: str) -> bool:
|
|
22
|
+
"""A tag should only contain one special character, excluding the following : (, ), ?, <, >."""
|
|
23
|
+
if len(s) > 1:
|
|
24
|
+
return False
|
|
25
|
+
|
|
26
|
+
return re.match(r"^[^\w()<>?]$", s) is not None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def validate_field_chars(s: str) -> bool:
|
|
30
|
+
"""Fields should only contain word characters.
|
|
31
|
+
A word character is a character a-z, A-Z, 0-9, including _ (underscore).
|
|
32
|
+
"""
|
|
33
|
+
return re.match(r"^\w+$", s) is not None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
_SCHEMA_V1 = schema.Schema(
|
|
37
|
+
{"version": 1, schema.Optional("vcf_files"): schema.Or(None, [str])}
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
_SCHEMA_V2 = schema.Schema(
|
|
41
|
+
{
|
|
42
|
+
"version": 2,
|
|
43
|
+
schema.Optional("vcf"): {
|
|
44
|
+
schema.Optional("filename_pattern"): str,
|
|
45
|
+
"files": [str],
|
|
46
|
+
},
|
|
47
|
+
}
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
_SCHEMA_V3 = schema.Schema(
|
|
51
|
+
{
|
|
52
|
+
"version": 3,
|
|
53
|
+
schema.Optional("analyses"): schema.Or(
|
|
54
|
+
None,
|
|
55
|
+
[
|
|
56
|
+
{
|
|
57
|
+
schema.Optional("file_prefix"): str,
|
|
58
|
+
schema.Optional("files"): [str],
|
|
59
|
+
schema.Optional("sample_name"): str,
|
|
60
|
+
schema.Optional("source"): str,
|
|
61
|
+
schema.Optional("barcode"): str,
|
|
62
|
+
schema.Optional("wet_process"): str,
|
|
63
|
+
schema.Optional("bi_process"): str,
|
|
64
|
+
schema.Optional("reference_genome"): str,
|
|
65
|
+
schema.Optional("flowcell"): str,
|
|
66
|
+
schema.Optional("lanes"): [int],
|
|
67
|
+
schema.Optional("seq_indices"): [str],
|
|
68
|
+
schema.Optional("cov_depth"): int,
|
|
69
|
+
schema.Optional("qc_comment"): str,
|
|
70
|
+
schema.Optional("data_path"): str,
|
|
71
|
+
}
|
|
72
|
+
],
|
|
73
|
+
),
|
|
74
|
+
schema.Optional("wet_processes"): schema.Or(
|
|
75
|
+
None,
|
|
76
|
+
[
|
|
77
|
+
{
|
|
78
|
+
"proc_id": str,
|
|
79
|
+
"manufacturer": str,
|
|
80
|
+
"sequencer": str,
|
|
81
|
+
"generic_kit": str,
|
|
82
|
+
"fragmentation": int,
|
|
83
|
+
"reads_size": int,
|
|
84
|
+
"input_type": str,
|
|
85
|
+
"amplification": str,
|
|
86
|
+
"flowcell_type": str,
|
|
87
|
+
"sequencing_type": str,
|
|
88
|
+
schema.Optional("desc"): str,
|
|
89
|
+
schema.Optional("library_kit"): str,
|
|
90
|
+
schema.Optional("sequencing_kit"): str,
|
|
91
|
+
schema.Optional("error_rate_expected"): float,
|
|
92
|
+
}
|
|
93
|
+
],
|
|
94
|
+
),
|
|
95
|
+
schema.Optional("bi_processes"): schema.Or(
|
|
96
|
+
None,
|
|
97
|
+
[
|
|
98
|
+
{
|
|
99
|
+
"proc_id": str,
|
|
100
|
+
"name": str,
|
|
101
|
+
"pipeline_version": str,
|
|
102
|
+
schema.Optional("steps"): [
|
|
103
|
+
{
|
|
104
|
+
"name": str,
|
|
105
|
+
"cmd": str,
|
|
106
|
+
schema.Optional("version"): str,
|
|
107
|
+
schema.Optional("output"): str,
|
|
108
|
+
}
|
|
109
|
+
],
|
|
110
|
+
"sequencing_type": str,
|
|
111
|
+
schema.Optional("desc"): str,
|
|
112
|
+
}
|
|
113
|
+
],
|
|
114
|
+
),
|
|
115
|
+
schema.Optional("tags"): {
|
|
116
|
+
schema.Optional("format"): {
|
|
117
|
+
schema.Optional("prefix"): schema.And(
|
|
118
|
+
str,
|
|
119
|
+
validate_tag_char,
|
|
120
|
+
error="Key 'prefix' should only contain one special character, "
|
|
121
|
+
"excluding the following : (, ), ?, <, >.",
|
|
122
|
+
),
|
|
123
|
+
schema.Optional("suffix"): schema.And(
|
|
124
|
+
str,
|
|
125
|
+
validate_tag_char,
|
|
126
|
+
error="Key 'suffix' should only contain one special character, "
|
|
127
|
+
"excluding the following : (, ), ?, <, >.",
|
|
128
|
+
),
|
|
129
|
+
},
|
|
130
|
+
"match": {
|
|
131
|
+
schema.And(
|
|
132
|
+
str,
|
|
133
|
+
validate_field_chars,
|
|
134
|
+
error="Tags listed under the 'match' key should only contain "
|
|
135
|
+
"word characters. A word character is a character "
|
|
136
|
+
"a-z, A-Z, 0-9, including _ (underscore).",
|
|
137
|
+
): {"field": str, "regex": str}
|
|
138
|
+
},
|
|
139
|
+
},
|
|
140
|
+
}
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def make_import_bundle_from_files(
|
|
145
|
+
files: list[Path], *, check: bool = False
|
|
146
|
+
) -> ImportBundle:
|
|
147
|
+
"""Create an ImportBundle instance from a list of YAML files."""
|
|
148
|
+
all_documents = []
|
|
149
|
+
for file in files:
|
|
150
|
+
# Load documents stored in each file.
|
|
151
|
+
new_documents = load_import_bundle_file(file)
|
|
152
|
+
|
|
153
|
+
for i, new_document in enumerate(new_documents):
|
|
154
|
+
# Upgrade each new document to the latest/current version.
|
|
155
|
+
if new_document["version"] != BUNDLE_CURRENT_VERSION:
|
|
156
|
+
new_documents[i] = upgrade_bundle_version(
|
|
157
|
+
new_document, BUNDLE_CURRENT_VERSION
|
|
158
|
+
)
|
|
159
|
+
# Set the root directory path in each new document.
|
|
160
|
+
new_documents[i]["root_dir"] = str(file.parent)
|
|
161
|
+
# Set the original bundle YAML file path in each new document.
|
|
162
|
+
new_documents[i]["bundle_file"] = str(file)
|
|
163
|
+
|
|
164
|
+
all_documents.extend(new_documents)
|
|
165
|
+
|
|
166
|
+
# Create bundle instance.
|
|
167
|
+
return ImportBundle(all_documents, check=check)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def set_version(x: BundleDict) -> None:
|
|
171
|
+
"""Set version number.
|
|
172
|
+
|
|
173
|
+
Deduce the version number from the keys present inside the dictionary.
|
|
174
|
+
"""
|
|
175
|
+
# Empty doc
|
|
176
|
+
if len(x) == 0:
|
|
177
|
+
x["version"] = BUNDLE_CURRENT_VERSION
|
|
178
|
+
|
|
179
|
+
# Wrong content in version field
|
|
180
|
+
elif "version" in x:
|
|
181
|
+
if not isinstance(x["version"], int):
|
|
182
|
+
msg = "Version must be an integer."
|
|
183
|
+
raise ValueError(msg)
|
|
184
|
+
|
|
185
|
+
# Version 1
|
|
186
|
+
elif "vcf_files" in x or "cov_files" in x:
|
|
187
|
+
x["version"] = 1
|
|
188
|
+
|
|
189
|
+
# Version 2
|
|
190
|
+
elif "vcf" in x and "filename_pattern" in x["vcf"]:
|
|
191
|
+
x["version"] = 2
|
|
192
|
+
|
|
193
|
+
# Latest version
|
|
194
|
+
else:
|
|
195
|
+
x["version"] = BUNDLE_CURRENT_VERSION
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def validate_doc(x: BundleDict) -> None:
|
|
199
|
+
"""Validate the dictionary using its corresponding schema."""
|
|
200
|
+
# Get schema
|
|
201
|
+
bundle_schema = globals().get("_SCHEMA_V" + str(x["version"]))
|
|
202
|
+
if bundle_schema is None:
|
|
203
|
+
raise ValueError(
|
|
204
|
+
f"Unknown version \"{x['version']}\" for import " + "bundle file."
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
# Validate
|
|
208
|
+
bundle_schema.validate(x)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def load_import_bundle_file(file: Path) -> list[BundleDict]:
|
|
212
|
+
"""Loads a YAML import bundle file."""
|
|
213
|
+
# Load YAML
|
|
214
|
+
logger.info('Load YAML data import file "%s".', file)
|
|
215
|
+
docs: list[BundleDict] = []
|
|
216
|
+
|
|
217
|
+
try:
|
|
218
|
+
with file.open(encoding="utf-8") as f:
|
|
219
|
+
docs = list(yaml.safe_load_all(f))
|
|
220
|
+
except (IsADirectoryError, FileNotFoundError) as e:
|
|
221
|
+
logger.error(e)
|
|
222
|
+
sys.exit(1)
|
|
223
|
+
except ScannerError as e:
|
|
224
|
+
logger.error("YAML file lexical analysis failed : %s", e)
|
|
225
|
+
sys.exit(1)
|
|
226
|
+
except ParserError as e:
|
|
227
|
+
logger.error("YAML file syntactic analysis failed : %s", e)
|
|
228
|
+
sys.exit(1)
|
|
229
|
+
|
|
230
|
+
# Guess/set version
|
|
231
|
+
if docs is None:
|
|
232
|
+
docs = [{"version": BUNDLE_CURRENT_VERSION}]
|
|
233
|
+
else:
|
|
234
|
+
for i, x in enumerate(docs):
|
|
235
|
+
if x is None:
|
|
236
|
+
docs[i] = {"version": BUNDLE_CURRENT_VERSION}
|
|
237
|
+
else:
|
|
238
|
+
set_version(x)
|
|
239
|
+
|
|
240
|
+
# Find schema and validate document
|
|
241
|
+
for x in docs:
|
|
242
|
+
validate_doc(x)
|
|
243
|
+
|
|
244
|
+
return docs
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def upgrade_bundle_version(x: BundleDict, to_version: int) -> BundleDict:
|
|
248
|
+
"""Upgrade a loaded import bundle dictionary.
|
|
249
|
+
|
|
250
|
+
:raises ValueError: Raised if the input bundle lacks a version key or if the target version is invalid.
|
|
251
|
+
:raises TypeError: Raised if the version value in the input bundle is not an integer.
|
|
252
|
+
"""
|
|
253
|
+
# Check version
|
|
254
|
+
if "version" not in x:
|
|
255
|
+
msg = "No version in input bundle dictionary."
|
|
256
|
+
raise ValueError(msg)
|
|
257
|
+
if not isinstance(x["version"], int):
|
|
258
|
+
msg = "Version of input bundle is not an integer."
|
|
259
|
+
raise TypeError(msg)
|
|
260
|
+
if x["version"] >= to_version:
|
|
261
|
+
msg = f"Original version ({x['version']}) is greater or equal to target version ({to_version})."
|
|
262
|
+
raise ValueError(msg)
|
|
263
|
+
|
|
264
|
+
# Loop on upgrades to run
|
|
265
|
+
y = x.copy()
|
|
266
|
+
for v in range(x["version"], to_version):
|
|
267
|
+
upgrade_fct = globals().get(f"_upgrade_from_v{v}_to_v{v + 1}")
|
|
268
|
+
y = upgrade_fct(y) # type: ignore[misc]
|
|
269
|
+
|
|
270
|
+
return y
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _upgrade_from_v1_to_v2(x: BundleDict) -> BundleDict:
|
|
274
|
+
# Upgrade
|
|
275
|
+
y = {"version": 2, "vcf": {"files": []}}
|
|
276
|
+
if "vcf_files" in x and x["vcf_files"] is not None:
|
|
277
|
+
y["vcf"]["files"] = x["vcf_files"] # type: ignore[index]
|
|
278
|
+
|
|
279
|
+
# Validate schema
|
|
280
|
+
_SCHEMA_V2.validate(y)
|
|
281
|
+
|
|
282
|
+
return y
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def _upgrade_from_v2_to_v3(x: BundleDict) -> BundleDict:
|
|
286
|
+
# Upgrade
|
|
287
|
+
y: BundleDict = {"version": 3, "analyses": []}
|
|
288
|
+
if "vcf" in x:
|
|
289
|
+
analysis_entry = {}
|
|
290
|
+
if "files" in x["vcf"]:
|
|
291
|
+
analysis_entry["files"] = x["vcf"]["files"]
|
|
292
|
+
if "filename_pattern" in x["vcf"]:
|
|
293
|
+
analysis_entry["file_prefix"] = x["vcf"]["filename_pattern"]
|
|
294
|
+
y["analyses"].append(analysis_entry)
|
|
295
|
+
|
|
296
|
+
_SCHEMA_V3.validate(y)
|
|
297
|
+
|
|
298
|
+
return y
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# pylint: disable=missing-module-docstring
|
|
2
1
|
import logging
|
|
3
2
|
import typing
|
|
4
3
|
|
|
@@ -8,32 +7,37 @@ import colorlog
|
|
|
8
7
|
def configure_logging(verbose: int, log_file: str | None = None) -> None:
|
|
9
8
|
"""Configure logging for both import and gen-data scripts."""
|
|
10
9
|
# Define TRACE level
|
|
11
|
-
logging.TRACE = 5 # type: ignore
|
|
12
|
-
logging.addLevelName(logging.TRACE, "TRACE") # type: ignore
|
|
10
|
+
logging.TRACE = 5 # type: ignore[attr-defined]
|
|
11
|
+
logging.addLevelName(logging.TRACE, "TRACE") # type: ignore[attr-defined]
|
|
13
12
|
|
|
14
|
-
def trace(
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
def trace(
|
|
14
|
+
self: logging.Logger,
|
|
15
|
+
message: object,
|
|
16
|
+
*args: typing.Any, # noqa: ANN401
|
|
17
|
+
**kws: typing.Any, # noqa: ANN401
|
|
18
|
+
) -> None:
|
|
19
|
+
if self.isEnabledFor(logging.TRACE): # type: ignore[attr-defined]
|
|
20
|
+
self._log(logging.TRACE, message, args, **kws) # type: ignore[attr-defined]
|
|
17
21
|
|
|
18
|
-
logging.Logger.trace = trace # type: ignore
|
|
22
|
+
logging.Logger.trace = trace # type: ignore[attr-defined]
|
|
19
23
|
|
|
20
24
|
# Get root logger
|
|
21
25
|
root = logging.getLogger()
|
|
22
26
|
|
|
23
27
|
# Define formatter for file logging.
|
|
24
|
-
fmt = logging.Formatter(
|
|
28
|
+
fmt = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
|
|
25
29
|
|
|
26
30
|
# Define formatter for colored console logging.
|
|
27
31
|
color_fmt = colorlog.ColoredFormatter(
|
|
28
|
-
|
|
32
|
+
"%(log_color)s%(asctime)s %(levelname)-8s %(message)s",
|
|
29
33
|
log_colors={
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
}
|
|
34
|
+
"TRACE": "light_cyan",
|
|
35
|
+
"DEBUG": "light_yellow",
|
|
36
|
+
"INFO": "light_green",
|
|
37
|
+
"WARNING": "light_purple",
|
|
38
|
+
"ERROR": "light_red",
|
|
39
|
+
"CRITICAL": "light_red",
|
|
40
|
+
},
|
|
37
41
|
)
|
|
38
42
|
|
|
39
43
|
# Define console handler
|
|
@@ -50,7 +54,7 @@ def configure_logging(verbose: int, log_file: str | None = None) -> None:
|
|
|
50
54
|
level_map = {
|
|
51
55
|
0: logging.WARNING, # quiet mode
|
|
52
56
|
1: logging.INFO, # default
|
|
53
|
-
2: logging.DEBUG # verbose mode
|
|
57
|
+
2: logging.DEBUG, # verbose mode
|
|
54
58
|
}
|
|
55
59
|
# If verbose is greater than 2, set level to TRACE.
|
|
56
|
-
root.setLevel(level_map.get(verbose, logging.TRACE)) # type: ignore
|
|
60
|
+
root.setLevel(level_map.get(verbose, logging.TRACE)) # type: ignore[attr-defined]
|