genelastic 0.6.1__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genelastic/api/cli_start_api.py +18 -0
- genelastic/api/extends/example.py +2 -3
- genelastic/api/extends/example.yml +20 -0
- genelastic/api/routes.py +160 -23
- genelastic/api/server.py +42 -31
- genelastic/api/settings.py +5 -8
- genelastic/api/specification.yml +350 -0
- genelastic/common/__init__.py +41 -9
- genelastic/common/cli.py +103 -23
- genelastic/common/elastic.py +80 -49
- genelastic/common/exceptions.py +0 -2
- genelastic/common/server.py +51 -0
- genelastic/common/types.py +20 -15
- genelastic/import_data/__init__.py +23 -5
- genelastic/import_data/analyses.py +17 -20
- genelastic/import_data/analysis.py +69 -65
- genelastic/import_data/bi_process.py +7 -5
- genelastic/import_data/bi_processes.py +8 -8
- genelastic/import_data/cli_gen_data.py +143 -0
- genelastic/import_data/cli_import.py +379 -0
- genelastic/import_data/{info.py → cli_info.py} +104 -75
- genelastic/import_data/cli_integrity.py +384 -0
- genelastic/import_data/cli_validate.py +54 -0
- genelastic/import_data/constants.py +11 -32
- genelastic/import_data/data_file.py +23 -20
- genelastic/import_data/filename_pattern.py +26 -32
- genelastic/import_data/import_bundle.py +56 -47
- genelastic/import_data/import_bundle_factory.py +166 -158
- genelastic/import_data/logger.py +22 -18
- genelastic/import_data/random_bundle.py +425 -0
- genelastic/import_data/tags.py +46 -26
- genelastic/import_data/wet_process.py +8 -4
- genelastic/import_data/wet_processes.py +13 -8
- genelastic/ui/__init__.py +0 -0
- genelastic/ui/cli_start_ui.py +18 -0
- genelastic/ui/routes.py +86 -0
- genelastic/ui/server.py +14 -0
- genelastic/ui/settings.py +7 -0
- genelastic/ui/templates/analyses.html +11 -0
- genelastic/ui/templates/bi_processes.html +11 -0
- genelastic/ui/templates/home.html +4 -0
- genelastic/ui/templates/layout.html +34 -0
- genelastic/ui/templates/version.html +9 -0
- genelastic/ui/templates/wet_processes.html +11 -0
- genelastic-0.8.0.dist-info/METADATA +109 -0
- genelastic-0.8.0.dist-info/RECORD +52 -0
- {genelastic-0.6.1.dist-info → genelastic-0.8.0.dist-info}/WHEEL +1 -1
- genelastic-0.8.0.dist-info/entry_points.txt +8 -0
- genelastic/import_data/gen_data.py +0 -194
- genelastic/import_data/import_data.py +0 -292
- genelastic/import_data/integrity.py +0 -290
- genelastic/import_data/validate_data.py +0 -43
- genelastic-0.6.1.dist-info/METADATA +0 -41
- genelastic-0.6.1.dist-info/RECORD +0 -36
- genelastic-0.6.1.dist-info/entry_points.txt +0 -6
- {genelastic-0.6.1.dist-info → genelastic-0.8.0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Module: import_bundle
|
|
1
|
+
"""Module: import_bundle
|
|
3
2
|
|
|
4
3
|
This module provides functionality for importing data bundles.
|
|
5
4
|
"""
|
|
@@ -10,63 +9,68 @@ import typing
|
|
|
10
9
|
|
|
11
10
|
from genelastic.common import BundleDict
|
|
12
11
|
|
|
12
|
+
from .analyses import Analyses
|
|
13
13
|
from .bi_processes import BioInfoProcesses
|
|
14
|
-
from .data_file import DataFile
|
|
15
14
|
from .constants import BUNDLE_CURRENT_VERSION
|
|
16
|
-
from .
|
|
15
|
+
from .data_file import DataFile
|
|
17
16
|
from .tags import Tags
|
|
18
17
|
from .wet_processes import WetProcesses
|
|
19
18
|
|
|
20
|
-
logger = logging.getLogger(
|
|
19
|
+
logger = logging.getLogger("genelastic")
|
|
21
20
|
|
|
22
21
|
|
|
23
22
|
class ImportBundle:
|
|
24
23
|
"""Class for handling an import bundle description."""
|
|
25
24
|
|
|
26
|
-
def __init__(
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
analyses:
|
|
30
|
-
wet_processes:
|
|
31
|
-
bi_processes:
|
|
25
|
+
def __init__( # noqa: C901
|
|
26
|
+
self, x: typing.Sequence[BundleDict], *, check: bool = False
|
|
27
|
+
) -> None:
|
|
28
|
+
analyses: list[BundleDict] = []
|
|
29
|
+
wet_processes: list[BundleDict] = []
|
|
30
|
+
bi_processes: list[BundleDict] = []
|
|
32
31
|
tags = Tags(x)
|
|
33
32
|
|
|
34
33
|
# Loop on dicts
|
|
35
34
|
for d in x:
|
|
36
35
|
# Check version
|
|
37
|
-
if
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
36
|
+
if "version" not in d:
|
|
37
|
+
msg = "No version inside YAML document."
|
|
38
|
+
raise RuntimeError(msg)
|
|
39
|
+
if int(d["version"]) != BUNDLE_CURRENT_VERSION:
|
|
40
|
+
raise RuntimeError
|
|
41
41
|
|
|
42
42
|
# Gather all analyses
|
|
43
|
-
if
|
|
43
|
+
if "analyses" in d and d["analyses"] is not None:
|
|
44
44
|
# Copy some bundle properties into each analysis
|
|
45
|
-
for analysis in d[
|
|
46
|
-
for key in [
|
|
45
|
+
for analysis in d["analyses"]:
|
|
46
|
+
for key in ["bundle_file", "root_dir"]:
|
|
47
47
|
if key in d:
|
|
48
48
|
analysis[key] = d[key]
|
|
49
49
|
|
|
50
50
|
# Add the tags to use.
|
|
51
|
-
analysis[
|
|
51
|
+
analysis["tags"] = tags
|
|
52
52
|
|
|
53
|
-
analyses.extend(d[
|
|
53
|
+
analyses.extend(d["analyses"])
|
|
54
54
|
|
|
55
55
|
# If some wet processes are defined, copy the bundle file path into each of them.
|
|
56
|
-
if
|
|
57
|
-
for wet_process in d[
|
|
58
|
-
wet_process[
|
|
59
|
-
wet_processes.extend(d[
|
|
56
|
+
if "wet_processes" in d and d["wet_processes"] is not None:
|
|
57
|
+
for wet_process in d["wet_processes"]:
|
|
58
|
+
wet_process["bundle_file"] = d["bundle_file"]
|
|
59
|
+
wet_processes.extend(d["wet_processes"])
|
|
60
60
|
|
|
61
61
|
# If some bio processes are defined, copy the bundle file path into each of them.
|
|
62
|
-
if
|
|
63
|
-
for bi_process in d[
|
|
64
|
-
bi_process[
|
|
65
|
-
bi_processes.extend(d[
|
|
62
|
+
if "bi_processes" in d and d["bi_processes"] is not None:
|
|
63
|
+
for bi_process in d["bi_processes"]:
|
|
64
|
+
bi_process["bundle_file"] = d["bundle_file"]
|
|
65
|
+
bi_processes.extend(d["bi_processes"])
|
|
66
66
|
|
|
67
67
|
# Instantiate all objects
|
|
68
|
-
self._wet_processes: WetProcesses = WetProcesses.from_array_of_dicts(
|
|
69
|
-
|
|
68
|
+
self._wet_processes: WetProcesses = WetProcesses.from_array_of_dicts(
|
|
69
|
+
wet_processes
|
|
70
|
+
)
|
|
71
|
+
self._bi_processes: BioInfoProcesses = (
|
|
72
|
+
BioInfoProcesses.from_array_of_dicts(bi_processes)
|
|
73
|
+
)
|
|
70
74
|
self._analyses: Analyses = Analyses.from_array_of_dicts(analyses)
|
|
71
75
|
|
|
72
76
|
if check:
|
|
@@ -79,17 +83,27 @@ class ImportBundle:
|
|
|
79
83
|
for index, analysis in enumerate(self._analyses):
|
|
80
84
|
analysis_wet_process = analysis.metadata.get("wet_process")
|
|
81
85
|
|
|
82
|
-
if (
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
+
if (
|
|
87
|
+
analysis_wet_process
|
|
88
|
+
and analysis_wet_process
|
|
89
|
+
not in self._wet_processes.get_process_ids()
|
|
90
|
+
):
|
|
91
|
+
sys.exit(
|
|
92
|
+
f"Analysis at index {index} in file {analysis.bundle_file} "
|
|
93
|
+
f"is referencing an undefined wet process: {analysis_wet_process}"
|
|
94
|
+
)
|
|
86
95
|
|
|
87
96
|
analysis_bi_process = analysis.metadata.get("bi_process")
|
|
88
97
|
|
|
89
|
-
if (
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
98
|
+
if (
|
|
99
|
+
analysis_bi_process
|
|
100
|
+
and analysis_bi_process
|
|
101
|
+
not in self._bi_processes.get_process_ids()
|
|
102
|
+
):
|
|
103
|
+
sys.exit(
|
|
104
|
+
f"Analysis at index {index} in file {analysis.bundle_file} "
|
|
105
|
+
f"is referencing an undefined bi process: {analysis_bi_process}"
|
|
106
|
+
)
|
|
93
107
|
|
|
94
108
|
@property
|
|
95
109
|
def analyses(self) -> Analyses:
|
|
@@ -111,10 +125,9 @@ class ImportBundle:
|
|
|
111
125
|
files = self.get_files(cat)
|
|
112
126
|
return len(files)
|
|
113
127
|
|
|
114
|
-
def get_files(self, cat: str | None = None) ->
|
|
128
|
+
def get_files(self, cat: str | None = None) -> list[DataFile]:
|
|
115
129
|
"""Returns all files of a category."""
|
|
116
|
-
|
|
117
|
-
files: typing.List[DataFile] = []
|
|
130
|
+
files: list[DataFile] = []
|
|
118
131
|
|
|
119
132
|
# Loop on all analyses
|
|
120
133
|
for analysis in self.analyses:
|
|
@@ -124,12 +137,8 @@ class ImportBundle:
|
|
|
124
137
|
|
|
125
138
|
def get_nb_matched_files(self) -> int:
|
|
126
139
|
"""Get the number of files that match the pattern."""
|
|
127
|
-
|
|
128
|
-
return sum(a.get_nb_files()
|
|
129
|
-
for a in self.analyses)
|
|
140
|
+
return sum(a.get_nb_files() for a in self.analyses)
|
|
130
141
|
|
|
131
142
|
def get_nb_unmatched_files(self) -> int:
|
|
132
143
|
"""Get the number of files that do not match."""
|
|
133
|
-
|
|
134
|
-
return sum(len(a.get_unmatched_file_paths())
|
|
135
|
-
for a in self.analyses)
|
|
144
|
+
return sum(len(a.get_unmatched_file_paths()) for a in self.analyses)
|
|
@@ -1,29 +1,25 @@
|
|
|
1
|
-
"""ImportBundle factory module.
|
|
2
|
-
"""
|
|
1
|
+
"""ImportBundle factory module."""
|
|
3
2
|
|
|
4
3
|
import logging
|
|
5
|
-
import os
|
|
6
4
|
import re
|
|
7
5
|
import sys
|
|
8
|
-
import
|
|
9
|
-
from yaml.parser import ParserError
|
|
10
|
-
from yaml.scanner import ScannerError
|
|
6
|
+
from pathlib import Path
|
|
11
7
|
|
|
12
|
-
import schema
|
|
8
|
+
import schema
|
|
13
9
|
import yaml
|
|
10
|
+
from yaml.parser import ParserError
|
|
11
|
+
from yaml.scanner import ScannerError
|
|
14
12
|
|
|
15
13
|
from genelastic.common import BundleDict
|
|
16
14
|
|
|
17
|
-
from .import_bundle import ImportBundle
|
|
18
15
|
from .constants import BUNDLE_CURRENT_VERSION
|
|
16
|
+
from .import_bundle import ImportBundle
|
|
19
17
|
|
|
20
|
-
logger = logging.getLogger(
|
|
18
|
+
logger = logging.getLogger("genelastic")
|
|
21
19
|
|
|
22
20
|
|
|
23
21
|
def validate_tag_char(s: str) -> bool:
|
|
24
|
-
"""
|
|
25
|
-
A tag should only contain one special character, excluding the following : (, ), ?, <, >.
|
|
26
|
-
"""
|
|
22
|
+
"""A tag should only contain one special character, excluding the following : (, ), ?, <, >."""
|
|
27
23
|
if len(s) > 1:
|
|
28
24
|
return False
|
|
29
25
|
|
|
@@ -31,115 +27,123 @@ def validate_tag_char(s: str) -> bool:
|
|
|
31
27
|
|
|
32
28
|
|
|
33
29
|
def validate_field_chars(s: str) -> bool:
|
|
34
|
-
"""
|
|
35
|
-
Fields should only contain word characters.
|
|
30
|
+
"""Fields should only contain word characters.
|
|
36
31
|
A word character is a character a-z, A-Z, 0-9, including _ (underscore).
|
|
37
32
|
"""
|
|
38
33
|
return re.match(r"^\w+$", s) is not None
|
|
39
34
|
|
|
40
35
|
|
|
41
|
-
_SCHEMA_V1 = schema.Schema(
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
})
|
|
36
|
+
_SCHEMA_V1 = schema.Schema(
|
|
37
|
+
{"version": 1, schema.Optional("vcf_files"): schema.Or(None, [str])}
|
|
38
|
+
)
|
|
45
39
|
|
|
46
|
-
_SCHEMA_V2 = schema.Schema(
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
schema.Optional(
|
|
50
|
-
|
|
40
|
+
_SCHEMA_V2 = schema.Schema(
|
|
41
|
+
{
|
|
42
|
+
"version": 2,
|
|
43
|
+
schema.Optional("vcf"): {
|
|
44
|
+
schema.Optional("filename_pattern"): str,
|
|
45
|
+
"files": [str],
|
|
46
|
+
},
|
|
51
47
|
}
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
_SCHEMA_V3 = schema.Schema(
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
schema.Or(
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
schema.Optional("
|
|
121
|
-
schema.And(
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
_SCHEMA_V3 = schema.Schema(
|
|
51
|
+
{
|
|
52
|
+
"version": 3,
|
|
53
|
+
schema.Optional("analyses"): schema.Or(
|
|
54
|
+
None,
|
|
55
|
+
[
|
|
56
|
+
{
|
|
57
|
+
schema.Optional("file_prefix"): str,
|
|
58
|
+
schema.Optional("files"): [str],
|
|
59
|
+
schema.Optional("sample_name"): str,
|
|
60
|
+
schema.Optional("source"): str,
|
|
61
|
+
schema.Optional("barcode"): str,
|
|
62
|
+
schema.Optional("wet_process"): str,
|
|
63
|
+
schema.Optional("bi_process"): str,
|
|
64
|
+
schema.Optional("reference_genome"): str,
|
|
65
|
+
schema.Optional("flowcell"): str,
|
|
66
|
+
schema.Optional("lanes"): [int],
|
|
67
|
+
schema.Optional("seq_indices"): [str],
|
|
68
|
+
schema.Optional("cov_depth"): int,
|
|
69
|
+
schema.Optional("qc_comment"): str,
|
|
70
|
+
schema.Optional("data_path"): str,
|
|
71
|
+
}
|
|
72
|
+
],
|
|
73
|
+
),
|
|
74
|
+
schema.Optional("wet_processes"): schema.Or(
|
|
75
|
+
None,
|
|
76
|
+
[
|
|
77
|
+
{
|
|
78
|
+
"proc_id": str,
|
|
79
|
+
"manufacturer": str,
|
|
80
|
+
"sequencer": str,
|
|
81
|
+
"generic_kit": str,
|
|
82
|
+
"fragmentation": int,
|
|
83
|
+
"reads_size": int,
|
|
84
|
+
"input_type": str,
|
|
85
|
+
"amplification": str,
|
|
86
|
+
"flowcell_type": str,
|
|
87
|
+
"sequencing_type": str,
|
|
88
|
+
schema.Optional("desc"): str,
|
|
89
|
+
schema.Optional("library_kit"): str,
|
|
90
|
+
schema.Optional("sequencing_kit"): str,
|
|
91
|
+
schema.Optional("error_rate_expected"): float,
|
|
92
|
+
}
|
|
93
|
+
],
|
|
94
|
+
),
|
|
95
|
+
schema.Optional("bi_processes"): schema.Or(
|
|
96
|
+
None,
|
|
97
|
+
[
|
|
98
|
+
{
|
|
99
|
+
"proc_id": str,
|
|
100
|
+
"name": str,
|
|
101
|
+
"pipeline_version": str,
|
|
102
|
+
schema.Optional("steps"): [
|
|
103
|
+
{
|
|
104
|
+
"name": str,
|
|
105
|
+
"cmd": str,
|
|
106
|
+
schema.Optional("version"): str,
|
|
107
|
+
schema.Optional("output"): str,
|
|
108
|
+
}
|
|
109
|
+
],
|
|
110
|
+
"sequencing_type": str,
|
|
111
|
+
schema.Optional("desc"): str,
|
|
112
|
+
}
|
|
113
|
+
],
|
|
114
|
+
),
|
|
115
|
+
schema.Optional("tags"): {
|
|
116
|
+
schema.Optional("format"): {
|
|
117
|
+
schema.Optional("prefix"): schema.And(
|
|
118
|
+
str,
|
|
119
|
+
validate_tag_char,
|
|
120
|
+
error="Key 'prefix' should only contain one special character, "
|
|
121
|
+
"excluding the following : (, ), ?, <, >.",
|
|
122
|
+
),
|
|
123
|
+
schema.Optional("suffix"): schema.And(
|
|
124
|
+
str,
|
|
125
|
+
validate_tag_char,
|
|
126
|
+
error="Key 'suffix' should only contain one special character, "
|
|
127
|
+
"excluding the following : (, ), ?, <, >.",
|
|
128
|
+
),
|
|
129
|
+
},
|
|
130
|
+
"match": {
|
|
131
|
+
schema.And(
|
|
132
|
+
str,
|
|
133
|
+
validate_field_chars,
|
|
134
|
+
error="Tags listed under the 'match' key should only contain "
|
|
135
|
+
"word characters. A word character is a character "
|
|
136
|
+
"a-z, A-Z, 0-9, including _ (underscore).",
|
|
137
|
+
): {"field": str, "regex": str}
|
|
138
|
+
},
|
|
126
139
|
},
|
|
127
|
-
"match": {
|
|
128
|
-
schema.And(str,
|
|
129
|
-
validate_field_chars,
|
|
130
|
-
error="Tags listed under the 'match' key should only contain "
|
|
131
|
-
"word characters. A word character is a character "
|
|
132
|
-
"a-z, A-Z, 0-9, including _ (underscore)."
|
|
133
|
-
): {
|
|
134
|
-
"field": str,
|
|
135
|
-
"regex": str
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
140
|
}
|
|
139
|
-
|
|
141
|
+
)
|
|
140
142
|
|
|
141
143
|
|
|
142
|
-
def make_import_bundle_from_files(
|
|
144
|
+
def make_import_bundle_from_files(
|
|
145
|
+
files: list[Path], *, check: bool = False
|
|
146
|
+
) -> ImportBundle:
|
|
143
147
|
"""Create an ImportBundle instance from a list of YAML files."""
|
|
144
148
|
all_documents = []
|
|
145
149
|
for file in files:
|
|
@@ -148,17 +152,19 @@ def make_import_bundle_from_files(files: typing.List[str], check: bool = False)
|
|
|
148
152
|
|
|
149
153
|
for i, new_document in enumerate(new_documents):
|
|
150
154
|
# Upgrade each new document to the latest/current version.
|
|
151
|
-
if new_document[
|
|
152
|
-
new_documents[i] = upgrade_bundle_version(
|
|
155
|
+
if new_document["version"] != BUNDLE_CURRENT_VERSION:
|
|
156
|
+
new_documents[i] = upgrade_bundle_version(
|
|
157
|
+
new_document, BUNDLE_CURRENT_VERSION
|
|
158
|
+
)
|
|
153
159
|
# Set the root directory path in each new document.
|
|
154
|
-
new_documents[i][
|
|
160
|
+
new_documents[i]["root_dir"] = str(file.parent)
|
|
155
161
|
# Set the original bundle YAML file path in each new document.
|
|
156
|
-
new_documents[i][
|
|
162
|
+
new_documents[i]["bundle_file"] = str(file)
|
|
157
163
|
|
|
158
164
|
all_documents.extend(new_documents)
|
|
159
165
|
|
|
160
166
|
# Create bundle instance.
|
|
161
|
-
return ImportBundle(all_documents, check)
|
|
167
|
+
return ImportBundle(all_documents, check=check)
|
|
162
168
|
|
|
163
169
|
|
|
164
170
|
def set_version(x: BundleDict) -> None:
|
|
@@ -166,54 +172,51 @@ def set_version(x: BundleDict) -> None:
|
|
|
166
172
|
|
|
167
173
|
Deduce the version number from the keys present inside the dictionary.
|
|
168
174
|
"""
|
|
169
|
-
|
|
170
175
|
# Empty doc
|
|
171
176
|
if len(x) == 0:
|
|
172
|
-
x[
|
|
177
|
+
x["version"] = BUNDLE_CURRENT_VERSION
|
|
173
178
|
|
|
174
179
|
# Wrong content in version field
|
|
175
|
-
elif
|
|
176
|
-
if not isinstance(x[
|
|
177
|
-
|
|
180
|
+
elif "version" in x:
|
|
181
|
+
if not isinstance(x["version"], int):
|
|
182
|
+
msg = "Version must be an integer."
|
|
183
|
+
raise ValueError(msg)
|
|
178
184
|
|
|
179
185
|
# Version 1
|
|
180
|
-
elif
|
|
181
|
-
x[
|
|
186
|
+
elif "vcf_files" in x or "cov_files" in x:
|
|
187
|
+
x["version"] = 1
|
|
182
188
|
|
|
183
189
|
# Version 2
|
|
184
|
-
elif
|
|
185
|
-
x[
|
|
190
|
+
elif "vcf" in x and "filename_pattern" in x["vcf"]:
|
|
191
|
+
x["version"] = 2
|
|
186
192
|
|
|
187
193
|
# Latest version
|
|
188
194
|
else:
|
|
189
|
-
x[
|
|
195
|
+
x["version"] = BUNDLE_CURRENT_VERSION
|
|
190
196
|
|
|
191
197
|
|
|
192
198
|
def validate_doc(x: BundleDict) -> None:
|
|
193
|
-
"""Validate the dictionary using its corresponding schema.
|
|
194
|
-
"""
|
|
195
|
-
|
|
199
|
+
"""Validate the dictionary using its corresponding schema."""
|
|
196
200
|
# Get schema
|
|
197
|
-
bundle_schema = globals().get(
|
|
201
|
+
bundle_schema = globals().get("_SCHEMA_V" + str(x["version"]))
|
|
198
202
|
if bundle_schema is None:
|
|
199
|
-
raise ValueError(
|
|
200
|
-
|
|
203
|
+
raise ValueError(
|
|
204
|
+
f"Unknown version \"{x['version']}\" for import " + "bundle file."
|
|
205
|
+
)
|
|
201
206
|
|
|
202
207
|
# Validate
|
|
203
208
|
bundle_schema.validate(x)
|
|
204
209
|
|
|
205
210
|
|
|
206
|
-
def load_import_bundle_file(file:
|
|
211
|
+
def load_import_bundle_file(file: Path) -> list[BundleDict]:
|
|
207
212
|
"""Loads a YAML import bundle file."""
|
|
208
|
-
|
|
209
213
|
# Load YAML
|
|
210
214
|
logger.info('Load YAML data import file "%s".', file)
|
|
211
|
-
docs:
|
|
215
|
+
docs: list[BundleDict] = []
|
|
212
216
|
|
|
213
217
|
try:
|
|
214
|
-
with open(
|
|
215
|
-
|
|
216
|
-
docs.append(doc)
|
|
218
|
+
with file.open(encoding="utf-8") as f:
|
|
219
|
+
docs = list(yaml.safe_load_all(f))
|
|
217
220
|
except (IsADirectoryError, FileNotFoundError) as e:
|
|
218
221
|
logger.error(e)
|
|
219
222
|
sys.exit(1)
|
|
@@ -226,11 +229,11 @@ def load_import_bundle_file(file: str) -> typing.List[BundleDict]:
|
|
|
226
229
|
|
|
227
230
|
# Guess/set version
|
|
228
231
|
if docs is None:
|
|
229
|
-
docs = [{
|
|
232
|
+
docs = [{"version": BUNDLE_CURRENT_VERSION}]
|
|
230
233
|
else:
|
|
231
234
|
for i, x in enumerate(docs):
|
|
232
235
|
if x is None:
|
|
233
|
-
docs[i] = {
|
|
236
|
+
docs[i] = {"version": BUNDLE_CURRENT_VERSION}
|
|
234
237
|
else:
|
|
235
238
|
set_version(x)
|
|
236
239
|
|
|
@@ -242,20 +245,25 @@ def load_import_bundle_file(file: str) -> typing.List[BundleDict]:
|
|
|
242
245
|
|
|
243
246
|
|
|
244
247
|
def upgrade_bundle_version(x: BundleDict, to_version: int) -> BundleDict:
|
|
245
|
-
"""Upgrade a loaded import bundle dictionary.
|
|
248
|
+
"""Upgrade a loaded import bundle dictionary.
|
|
246
249
|
|
|
250
|
+
:raises ValueError: Raised if the input bundle lacks a version key or if the target version is invalid.
|
|
251
|
+
:raises TypeError: Raised if the version value in the input bundle is not an integer.
|
|
252
|
+
"""
|
|
247
253
|
# Check version
|
|
248
|
-
if
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
raise
|
|
254
|
-
|
|
254
|
+
if "version" not in x:
|
|
255
|
+
msg = "No version in input bundle dictionary."
|
|
256
|
+
raise ValueError(msg)
|
|
257
|
+
if not isinstance(x["version"], int):
|
|
258
|
+
msg = "Version of input bundle is not an integer."
|
|
259
|
+
raise TypeError(msg)
|
|
260
|
+
if x["version"] >= to_version:
|
|
261
|
+
msg = f"Original version ({x['version']}) is greater or equal to target version ({to_version})."
|
|
262
|
+
raise ValueError(msg)
|
|
255
263
|
|
|
256
264
|
# Loop on upgrades to run
|
|
257
265
|
y = x.copy()
|
|
258
|
-
for v in range(x[
|
|
266
|
+
for v in range(x["version"], to_version):
|
|
259
267
|
upgrade_fct = globals().get(f"_upgrade_from_v{v}_to_v{v + 1}")
|
|
260
268
|
y = upgrade_fct(y) # type: ignore[misc]
|
|
261
269
|
|
|
@@ -264,9 +272,9 @@ def upgrade_bundle_version(x: BundleDict, to_version: int) -> BundleDict:
|
|
|
264
272
|
|
|
265
273
|
def _upgrade_from_v1_to_v2(x: BundleDict) -> BundleDict:
|
|
266
274
|
# Upgrade
|
|
267
|
-
y = {
|
|
268
|
-
if
|
|
269
|
-
y[
|
|
275
|
+
y = {"version": 2, "vcf": {"files": []}}
|
|
276
|
+
if "vcf_files" in x and x["vcf_files"] is not None:
|
|
277
|
+
y["vcf"]["files"] = x["vcf_files"] # type: ignore[index]
|
|
270
278
|
|
|
271
279
|
# Validate schema
|
|
272
280
|
_SCHEMA_V2.validate(y)
|
|
@@ -276,14 +284,14 @@ def _upgrade_from_v1_to_v2(x: BundleDict) -> BundleDict:
|
|
|
276
284
|
|
|
277
285
|
def _upgrade_from_v2_to_v3(x: BundleDict) -> BundleDict:
|
|
278
286
|
# Upgrade
|
|
279
|
-
y: BundleDict = {
|
|
280
|
-
if
|
|
287
|
+
y: BundleDict = {"version": 3, "analyses": []}
|
|
288
|
+
if "vcf" in x:
|
|
281
289
|
analysis_entry = {}
|
|
282
|
-
if
|
|
283
|
-
analysis_entry[
|
|
284
|
-
if
|
|
285
|
-
analysis_entry[
|
|
286
|
-
y[
|
|
290
|
+
if "files" in x["vcf"]:
|
|
291
|
+
analysis_entry["files"] = x["vcf"]["files"]
|
|
292
|
+
if "filename_pattern" in x["vcf"]:
|
|
293
|
+
analysis_entry["file_prefix"] = x["vcf"]["filename_pattern"]
|
|
294
|
+
y["analyses"].append(analysis_entry)
|
|
287
295
|
|
|
288
296
|
_SCHEMA_V3.validate(y)
|
|
289
297
|
|