genelastic 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,62 @@
1
+ """
2
+ This module defines the FilenamePattern class, used to define a filename pattern
3
+ and extract metadata from file names using this pattern.
4
+ """
5
+
6
+ import logging
7
+ import re
8
+ from .common import AnalysisMetaData
9
+
10
+ logger = logging.getLogger('genelastic')
11
+
12
+ class FilenamePattern:
13
+ """Class for defining a filename pattern.
14
+ The pattern is used to extract metadata from filenames
15
+ and verify filename conformity.
16
+ """
17
+
18
+ # Initializer
19
+ def __init__(self, pattern: str) -> None:
20
+ """
21
+ Initializes a FilenamePattern instance.
22
+
23
+ Args:
24
+ pattern (str): The pattern string used for defining
25
+ the filename pattern.
26
+ """
27
+ self._re = re.compile(pattern)
28
+
29
+ def extract_metadata(self, filename: str) -> AnalysisMetaData:
30
+ """
31
+ Extracts metadata from the given filename based
32
+ on the defined pattern.
33
+
34
+ Args:
35
+ filename (str): The filename from which metadata
36
+ needs to be extracted.
37
+
38
+ Returns:
39
+ dict: A dictionary containing the extracted metadata.
40
+
41
+ Raises:
42
+ RuntimeError: If parsing of filename fails
43
+ with the defined pattern.
44
+ """
45
+ m = self._re.search(filename)
46
+ if not m:
47
+ raise RuntimeError(f'Failed parsing filename "{filename}"' +
48
+ f'with pattern "{self._re.pattern}".')
49
+ return m.groupdict()
50
+
51
+ def matches_pattern(self, filename: str) -> bool:
52
+ """
53
+ Checks if the given filename matches the defined pattern.
54
+
55
+ Args:
56
+ filename (str): The filename to be checked.
57
+
58
+ Returns:
59
+ bool: True if the filename matches the pattern,
60
+ False otherwise.
61
+ """
62
+ return bool(self._re.match(filename))
genelastic/gen_data.py ADDED
@@ -0,0 +1,193 @@
1
+ # pylint: disable=missing-module-docstring
2
+ import argparse
3
+ import logging
4
+ import os
5
+ import random
6
+ import subprocess # nosec
7
+ import sys
8
+ from typing import Dict, List, Sequence, Collection
9
+
10
+ import yaml
11
+
12
+ from genelastic.common import add_verbose_control_args
13
+ from .logger import configure_logging
14
+
15
+ logger = logging.getLogger('genelastic')
16
+
17
+
18
+ def read_args() -> argparse.Namespace:
19
+ # pylint: disable=R0801
20
+ """Read arguments from command line."""
21
+ parser = argparse.ArgumentParser(description='Genetics data random generator.',
22
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
23
+ add_verbose_control_args(parser)
24
+ parser.add_argument('-d', '--data-folder', dest='data_folder', required=True,
25
+ help='Data destination folder.')
26
+ parser.add_argument('--log-file', dest='log_file', help='Path to a log file.')
27
+ parser.add_argument('-n', '--chrom-nb', dest='chrom_nb', type=int, default=5,
28
+ help='Number of chromosomes to generate.')
29
+ parser.add_argument('-o', '--output-yaml-file', dest='output_file', default='-',
30
+ help='Output YAML file.')
31
+ parser.add_argument('-s', '--chrom-size', dest='chrom_size', type=int, default=2000,
32
+ help='Data size (number of nucleotides) for each chromosome.')
33
+ return parser.parse_args()
34
+
35
+
36
+ def gen_cov_files(folder: str, nb_chrom: int, chrom_sz: int, prefix: str) -> List[str]:
37
+ """Generate dummy coverage files. If an error occurs while generating coverage files, exit."""
38
+ files = []
39
+ chrom_end = chrom_sz - 1
40
+
41
+ for chrom in range(1, nb_chrom + 1):
42
+ output_path = os.path.join(folder, f"{prefix}_chr{chrom}_cov.tsv")
43
+
44
+ # gen-cov will output a coverage file to stdout.
45
+ gen_cov_cmd = ["gen-cov", "-c", str(chrom), "-p", f"0-{chrom_end}", "-d", "5-15",
46
+ "-r", "0.1"]
47
+
48
+ try:
49
+ with open(output_path, "w", encoding="utf-8") as f:
50
+ # Redirect the gen-cov output to a file.
51
+ subprocess.run(gen_cov_cmd, stdout=f, check=True) # nosec
52
+
53
+ except (subprocess.CalledProcessError, FileNotFoundError, OSError) as e:
54
+ logger.error(e)
55
+ sys.exit(1)
56
+
57
+ files.append(output_path)
58
+
59
+ return files
60
+
61
+
62
+ def gen_vcf_files(folder: str, nb_chrom: int, chrom_sz: int, prefix: str) -> List[str]:
63
+ """Generate dummy VCF files. If an error occurs while generating VCFs, exit."""
64
+ files = []
65
+ for chrom in range(1, nb_chrom + 1):
66
+ output_path = os.path.join(folder, f"{prefix}_chr{chrom}.vcf")
67
+
68
+ # gen-fasta will output a FASTA to stdout.
69
+ gen_fasta_cmd = ["gen-fasta", "-s", f"chr{chrom}", "-n", str(chrom_sz)]
70
+ # gen-vcf will output a VCF to stdout.
71
+ gen_vcf_cmd = ["gen-vcf", "--snp-rate", "0.02", "--ins-rate", "0.01", "--del-rate", "0.01"]
72
+
73
+ try:
74
+ # Pipe the output of gen-fasta to the stdin of gen-vcf.
75
+ with subprocess.Popen(gen_fasta_cmd, stdout=subprocess.PIPE) as gen_fasta_proc: # nosec
76
+ # Redirect the gen-vcf output to a file.
77
+ with open(output_path, "w", encoding="utf-8") as f:
78
+ subprocess.run(gen_vcf_cmd,
79
+ stdin=gen_fasta_proc.stdout, stdout=f, check=True) # nosec
80
+ except (subprocess.CalledProcessError, FileNotFoundError, OSError) as e:
81
+ logger.error(e)
82
+ sys.exit(1)
83
+
84
+ files.append(output_path)
85
+
86
+ return files
87
+
88
+
89
+ def gen_name(chars: str = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', n: int = 4) -> str:
90
+ """Generate a random alphanumerical name."""
91
+ return ''.join(random.sample(list(chars), n))
92
+
93
+
94
+ def gen_data(folder: str, nb_chrom: int, chrom_sz: int) -> (
95
+ Dict)[str, int | Sequence[Collection[str]]]:
96
+ """Generate dummy analysis following the V3 YAML schema."""
97
+ # Set metadata
98
+ sample_name = "HG0003"
99
+ source = "CNRGH"
100
+ barcode = gen_name(n=6)
101
+ wet_process = "novaseqxplus-10b"
102
+ bi_process = "dragen-4123"
103
+ reference_genome = "hg38"
104
+ prefix = f'{sample_name}_{source}_{wet_process}_{bi_process}_{barcode}_{reference_genome}'
105
+
106
+ wet_processes = [{
107
+ "proc_id": "novaseqxplus-10b",
108
+ "manufacturer": "illumina",
109
+ "sequencer": "novaseqxplus",
110
+ "generic_kit": "truseq-illumina",
111
+ "fragmentation": 350,
112
+ "reads_size": 300,
113
+ "input_type": "gdna",
114
+ "amplification": "pcr-free",
115
+ "flowcell_type": "10b",
116
+ "sequencing_type": "wgs",
117
+ }]
118
+
119
+ bi_processes = [{
120
+ "proc_id": "dragen-4123",
121
+ "name": "dragen",
122
+ "pipeline_version": "4.1.2.3",
123
+ "steps": [
124
+ {"name": "basecalling", "cmd": "bclconvert", "version": "3.9.3.2"},
125
+ {"name": "trimming", "cmd": "dragen"},
126
+ {"name": "mapping", "cmd": "dragmap"},
127
+ {"name": "postmapping", "cmd": "dragen", "version": "4.1.23"},
128
+ {"name": "smallvarcalling", "cmd": "dragen", "version": "4.1.23"},
129
+ {"name": "svcalling", "cmd": "dragen", "version": "4.1.23"},
130
+ {"name": "secondary_qc", "cmd": "dragen", "version": "4.1.23"}
131
+ ],
132
+ "sequencing_type": "wgs"
133
+ }]
134
+
135
+ analyses = [{
136
+ 'file_prefix': '%S_%F_%W_%B_%A_%R_chr[0-9]+',
137
+ 'sample_name': sample_name,
138
+ 'source': source,
139
+ 'barcode': barcode,
140
+ 'wet_process': "novaseqxplus-10b",
141
+ 'bi_process': "dragen-4123",
142
+ 'reference_genome': reference_genome,
143
+ 'flowcell': gen_name(n=8),
144
+ 'lanes': [random.randint(1, 10)], # nosec
145
+ 'seq_indices': ['DUAL219', 'DUAL222', 'DUAL225', 'DUAL228', 'DUAL289'],
146
+ 'qc_comment': "",
147
+ 'data_path': folder,
148
+ }]
149
+
150
+ gen_vcf_files(folder, nb_chrom=nb_chrom, chrom_sz=chrom_sz, prefix=prefix)
151
+ gen_cov_files(folder, nb_chrom=nb_chrom, chrom_sz=chrom_sz, prefix=prefix)
152
+
153
+ return {
154
+ 'version': 3,
155
+ 'analyses': analyses,
156
+ 'bi_processes': bi_processes,
157
+ 'wet_processes': wet_processes
158
+ }
159
+
160
+
161
+ # Write import bundle YAML
162
+ def write_yaml(file: str, data: Dict[str, int | Sequence[Collection[str]]]) -> None:
163
+ """Write YAML to stdout or in a file."""
164
+ # Standard output
165
+ if file == '-':
166
+ print('---')
167
+ yaml.dump(data, sys.stdout)
168
+
169
+ # File
170
+ else:
171
+ with open(file, 'w', encoding="utf-8") as f:
172
+ print('---', file=f)
173
+ yaml.dump(data, f)
174
+
175
+
176
+ def main() -> None:
177
+ """Entry point of the gen-data script."""
178
+ # Read command line arguments
179
+ args = read_args()
180
+
181
+ # Configure logging
182
+ configure_logging(args.verbose, log_file=args.log_file)
183
+ logger.debug("Arguments: %s", args)
184
+
185
+ # Generate data
186
+ data = gen_data(args.data_folder, nb_chrom=args.chrom_nb, chrom_sz=args.chrom_size)
187
+
188
+ # Write to stdout or file
189
+ write_yaml(args.output_file, data)
190
+
191
+
192
+ if __name__ == '__main__':
193
+ main()
@@ -0,0 +1,134 @@
1
+ """
2
+ Module: import_bundle
3
+
4
+ This module provides functionality for importing data bundles.
5
+ """
6
+
7
+ import logging
8
+ import sys
9
+ import typing
10
+
11
+ from .bi_processes import BioInfoProcesses
12
+ from .data_file import DataFile
13
+ from .common import BundleDict
14
+ from .constants import BUNDLE_CURRENT_VERSION
15
+ from .analyses import Analyses
16
+ from .tags import Tags
17
+ from .wet_processes import WetProcesses
18
+
19
+ logger = logging.getLogger('genelastic')
20
+
21
+
22
+ class ImportBundle:
23
+ """Class for handling an import bundle description."""
24
+
25
+ def __init__(self, x: typing.Sequence[BundleDict],
26
+ check: bool = False) -> None:
27
+
28
+ analyses: typing.List[BundleDict] = []
29
+ wet_processes: typing.List[BundleDict] = []
30
+ bi_processes: typing.List[BundleDict] = []
31
+ tags = Tags(x)
32
+
33
+ # Loop on dicts
34
+ for d in x:
35
+ # Check version
36
+ if 'version' not in d:
37
+ raise RuntimeError("No version inside YAML document.")
38
+ if int(d['version']) != BUNDLE_CURRENT_VERSION:
39
+ raise RuntimeError("")
40
+
41
+ # Gather all analyses
42
+ if 'analyses' in d and d['analyses'] is not None:
43
+ # Copy some bundle properties into each analysis
44
+ for analysis in d['analyses']:
45
+ for key in ['bundle_file', 'root_dir']:
46
+ if key in d:
47
+ analysis[key] = d[key]
48
+
49
+ # Add the tags to use.
50
+ analysis['tags'] = tags
51
+
52
+ analyses.extend(d['analyses'])
53
+
54
+ # If some wet processes are defined, copy the bundle file path into each of them.
55
+ if 'wet_processes' in d and d['wet_processes'] is not None:
56
+ for wet_process in d['wet_processes']:
57
+ wet_process['bundle_file'] = d['bundle_file']
58
+ wet_processes.extend(d['wet_processes'])
59
+
60
+ # If some bio processes are defined, copy the bundle file path into each of them.
61
+ if 'bi_processes' in d and d['bi_processes'] is not None:
62
+ for bi_process in d['bi_processes']:
63
+ bi_process['bundle_file'] = d['bundle_file']
64
+ bi_processes.extend(d['bi_processes'])
65
+
66
+ # Instantiate all objects
67
+ self._wet_processes: WetProcesses = WetProcesses.from_array_of_dicts(wet_processes)
68
+ self._bi_processes: BioInfoProcesses = BioInfoProcesses.from_array_of_dicts(bi_processes)
69
+ self._analyses: Analyses = Analyses.from_array_of_dicts(analyses)
70
+
71
+ if check:
72
+ self.check_referenced_processes()
73
+
74
+ def check_referenced_processes(self) -> None:
75
+ """Check if wet and bi processes referenced inside each analysis are defined.
76
+ If one of the processes is not defined, the program exits.
77
+ """
78
+ for index, analysis in enumerate(self._analyses):
79
+ analysis_wet_process = analysis.metadata.get("wet_process")
80
+
81
+ if (analysis_wet_process and
82
+ analysis_wet_process not in self._wet_processes.get_process_ids()):
83
+ sys.exit(f"Analysis at index {index} in file {analysis.bundle_file} "
84
+ f"is referencing an undefined wet process: {analysis_wet_process}")
85
+
86
+ analysis_bi_process = analysis.metadata.get("bi_process")
87
+
88
+ if (analysis_bi_process and
89
+ analysis_bi_process not in self._bi_processes.get_process_ids()):
90
+ sys.exit(f"Analysis at index {index} in file {analysis.bundle_file} "
91
+ f"is referencing an undefined bi process: {analysis_bi_process}")
92
+
93
+ @property
94
+ def analyses(self) -> Analyses:
95
+ """The analyses."""
96
+ return self._analyses
97
+
98
+ @property
99
+ def wet_processes(self) -> WetProcesses:
100
+ """The wet processes."""
101
+ return self._wet_processes
102
+
103
+ @property
104
+ def bi_processes(self) -> BioInfoProcesses:
105
+ """The bi processes."""
106
+ return self._bi_processes
107
+
108
+ def get_nb_files(self, cat: str | None = None) -> int:
109
+ """Get the number of files in a category."""
110
+ files = self.get_files(cat)
111
+ return len(files)
112
+
113
+ def get_files(self, cat: str | None = None) -> typing.List[DataFile]:
114
+ """Returns all files of a category."""
115
+
116
+ files: typing.List[DataFile] = []
117
+
118
+ # Loop on all analyses
119
+ for analysis in self.analyses:
120
+ files += analysis.get_data_files(cat)
121
+
122
+ return files
123
+
124
+ def get_nb_matched_files(self) -> int:
125
+ """Get the number of files that match the pattern."""
126
+
127
+ return sum(a.get_nb_files()
128
+ for a in self.analyses)
129
+
130
+ def get_nb_unmatched_files(self) -> int:
131
+ """Get the number of files that do not match."""
132
+
133
+ return sum(len(a.get_unmatched_file_paths())
134
+ for a in self.analyses)
@@ -0,0 +1,288 @@
1
+ """ImportBundle factory module.
2
+ """
3
+
4
+ import logging
5
+ import os
6
+ import re
7
+ import sys
8
+ import typing
9
+ from yaml.parser import ParserError
10
+ from yaml.scanner import ScannerError
11
+
12
+ import schema # type: ignore[import-untyped]
13
+ import yaml
14
+ from .import_bundle import ImportBundle
15
+ from .common import BundleDict
16
+ from .constants import BUNDLE_CURRENT_VERSION
17
+
18
+ logger = logging.getLogger('genelastic')
19
+
20
+
21
+ def validate_tag_char(s: str) -> bool:
22
+ """
23
+ A tag should only contain one special character, excluding the following : (, ), ?, <, >.
24
+ """
25
+ if len(s) > 1:
26
+ return False
27
+
28
+ return re.match(r"^[^\w()<>?]$", s) is not None
29
+
30
+
31
+ def validate_field_chars(s: str) -> bool:
32
+ """
33
+ Fields should only contain word characters.
34
+ A word character is a character a-z, A-Z, 0-9, including _ (underscore).
35
+ """
36
+ return re.match(r"^\w+$", s) is not None
37
+
38
+
39
+ _SCHEMA_V1 = schema.Schema({
40
+ 'version': 1,
41
+ schema.Optional("vcf_files"): schema.Or(None, [str])
42
+ })
43
+
44
+ _SCHEMA_V2 = schema.Schema({
45
+ 'version': 2,
46
+ schema.Optional("vcf"): {
47
+ schema.Optional('filename_pattern'): str,
48
+ 'files': [str]
49
+ }
50
+ })
51
+
52
+ _SCHEMA_V3 = schema.Schema({
53
+ 'version': 3,
54
+ schema.Optional('analyses'):
55
+ schema.Or(None, [
56
+ {
57
+ schema.Optional('file_prefix'): str,
58
+ schema.Optional('files'): [str],
59
+ schema.Optional('sample_name'): str,
60
+ schema.Optional('source'): str,
61
+ schema.Optional('barcode'): str,
62
+ schema.Optional('wet_process'): str,
63
+ schema.Optional('bi_process'): str,
64
+ schema.Optional('reference_genome'): str,
65
+ schema.Optional('flowcell'): str,
66
+ schema.Optional('lanes'): [int],
67
+ schema.Optional('seq_indices'): [str],
68
+ schema.Optional('cov_depth'): int,
69
+ schema.Optional('qc_comment'): str,
70
+ schema.Optional('data_path'): str
71
+ }
72
+ ]),
73
+ schema.Optional('wet_processes'):
74
+ schema.Or(None, [
75
+ {
76
+ "proc_id": str,
77
+ "manufacturer": str,
78
+ "sequencer": str,
79
+ "generic_kit": str,
80
+ "fragmentation": int,
81
+ "reads_size": int,
82
+ "input_type": str,
83
+ "amplification": str,
84
+ "flowcell_type": str,
85
+ "sequencing_type": str,
86
+ schema.Optional("desc"): str,
87
+ schema.Optional("library_kit"): str,
88
+ schema.Optional("sequencing_kit"): str,
89
+ schema.Optional("error_rate_expected"): float
90
+ }
91
+ ]),
92
+ schema.Optional('bi_processes'):
93
+ schema.Or(None, [
94
+ {
95
+ "proc_id": str,
96
+ "name": str,
97
+ "pipeline_version": str,
98
+ "steps": [
99
+ {
100
+ "name": str,
101
+ "cmd": str,
102
+ schema.Optional("version"): str,
103
+ schema.Optional("output"): str,
104
+ }
105
+ ],
106
+ "sequencing_type": str,
107
+ schema.Optional("desc"): str
108
+ }
109
+ ]),
110
+ schema.Optional('tags'): {
111
+ schema.Optional("format"): {
112
+ schema.Optional("prefix"):
113
+ schema.And(str,
114
+ validate_tag_char,
115
+ error="Key 'prefix' should only contain one special character, "
116
+ "excluding the following : (, ), ?, <, >."
117
+ ),
118
+ schema.Optional("suffix"):
119
+ schema.And(str,
120
+ validate_tag_char,
121
+ error="Key 'suffix' should only contain one special character, "
122
+ "excluding the following : (, ), ?, <, >."
123
+ ),
124
+ },
125
+ "match": {
126
+ schema.And(str,
127
+ validate_field_chars,
128
+ error="Tags listed under the 'match' key should only contain "
129
+ "word characters. A word character is a character "
130
+ "a-z, A-Z, 0-9, including _ (underscore)."
131
+ ): {
132
+ "field": str,
133
+ "regex": str
134
+ }
135
+ }
136
+ }
137
+ })
138
+
139
+
140
+ def make_import_bundle_from_files(files: typing.List[str], check: bool = False) -> ImportBundle:
141
+ """Create an ImportBundle instance from a list of YAML files."""
142
+ all_documents = []
143
+ for file in files:
144
+ # Load documents stored in each file.
145
+ new_documents = load_import_bundle_file(file)
146
+
147
+ for i, new_document in enumerate(new_documents):
148
+ # Upgrade each new document to the latest/current version.
149
+ if new_document['version'] != BUNDLE_CURRENT_VERSION:
150
+ new_documents[i] = upgrade_bundle_version(new_document, BUNDLE_CURRENT_VERSION)
151
+ # Set the root directory path in each new document.
152
+ new_documents[i]['root_dir'] = os.path.dirname(file)
153
+ # Set the original bundle YAML file path in each new document.
154
+ new_documents[i]['bundle_file'] = file
155
+
156
+ all_documents.extend(new_documents)
157
+
158
+ # Create bundle instance.
159
+ return ImportBundle(all_documents, check)
160
+
161
+
162
+ def set_version(x: BundleDict) -> None:
163
+ """Set version number.
164
+
165
+ Deduce the version number from the keys present inside the dictionary.
166
+ """
167
+
168
+ # Empty doc
169
+ if len(x) == 0:
170
+ x['version'] = BUNDLE_CURRENT_VERSION
171
+
172
+ # Wrong content in version field
173
+ elif 'version' in x:
174
+ if not isinstance(x['version'], int):
175
+ raise ValueError("Version must be an integer.")
176
+
177
+ # Version 1
178
+ elif 'vcf_files' in x or 'cov_files' in x:
179
+ x['version'] = 1
180
+
181
+ # Version 2
182
+ elif 'vcf' in x and 'filename_pattern' in x['vcf']:
183
+ x['version'] = 2
184
+
185
+ # Latest version
186
+ else:
187
+ x['version'] = BUNDLE_CURRENT_VERSION
188
+
189
+
190
+ def validate_doc(x: BundleDict) -> None:
191
+ """Validate the dictionary using its corresponding schema.
192
+ """
193
+
194
+ # Get schema
195
+ bundle_schema = globals().get('_SCHEMA_V' + str(x['version']))
196
+ if bundle_schema is None:
197
+ raise ValueError((f"Unknown version \"{x['version']}\" for import " +
198
+ "bundle file."))
199
+
200
+ # Validate
201
+ bundle_schema.validate(x)
202
+
203
+
204
+ def load_import_bundle_file(file: str) -> typing.List[BundleDict]:
205
+ """Loads a YAML import bundle file."""
206
+
207
+ # Load YAML
208
+ logger.info('Load YAML data import file "%s".', file)
209
+ docs: typing.List[BundleDict] = []
210
+
211
+ try:
212
+ with open(file, "r", encoding="utf-8") as f:
213
+ for doc in yaml.safe_load_all(f):
214
+ docs.append(doc)
215
+ except (IsADirectoryError, FileNotFoundError) as e:
216
+ logger.error(e)
217
+ sys.exit(1)
218
+ except ScannerError as e:
219
+ logger.error("YAML file lexical analysis failed : %s", e)
220
+ sys.exit(1)
221
+ except ParserError as e:
222
+ logger.error("YAML file syntactic analysis failed : %s", e)
223
+ sys.exit(1)
224
+
225
+ # Guess/set version
226
+ if docs is None:
227
+ docs = [{'version': BUNDLE_CURRENT_VERSION}]
228
+ else:
229
+ for i, x in enumerate(docs):
230
+ if x is None:
231
+ docs[i] = {'version': BUNDLE_CURRENT_VERSION}
232
+ else:
233
+ set_version(x)
234
+
235
+ # Find schema and validate document
236
+ for x in docs:
237
+ validate_doc(x)
238
+
239
+ return docs
240
+
241
+
242
+ def upgrade_bundle_version(x: BundleDict, to_version: int) -> BundleDict:
243
+ """Upgrade a loaded import bundle dictionary."""
244
+
245
+ # Check version
246
+ if 'version' not in x:
247
+ raise ValueError("No version in input bundle dictionary.")
248
+ if not isinstance(x['version'], int):
249
+ raise ValueError("Version of input bundle is not an integer.")
250
+ if x['version'] >= to_version:
251
+ raise ValueError((f"Original version ({x['version']}) is greater or" +
252
+ " equal to target version ({to_version})."))
253
+
254
+ # Loop on upgrades to run
255
+ y = x.copy()
256
+ for v in range(x['version'], to_version):
257
+ upgrade_fct = globals().get(f"_upgrade_from_v{v}_to_v{v + 1}")
258
+ y = upgrade_fct(y) # type: ignore[misc]
259
+
260
+ return y
261
+
262
+
263
+ def _upgrade_from_v1_to_v2(x: BundleDict) -> BundleDict:
264
+ # Upgrade
265
+ y = {'version': 2, 'vcf': {'files': []}}
266
+ if 'vcf_files' in x and x['vcf_files'] is not None:
267
+ y['vcf']['files'] = x['vcf_files'] # type: ignore[index]
268
+
269
+ # Validate schema
270
+ _SCHEMA_V2.validate(y)
271
+
272
+ return y
273
+
274
+
275
+ def _upgrade_from_v2_to_v3(x: BundleDict) -> BundleDict:
276
+ # Upgrade
277
+ y: BundleDict = {'version': 3, 'analyses': []}
278
+ if 'vcf' in x:
279
+ analysis_entry = {}
280
+ if 'files' in x['vcf']:
281
+ analysis_entry['files'] = x['vcf']['files']
282
+ if 'filename_pattern' in x['vcf']:
283
+ analysis_entry['file_prefix'] = x['vcf']['filename_pattern']
284
+ y['analyses'].append(analysis_entry)
285
+
286
+ _SCHEMA_V3.validate(y)
287
+
288
+ return y