loqusdb 2.7.17__py3-none-any.whl → 2.7.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loqusdb/__init__.py +1 -1
- loqusdb/build_models/profile_variant.py +6 -4
- loqusdb/build_models/variant.py +32 -13
- loqusdb/commands/annotate.py +3 -2
- loqusdb/commands/cli.py +22 -1
- loqusdb/commands/delete.py +7 -1
- loqusdb/commands/load.py +11 -1
- loqusdb/commands/load_profile.py +4 -3
- loqusdb/utils/annotate.py +4 -18
- loqusdb/utils/delete.py +20 -11
- loqusdb/utils/load.py +28 -8
- loqusdb/utils/profiling.py +4 -4
- loqusdb/utils/vcf.py +9 -9
- {loqusdb-2.7.17.dist-info → loqusdb-2.7.19.dist-info}/METADATA +1 -1
- {loqusdb-2.7.17.dist-info → loqusdb-2.7.19.dist-info}/RECORD +18 -18
- {loqusdb-2.7.17.dist-info → loqusdb-2.7.19.dist-info}/LICENSE +0 -0
- {loqusdb-2.7.17.dist-info → loqusdb-2.7.19.dist-info}/WHEEL +0 -0
- {loqusdb-2.7.17.dist-info → loqusdb-2.7.19.dist-info}/entry_points.txt +0 -0
loqusdb/__init__.py
CHANGED
@@ -24,23 +24,25 @@ def get_maf(variant):
|
|
24
24
|
return variant.INFO.get("MAF")
|
25
25
|
|
26
26
|
|
27
|
-
def build_profile_variant(variant):
|
27
|
+
def build_profile_variant(variant, keep_chr_prefix=None):
|
28
28
|
"""Returns a ProfileVariant object
|
29
29
|
|
30
30
|
Args:
|
31
31
|
variant (cyvcf2.Variant)
|
32
|
+
keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
|
32
33
|
|
33
34
|
Returns:
|
34
35
|
variant (models.ProfileVariant)
|
35
36
|
"""
|
36
37
|
|
37
38
|
chrom = variant.CHROM
|
38
|
-
if
|
39
|
-
chrom
|
39
|
+
if not keep_chr_prefix:
|
40
|
+
if chrom.startswith(("chr", "CHR", "Chr")):
|
41
|
+
chrom = chrom[3:]
|
40
42
|
|
41
43
|
pos = int(variant.POS)
|
42
44
|
|
43
|
-
variant_id = get_variant_id(variant)
|
45
|
+
variant_id = get_variant_id(variant, keep_chr_prefix)
|
44
46
|
|
45
47
|
ref = variant.REF
|
46
48
|
alt = variant.ALT[0]
|
loqusdb/build_models/variant.py
CHANGED
@@ -32,11 +32,21 @@ def check_par(chrom, pos, genome_build=None):
|
|
32
32
|
)
|
33
33
|
|
34
34
|
|
35
|
-
def get_variant_id(variant):
|
36
|
-
"""Get a variant id on the format chrom_pos_ref_alt
|
35
|
+
def get_variant_id(variant, keep_chr_prefix=None):
|
36
|
+
"""Get a variant id on the format chrom_pos_ref_alt
|
37
|
+
|
38
|
+
Args:
|
39
|
+
variant (cyvcf2.Variant)
|
40
|
+
keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
|
41
|
+
|
42
|
+
Returns:
|
43
|
+
variant (models.ProfileVariant)
|
44
|
+
"""
|
45
|
+
|
37
46
|
chrom = variant.CHROM
|
38
|
-
if
|
39
|
-
|
47
|
+
if not keep_chr_prefix:
|
48
|
+
if chrom.lower().startswith("chr"):
|
49
|
+
chrom = chrom[3:]
|
40
50
|
return "_".join([str(chrom), str(variant.POS), str(variant.REF), str(variant.ALT[0])])
|
41
51
|
|
42
52
|
|
@@ -68,11 +78,12 @@ def is_greater(a, b):
|
|
68
78
|
return a_chrom == b_chrom and a.pos > b.pos
|
69
79
|
|
70
80
|
|
71
|
-
def get_coords(variant):
|
81
|
+
def get_coords(variant, keep_chr_prefix):
|
72
82
|
"""Returns a dictionary with position information
|
73
83
|
|
74
84
|
Args:
|
75
85
|
variant(cyvcf2.Variant)
|
86
|
+
keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
|
76
87
|
|
77
88
|
Returns:
|
78
89
|
coordinates(dict)
|
@@ -86,8 +97,9 @@ def get_coords(variant):
|
|
86
97
|
"end": None,
|
87
98
|
}
|
88
99
|
chrom = variant.CHROM
|
89
|
-
if
|
90
|
-
chrom
|
100
|
+
if not keep_chr_prefix:
|
101
|
+
if chrom.startswith(("chr", "CHR", "Chr")):
|
102
|
+
chrom = chrom[3:]
|
91
103
|
coordinates["chrom"] = chrom
|
92
104
|
end_chrom = chrom
|
93
105
|
|
@@ -107,8 +119,9 @@ def get_coords(variant):
|
|
107
119
|
if sv_type == "BND":
|
108
120
|
other_coordinates = alt.strip("ATCGN").strip("[]").split(":")
|
109
121
|
end_chrom = other_coordinates[0]
|
110
|
-
if
|
111
|
-
end_chrom
|
122
|
+
if not keep_chr_prefix:
|
123
|
+
if end_chrom.startswith(("chr", "CHR", "Chr")):
|
124
|
+
end_chrom = end_chrom[3:]
|
112
125
|
|
113
126
|
end = int(other_coordinates[1])
|
114
127
|
|
@@ -148,6 +161,8 @@ def build_variant(
|
|
148
161
|
case_id: Optional[str] = None,
|
149
162
|
gq_threshold: Optional[int] = None,
|
150
163
|
gq_qual: Optional[bool] = False,
|
164
|
+
keep_chr_prefix: Optional[bool] = False,
|
165
|
+
ignore_gq_if_unset: Optional[bool] = False,
|
151
166
|
genome_build: Optional[str] = None,
|
152
167
|
) -> Variant:
|
153
168
|
"""Return a Variant object
|
@@ -164,6 +179,9 @@ def build_variant(
|
|
164
179
|
case_id(str): The case id
|
165
180
|
gq_threshold(int): Genotype Quality threshold
|
166
181
|
gq_qual(bool): Use variant.QUAL for quality instead of GQ
|
182
|
+
keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
|
183
|
+
ignore_gq_if_unset(bool): Ignore GQ threshold check for variants that do not have GQ or QUAL set.
|
184
|
+
genome_build(str): Genome build. Ex. GRCh37 or GRCh38
|
167
185
|
|
168
186
|
Return:
|
169
187
|
formated_variant(models.Variant): A variant dictionary
|
@@ -176,14 +194,14 @@ def build_variant(
|
|
176
194
|
sv = True
|
177
195
|
|
178
196
|
# chrom_pos_ref_alt
|
179
|
-
variant_id = get_variant_id(variant)
|
197
|
+
variant_id = get_variant_id(variant, keep_chr_prefix)
|
180
198
|
|
181
199
|
ref = variant.REF
|
182
200
|
# ALT is an array in cyvcf2
|
183
201
|
# We allways assume splitted and normalized VCFs
|
184
202
|
alt = variant.ALT[0]
|
185
203
|
|
186
|
-
coordinates = get_coords(variant)
|
204
|
+
coordinates = get_coords(variant, keep_chr_prefix)
|
187
205
|
chrom = coordinates["chrom"]
|
188
206
|
pos = coordinates["pos"]
|
189
207
|
|
@@ -201,14 +219,15 @@ def build_variant(
|
|
201
219
|
ind_pos = ind_obj["ind_index"]
|
202
220
|
|
203
221
|
if gq_qual:
|
204
|
-
gq =
|
222
|
+
gq = -1
|
205
223
|
if variant.QUAL:
|
206
224
|
gq = int(variant.QUAL)
|
207
225
|
|
208
226
|
if not gq_qual:
|
209
227
|
gq = int(variant.gt_quals[ind_pos])
|
210
228
|
|
211
|
-
|
229
|
+
# When gq is missing in FORMAT cyvcf2 assigns a score of -1
|
230
|
+
if (gq_threshold and 0 <= gq < gq_threshold) or (gq == -1 and not ignore_gq_if_unset):
|
212
231
|
continue
|
213
232
|
|
214
233
|
genotype = GENOTYPE_MAP[variant.gt_types[ind_pos]]
|
loqusdb/commands/annotate.py
CHANGED
@@ -21,6 +21,7 @@ LOG = logging.getLogger(__name__)
|
|
21
21
|
def annotate(ctx, variant_file, sv):
|
22
22
|
"""Annotate the variants in a VCF"""
|
23
23
|
adapter = ctx.obj["adapter"]
|
24
|
+
keep_chr_prefix = ctx.obj["keep_chr_prefix"]
|
24
25
|
|
25
26
|
variant_path = os.path.abspath(variant_file)
|
26
27
|
|
@@ -40,9 +41,9 @@ def annotate(ctx, variant_file, sv):
|
|
40
41
|
start_inserting = datetime.now()
|
41
42
|
|
42
43
|
if sv:
|
43
|
-
annotated_variants = annotate_svs(adapter, vcf_obj)
|
44
|
+
annotated_variants = annotate_svs(adapter, vcf_obj, keep_chr_prefix)
|
44
45
|
else:
|
45
|
-
annotated_variants = annotate_snvs(adapter, vcf_obj)
|
46
|
+
annotated_variants = annotate_snvs(adapter, vcf_obj, keep_chr_prefix)
|
46
47
|
# try:
|
47
48
|
for variant in annotated_variants:
|
48
49
|
click.echo(str(variant).rstrip())
|
loqusdb/commands/cli.py
CHANGED
@@ -55,11 +55,30 @@ LOG = logging.getLogger(__name__)
|
|
55
55
|
type=click.Choice([GRCH37, GRCH38]),
|
56
56
|
help="Specify what genome build to use",
|
57
57
|
)
|
58
|
+
@click.option(
|
59
|
+
"--keep-chr-prefix",
|
60
|
+
is_flag=True,
|
61
|
+
default=False,
|
62
|
+
show_default=True,
|
63
|
+
help="Retain the 'chr/Chr/CHR' prefix for chromosomes if it is present",
|
64
|
+
)
|
58
65
|
@click.option("-v", "--verbose", is_flag=True)
|
59
66
|
@click.version_option(__version__)
|
60
67
|
@click.pass_context
|
61
68
|
def cli(
|
62
|
-
ctx,
|
69
|
+
ctx,
|
70
|
+
database,
|
71
|
+
username,
|
72
|
+
password,
|
73
|
+
authdb,
|
74
|
+
port,
|
75
|
+
host,
|
76
|
+
uri,
|
77
|
+
verbose,
|
78
|
+
config,
|
79
|
+
test,
|
80
|
+
genome_build,
|
81
|
+
keep_chr_prefix,
|
63
82
|
):
|
64
83
|
"""loqusdb: manage a local variant count database."""
|
65
84
|
loglevel = "INFO"
|
@@ -103,6 +122,7 @@ def cli(
|
|
103
122
|
adapter = MongoAdapter(client, db_name=database)
|
104
123
|
|
105
124
|
genome_build = genome_build or configs.get("genome_build") or GRCH37
|
125
|
+
keep_chr_prefix = keep_chr_prefix or configs.get("keep_chr_prefix")
|
106
126
|
|
107
127
|
ctx.obj = {}
|
108
128
|
ctx.obj["db"] = database
|
@@ -114,3 +134,4 @@ def cli(
|
|
114
134
|
ctx.obj["adapter"] = adapter
|
115
135
|
ctx.obj["version"] = __version__
|
116
136
|
ctx.obj["genome_build"] = genome_build
|
137
|
+
ctx.obj["keep_chr_prefix"] = keep_chr_prefix
|
loqusdb/commands/delete.py
CHANGED
@@ -35,6 +35,7 @@ def delete(ctx, family_file, family_type, case_id):
|
|
35
35
|
ctx.abort()
|
36
36
|
|
37
37
|
adapter = ctx.obj["adapter"]
|
38
|
+
keep_chr_prefix = ctx.obj["keep_chr_prefix"]
|
38
39
|
|
39
40
|
# Get a ped_parser.Family object from family file
|
40
41
|
family = None
|
@@ -59,7 +60,12 @@ def delete(ctx, family_file, family_type, case_id):
|
|
59
60
|
genome_build = ctx.obj["genome_build"]
|
60
61
|
start_deleting = datetime.now()
|
61
62
|
try:
|
62
|
-
delete_command(
|
63
|
+
delete_command(
|
64
|
+
adapter=adapter,
|
65
|
+
case_obj=existing_case,
|
66
|
+
genome_build=genome_build,
|
67
|
+
keep_chr_prefix=keep_chr_prefix,
|
68
|
+
)
|
63
69
|
except (CaseError, IOError) as error:
|
64
70
|
LOG.warning(error)
|
65
71
|
ctx.abort()
|
loqusdb/commands/load.py
CHANGED
@@ -95,6 +95,13 @@ def validate_profile_threshold(ctx, param, value):
|
|
95
95
|
show_default=True,
|
96
96
|
help="Apply GQ threshold only to SNV variants",
|
97
97
|
)
|
98
|
+
@click.option(
|
99
|
+
"--ignore-gq-if-unset",
|
100
|
+
is_flag=True,
|
101
|
+
default=False,
|
102
|
+
show_default=True,
|
103
|
+
help="Ignore GQ threshold if GQ (or the QUAL field for --qual-gq) is unset in VCF",
|
104
|
+
)
|
98
105
|
@click.pass_context
|
99
106
|
def load(
|
100
107
|
ctx,
|
@@ -112,6 +119,7 @@ def load(
|
|
112
119
|
soft_threshold,
|
113
120
|
qual_gq,
|
114
121
|
snv_gq_only,
|
122
|
+
ignore_gq_if_unset,
|
115
123
|
):
|
116
124
|
"""Load the variants of a case
|
117
125
|
|
@@ -140,7 +148,7 @@ def load(
|
|
140
148
|
|
141
149
|
adapter = ctx.obj["adapter"]
|
142
150
|
genome_build = ctx.obj["genome_build"]
|
143
|
-
|
151
|
+
keep_chr_prefix = ctx.obj["keep_chr_prefix"]
|
144
152
|
start_inserting = datetime.now()
|
145
153
|
|
146
154
|
try:
|
@@ -154,12 +162,14 @@ def load(
|
|
154
162
|
case_id=case_id,
|
155
163
|
gq_threshold=gq_threshold,
|
156
164
|
snv_gq_only=snv_gq_only,
|
165
|
+
keep_chr_prefix=keep_chr_prefix,
|
157
166
|
qual_gq=qual_gq,
|
158
167
|
max_window=max_window,
|
159
168
|
profile_file=variant_profile_path,
|
160
169
|
hard_threshold=hard_threshold,
|
161
170
|
soft_threshold=soft_threshold,
|
162
171
|
genome_build=genome_build,
|
172
|
+
ignore_gq_if_unset=ignore_gq_if_unset,
|
163
173
|
)
|
164
174
|
except (SyntaxError, CaseError, IOError) as error:
|
165
175
|
LOG.warning(error)
|
loqusdb/commands/load_profile.py
CHANGED
@@ -60,13 +60,14 @@ def load_profile(ctx, load, variant_file, update, stats, profile_threshold, chec
|
|
60
60
|
"""
|
61
61
|
|
62
62
|
adapter = ctx.obj["adapter"]
|
63
|
+
keep_chr_prefix = ctx.obj["keep_chr_prefix"]
|
63
64
|
|
64
65
|
LOG.info("Running loqusdb profile")
|
65
66
|
|
66
67
|
if check_vcf:
|
67
68
|
LOG.info(f"Check if profile in {check_vcf} has match in database")
|
68
69
|
vcf_file = check_vcf
|
69
|
-
profiles = get_profiles(adapter, vcf_file)
|
70
|
+
profiles = get_profiles(adapter, vcf_file, keep_chr_prefix)
|
70
71
|
duplicate = check_duplicates(adapter, profiles, profile_threshold)
|
71
72
|
|
72
73
|
if duplicate is not None:
|
@@ -81,11 +82,11 @@ def load_profile(ctx, load, variant_file, update, stats, profile_threshold, chec
|
|
81
82
|
if variant_file is not None:
|
82
83
|
vcf_path = variant_file
|
83
84
|
LOG.info(f"Loads variants in {vcf_path} to be used in profiling")
|
84
|
-
load_profile_variants(adapter, vcf_path)
|
85
|
+
load_profile_variants(adapter, vcf_path, keep_chr_prefix)
|
85
86
|
|
86
87
|
if update:
|
87
88
|
LOG.info("Updates profiles in database")
|
88
|
-
update_profiles(adapter)
|
89
|
+
update_profiles(adapter, keep_chr_prefix)
|
89
90
|
|
90
91
|
if stats:
|
91
92
|
LOG.info("Prints profile stats")
|
loqusdb/utils/annotate.py
CHANGED
@@ -31,21 +31,7 @@ def annotate_variant(variant, var_obj=None):
|
|
31
31
|
return variant
|
32
32
|
|
33
33
|
|
34
|
-
def
|
35
|
-
"""Annotate an SNV/INDEL variant
|
36
|
-
|
37
|
-
Args:
|
38
|
-
adapter(loqusdb.plugin.adapter)
|
39
|
-
variant(cyvcf2.Variant)
|
40
|
-
"""
|
41
|
-
variant_id = get_variant_id(variant)
|
42
|
-
variant_obj = adapter.get_variant(variant={"_id": variant_id})
|
43
|
-
|
44
|
-
annotated_variant = annotated_variant(variant, variant_obj)
|
45
|
-
return annotated_variant
|
46
|
-
|
47
|
-
|
48
|
-
def annotate_svs(adapter, vcf_obj):
|
34
|
+
def annotate_svs(adapter, vcf_obj, keep_chr_prefix):
|
49
35
|
"""Annotate all SV variants in a VCF
|
50
36
|
|
51
37
|
Args:
|
@@ -56,14 +42,14 @@ def annotate_svs(adapter, vcf_obj):
|
|
56
42
|
variant(cyvcf2.Variant)
|
57
43
|
"""
|
58
44
|
for nr_variants, variant in enumerate(vcf_obj, 1):
|
59
|
-
variant_info = get_coords(variant)
|
45
|
+
variant_info = get_coords(variant, keep_chr_prefix)
|
60
46
|
match = adapter.get_structural_variant(variant_info)
|
61
47
|
if match:
|
62
48
|
annotate_variant(variant, match)
|
63
49
|
yield variant
|
64
50
|
|
65
51
|
|
66
|
-
def annotate_snvs(adapter, vcf_obj):
|
52
|
+
def annotate_snvs(adapter, vcf_obj, keep_chr_prefix):
|
67
53
|
"""Annotate all variants in a VCF
|
68
54
|
|
69
55
|
Args:
|
@@ -77,7 +63,7 @@ def annotate_snvs(adapter, vcf_obj):
|
|
77
63
|
|
78
64
|
for nr_variants, variant in enumerate(vcf_obj, 1):
|
79
65
|
# Add the variant to current batch
|
80
|
-
variants[get_variant_id(variant)] = variant
|
66
|
+
variants[get_variant_id(variant, keep_chr_prefix)] = variant
|
81
67
|
# If batch len == 1000 we annotate the batch
|
82
68
|
if (nr_variants % 1000) == 0:
|
83
69
|
|
loqusdb/utils/delete.py
CHANGED
@@ -9,7 +9,9 @@ from loqusdb.build_models.variant import build_variant
|
|
9
9
|
LOG = logging.getLogger(__name__)
|
10
10
|
|
11
11
|
|
12
|
-
def delete(
|
12
|
+
def delete(
|
13
|
+
adapter, case_obj, keep_chr_prefix=None, update=False, existing_case=False, genome_build=None
|
14
|
+
):
|
13
15
|
"""Delete a case and all of it's variants from the database.
|
14
16
|
|
15
17
|
Args:
|
@@ -18,6 +20,7 @@ def delete(adapter, case_obj, update=False, existing_case=False, genome_build=No
|
|
18
20
|
update(bool): If we are in the middle of an update
|
19
21
|
existing_case(models.Case): If something failed during an update we need to revert
|
20
22
|
to the original case
|
23
|
+
keep_chr_prefix(bool): Retain chr/CHR/Chr prefixes in chromosome IDs when they are present
|
21
24
|
|
22
25
|
"""
|
23
26
|
# This will overwrite the updated case with the previous one
|
@@ -36,18 +39,22 @@ def delete(adapter, case_obj, update=False, existing_case=False, genome_build=No
|
|
36
39
|
if file_type == "vcf_path":
|
37
40
|
LOG.info("deleting variants")
|
38
41
|
delete_variants(
|
39
|
-
adapter=adapter,
|
42
|
+
adapter=adapter,
|
43
|
+
vcf_obj=vcf_obj,
|
44
|
+
keep_chr_prefix=keep_chr_prefix,
|
45
|
+
case_obj=case_obj,
|
46
|
+
genome_build=genome_build,
|
40
47
|
)
|
41
48
|
elif file_type == "vcf_sv_path":
|
42
49
|
LOG.info("deleting structural variants")
|
43
50
|
delete_structural_variants(
|
44
|
-
adapter=adapter,
|
45
|
-
vcf_obj=vcf_obj,
|
46
|
-
case_obj=case_obj,
|
51
|
+
adapter=adapter, vcf_obj=vcf_obj, case_obj=case_obj, keep_chr_prefix=keep_chr_prefix
|
47
52
|
)
|
48
53
|
|
49
54
|
|
50
|
-
def delete_variants(
|
55
|
+
def delete_variants(
|
56
|
+
adapter, vcf_obj, case_obj, keep_chr_prefix=None, case_id=None, genome_build=None
|
57
|
+
):
|
51
58
|
"""Delete variants for a case in the database
|
52
59
|
|
53
60
|
Args:
|
@@ -69,7 +76,11 @@ def delete_variants(adapter, vcf_obj, case_obj, case_id=None, genome_build=None)
|
|
69
76
|
variant_list = []
|
70
77
|
for variant in vcf_obj:
|
71
78
|
formated_variant = build_variant(
|
72
|
-
variant=variant,
|
79
|
+
variant=variant,
|
80
|
+
case_obj=case_obj,
|
81
|
+
keep_chr_prefix=keep_chr_prefix,
|
82
|
+
case_id=case_id,
|
83
|
+
genome_build=genome_build,
|
73
84
|
)
|
74
85
|
|
75
86
|
if not formated_variant:
|
@@ -109,7 +120,7 @@ def delete_variants(adapter, vcf_obj, case_obj, case_id=None, genome_build=None)
|
|
109
120
|
return nr_deleted
|
110
121
|
|
111
122
|
|
112
|
-
def delete_structural_variants(adapter, vcf_obj, case_obj, case_id=None):
|
123
|
+
def delete_structural_variants(adapter, vcf_obj, case_obj, keep_chr_prefix=None, case_id=None):
|
113
124
|
"""Delete structural variants for a case in the database
|
114
125
|
|
115
126
|
Args:
|
@@ -130,9 +141,7 @@ def delete_structural_variants(adapter, vcf_obj, case_obj, case_id=None):
|
|
130
141
|
|
131
142
|
for variant in vcf_obj:
|
132
143
|
formated_variant = build_variant(
|
133
|
-
variant=variant,
|
134
|
-
case_obj=case_obj,
|
135
|
-
case_id=case_id,
|
144
|
+
variant=variant, case_obj=case_obj, case_id=case_id, keep_chr_prefix=keep_chr_prefix
|
136
145
|
)
|
137
146
|
|
138
147
|
if not formated_variant:
|
loqusdb/utils/load.py
CHANGED
@@ -32,6 +32,7 @@ def load_database(
|
|
32
32
|
skip_case_id=False,
|
33
33
|
gq_threshold=None,
|
34
34
|
snv_gq_only=False,
|
35
|
+
keep_chr_prefix=False,
|
35
36
|
qual_gq=False,
|
36
37
|
case_id=None,
|
37
38
|
max_window=3000,
|
@@ -39,6 +40,7 @@ def load_database(
|
|
39
40
|
hard_threshold=0.95,
|
40
41
|
soft_threshold=0.9,
|
41
42
|
genome_build=None,
|
43
|
+
ignore_gq_if_unset=False,
|
42
44
|
):
|
43
45
|
"""Load the database with a case and its variants
|
44
46
|
|
@@ -50,12 +52,15 @@ def load_database(
|
|
50
52
|
family_type(str): Format of family file
|
51
53
|
skip_case_id(bool): If no case information should be added to variants
|
52
54
|
gq_threshold(int): If only quality variants should be considered
|
55
|
+
keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
|
53
56
|
qual_gq(bool): Use QUAL field instead of GQ format tag to gate quality
|
54
57
|
case_id(str): If different case id than the one in family file should be used
|
55
58
|
max_window(int): Specify the max size for sv windows
|
56
59
|
check_profile(bool): Does profile check if True
|
57
60
|
hard_threshold(float): Rejects load if hamming distance above this is found
|
58
61
|
soft_threshold(float): Stores similar samples if hamming distance above this is found
|
62
|
+
genome_build(str): Store the genome version
|
63
|
+
ignore_gq_if_unset(str): Ignore the gq threhsold check for variants that do not have a GQ or QUAL set
|
59
64
|
|
60
65
|
Returns:
|
61
66
|
nr_inserted(int)
|
@@ -65,7 +70,7 @@ def load_database(
|
|
65
70
|
nr_variants = None
|
66
71
|
vcf_individuals = None
|
67
72
|
if variant_file:
|
68
|
-
vcf_info = check_vcf(variant_file)
|
73
|
+
vcf_info = check_vcf(variant_file, keep_chr_prefix)
|
69
74
|
nr_variants = vcf_info["nr_variants"]
|
70
75
|
variant_type = vcf_info["variant_type"]
|
71
76
|
vcf_files.append(variant_file)
|
@@ -75,7 +80,7 @@ def load_database(
|
|
75
80
|
nr_sv_variants = None
|
76
81
|
sv_individuals = None
|
77
82
|
if sv_file:
|
78
|
-
vcf_info = check_vcf(sv_file, "sv")
|
83
|
+
vcf_info = check_vcf(sv_file, keep_chr_prefix, "sv")
|
79
84
|
nr_sv_variants = vcf_info["nr_variants"]
|
80
85
|
vcf_files.append(sv_file)
|
81
86
|
sv_individuals = vcf_info["individuals"]
|
@@ -83,7 +88,7 @@ def load_database(
|
|
83
88
|
profiles = None
|
84
89
|
matches = None
|
85
90
|
if profile_file:
|
86
|
-
profiles = get_profiles(adapter, profile_file)
|
91
|
+
profiles = get_profiles(adapter, profile_file, keep_chr_prefix)
|
87
92
|
###Check if any profile already exists
|
88
93
|
matches = profile_match(
|
89
94
|
adapter, profiles, hard_threshold=hard_threshold, soft_threshold=soft_threshold
|
@@ -149,9 +154,11 @@ def load_database(
|
|
149
154
|
skip_case_id=skip_case_id,
|
150
155
|
gq_threshold=gq_threshold if not snv_gq_only or variant_type == "snv" else None,
|
151
156
|
qual_gq=qual_gq,
|
157
|
+
keep_chr_prefix=keep_chr_prefix,
|
152
158
|
max_window=max_window,
|
153
159
|
variant_type=variant_type,
|
154
160
|
genome_build=genome_build,
|
161
|
+
ignore_gq_if_unset=ignore_gq_if_unset,
|
155
162
|
)
|
156
163
|
except Exception as err:
|
157
164
|
# If something went wrong do a rollback
|
@@ -196,21 +203,27 @@ def load_variants(
|
|
196
203
|
skip_case_id=False,
|
197
204
|
gq_threshold=None,
|
198
205
|
qual_gq=False,
|
206
|
+
keep_chr_prefix=False,
|
199
207
|
max_window=3000,
|
200
208
|
variant_type="snv",
|
201
209
|
genome_build=None,
|
210
|
+
ignore_gq_if_unset=False,
|
202
211
|
):
|
203
212
|
"""Load variants for a family into the database.
|
204
213
|
|
205
214
|
Args:
|
206
215
|
adapter (loqusdb.plugins.Adapter): initialized plugin
|
216
|
+
vcf_obj(cyvcf2.VCF): Iterable with cyvcf2.Variant
|
207
217
|
case_obj(Case): dict with case information
|
208
|
-
nr_variants(int)
|
209
218
|
skip_case_id (bool): whether to include the case id on variant level
|
210
219
|
or not
|
220
|
+
keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
|
211
221
|
gq_threshold(int)
|
222
|
+
qual_gq(bool): whether to use QUAL instead of GQ
|
212
223
|
max_window(int): Specify the max size for sv windows
|
213
224
|
variant_type(str): 'sv' or 'snv'
|
225
|
+
genome_build(str): Genome version. Ex. GRCH37
|
226
|
+
ignore_gq_if_unset (bool): whether to add entries that have missing GQ or QUAL field
|
214
227
|
|
215
228
|
Returns:
|
216
229
|
nr_inserted(int)
|
@@ -229,7 +242,14 @@ def load_variants(
|
|
229
242
|
|
230
243
|
variants = (
|
231
244
|
build_variant(
|
232
|
-
variant,
|
245
|
+
variant,
|
246
|
+
case_obj,
|
247
|
+
case_id,
|
248
|
+
gq_threshold,
|
249
|
+
qual_gq,
|
250
|
+
keep_chr_prefix,
|
251
|
+
ignore_gq_if_unset,
|
252
|
+
genome_build=genome_build,
|
233
253
|
)
|
234
254
|
for variant in bar
|
235
255
|
)
|
@@ -249,7 +269,7 @@ def load_variants(
|
|
249
269
|
return nr_inserted
|
250
270
|
|
251
271
|
|
252
|
-
def load_profile_variants(adapter, variant_file):
|
272
|
+
def load_profile_variants(adapter, variant_file, keep_chr_prefix=None):
|
253
273
|
"""
|
254
274
|
|
255
275
|
Loads variants used for profiling
|
@@ -261,7 +281,7 @@ def load_profile_variants(adapter, variant_file):
|
|
261
281
|
|
262
282
|
"""
|
263
283
|
|
264
|
-
vcf_info = check_vcf(variant_file)
|
284
|
+
vcf_info = check_vcf(variant_file, keep_chr_prefix)
|
265
285
|
variant_type = vcf_info["variant_type"]
|
266
286
|
|
267
287
|
if variant_type != "snv":
|
@@ -270,5 +290,5 @@ def load_profile_variants(adapter, variant_file):
|
|
270
290
|
|
271
291
|
vcf = get_vcf(variant_file)
|
272
292
|
|
273
|
-
profile_variants = [build_profile_variant(variant) for variant in vcf]
|
293
|
+
profile_variants = [build_profile_variant(variant, keep_chr_prefix) for variant in vcf]
|
274
294
|
adapter.add_profile_variants(profile_variants)
|
loqusdb/utils/profiling.py
CHANGED
@@ -11,7 +11,7 @@ from .vcf import get_file_handle
|
|
11
11
|
LOG = logging.getLogger(__name__)
|
12
12
|
|
13
13
|
|
14
|
-
def get_profiles(adapter, vcf_file):
|
14
|
+
def get_profiles(adapter, vcf_file, keep_chr_prefix):
|
15
15
|
"""Given a vcf, get a profile string for each sample in the vcf
|
16
16
|
based on the profile variants in the database
|
17
17
|
|
@@ -44,7 +44,7 @@ def get_profiles(adapter, vcf_file):
|
|
44
44
|
found_variant = False
|
45
45
|
for variant in vcf(region):
|
46
46
|
|
47
|
-
variant_id = get_variant_id(variant)
|
47
|
+
variant_id = get_variant_id(variant, keep_chr_prefix)
|
48
48
|
|
49
49
|
# If variant id i.e. chrom_pos_ref_alt matches
|
50
50
|
if variant_id == profile_variant["_id"]:
|
@@ -183,7 +183,7 @@ def compare_profiles(profile1, profile2):
|
|
183
183
|
return similarity_ratio
|
184
184
|
|
185
185
|
|
186
|
-
def update_profiles(adapter):
|
186
|
+
def update_profiles(adapter, keep_chr_prefix):
|
187
187
|
"""
|
188
188
|
For all cases having vcf_path, update the profile string for the samples
|
189
189
|
|
@@ -198,7 +198,7 @@ def update_profiles(adapter):
|
|
198
198
|
# case with new profiled individuals.
|
199
199
|
if case.get("profile_path"):
|
200
200
|
|
201
|
-
profiles = get_profiles(adapter, case["profile_path"])
|
201
|
+
profiles = get_profiles(adapter, case["profile_path"], keep_chr_prefix)
|
202
202
|
profiled_individuals = deepcopy(case["individuals"])
|
203
203
|
|
204
204
|
for individual in profiled_individuals:
|
loqusdb/utils/vcf.py
CHANGED
@@ -89,7 +89,7 @@ def check_sorting(previous_chrom, previous_pos, current_chrom, current_pos):
|
|
89
89
|
pass
|
90
90
|
|
91
91
|
|
92
|
-
def check_vcf(vcf_path, expected_type="snv"):
|
92
|
+
def check_vcf(vcf_path, keep_chr_prefix=None, expected_type="snv"):
|
93
93
|
"""Check if there are any problems with the vcf file
|
94
94
|
|
95
95
|
Args:
|
@@ -113,7 +113,7 @@ def check_vcf(vcf_path, expected_type="snv"):
|
|
113
113
|
previous_pos = None
|
114
114
|
previous_chrom = None
|
115
115
|
|
116
|
-
|
116
|
+
position_variants = set()
|
117
117
|
|
118
118
|
nr_variants = 0
|
119
119
|
for nr_variants, variant in enumerate(vcf, 1):
|
@@ -134,36 +134,36 @@ def check_vcf(vcf_path, expected_type="snv"):
|
|
134
134
|
variant_id = "{0}_{1}".format(current_chrom, current_pos)
|
135
135
|
# For SNVs we can create a proper variant id with chrom_pos_ref_alt
|
136
136
|
if variant_type == "snv":
|
137
|
-
variant_id = get_variant_id(variant)
|
137
|
+
variant_id = get_variant_id(variant, keep_chr_prefix)
|
138
138
|
|
139
139
|
# Initiate variables
|
140
140
|
if not previous_chrom:
|
141
141
|
previous_chrom = current_chrom
|
142
142
|
previous_pos = current_pos
|
143
|
-
|
143
|
+
position_variants = {variant_id}
|
144
144
|
continue
|
145
145
|
|
146
146
|
# Update variables if new chromosome
|
147
147
|
if current_chrom != previous_chrom:
|
148
148
|
previous_chrom = current_chrom
|
149
149
|
previous_pos = current_pos
|
150
|
-
|
150
|
+
position_variants = {variant_id}
|
151
151
|
continue
|
152
152
|
|
153
153
|
if variant_type == "snv":
|
154
154
|
# Check if variant is unique
|
155
155
|
if current_pos == previous_pos:
|
156
|
-
if variant_id in
|
156
|
+
if variant_id in position_variants:
|
157
157
|
raise VcfError("Variant {0} occurs several times" " in vcf".format(variant_id))
|
158
158
|
else:
|
159
|
-
|
159
|
+
position_variants.add(variant_id)
|
160
160
|
# Check if vcf is sorted
|
161
161
|
else:
|
162
162
|
if not current_pos >= previous_pos:
|
163
163
|
raise VcfError("Vcf if not sorted in a correct way")
|
164
164
|
previous_pos = current_pos
|
165
|
-
# Reset
|
166
|
-
|
165
|
+
# Reset position_variants since we are on a new position
|
166
|
+
position_variants = {variant_id}
|
167
167
|
|
168
168
|
if variant_type != expected_type:
|
169
169
|
raise VcfError(
|
@@ -1,17 +1,17 @@
|
|
1
|
-
loqusdb/__init__.py,sha256=
|
1
|
+
loqusdb/__init__.py,sha256=Zs9AtDiQwuASVgXDU0xzuWv8RhaadjMaa9WD4D7BMVc,1688
|
2
2
|
loqusdb/__main__.py,sha256=8FGKySAGaWSzAYMj6HRsxeyiME3V01Idt7HrmN7pSYY,397
|
3
3
|
loqusdb/build_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
loqusdb/build_models/case.py,sha256=AByutEYK2N3kS9JFvyZfPKNZdCpZHCSD0nNHAgaU1Cs,4127
|
5
|
-
loqusdb/build_models/profile_variant.py,sha256=
|
6
|
-
loqusdb/build_models/variant.py,sha256=
|
5
|
+
loqusdb/build_models/profile_variant.py,sha256=WzWhxq4HNvf67IknyBWYnMHQzPMZ9eitw_so6lfOkPc,1166
|
6
|
+
loqusdb/build_models/variant.py,sha256=buIQr8GsNUBBtgf78a0n5I_GiMEogohSEQJibVUuM5Y,7815
|
7
7
|
loqusdb/commands/__init__.py,sha256=BXAN3UADgqPrkGczzjlLO9GyyQ96dnLnP7n92JlYHgo,603
|
8
|
-
loqusdb/commands/annotate.py,sha256=
|
9
|
-
loqusdb/commands/cli.py,sha256=
|
10
|
-
loqusdb/commands/delete.py,sha256=
|
8
|
+
loqusdb/commands/annotate.py,sha256=MGU9EerKYsFx1lkyjQ6ZMUKYuShi0uSTPJCS0cyxq7U,1467
|
9
|
+
loqusdb/commands/cli.py,sha256=XRprLQaENiLdqXG_7ugCC9jTcG7Uh54_M0KZj1ERFaM,3542
|
10
|
+
loqusdb/commands/delete.py,sha256=BRtm6Uade3l97FBcKFNkiYjks84AhuXYo-2QD8E74A4,2120
|
11
11
|
loqusdb/commands/export.py,sha256=HKoRzUo_BHNOdw_TcKUId9TTowi8VJVGqnuDlK-FqFE,3531
|
12
12
|
loqusdb/commands/identity.py,sha256=KLA9c8e6cJFDxtqIa1G6zdHTHK1sz2b3v1Utdtik_4k,787
|
13
|
-
loqusdb/commands/load.py,sha256=
|
14
|
-
loqusdb/commands/load_profile.py,sha256=
|
13
|
+
loqusdb/commands/load.py,sha256=pHtjldblUM-HFFgcN5UtoaxGhYmo1yeexqGq4I427qk,4996
|
14
|
+
loqusdb/commands/load_profile.py,sha256=x-T2bzi2SL5kwZhY_3hHQCtGDLao1xkxj1pZaOnzs4U,3436
|
15
15
|
loqusdb/commands/migrate.py,sha256=2C8YL-zVqnpnqg3JIyUr0rbVnb8-AGPVWNhicHnPKLo,667
|
16
16
|
loqusdb/commands/restore.py,sha256=eqPX0yao0IAYS5SbjCdlsfSJRBbRByBLISUU2hTzqqs,1492
|
17
17
|
loqusdb/commands/update.py,sha256=zz3wueaJVqJ1FKact-rpY2az__5oa1LnZKf7mgqNGPk,3211
|
@@ -40,17 +40,17 @@ loqusdb/resources/loqusdb.20181005.gz,sha256=DI8CLI7fPnIAjM25Avraz-C7KQkOKsfnhgZ
|
|
40
40
|
loqusdb/resources/maf_50_sites_GRCh37.vcf.gz,sha256=BoD1_xZ-Rr8DTWCMNlQGh7gz1K8FA-j2nC4jKn_eB2A,5260
|
41
41
|
loqusdb/resources/maf_50_sites_GRCh38.vcf.gz,sha256=6T4iyrIr6yx1HpgobzAsh305BO1JX0oGj48nFiYt2QM,9037
|
42
42
|
loqusdb/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
43
|
-
loqusdb/utils/annotate.py,sha256=
|
43
|
+
loqusdb/utils/annotate.py,sha256=vOHlLkenwCCLXh-cjerd9cW68eZfEtgvP0IwWh-oBHs,2347
|
44
44
|
loqusdb/utils/case.py,sha256=aeTvyACJTDjzl-aOjAZaUzFMLisgFKMfcoXSvNAZz4s,2168
|
45
|
-
loqusdb/utils/delete.py,sha256
|
46
|
-
loqusdb/utils/load.py,sha256=
|
45
|
+
loqusdb/utils/delete.py,sha256=uj1m5i12GjUhhnCnIbh6D7BMG-oMDk6bfrJxk8zpSxE,5208
|
46
|
+
loqusdb/utils/load.py,sha256=GgJyTLSOpgcEqjvo9RXzcacQLzHZYtXF_tkyp_XJwOs,9448
|
47
47
|
loqusdb/utils/migrate.py,sha256=9Q6kdIi9TpFVzDYptlEE8RqPPS5wyzfM3F8egzmmBBk,1113
|
48
|
-
loqusdb/utils/profiling.py,sha256=
|
48
|
+
loqusdb/utils/profiling.py,sha256=uISq4xfRNPPedoYXS_D4dXphq8odDogfMBm_XfHBTpE,9232
|
49
49
|
loqusdb/utils/update.py,sha256=1edJG-u24FgOSxyXAQEiyTG4IyK-Uo3lSIl5qyzcXsI,4433
|
50
50
|
loqusdb/utils/variant.py,sha256=U6nMZRUf5NDDQ74nG0HBCLMnFQVgFAT6eHll_F2uiwc,2087
|
51
|
-
loqusdb/utils/vcf.py,sha256=
|
52
|
-
loqusdb-2.7.
|
53
|
-
loqusdb-2.7.
|
54
|
-
loqusdb-2.7.
|
55
|
-
loqusdb-2.7.
|
56
|
-
loqusdb-2.7.
|
51
|
+
loqusdb/utils/vcf.py,sha256=og8JBYock31v_0CnsoRhuKIJCurLCIFW8PCCQIRWF-Q,5207
|
52
|
+
loqusdb-2.7.19.dist-info/LICENSE,sha256=urpFcJXw3elN9kV2fFutc-lXegjuu2lqP_GSy8_CAbs,1054
|
53
|
+
loqusdb-2.7.19.dist-info/METADATA,sha256=w1TxeA5Lz1cYqetHRw0UGvuEUS82WfpOgxThm5hQOYs,5321
|
54
|
+
loqusdb-2.7.19.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
55
|
+
loqusdb-2.7.19.dist-info/entry_points.txt,sha256=wFoWzEFjsSgXkj9FMQA8C9ihZoJ9R1XvbGuX9hEEI6E,52
|
56
|
+
loqusdb-2.7.19.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|