loqusdb 2.7.18__py3-none-any.whl → 2.7.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loqusdb/__init__.py +1 -1
- loqusdb/build_models/profile_variant.py +6 -4
- loqusdb/build_models/variant.py +26 -11
- loqusdb/commands/annotate.py +3 -2
- loqusdb/commands/cli.py +22 -1
- loqusdb/commands/delete.py +7 -1
- loqusdb/commands/load.py +2 -1
- loqusdb/commands/load_profile.py +4 -3
- loqusdb/utils/annotate.py +4 -18
- loqusdb/utils/delete.py +20 -11
- loqusdb/utils/load.py +12 -6
- loqusdb/utils/profiling.py +4 -4
- loqusdb/utils/vcf.py +9 -9
- {loqusdb-2.7.18.dist-info → loqusdb-2.7.19.dist-info}/METADATA +1 -1
- {loqusdb-2.7.18.dist-info → loqusdb-2.7.19.dist-info}/RECORD +18 -18
- {loqusdb-2.7.18.dist-info → loqusdb-2.7.19.dist-info}/LICENSE +0 -0
- {loqusdb-2.7.18.dist-info → loqusdb-2.7.19.dist-info}/WHEEL +0 -0
- {loqusdb-2.7.18.dist-info → loqusdb-2.7.19.dist-info}/entry_points.txt +0 -0
loqusdb/__init__.py
CHANGED
@@ -24,23 +24,25 @@ def get_maf(variant):
|
|
24
24
|
return variant.INFO.get("MAF")
|
25
25
|
|
26
26
|
|
27
|
-
def build_profile_variant(variant):
|
27
|
+
def build_profile_variant(variant, keep_chr_prefix=None):
|
28
28
|
"""Returns a ProfileVariant object
|
29
29
|
|
30
30
|
Args:
|
31
31
|
variant (cyvcf2.Variant)
|
32
|
+
keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
|
32
33
|
|
33
34
|
Returns:
|
34
35
|
variant (models.ProfileVariant)
|
35
36
|
"""
|
36
37
|
|
37
38
|
chrom = variant.CHROM
|
38
|
-
if
|
39
|
-
chrom
|
39
|
+
if not keep_chr_prefix:
|
40
|
+
if chrom.startswith(("chr", "CHR", "Chr")):
|
41
|
+
chrom = chrom[3:]
|
40
42
|
|
41
43
|
pos = int(variant.POS)
|
42
44
|
|
43
|
-
variant_id = get_variant_id(variant)
|
45
|
+
variant_id = get_variant_id(variant, keep_chr_prefix)
|
44
46
|
|
45
47
|
ref = variant.REF
|
46
48
|
alt = variant.ALT[0]
|
loqusdb/build_models/variant.py
CHANGED
@@ -32,11 +32,21 @@ def check_par(chrom, pos, genome_build=None):
|
|
32
32
|
)
|
33
33
|
|
34
34
|
|
35
|
-
def get_variant_id(variant):
|
36
|
-
"""Get a variant id on the format chrom_pos_ref_alt
|
35
|
+
def get_variant_id(variant, keep_chr_prefix=None):
|
36
|
+
"""Get a variant id on the format chrom_pos_ref_alt
|
37
|
+
|
38
|
+
Args:
|
39
|
+
variant (cyvcf2.Variant)
|
40
|
+
keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
|
41
|
+
|
42
|
+
Returns:
|
43
|
+
variant (models.ProfileVariant)
|
44
|
+
"""
|
45
|
+
|
37
46
|
chrom = variant.CHROM
|
38
|
-
if
|
39
|
-
|
47
|
+
if not keep_chr_prefix:
|
48
|
+
if chrom.lower().startswith("chr"):
|
49
|
+
chrom = chrom[3:]
|
40
50
|
return "_".join([str(chrom), str(variant.POS), str(variant.REF), str(variant.ALT[0])])
|
41
51
|
|
42
52
|
|
@@ -68,11 +78,12 @@ def is_greater(a, b):
|
|
68
78
|
return a_chrom == b_chrom and a.pos > b.pos
|
69
79
|
|
70
80
|
|
71
|
-
def get_coords(variant):
|
81
|
+
def get_coords(variant, keep_chr_prefix):
|
72
82
|
"""Returns a dictionary with position information
|
73
83
|
|
74
84
|
Args:
|
75
85
|
variant(cyvcf2.Variant)
|
86
|
+
keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
|
76
87
|
|
77
88
|
Returns:
|
78
89
|
coordinates(dict)
|
@@ -86,8 +97,9 @@ def get_coords(variant):
|
|
86
97
|
"end": None,
|
87
98
|
}
|
88
99
|
chrom = variant.CHROM
|
89
|
-
if
|
90
|
-
chrom
|
100
|
+
if not keep_chr_prefix:
|
101
|
+
if chrom.startswith(("chr", "CHR", "Chr")):
|
102
|
+
chrom = chrom[3:]
|
91
103
|
coordinates["chrom"] = chrom
|
92
104
|
end_chrom = chrom
|
93
105
|
|
@@ -107,8 +119,9 @@ def get_coords(variant):
|
|
107
119
|
if sv_type == "BND":
|
108
120
|
other_coordinates = alt.strip("ATCGN").strip("[]").split(":")
|
109
121
|
end_chrom = other_coordinates[0]
|
110
|
-
if
|
111
|
-
end_chrom
|
122
|
+
if not keep_chr_prefix:
|
123
|
+
if end_chrom.startswith(("chr", "CHR", "Chr")):
|
124
|
+
end_chrom = end_chrom[3:]
|
112
125
|
|
113
126
|
end = int(other_coordinates[1])
|
114
127
|
|
@@ -148,6 +161,7 @@ def build_variant(
|
|
148
161
|
case_id: Optional[str] = None,
|
149
162
|
gq_threshold: Optional[int] = None,
|
150
163
|
gq_qual: Optional[bool] = False,
|
164
|
+
keep_chr_prefix: Optional[bool] = False,
|
151
165
|
ignore_gq_if_unset: Optional[bool] = False,
|
152
166
|
genome_build: Optional[str] = None,
|
153
167
|
) -> Variant:
|
@@ -165,6 +179,7 @@ def build_variant(
|
|
165
179
|
case_id(str): The case id
|
166
180
|
gq_threshold(int): Genotype Quality threshold
|
167
181
|
gq_qual(bool): Use variant.QUAL for quality instead of GQ
|
182
|
+
keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
|
168
183
|
ignore_gq_if_unset(bool): Ignore GQ threshold check for variants that do not have GQ or QUAL set.
|
169
184
|
genome_build(str): Genome build. Ex. GRCh37 or GRCh38
|
170
185
|
|
@@ -179,14 +194,14 @@ def build_variant(
|
|
179
194
|
sv = True
|
180
195
|
|
181
196
|
# chrom_pos_ref_alt
|
182
|
-
variant_id = get_variant_id(variant)
|
197
|
+
variant_id = get_variant_id(variant, keep_chr_prefix)
|
183
198
|
|
184
199
|
ref = variant.REF
|
185
200
|
# ALT is an array in cyvcf2
|
186
201
|
# We allways assume splitted and normalized VCFs
|
187
202
|
alt = variant.ALT[0]
|
188
203
|
|
189
|
-
coordinates = get_coords(variant)
|
204
|
+
coordinates = get_coords(variant, keep_chr_prefix)
|
190
205
|
chrom = coordinates["chrom"]
|
191
206
|
pos = coordinates["pos"]
|
192
207
|
|
loqusdb/commands/annotate.py
CHANGED
@@ -21,6 +21,7 @@ LOG = logging.getLogger(__name__)
|
|
21
21
|
def annotate(ctx, variant_file, sv):
|
22
22
|
"""Annotate the variants in a VCF"""
|
23
23
|
adapter = ctx.obj["adapter"]
|
24
|
+
keep_chr_prefix = ctx.obj["keep_chr_prefix"]
|
24
25
|
|
25
26
|
variant_path = os.path.abspath(variant_file)
|
26
27
|
|
@@ -40,9 +41,9 @@ def annotate(ctx, variant_file, sv):
|
|
40
41
|
start_inserting = datetime.now()
|
41
42
|
|
42
43
|
if sv:
|
43
|
-
annotated_variants = annotate_svs(adapter, vcf_obj)
|
44
|
+
annotated_variants = annotate_svs(adapter, vcf_obj, keep_chr_prefix)
|
44
45
|
else:
|
45
|
-
annotated_variants = annotate_snvs(adapter, vcf_obj)
|
46
|
+
annotated_variants = annotate_snvs(adapter, vcf_obj, keep_chr_prefix)
|
46
47
|
# try:
|
47
48
|
for variant in annotated_variants:
|
48
49
|
click.echo(str(variant).rstrip())
|
loqusdb/commands/cli.py
CHANGED
@@ -55,11 +55,30 @@ LOG = logging.getLogger(__name__)
|
|
55
55
|
type=click.Choice([GRCH37, GRCH38]),
|
56
56
|
help="Specify what genome build to use",
|
57
57
|
)
|
58
|
+
@click.option(
|
59
|
+
"--keep-chr-prefix",
|
60
|
+
is_flag=True,
|
61
|
+
default=False,
|
62
|
+
show_default=True,
|
63
|
+
help="Retain the 'chr/Chr/CHR' prefix for chromosomes if it is present",
|
64
|
+
)
|
58
65
|
@click.option("-v", "--verbose", is_flag=True)
|
59
66
|
@click.version_option(__version__)
|
60
67
|
@click.pass_context
|
61
68
|
def cli(
|
62
|
-
ctx,
|
69
|
+
ctx,
|
70
|
+
database,
|
71
|
+
username,
|
72
|
+
password,
|
73
|
+
authdb,
|
74
|
+
port,
|
75
|
+
host,
|
76
|
+
uri,
|
77
|
+
verbose,
|
78
|
+
config,
|
79
|
+
test,
|
80
|
+
genome_build,
|
81
|
+
keep_chr_prefix,
|
63
82
|
):
|
64
83
|
"""loqusdb: manage a local variant count database."""
|
65
84
|
loglevel = "INFO"
|
@@ -103,6 +122,7 @@ def cli(
|
|
103
122
|
adapter = MongoAdapter(client, db_name=database)
|
104
123
|
|
105
124
|
genome_build = genome_build or configs.get("genome_build") or GRCH37
|
125
|
+
keep_chr_prefix = keep_chr_prefix or configs.get("keep_chr_prefix")
|
106
126
|
|
107
127
|
ctx.obj = {}
|
108
128
|
ctx.obj["db"] = database
|
@@ -114,3 +134,4 @@ def cli(
|
|
114
134
|
ctx.obj["adapter"] = adapter
|
115
135
|
ctx.obj["version"] = __version__
|
116
136
|
ctx.obj["genome_build"] = genome_build
|
137
|
+
ctx.obj["keep_chr_prefix"] = keep_chr_prefix
|
loqusdb/commands/delete.py
CHANGED
@@ -35,6 +35,7 @@ def delete(ctx, family_file, family_type, case_id):
|
|
35
35
|
ctx.abort()
|
36
36
|
|
37
37
|
adapter = ctx.obj["adapter"]
|
38
|
+
keep_chr_prefix = ctx.obj["keep_chr_prefix"]
|
38
39
|
|
39
40
|
# Get a ped_parser.Family object from family file
|
40
41
|
family = None
|
@@ -59,7 +60,12 @@ def delete(ctx, family_file, family_type, case_id):
|
|
59
60
|
genome_build = ctx.obj["genome_build"]
|
60
61
|
start_deleting = datetime.now()
|
61
62
|
try:
|
62
|
-
delete_command(
|
63
|
+
delete_command(
|
64
|
+
adapter=adapter,
|
65
|
+
case_obj=existing_case,
|
66
|
+
genome_build=genome_build,
|
67
|
+
keep_chr_prefix=keep_chr_prefix,
|
68
|
+
)
|
63
69
|
except (CaseError, IOError) as error:
|
64
70
|
LOG.warning(error)
|
65
71
|
ctx.abort()
|
loqusdb/commands/load.py
CHANGED
@@ -148,7 +148,7 @@ def load(
|
|
148
148
|
|
149
149
|
adapter = ctx.obj["adapter"]
|
150
150
|
genome_build = ctx.obj["genome_build"]
|
151
|
-
|
151
|
+
keep_chr_prefix = ctx.obj["keep_chr_prefix"]
|
152
152
|
start_inserting = datetime.now()
|
153
153
|
|
154
154
|
try:
|
@@ -162,6 +162,7 @@ def load(
|
|
162
162
|
case_id=case_id,
|
163
163
|
gq_threshold=gq_threshold,
|
164
164
|
snv_gq_only=snv_gq_only,
|
165
|
+
keep_chr_prefix=keep_chr_prefix,
|
165
166
|
qual_gq=qual_gq,
|
166
167
|
max_window=max_window,
|
167
168
|
profile_file=variant_profile_path,
|
loqusdb/commands/load_profile.py
CHANGED
@@ -60,13 +60,14 @@ def load_profile(ctx, load, variant_file, update, stats, profile_threshold, chec
|
|
60
60
|
"""
|
61
61
|
|
62
62
|
adapter = ctx.obj["adapter"]
|
63
|
+
keep_chr_prefix = ctx.obj["keep_chr_prefix"]
|
63
64
|
|
64
65
|
LOG.info("Running loqusdb profile")
|
65
66
|
|
66
67
|
if check_vcf:
|
67
68
|
LOG.info(f"Check if profile in {check_vcf} has match in database")
|
68
69
|
vcf_file = check_vcf
|
69
|
-
profiles = get_profiles(adapter, vcf_file)
|
70
|
+
profiles = get_profiles(adapter, vcf_file, keep_chr_prefix)
|
70
71
|
duplicate = check_duplicates(adapter, profiles, profile_threshold)
|
71
72
|
|
72
73
|
if duplicate is not None:
|
@@ -81,11 +82,11 @@ def load_profile(ctx, load, variant_file, update, stats, profile_threshold, chec
|
|
81
82
|
if variant_file is not None:
|
82
83
|
vcf_path = variant_file
|
83
84
|
LOG.info(f"Loads variants in {vcf_path} to be used in profiling")
|
84
|
-
load_profile_variants(adapter, vcf_path)
|
85
|
+
load_profile_variants(adapter, vcf_path, keep_chr_prefix)
|
85
86
|
|
86
87
|
if update:
|
87
88
|
LOG.info("Updates profiles in database")
|
88
|
-
update_profiles(adapter)
|
89
|
+
update_profiles(adapter, keep_chr_prefix)
|
89
90
|
|
90
91
|
if stats:
|
91
92
|
LOG.info("Prints profile stats")
|
loqusdb/utils/annotate.py
CHANGED
@@ -31,21 +31,7 @@ def annotate_variant(variant, var_obj=None):
|
|
31
31
|
return variant
|
32
32
|
|
33
33
|
|
34
|
-
def
|
35
|
-
"""Annotate an SNV/INDEL variant
|
36
|
-
|
37
|
-
Args:
|
38
|
-
adapter(loqusdb.plugin.adapter)
|
39
|
-
variant(cyvcf2.Variant)
|
40
|
-
"""
|
41
|
-
variant_id = get_variant_id(variant)
|
42
|
-
variant_obj = adapter.get_variant(variant={"_id": variant_id})
|
43
|
-
|
44
|
-
annotated_variant = annotated_variant(variant, variant_obj)
|
45
|
-
return annotated_variant
|
46
|
-
|
47
|
-
|
48
|
-
def annotate_svs(adapter, vcf_obj):
|
34
|
+
def annotate_svs(adapter, vcf_obj, keep_chr_prefix):
|
49
35
|
"""Annotate all SV variants in a VCF
|
50
36
|
|
51
37
|
Args:
|
@@ -56,14 +42,14 @@ def annotate_svs(adapter, vcf_obj):
|
|
56
42
|
variant(cyvcf2.Variant)
|
57
43
|
"""
|
58
44
|
for nr_variants, variant in enumerate(vcf_obj, 1):
|
59
|
-
variant_info = get_coords(variant)
|
45
|
+
variant_info = get_coords(variant, keep_chr_prefix)
|
60
46
|
match = adapter.get_structural_variant(variant_info)
|
61
47
|
if match:
|
62
48
|
annotate_variant(variant, match)
|
63
49
|
yield variant
|
64
50
|
|
65
51
|
|
66
|
-
def annotate_snvs(adapter, vcf_obj):
|
52
|
+
def annotate_snvs(adapter, vcf_obj, keep_chr_prefix):
|
67
53
|
"""Annotate all variants in a VCF
|
68
54
|
|
69
55
|
Args:
|
@@ -77,7 +63,7 @@ def annotate_snvs(adapter, vcf_obj):
|
|
77
63
|
|
78
64
|
for nr_variants, variant in enumerate(vcf_obj, 1):
|
79
65
|
# Add the variant to current batch
|
80
|
-
variants[get_variant_id(variant)] = variant
|
66
|
+
variants[get_variant_id(variant, keep_chr_prefix)] = variant
|
81
67
|
# If batch len == 1000 we annotate the batch
|
82
68
|
if (nr_variants % 1000) == 0:
|
83
69
|
|
loqusdb/utils/delete.py
CHANGED
@@ -9,7 +9,9 @@ from loqusdb.build_models.variant import build_variant
|
|
9
9
|
LOG = logging.getLogger(__name__)
|
10
10
|
|
11
11
|
|
12
|
-
def delete(
|
12
|
+
def delete(
|
13
|
+
adapter, case_obj, keep_chr_prefix=None, update=False, existing_case=False, genome_build=None
|
14
|
+
):
|
13
15
|
"""Delete a case and all of it's variants from the database.
|
14
16
|
|
15
17
|
Args:
|
@@ -18,6 +20,7 @@ def delete(adapter, case_obj, update=False, existing_case=False, genome_build=No
|
|
18
20
|
update(bool): If we are in the middle of an update
|
19
21
|
existing_case(models.Case): If something failed during an update we need to revert
|
20
22
|
to the original case
|
23
|
+
keep_chr_prefix(bool): Retain chr/CHR/Chr prefixes in chromosome IDs when they are present
|
21
24
|
|
22
25
|
"""
|
23
26
|
# This will overwrite the updated case with the previous one
|
@@ -36,18 +39,22 @@ def delete(adapter, case_obj, update=False, existing_case=False, genome_build=No
|
|
36
39
|
if file_type == "vcf_path":
|
37
40
|
LOG.info("deleting variants")
|
38
41
|
delete_variants(
|
39
|
-
adapter=adapter,
|
42
|
+
adapter=adapter,
|
43
|
+
vcf_obj=vcf_obj,
|
44
|
+
keep_chr_prefix=keep_chr_prefix,
|
45
|
+
case_obj=case_obj,
|
46
|
+
genome_build=genome_build,
|
40
47
|
)
|
41
48
|
elif file_type == "vcf_sv_path":
|
42
49
|
LOG.info("deleting structural variants")
|
43
50
|
delete_structural_variants(
|
44
|
-
adapter=adapter,
|
45
|
-
vcf_obj=vcf_obj,
|
46
|
-
case_obj=case_obj,
|
51
|
+
adapter=adapter, vcf_obj=vcf_obj, case_obj=case_obj, keep_chr_prefix=keep_chr_prefix
|
47
52
|
)
|
48
53
|
|
49
54
|
|
50
|
-
def delete_variants(
|
55
|
+
def delete_variants(
|
56
|
+
adapter, vcf_obj, case_obj, keep_chr_prefix=None, case_id=None, genome_build=None
|
57
|
+
):
|
51
58
|
"""Delete variants for a case in the database
|
52
59
|
|
53
60
|
Args:
|
@@ -69,7 +76,11 @@ def delete_variants(adapter, vcf_obj, case_obj, case_id=None, genome_build=None)
|
|
69
76
|
variant_list = []
|
70
77
|
for variant in vcf_obj:
|
71
78
|
formated_variant = build_variant(
|
72
|
-
variant=variant,
|
79
|
+
variant=variant,
|
80
|
+
case_obj=case_obj,
|
81
|
+
keep_chr_prefix=keep_chr_prefix,
|
82
|
+
case_id=case_id,
|
83
|
+
genome_build=genome_build,
|
73
84
|
)
|
74
85
|
|
75
86
|
if not formated_variant:
|
@@ -109,7 +120,7 @@ def delete_variants(adapter, vcf_obj, case_obj, case_id=None, genome_build=None)
|
|
109
120
|
return nr_deleted
|
110
121
|
|
111
122
|
|
112
|
-
def delete_structural_variants(adapter, vcf_obj, case_obj, case_id=None):
|
123
|
+
def delete_structural_variants(adapter, vcf_obj, case_obj, keep_chr_prefix=None, case_id=None):
|
113
124
|
"""Delete structural variants for a case in the database
|
114
125
|
|
115
126
|
Args:
|
@@ -130,9 +141,7 @@ def delete_structural_variants(adapter, vcf_obj, case_obj, case_id=None):
|
|
130
141
|
|
131
142
|
for variant in vcf_obj:
|
132
143
|
formated_variant = build_variant(
|
133
|
-
variant=variant,
|
134
|
-
case_obj=case_obj,
|
135
|
-
case_id=case_id,
|
144
|
+
variant=variant, case_obj=case_obj, case_id=case_id, keep_chr_prefix=keep_chr_prefix
|
136
145
|
)
|
137
146
|
|
138
147
|
if not formated_variant:
|
loqusdb/utils/load.py
CHANGED
@@ -32,6 +32,7 @@ def load_database(
|
|
32
32
|
skip_case_id=False,
|
33
33
|
gq_threshold=None,
|
34
34
|
snv_gq_only=False,
|
35
|
+
keep_chr_prefix=False,
|
35
36
|
qual_gq=False,
|
36
37
|
case_id=None,
|
37
38
|
max_window=3000,
|
@@ -51,6 +52,7 @@ def load_database(
|
|
51
52
|
family_type(str): Format of family file
|
52
53
|
skip_case_id(bool): If no case information should be added to variants
|
53
54
|
gq_threshold(int): If only quality variants should be considered
|
55
|
+
keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
|
54
56
|
qual_gq(bool): Use QUAL field instead of GQ format tag to gate quality
|
55
57
|
case_id(str): If different case id than the one in family file should be used
|
56
58
|
max_window(int): Specify the max size for sv windows
|
@@ -68,7 +70,7 @@ def load_database(
|
|
68
70
|
nr_variants = None
|
69
71
|
vcf_individuals = None
|
70
72
|
if variant_file:
|
71
|
-
vcf_info = check_vcf(variant_file)
|
73
|
+
vcf_info = check_vcf(variant_file, keep_chr_prefix)
|
72
74
|
nr_variants = vcf_info["nr_variants"]
|
73
75
|
variant_type = vcf_info["variant_type"]
|
74
76
|
vcf_files.append(variant_file)
|
@@ -78,7 +80,7 @@ def load_database(
|
|
78
80
|
nr_sv_variants = None
|
79
81
|
sv_individuals = None
|
80
82
|
if sv_file:
|
81
|
-
vcf_info = check_vcf(sv_file, "sv")
|
83
|
+
vcf_info = check_vcf(sv_file, keep_chr_prefix, "sv")
|
82
84
|
nr_sv_variants = vcf_info["nr_variants"]
|
83
85
|
vcf_files.append(sv_file)
|
84
86
|
sv_individuals = vcf_info["individuals"]
|
@@ -86,7 +88,7 @@ def load_database(
|
|
86
88
|
profiles = None
|
87
89
|
matches = None
|
88
90
|
if profile_file:
|
89
|
-
profiles = get_profiles(adapter, profile_file)
|
91
|
+
profiles = get_profiles(adapter, profile_file, keep_chr_prefix)
|
90
92
|
###Check if any profile already exists
|
91
93
|
matches = profile_match(
|
92
94
|
adapter, profiles, hard_threshold=hard_threshold, soft_threshold=soft_threshold
|
@@ -152,6 +154,7 @@ def load_database(
|
|
152
154
|
skip_case_id=skip_case_id,
|
153
155
|
gq_threshold=gq_threshold if not snv_gq_only or variant_type == "snv" else None,
|
154
156
|
qual_gq=qual_gq,
|
157
|
+
keep_chr_prefix=keep_chr_prefix,
|
155
158
|
max_window=max_window,
|
156
159
|
variant_type=variant_type,
|
157
160
|
genome_build=genome_build,
|
@@ -200,6 +203,7 @@ def load_variants(
|
|
200
203
|
skip_case_id=False,
|
201
204
|
gq_threshold=None,
|
202
205
|
qual_gq=False,
|
206
|
+
keep_chr_prefix=False,
|
203
207
|
max_window=3000,
|
204
208
|
variant_type="snv",
|
205
209
|
genome_build=None,
|
@@ -213,6 +217,7 @@ def load_variants(
|
|
213
217
|
case_obj(Case): dict with case information
|
214
218
|
skip_case_id (bool): whether to include the case id on variant level
|
215
219
|
or not
|
220
|
+
keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
|
216
221
|
gq_threshold(int)
|
217
222
|
qual_gq(bool): whether to use QUAL instead of GQ
|
218
223
|
max_window(int): Specify the max size for sv windows
|
@@ -242,6 +247,7 @@ def load_variants(
|
|
242
247
|
case_id,
|
243
248
|
gq_threshold,
|
244
249
|
qual_gq,
|
250
|
+
keep_chr_prefix,
|
245
251
|
ignore_gq_if_unset,
|
246
252
|
genome_build=genome_build,
|
247
253
|
)
|
@@ -263,7 +269,7 @@ def load_variants(
|
|
263
269
|
return nr_inserted
|
264
270
|
|
265
271
|
|
266
|
-
def load_profile_variants(adapter, variant_file):
|
272
|
+
def load_profile_variants(adapter, variant_file, keep_chr_prefix=None):
|
267
273
|
"""
|
268
274
|
|
269
275
|
Loads variants used for profiling
|
@@ -275,7 +281,7 @@ def load_profile_variants(adapter, variant_file):
|
|
275
281
|
|
276
282
|
"""
|
277
283
|
|
278
|
-
vcf_info = check_vcf(variant_file)
|
284
|
+
vcf_info = check_vcf(variant_file, keep_chr_prefix)
|
279
285
|
variant_type = vcf_info["variant_type"]
|
280
286
|
|
281
287
|
if variant_type != "snv":
|
@@ -284,5 +290,5 @@ def load_profile_variants(adapter, variant_file):
|
|
284
290
|
|
285
291
|
vcf = get_vcf(variant_file)
|
286
292
|
|
287
|
-
profile_variants = [build_profile_variant(variant) for variant in vcf]
|
293
|
+
profile_variants = [build_profile_variant(variant, keep_chr_prefix) for variant in vcf]
|
288
294
|
adapter.add_profile_variants(profile_variants)
|
loqusdb/utils/profiling.py
CHANGED
@@ -11,7 +11,7 @@ from .vcf import get_file_handle
|
|
11
11
|
LOG = logging.getLogger(__name__)
|
12
12
|
|
13
13
|
|
14
|
-
def get_profiles(adapter, vcf_file):
|
14
|
+
def get_profiles(adapter, vcf_file, keep_chr_prefix):
|
15
15
|
"""Given a vcf, get a profile string for each sample in the vcf
|
16
16
|
based on the profile variants in the database
|
17
17
|
|
@@ -44,7 +44,7 @@ def get_profiles(adapter, vcf_file):
|
|
44
44
|
found_variant = False
|
45
45
|
for variant in vcf(region):
|
46
46
|
|
47
|
-
variant_id = get_variant_id(variant)
|
47
|
+
variant_id = get_variant_id(variant, keep_chr_prefix)
|
48
48
|
|
49
49
|
# If variant id i.e. chrom_pos_ref_alt matches
|
50
50
|
if variant_id == profile_variant["_id"]:
|
@@ -183,7 +183,7 @@ def compare_profiles(profile1, profile2):
|
|
183
183
|
return similarity_ratio
|
184
184
|
|
185
185
|
|
186
|
-
def update_profiles(adapter):
|
186
|
+
def update_profiles(adapter, keep_chr_prefix):
|
187
187
|
"""
|
188
188
|
For all cases having vcf_path, update the profile string for the samples
|
189
189
|
|
@@ -198,7 +198,7 @@ def update_profiles(adapter):
|
|
198
198
|
# case with new profiled individuals.
|
199
199
|
if case.get("profile_path"):
|
200
200
|
|
201
|
-
profiles = get_profiles(adapter, case["profile_path"])
|
201
|
+
profiles = get_profiles(adapter, case["profile_path"], keep_chr_prefix)
|
202
202
|
profiled_individuals = deepcopy(case["individuals"])
|
203
203
|
|
204
204
|
for individual in profiled_individuals:
|
loqusdb/utils/vcf.py
CHANGED
@@ -89,7 +89,7 @@ def check_sorting(previous_chrom, previous_pos, current_chrom, current_pos):
|
|
89
89
|
pass
|
90
90
|
|
91
91
|
|
92
|
-
def check_vcf(vcf_path, expected_type="snv"):
|
92
|
+
def check_vcf(vcf_path, keep_chr_prefix=None, expected_type="snv"):
|
93
93
|
"""Check if there are any problems with the vcf file
|
94
94
|
|
95
95
|
Args:
|
@@ -113,7 +113,7 @@ def check_vcf(vcf_path, expected_type="snv"):
|
|
113
113
|
previous_pos = None
|
114
114
|
previous_chrom = None
|
115
115
|
|
116
|
-
|
116
|
+
position_variants = set()
|
117
117
|
|
118
118
|
nr_variants = 0
|
119
119
|
for nr_variants, variant in enumerate(vcf, 1):
|
@@ -134,36 +134,36 @@ def check_vcf(vcf_path, expected_type="snv"):
|
|
134
134
|
variant_id = "{0}_{1}".format(current_chrom, current_pos)
|
135
135
|
# For SNVs we can create a proper variant id with chrom_pos_ref_alt
|
136
136
|
if variant_type == "snv":
|
137
|
-
variant_id = get_variant_id(variant)
|
137
|
+
variant_id = get_variant_id(variant, keep_chr_prefix)
|
138
138
|
|
139
139
|
# Initiate variables
|
140
140
|
if not previous_chrom:
|
141
141
|
previous_chrom = current_chrom
|
142
142
|
previous_pos = current_pos
|
143
|
-
|
143
|
+
position_variants = {variant_id}
|
144
144
|
continue
|
145
145
|
|
146
146
|
# Update variables if new chromosome
|
147
147
|
if current_chrom != previous_chrom:
|
148
148
|
previous_chrom = current_chrom
|
149
149
|
previous_pos = current_pos
|
150
|
-
|
150
|
+
position_variants = {variant_id}
|
151
151
|
continue
|
152
152
|
|
153
153
|
if variant_type == "snv":
|
154
154
|
# Check if variant is unique
|
155
155
|
if current_pos == previous_pos:
|
156
|
-
if variant_id in
|
156
|
+
if variant_id in position_variants:
|
157
157
|
raise VcfError("Variant {0} occurs several times" " in vcf".format(variant_id))
|
158
158
|
else:
|
159
|
-
|
159
|
+
position_variants.add(variant_id)
|
160
160
|
# Check if vcf is sorted
|
161
161
|
else:
|
162
162
|
if not current_pos >= previous_pos:
|
163
163
|
raise VcfError("Vcf if not sorted in a correct way")
|
164
164
|
previous_pos = current_pos
|
165
|
-
# Reset
|
166
|
-
|
165
|
+
# Reset position_variants since we are on a new position
|
166
|
+
position_variants = {variant_id}
|
167
167
|
|
168
168
|
if variant_type != expected_type:
|
169
169
|
raise VcfError(
|
@@ -1,17 +1,17 @@
|
|
1
|
-
loqusdb/__init__.py,sha256=
|
1
|
+
loqusdb/__init__.py,sha256=Zs9AtDiQwuASVgXDU0xzuWv8RhaadjMaa9WD4D7BMVc,1688
|
2
2
|
loqusdb/__main__.py,sha256=8FGKySAGaWSzAYMj6HRsxeyiME3V01Idt7HrmN7pSYY,397
|
3
3
|
loqusdb/build_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
loqusdb/build_models/case.py,sha256=AByutEYK2N3kS9JFvyZfPKNZdCpZHCSD0nNHAgaU1Cs,4127
|
5
|
-
loqusdb/build_models/profile_variant.py,sha256=
|
6
|
-
loqusdb/build_models/variant.py,sha256=
|
5
|
+
loqusdb/build_models/profile_variant.py,sha256=WzWhxq4HNvf67IknyBWYnMHQzPMZ9eitw_so6lfOkPc,1166
|
6
|
+
loqusdb/build_models/variant.py,sha256=buIQr8GsNUBBtgf78a0n5I_GiMEogohSEQJibVUuM5Y,7815
|
7
7
|
loqusdb/commands/__init__.py,sha256=BXAN3UADgqPrkGczzjlLO9GyyQ96dnLnP7n92JlYHgo,603
|
8
|
-
loqusdb/commands/annotate.py,sha256=
|
9
|
-
loqusdb/commands/cli.py,sha256=
|
10
|
-
loqusdb/commands/delete.py,sha256=
|
8
|
+
loqusdb/commands/annotate.py,sha256=MGU9EerKYsFx1lkyjQ6ZMUKYuShi0uSTPJCS0cyxq7U,1467
|
9
|
+
loqusdb/commands/cli.py,sha256=XRprLQaENiLdqXG_7ugCC9jTcG7Uh54_M0KZj1ERFaM,3542
|
10
|
+
loqusdb/commands/delete.py,sha256=BRtm6Uade3l97FBcKFNkiYjks84AhuXYo-2QD8E74A4,2120
|
11
11
|
loqusdb/commands/export.py,sha256=HKoRzUo_BHNOdw_TcKUId9TTowi8VJVGqnuDlK-FqFE,3531
|
12
12
|
loqusdb/commands/identity.py,sha256=KLA9c8e6cJFDxtqIa1G6zdHTHK1sz2b3v1Utdtik_4k,787
|
13
|
-
loqusdb/commands/load.py,sha256=
|
14
|
-
loqusdb/commands/load_profile.py,sha256=
|
13
|
+
loqusdb/commands/load.py,sha256=pHtjldblUM-HFFgcN5UtoaxGhYmo1yeexqGq4I427qk,4996
|
14
|
+
loqusdb/commands/load_profile.py,sha256=x-T2bzi2SL5kwZhY_3hHQCtGDLao1xkxj1pZaOnzs4U,3436
|
15
15
|
loqusdb/commands/migrate.py,sha256=2C8YL-zVqnpnqg3JIyUr0rbVnb8-AGPVWNhicHnPKLo,667
|
16
16
|
loqusdb/commands/restore.py,sha256=eqPX0yao0IAYS5SbjCdlsfSJRBbRByBLISUU2hTzqqs,1492
|
17
17
|
loqusdb/commands/update.py,sha256=zz3wueaJVqJ1FKact-rpY2az__5oa1LnZKf7mgqNGPk,3211
|
@@ -40,17 +40,17 @@ loqusdb/resources/loqusdb.20181005.gz,sha256=DI8CLI7fPnIAjM25Avraz-C7KQkOKsfnhgZ
|
|
40
40
|
loqusdb/resources/maf_50_sites_GRCh37.vcf.gz,sha256=BoD1_xZ-Rr8DTWCMNlQGh7gz1K8FA-j2nC4jKn_eB2A,5260
|
41
41
|
loqusdb/resources/maf_50_sites_GRCh38.vcf.gz,sha256=6T4iyrIr6yx1HpgobzAsh305BO1JX0oGj48nFiYt2QM,9037
|
42
42
|
loqusdb/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
43
|
-
loqusdb/utils/annotate.py,sha256=
|
43
|
+
loqusdb/utils/annotate.py,sha256=vOHlLkenwCCLXh-cjerd9cW68eZfEtgvP0IwWh-oBHs,2347
|
44
44
|
loqusdb/utils/case.py,sha256=aeTvyACJTDjzl-aOjAZaUzFMLisgFKMfcoXSvNAZz4s,2168
|
45
|
-
loqusdb/utils/delete.py,sha256
|
46
|
-
loqusdb/utils/load.py,sha256=
|
45
|
+
loqusdb/utils/delete.py,sha256=uj1m5i12GjUhhnCnIbh6D7BMG-oMDk6bfrJxk8zpSxE,5208
|
46
|
+
loqusdb/utils/load.py,sha256=GgJyTLSOpgcEqjvo9RXzcacQLzHZYtXF_tkyp_XJwOs,9448
|
47
47
|
loqusdb/utils/migrate.py,sha256=9Q6kdIi9TpFVzDYptlEE8RqPPS5wyzfM3F8egzmmBBk,1113
|
48
|
-
loqusdb/utils/profiling.py,sha256=
|
48
|
+
loqusdb/utils/profiling.py,sha256=uISq4xfRNPPedoYXS_D4dXphq8odDogfMBm_XfHBTpE,9232
|
49
49
|
loqusdb/utils/update.py,sha256=1edJG-u24FgOSxyXAQEiyTG4IyK-Uo3lSIl5qyzcXsI,4433
|
50
50
|
loqusdb/utils/variant.py,sha256=U6nMZRUf5NDDQ74nG0HBCLMnFQVgFAT6eHll_F2uiwc,2087
|
51
|
-
loqusdb/utils/vcf.py,sha256=
|
52
|
-
loqusdb-2.7.
|
53
|
-
loqusdb-2.7.
|
54
|
-
loqusdb-2.7.
|
55
|
-
loqusdb-2.7.
|
56
|
-
loqusdb-2.7.
|
51
|
+
loqusdb/utils/vcf.py,sha256=og8JBYock31v_0CnsoRhuKIJCurLCIFW8PCCQIRWF-Q,5207
|
52
|
+
loqusdb-2.7.19.dist-info/LICENSE,sha256=urpFcJXw3elN9kV2fFutc-lXegjuu2lqP_GSy8_CAbs,1054
|
53
|
+
loqusdb-2.7.19.dist-info/METADATA,sha256=w1TxeA5Lz1cYqetHRw0UGvuEUS82WfpOgxThm5hQOYs,5321
|
54
|
+
loqusdb-2.7.19.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
55
|
+
loqusdb-2.7.19.dist-info/entry_points.txt,sha256=wFoWzEFjsSgXkj9FMQA8C9ihZoJ9R1XvbGuX9hEEI6E,52
|
56
|
+
loqusdb-2.7.19.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|