loqusdb 2.7.18__tar.gz → 2.7.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {loqusdb-2.7.18 → loqusdb-2.7.20}/PKG-INFO +1 -1
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/__init__.py +1 -30
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/build_models/profile_variant.py +6 -4
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/build_models/variant.py +36 -20
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/annotate.py +3 -2
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/cli.py +25 -2
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/delete.py +7 -1
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/export.py +14 -3
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/load.py +2 -1
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/load_profile.py +4 -3
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/constants/__init__.py +60 -28
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/utils/annotate.py +4 -18
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/utils/delete.py +27 -5
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/utils/load.py +12 -6
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/utils/profiling.py +4 -4
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/utils/vcf.py +9 -9
- {loqusdb-2.7.18 → loqusdb-2.7.20}/pyproject.toml +1 -1
- loqusdb-2.7.20/tests/build_models/test_build_variant.py +30 -0
- loqusdb-2.7.20/tests/build_models/test_is_greater.py +98 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/conftest.py +67 -1
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/functional/test_cli.py +1 -1
- loqusdb-2.7.20/tests/plugins/mongo/test_get_sv.py +62 -0
- loqusdb-2.7.20/tests/plugins/mongo/test_load_svs.py +164 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/plugins/mongo/test_variant_operations.py +65 -2
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/utils/test_delete.py +57 -5
- loqusdb-2.7.20/tests/utils/test_delete_variant.py +157 -0
- loqusdb-2.7.20/tests/utils/test_load_database.py +108 -0
- loqusdb-2.7.20/tests/utils/test_load_variants.py +398 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/utils/test_profiling.py +1 -1
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/vcf_tools/test_check_par.py +11 -10
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/vcf_tools/test_check_vcf.py +1 -1
- loqusdb-2.7.20/tests/vcf_tools/test_format_sv_variant.py +244 -0
- loqusdb-2.7.20/tests/vcf_tools/test_format_variant.py +283 -0
- loqusdb-2.7.18/tests/build_models/test_build_variant.py +0 -15
- loqusdb-2.7.18/tests/build_models/test_is_greater.py +0 -49
- loqusdb-2.7.18/tests/plugins/mongo/test_get_sv.py +0 -27
- loqusdb-2.7.18/tests/plugins/mongo/test_load_svs.py +0 -74
- loqusdb-2.7.18/tests/utils/test_delete_variant.py +0 -74
- loqusdb-2.7.18/tests/utils/test_load_database.py +0 -52
- loqusdb-2.7.18/tests/utils/test_load_variants.py +0 -225
- loqusdb-2.7.18/tests/vcf_tools/test_format_sv_variant.py +0 -102
- loqusdb-2.7.18/tests/vcf_tools/test_format_variant.py +0 -113
- {loqusdb-2.7.18 → loqusdb-2.7.20}/LICENSE +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/README.md +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/__main__.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/build_models/__init__.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/build_models/case.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/__init__.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/identity.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/migrate.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/restore.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/update.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/view.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/wipe.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/exceptions/__init__.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/exceptions/case.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/exceptions/profile.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/exceptions/vcf.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/log.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/models/__init__.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/models/case.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/models/identity.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/models/profile_variant.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/models/variant.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/plugins/__init__.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/plugins/mongo/__init__.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/plugins/mongo/adapter.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/plugins/mongo/case.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/plugins/mongo/profile_variant.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/plugins/mongo/structural_variant.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/plugins/mongo/variant.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/resources/__init__.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/resources/loqusdb.20181005.gz +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/resources/maf_50_sites_GRCh37.vcf.gz +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/resources/maf_50_sites_GRCh38.vcf.gz +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/utils/__init__.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/utils/case.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/utils/migrate.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/utils/update.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/utils/variant.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/build_models/test_build_case.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/commands/test_export.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/commands/test_identity.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/commands/test_view.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/643594.clinical.SV.vcf +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/643594.clinical.vcf.gz +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/double_variant.vcf +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/funny_trio.ped +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/profile_snv.vcf +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/recessive_trio.ped +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/test.SV.vcf +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/test.vcf +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/test.vcf.gz +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/test.vcf.gz.tbi +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/unsorted.vcf +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/plugins/mongo/test_case_operations.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/plugins/mongo/test_connect.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/plugins/mongo/test_flask_extension.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/utils/test_case.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/utils/test_delete_family.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/utils/test_get_family.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/utils/test_load_family.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/utils/test_migrate.py +0 -0
- {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/vcf_tools/test_vcf.py +0 -0
@@ -4,7 +4,7 @@ from pymongo import ASCENDING, IndexModel
|
|
4
4
|
|
5
5
|
logger = logging.getLogger(__name__)
|
6
6
|
|
7
|
-
__version__ = "2.7.
|
7
|
+
__version__ = "2.7.20"
|
8
8
|
|
9
9
|
INDEXES = {
|
10
10
|
"variant": [
|
@@ -61,32 +61,3 @@ INDEXES = {
|
|
61
61
|
),
|
62
62
|
],
|
63
63
|
}
|
64
|
-
|
65
|
-
CHROMOSOME_ORDER = (
|
66
|
-
"1",
|
67
|
-
"2",
|
68
|
-
"3",
|
69
|
-
"4",
|
70
|
-
"5",
|
71
|
-
"6",
|
72
|
-
"7",
|
73
|
-
"8",
|
74
|
-
"9",
|
75
|
-
"10",
|
76
|
-
"11",
|
77
|
-
"12",
|
78
|
-
"13",
|
79
|
-
"14",
|
80
|
-
"15",
|
81
|
-
"16",
|
82
|
-
"17",
|
83
|
-
"18",
|
84
|
-
"19",
|
85
|
-
"20",
|
86
|
-
"21",
|
87
|
-
"22",
|
88
|
-
"23",
|
89
|
-
"X",
|
90
|
-
"Y",
|
91
|
-
"MT",
|
92
|
-
)
|
@@ -24,23 +24,25 @@ def get_maf(variant):
|
|
24
24
|
return variant.INFO.get("MAF")
|
25
25
|
|
26
26
|
|
27
|
-
def build_profile_variant(variant):
|
27
|
+
def build_profile_variant(variant, keep_chr_prefix=None):
|
28
28
|
"""Returns a ProfileVariant object
|
29
29
|
|
30
30
|
Args:
|
31
31
|
variant (cyvcf2.Variant)
|
32
|
+
keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
|
32
33
|
|
33
34
|
Returns:
|
34
35
|
variant (models.ProfileVariant)
|
35
36
|
"""
|
36
37
|
|
37
38
|
chrom = variant.CHROM
|
38
|
-
if
|
39
|
-
chrom
|
39
|
+
if not keep_chr_prefix:
|
40
|
+
if chrom.startswith(("chr", "CHR", "Chr")):
|
41
|
+
chrom = chrom[3:]
|
40
42
|
|
41
43
|
pos = int(variant.POS)
|
42
44
|
|
43
|
-
variant_id = get_variant_id(variant)
|
45
|
+
variant_id = get_variant_id(variant, keep_chr_prefix)
|
44
46
|
|
45
47
|
ref = variant.REF
|
46
48
|
alt = variant.ALT[0]
|
@@ -15,49 +15,59 @@ Position = namedtuple("Position", "chrom pos")
|
|
15
15
|
# These are coordinate for the pseudo autosomal regions in GRCh37
|
16
16
|
|
17
17
|
|
18
|
-
def check_par(chrom, pos, genome_build
|
18
|
+
def check_par(chrom, pos, genome_build):
|
19
19
|
"""Check if a coordinate is in the PAR region
|
20
20
|
|
21
21
|
Args:
|
22
22
|
chrom(str)
|
23
23
|
pos(int)
|
24
|
+
genome_build(str): Genome build. Ex. GRCh37 or GRCh38
|
24
25
|
|
25
26
|
Returns:
|
26
27
|
par(bool)
|
27
28
|
"""
|
28
|
-
if genome_build is None:
|
29
|
-
genome_build = GRCH37
|
30
29
|
return any(
|
31
30
|
pos >= interval[0] and pos <= interval[1] for interval in PAR[genome_build].get(chrom, [])
|
32
31
|
)
|
33
32
|
|
34
33
|
|
35
|
-
def get_variant_id(variant):
|
36
|
-
"""Get a variant id on the format chrom_pos_ref_alt
|
34
|
+
def get_variant_id(variant, keep_chr_prefix=None):
|
35
|
+
"""Get a variant id on the format chrom_pos_ref_alt
|
36
|
+
|
37
|
+
Args:
|
38
|
+
variant (cyvcf2.Variant)
|
39
|
+
keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
|
40
|
+
|
41
|
+
Returns:
|
42
|
+
variant (models.ProfileVariant)
|
43
|
+
"""
|
44
|
+
|
37
45
|
chrom = variant.CHROM
|
38
|
-
if
|
39
|
-
|
46
|
+
if not keep_chr_prefix:
|
47
|
+
if chrom.lower().startswith("chr"):
|
48
|
+
chrom = chrom[3:]
|
40
49
|
return "_".join([str(chrom), str(variant.POS), str(variant.REF), str(variant.ALT[0])])
|
41
50
|
|
42
51
|
|
43
|
-
def is_greater(a, b):
|
52
|
+
def is_greater(a, b, genome_build):
|
44
53
|
"""Check if position a is greater than position b
|
45
54
|
This will look at chromosome and position.
|
46
55
|
|
47
56
|
For example a position where chrom = 2 and pos = 300 is greater than a position where
|
48
57
|
chrom = 1 and pos = 1000
|
49
58
|
|
50
|
-
If any of the chromosomes is outside [1-22,X,Y,MT] we can not say which is biggest.
|
59
|
+
If any of the chromosomes is outside [1-22,X,Y,MT] or [chr1-chr22,chrX,chrY,chrM] we can not say which is biggest.
|
51
60
|
|
52
61
|
Args:
|
53
62
|
a,b(Position)
|
63
|
+
genome_build(str): Genome build. Ex. GRCh37 or GRCh38
|
54
64
|
|
55
65
|
Returns:
|
56
66
|
bool: True if a is greater than b
|
57
67
|
"""
|
58
68
|
|
59
|
-
a_chrom = CHROM_TO_INT.get(a.chrom, 0)
|
60
|
-
b_chrom = CHROM_TO_INT.get(b.chrom, 0)
|
69
|
+
a_chrom = CHROM_TO_INT[genome_build].get(a.chrom, 0)
|
70
|
+
b_chrom = CHROM_TO_INT[genome_build].get(b.chrom, 0)
|
61
71
|
|
62
72
|
if a_chrom == 0 or b_chrom == 0:
|
63
73
|
return False
|
@@ -68,11 +78,13 @@ def is_greater(a, b):
|
|
68
78
|
return a_chrom == b_chrom and a.pos > b.pos
|
69
79
|
|
70
80
|
|
71
|
-
def get_coords(variant):
|
81
|
+
def get_coords(variant, keep_chr_prefix, genome_build):
|
72
82
|
"""Returns a dictionary with position information
|
73
83
|
|
74
84
|
Args:
|
75
85
|
variant(cyvcf2.Variant)
|
86
|
+
keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
|
87
|
+
genome_build(str): Genome build. Ex. GRCh37 or GRCh38
|
76
88
|
|
77
89
|
Returns:
|
78
90
|
coordinates(dict)
|
@@ -86,8 +98,9 @@ def get_coords(variant):
|
|
86
98
|
"end": None,
|
87
99
|
}
|
88
100
|
chrom = variant.CHROM
|
89
|
-
if
|
90
|
-
chrom
|
101
|
+
if not keep_chr_prefix:
|
102
|
+
if chrom.startswith(("chr", "CHR", "Chr")):
|
103
|
+
chrom = chrom[3:]
|
91
104
|
coordinates["chrom"] = chrom
|
92
105
|
end_chrom = chrom
|
93
106
|
|
@@ -107,8 +120,9 @@ def get_coords(variant):
|
|
107
120
|
if sv_type == "BND":
|
108
121
|
other_coordinates = alt.strip("ATCGN").strip("[]").split(":")
|
109
122
|
end_chrom = other_coordinates[0]
|
110
|
-
if
|
111
|
-
end_chrom
|
123
|
+
if not keep_chr_prefix:
|
124
|
+
if end_chrom.startswith(("chr", "CHR", "Chr")):
|
125
|
+
end_chrom = end_chrom[3:]
|
112
126
|
|
113
127
|
end = int(other_coordinates[1])
|
114
128
|
|
@@ -126,7 +140,7 @@ def get_coords(variant):
|
|
126
140
|
end_position = Position(end_chrom, end)
|
127
141
|
|
128
142
|
# If 'start' is greater than 'end', switch positions
|
129
|
-
if is_greater(position, end_position):
|
143
|
+
if is_greater(position, end_position, genome_build=genome_build):
|
130
144
|
end_chrom = position.chrom
|
131
145
|
end = position.pos
|
132
146
|
|
@@ -148,6 +162,7 @@ def build_variant(
|
|
148
162
|
case_id: Optional[str] = None,
|
149
163
|
gq_threshold: Optional[int] = None,
|
150
164
|
gq_qual: Optional[bool] = False,
|
165
|
+
keep_chr_prefix: Optional[bool] = False,
|
151
166
|
ignore_gq_if_unset: Optional[bool] = False,
|
152
167
|
genome_build: Optional[str] = None,
|
153
168
|
) -> Variant:
|
@@ -165,6 +180,7 @@ def build_variant(
|
|
165
180
|
case_id(str): The case id
|
166
181
|
gq_threshold(int): Genotype Quality threshold
|
167
182
|
gq_qual(bool): Use variant.QUAL for quality instead of GQ
|
183
|
+
keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
|
168
184
|
ignore_gq_if_unset(bool): Ignore GQ threshold check for variants that do not have GQ or QUAL set.
|
169
185
|
genome_build(str): Genome build. Ex. GRCh37 or GRCh38
|
170
186
|
|
@@ -179,14 +195,14 @@ def build_variant(
|
|
179
195
|
sv = True
|
180
196
|
|
181
197
|
# chrom_pos_ref_alt
|
182
|
-
variant_id = get_variant_id(variant)
|
198
|
+
variant_id = get_variant_id(variant, keep_chr_prefix)
|
183
199
|
|
184
200
|
ref = variant.REF
|
185
201
|
# ALT is an array in cyvcf2
|
186
202
|
# We allways assume splitted and normalized VCFs
|
187
203
|
alt = variant.ALT[0]
|
188
204
|
|
189
|
-
coordinates = get_coords(variant)
|
205
|
+
coordinates = get_coords(variant, keep_chr_prefix, genome_build=genome_build)
|
190
206
|
chrom = coordinates["chrom"]
|
191
207
|
pos = coordinates["pos"]
|
192
208
|
|
@@ -224,7 +240,7 @@ def build_variant(
|
|
224
240
|
# If variant in X or Y and individual is male,
|
225
241
|
# we need to check hemizygosity
|
226
242
|
if (
|
227
|
-
chrom in ["X", "Y"]
|
243
|
+
chrom in ["X", "Y", "chrX", "chrY"]
|
228
244
|
and ind_obj["sex"] == 1
|
229
245
|
and not check_par(chrom, pos, genome_build=genome_build)
|
230
246
|
):
|
@@ -21,6 +21,7 @@ LOG = logging.getLogger(__name__)
|
|
21
21
|
def annotate(ctx, variant_file, sv):
|
22
22
|
"""Annotate the variants in a VCF"""
|
23
23
|
adapter = ctx.obj["adapter"]
|
24
|
+
keep_chr_prefix = ctx.obj["keep_chr_prefix"]
|
24
25
|
|
25
26
|
variant_path = os.path.abspath(variant_file)
|
26
27
|
|
@@ -40,9 +41,9 @@ def annotate(ctx, variant_file, sv):
|
|
40
41
|
start_inserting = datetime.now()
|
41
42
|
|
42
43
|
if sv:
|
43
|
-
annotated_variants = annotate_svs(adapter, vcf_obj)
|
44
|
+
annotated_variants = annotate_svs(adapter, vcf_obj, keep_chr_prefix)
|
44
45
|
else:
|
45
|
-
annotated_variants = annotate_snvs(adapter, vcf_obj)
|
46
|
+
annotated_variants = annotate_snvs(adapter, vcf_obj, keep_chr_prefix)
|
46
47
|
# try:
|
47
48
|
for variant in annotated_variants:
|
48
49
|
click.echo(str(variant).rstrip())
|
@@ -52,14 +52,35 @@ LOG = logging.getLogger(__name__)
|
|
52
52
|
@click.option(
|
53
53
|
"-g",
|
54
54
|
"--genome-build",
|
55
|
+
default="GRCh37",
|
56
|
+
show_default=True,
|
55
57
|
type=click.Choice([GRCH37, GRCH38]),
|
56
58
|
help="Specify what genome build to use",
|
57
59
|
)
|
60
|
+
@click.option(
|
61
|
+
"--keep-chr-prefix",
|
62
|
+
is_flag=True,
|
63
|
+
default=False,
|
64
|
+
show_default=True,
|
65
|
+
help="Retain the 'chr/Chr/CHR' prefix for chromosomes if it is present",
|
66
|
+
)
|
58
67
|
@click.option("-v", "--verbose", is_flag=True)
|
59
68
|
@click.version_option(__version__)
|
60
69
|
@click.pass_context
|
61
70
|
def cli(
|
62
|
-
ctx,
|
71
|
+
ctx,
|
72
|
+
database,
|
73
|
+
username,
|
74
|
+
password,
|
75
|
+
authdb,
|
76
|
+
port,
|
77
|
+
host,
|
78
|
+
uri,
|
79
|
+
verbose,
|
80
|
+
config,
|
81
|
+
test,
|
82
|
+
genome_build,
|
83
|
+
keep_chr_prefix,
|
63
84
|
):
|
64
85
|
"""loqusdb: manage a local variant count database."""
|
65
86
|
loglevel = "INFO"
|
@@ -102,7 +123,8 @@ def cli(
|
|
102
123
|
|
103
124
|
adapter = MongoAdapter(client, db_name=database)
|
104
125
|
|
105
|
-
genome_build = genome_build or configs.get("genome_build")
|
126
|
+
genome_build = genome_build or configs.get("genome_build")
|
127
|
+
keep_chr_prefix = keep_chr_prefix or configs.get("keep_chr_prefix")
|
106
128
|
|
107
129
|
ctx.obj = {}
|
108
130
|
ctx.obj["db"] = database
|
@@ -114,3 +136,4 @@ def cli(
|
|
114
136
|
ctx.obj["adapter"] = adapter
|
115
137
|
ctx.obj["version"] = __version__
|
116
138
|
ctx.obj["genome_build"] = genome_build
|
139
|
+
ctx.obj["keep_chr_prefix"] = keep_chr_prefix
|
@@ -35,6 +35,7 @@ def delete(ctx, family_file, family_type, case_id):
|
|
35
35
|
ctx.abort()
|
36
36
|
|
37
37
|
adapter = ctx.obj["adapter"]
|
38
|
+
keep_chr_prefix = ctx.obj["keep_chr_prefix"]
|
38
39
|
|
39
40
|
# Get a ped_parser.Family object from family file
|
40
41
|
family = None
|
@@ -59,7 +60,12 @@ def delete(ctx, family_file, family_type, case_id):
|
|
59
60
|
genome_build = ctx.obj["genome_build"]
|
60
61
|
start_deleting = datetime.now()
|
61
62
|
try:
|
62
|
-
delete_command(
|
63
|
+
delete_command(
|
64
|
+
adapter=adapter,
|
65
|
+
case_obj=existing_case,
|
66
|
+
genome_build=genome_build,
|
67
|
+
keep_chr_prefix=keep_chr_prefix,
|
68
|
+
)
|
63
69
|
except (CaseError, IOError) as error:
|
64
70
|
LOG.warning(error)
|
65
71
|
ctx.abort()
|
@@ -2,7 +2,7 @@ import logging
|
|
2
2
|
from datetime import datetime
|
3
3
|
|
4
4
|
import click
|
5
|
-
from loqusdb import
|
5
|
+
from loqusdb.constants import CHROMOSOMES, GRCH37, GRCH38
|
6
6
|
from loqusdb.utils.variant import format_variant
|
7
7
|
from vcftoolbox import HeaderParser, print_headers, print_variant
|
8
8
|
|
@@ -43,11 +43,22 @@ def export(ctx, outfile, variant_type, freq):
|
|
43
43
|
is_sv = variant_type == "sv"
|
44
44
|
existing_chromosomes = set(adapter.get_chromosomes(sv=is_sv))
|
45
45
|
|
46
|
+
genome = ctx.obj["genome_build"]
|
47
|
+
chromosome_order = CHROMOSOMES[genome]
|
48
|
+
keep_chr_prefix = ctx.obj["keep_chr_prefix"]
|
49
|
+
|
46
50
|
ordered_chromosomes = []
|
47
|
-
for chrom in
|
48
|
-
if chrom in existing_chromosomes:
|
51
|
+
for chrom in chromosome_order:
|
52
|
+
if keep_chr_prefix and chrom in existing_chromosomes:
|
49
53
|
ordered_chromosomes.append(chrom)
|
50
54
|
existing_chromosomes.remove(chrom)
|
55
|
+
elif not keep_chr_prefix:
|
56
|
+
if genome == GRCH37 and chrom in existing_chromosomes:
|
57
|
+
ordered_chromosomes.append(chrom)
|
58
|
+
existing_chromosomes.remove(chrom)
|
59
|
+
elif genome == GRCH38 and chrom[3:] in existing_chromosomes:
|
60
|
+
ordered_chromosomes.append(chrom)
|
61
|
+
existing_chromosomes.remove(chrom)
|
51
62
|
for chrom in existing_chromosomes:
|
52
63
|
ordered_chromosomes.append(chrom)
|
53
64
|
|
@@ -148,7 +148,7 @@ def load(
|
|
148
148
|
|
149
149
|
adapter = ctx.obj["adapter"]
|
150
150
|
genome_build = ctx.obj["genome_build"]
|
151
|
-
|
151
|
+
keep_chr_prefix = ctx.obj["keep_chr_prefix"]
|
152
152
|
start_inserting = datetime.now()
|
153
153
|
|
154
154
|
try:
|
@@ -162,6 +162,7 @@ def load(
|
|
162
162
|
case_id=case_id,
|
163
163
|
gq_threshold=gq_threshold,
|
164
164
|
snv_gq_only=snv_gq_only,
|
165
|
+
keep_chr_prefix=keep_chr_prefix,
|
165
166
|
qual_gq=qual_gq,
|
166
167
|
max_window=max_window,
|
167
168
|
profile_file=variant_profile_path,
|
@@ -60,13 +60,14 @@ def load_profile(ctx, load, variant_file, update, stats, profile_threshold, chec
|
|
60
60
|
"""
|
61
61
|
|
62
62
|
adapter = ctx.obj["adapter"]
|
63
|
+
keep_chr_prefix = ctx.obj["keep_chr_prefix"]
|
63
64
|
|
64
65
|
LOG.info("Running loqusdb profile")
|
65
66
|
|
66
67
|
if check_vcf:
|
67
68
|
LOG.info(f"Check if profile in {check_vcf} has match in database")
|
68
69
|
vcf_file = check_vcf
|
69
|
-
profiles = get_profiles(adapter, vcf_file)
|
70
|
+
profiles = get_profiles(adapter, vcf_file, keep_chr_prefix)
|
70
71
|
duplicate = check_duplicates(adapter, profiles, profile_threshold)
|
71
72
|
|
72
73
|
if duplicate is not None:
|
@@ -81,11 +82,11 @@ def load_profile(ctx, load, variant_file, update, stats, profile_threshold, chec
|
|
81
82
|
if variant_file is not None:
|
82
83
|
vcf_path = variant_file
|
83
84
|
LOG.info(f"Loads variants in {vcf_path} to be used in profiling")
|
84
|
-
load_profile_variants(adapter, vcf_path)
|
85
|
+
load_profile_variants(adapter, vcf_path, keep_chr_prefix)
|
85
86
|
|
86
87
|
if update:
|
87
88
|
LOG.info("Updates profiles in database")
|
88
|
-
update_profiles(adapter)
|
89
|
+
update_profiles(adapter, keep_chr_prefix)
|
89
90
|
|
90
91
|
if stats:
|
91
92
|
LOG.info("Prints profile stats")
|
@@ -17,35 +17,67 @@ PAR = {
|
|
17
17
|
GENOTYPE_MAP = {0: "hom_ref", 1: "het", 2: "no_call", 3: "hom_alt"}
|
18
18
|
|
19
19
|
# To keep the order of chromosomes
|
20
|
-
CHROMOSOMES =
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
20
|
+
CHROMOSOMES = {
|
21
|
+
GRCH37: [
|
22
|
+
"1",
|
23
|
+
"2",
|
24
|
+
"3",
|
25
|
+
"4",
|
26
|
+
"5",
|
27
|
+
"6",
|
28
|
+
"7",
|
29
|
+
"8",
|
30
|
+
"9",
|
31
|
+
"10",
|
32
|
+
"11",
|
33
|
+
"12",
|
34
|
+
"13",
|
35
|
+
"14",
|
36
|
+
"15",
|
37
|
+
"16",
|
38
|
+
"17",
|
39
|
+
"18",
|
40
|
+
"19",
|
41
|
+
"20",
|
42
|
+
"21",
|
43
|
+
"22",
|
44
|
+
"X",
|
45
|
+
"Y",
|
46
|
+
"MT",
|
47
|
+
],
|
48
|
+
GRCH38: [
|
49
|
+
"chr1",
|
50
|
+
"chr2",
|
51
|
+
"chr3",
|
52
|
+
"chr4",
|
53
|
+
"chr5",
|
54
|
+
"chr6",
|
55
|
+
"chr7",
|
56
|
+
"chr8",
|
57
|
+
"chr9",
|
58
|
+
"chr10",
|
59
|
+
"chr11",
|
60
|
+
"chr12",
|
61
|
+
"chr13",
|
62
|
+
"chr14",
|
63
|
+
"chr15",
|
64
|
+
"chr16",
|
65
|
+
"chr17",
|
66
|
+
"chr18",
|
67
|
+
"chr19",
|
68
|
+
"chr20",
|
69
|
+
"chr21",
|
70
|
+
"chr22",
|
71
|
+
"chrX",
|
72
|
+
"chrY",
|
73
|
+
"chrM",
|
74
|
+
],
|
75
|
+
}
|
47
76
|
|
48
|
-
CHROM_TO_INT = {
|
77
|
+
CHROM_TO_INT = {
|
78
|
+
build: {chrom: i + 1 for i, chrom in enumerate(chromosomes)}
|
79
|
+
for build, chromosomes in CHROMOSOMES.items()
|
80
|
+
}
|
49
81
|
|
50
82
|
# Ranges of hamming distances to be checked when 'loqusdb profile --stats'
|
51
83
|
# items: <range_name>: <range>
|
@@ -31,21 +31,7 @@ def annotate_variant(variant, var_obj=None):
|
|
31
31
|
return variant
|
32
32
|
|
33
33
|
|
34
|
-
def
|
35
|
-
"""Annotate an SNV/INDEL variant
|
36
|
-
|
37
|
-
Args:
|
38
|
-
adapter(loqusdb.plugin.adapter)
|
39
|
-
variant(cyvcf2.Variant)
|
40
|
-
"""
|
41
|
-
variant_id = get_variant_id(variant)
|
42
|
-
variant_obj = adapter.get_variant(variant={"_id": variant_id})
|
43
|
-
|
44
|
-
annotated_variant = annotated_variant(variant, variant_obj)
|
45
|
-
return annotated_variant
|
46
|
-
|
47
|
-
|
48
|
-
def annotate_svs(adapter, vcf_obj):
|
34
|
+
def annotate_svs(adapter, vcf_obj, keep_chr_prefix):
|
49
35
|
"""Annotate all SV variants in a VCF
|
50
36
|
|
51
37
|
Args:
|
@@ -56,14 +42,14 @@ def annotate_svs(adapter, vcf_obj):
|
|
56
42
|
variant(cyvcf2.Variant)
|
57
43
|
"""
|
58
44
|
for nr_variants, variant in enumerate(vcf_obj, 1):
|
59
|
-
variant_info = get_coords(variant)
|
45
|
+
variant_info = get_coords(variant, keep_chr_prefix)
|
60
46
|
match = adapter.get_structural_variant(variant_info)
|
61
47
|
if match:
|
62
48
|
annotate_variant(variant, match)
|
63
49
|
yield variant
|
64
50
|
|
65
51
|
|
66
|
-
def annotate_snvs(adapter, vcf_obj):
|
52
|
+
def annotate_snvs(adapter, vcf_obj, keep_chr_prefix):
|
67
53
|
"""Annotate all variants in a VCF
|
68
54
|
|
69
55
|
Args:
|
@@ -77,7 +63,7 @@ def annotate_snvs(adapter, vcf_obj):
|
|
77
63
|
|
78
64
|
for nr_variants, variant in enumerate(vcf_obj, 1):
|
79
65
|
# Add the variant to current batch
|
80
|
-
variants[get_variant_id(variant)] = variant
|
66
|
+
variants[get_variant_id(variant, keep_chr_prefix)] = variant
|
81
67
|
# If batch len == 1000 we annotate the batch
|
82
68
|
if (nr_variants % 1000) == 0:
|
83
69
|
|
@@ -9,7 +9,9 @@ from loqusdb.build_models.variant import build_variant
|
|
9
9
|
LOG = logging.getLogger(__name__)
|
10
10
|
|
11
11
|
|
12
|
-
def delete(
|
12
|
+
def delete(
|
13
|
+
adapter, case_obj, keep_chr_prefix=None, update=False, existing_case=False, genome_build=None
|
14
|
+
):
|
13
15
|
"""Delete a case and all of it's variants from the database.
|
14
16
|
|
15
17
|
Args:
|
@@ -18,6 +20,8 @@ def delete(adapter, case_obj, update=False, existing_case=False, genome_build=No
|
|
18
20
|
update(bool): If we are in the middle of an update
|
19
21
|
existing_case(models.Case): If something failed during an update we need to revert
|
20
22
|
to the original case
|
23
|
+
keep_chr_prefix(bool): Retain chr/CHR/Chr prefixes in chromosome IDs when they are present
|
24
|
+
genome_build(str): Genome build. Ex. GRCh37 or GRCh38
|
21
25
|
|
22
26
|
"""
|
23
27
|
# This will overwrite the updated case with the previous one
|
@@ -36,7 +40,11 @@ def delete(adapter, case_obj, update=False, existing_case=False, genome_build=No
|
|
36
40
|
if file_type == "vcf_path":
|
37
41
|
LOG.info("deleting variants")
|
38
42
|
delete_variants(
|
39
|
-
adapter=adapter,
|
43
|
+
adapter=adapter,
|
44
|
+
vcf_obj=vcf_obj,
|
45
|
+
keep_chr_prefix=keep_chr_prefix,
|
46
|
+
case_obj=case_obj,
|
47
|
+
genome_build=genome_build,
|
40
48
|
)
|
41
49
|
elif file_type == "vcf_sv_path":
|
42
50
|
LOG.info("deleting structural variants")
|
@@ -44,10 +52,14 @@ def delete(adapter, case_obj, update=False, existing_case=False, genome_build=No
|
|
44
52
|
adapter=adapter,
|
45
53
|
vcf_obj=vcf_obj,
|
46
54
|
case_obj=case_obj,
|
55
|
+
keep_chr_prefix=keep_chr_prefix,
|
56
|
+
genome_build=genome_build,
|
47
57
|
)
|
48
58
|
|
49
59
|
|
50
|
-
def delete_variants(
|
60
|
+
def delete_variants(
|
61
|
+
adapter, vcf_obj, case_obj, keep_chr_prefix=None, case_id=None, genome_build=None
|
62
|
+
):
|
51
63
|
"""Delete variants for a case in the database
|
52
64
|
|
53
65
|
Args:
|
@@ -55,6 +67,7 @@ def delete_variants(adapter, vcf_obj, case_obj, case_id=None, genome_build=None)
|
|
55
67
|
vcf_obj(iterable(dict))
|
56
68
|
ind_positions(dict)
|
57
69
|
case_id(str)
|
70
|
+
genome_build(str): Genome build. Ex. GRCh37 or GRCh38
|
58
71
|
|
59
72
|
Returns:
|
60
73
|
nr_deleted (int): Number of deleted variants
|
@@ -69,7 +82,11 @@ def delete_variants(adapter, vcf_obj, case_obj, case_id=None, genome_build=None)
|
|
69
82
|
variant_list = []
|
70
83
|
for variant in vcf_obj:
|
71
84
|
formated_variant = build_variant(
|
72
|
-
variant=variant,
|
85
|
+
variant=variant,
|
86
|
+
case_obj=case_obj,
|
87
|
+
keep_chr_prefix=keep_chr_prefix,
|
88
|
+
case_id=case_id,
|
89
|
+
genome_build=genome_build,
|
73
90
|
)
|
74
91
|
|
75
92
|
if not formated_variant:
|
@@ -109,7 +126,9 @@ def delete_variants(adapter, vcf_obj, case_obj, case_id=None, genome_build=None)
|
|
109
126
|
return nr_deleted
|
110
127
|
|
111
128
|
|
112
|
-
def delete_structural_variants(
|
129
|
+
def delete_structural_variants(
|
130
|
+
adapter, vcf_obj, case_obj, genome_build, keep_chr_prefix=None, case_id=None
|
131
|
+
):
|
113
132
|
"""Delete structural variants for a case in the database
|
114
133
|
|
115
134
|
Args:
|
@@ -117,6 +136,7 @@ def delete_structural_variants(adapter, vcf_obj, case_obj, case_id=None):
|
|
117
136
|
vcf_obj(iterable(dict))
|
118
137
|
ind_positions(dict)
|
119
138
|
case_id(str)
|
139
|
+
genome_build(str): Genome build. Ex. GRCh37 or GRCh38
|
120
140
|
|
121
141
|
Returns:
|
122
142
|
nr_deleted (int): Number of deleted variants"""
|
@@ -133,6 +153,8 @@ def delete_structural_variants(adapter, vcf_obj, case_obj, case_id=None):
|
|
133
153
|
variant=variant,
|
134
154
|
case_obj=case_obj,
|
135
155
|
case_id=case_id,
|
156
|
+
genome_build=genome_build,
|
157
|
+
keep_chr_prefix=keep_chr_prefix,
|
136
158
|
)
|
137
159
|
|
138
160
|
if not formated_variant:
|