loqusdb 2.6.9__py3-none-any.whl → 2.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loqusdb/build_models/variant.py +11 -5
- loqusdb/commands/load.py +6 -3
- loqusdb/commands/update.py +3 -3
- loqusdb/utils/load.py +13 -9
- loqusdb/utils/update.py +6 -6
- {loqusdb-2.6.9.dist-info → loqusdb-2.7.0.dist-info}/METADATA +1 -2
- {loqusdb-2.6.9.dist-info → loqusdb-2.7.0.dist-info}/RECORD +11 -11
- {loqusdb-2.6.9.dist-info → loqusdb-2.7.0.dist-info}/WHEEL +1 -1
- {loqusdb-2.6.9.dist-info → loqusdb-2.7.0.dist-info}/LICENSE +0 -0
- {loqusdb-2.6.9.dist-info → loqusdb-2.7.0.dist-info}/entry_points.txt +0 -0
- {loqusdb-2.6.9.dist-info → loqusdb-2.7.0.dist-info}/top_level.txt +0 -0
loqusdb/build_models/variant.py
CHANGED
@@ -139,19 +139,19 @@ def get_coords(variant):
|
|
139
139
|
return coordinates
|
140
140
|
|
141
141
|
|
142
|
-
def build_variant(variant, case_obj, case_id=None,
|
142
|
+
def build_variant(variant, case_obj, case_id=None, gq_threshold=None, gq_qual=False, genome_build=None):
|
143
143
|
"""Return a Variant object
|
144
144
|
|
145
145
|
Take a cyvcf2 formated variant line and return a models.Variant.
|
146
146
|
|
147
147
|
If criterias are not fullfilled, eg. variant have no gt call or quality
|
148
|
-
is below gq
|
148
|
+
is below gq threshold then return None.
|
149
149
|
|
150
150
|
Args:
|
151
151
|
variant(cyvcf2.Variant)
|
152
152
|
case_obj(Case): We need the case object to check individuals sex
|
153
153
|
case_id(str): The case id
|
154
|
-
|
154
|
+
gq_threshold(int): Genotype Quality threshold
|
155
155
|
|
156
156
|
Return:
|
157
157
|
formated_variant(models.Variant): A variant dictionary
|
@@ -188,8 +188,14 @@ def build_variant(variant, case_obj, case_id=None, gq_treshold=None, genome_buil
|
|
188
188
|
ind_id = ind_obj["ind_id"]
|
189
189
|
# Get the index position for the individual in the VCF
|
190
190
|
ind_pos = ind_obj["ind_index"]
|
191
|
-
|
192
|
-
if
|
191
|
+
|
192
|
+
if gq_qual:
|
193
|
+
gq = int(variant.QUAL)
|
194
|
+
|
195
|
+
if not gq_qual:
|
196
|
+
gq = int(variant.gt_quals[ind_pos])
|
197
|
+
|
198
|
+
if gq_threshold and gq < gq_threshold:
|
193
199
|
continue
|
194
200
|
|
195
201
|
genotype = GENOTYPE_MAP[variant.gt_types[ind_pos]]
|
loqusdb/commands/load.py
CHANGED
@@ -54,7 +54,8 @@ def validate_profile_threshold(ctx, param, value):
|
|
54
54
|
help="Do not store case information on variants",
|
55
55
|
)
|
56
56
|
@click.option("--ensure-index", is_flag=True, help="Make sure that the indexes are in place")
|
57
|
-
@click.option("--gq-
|
57
|
+
@click.option("--gq-threshold", default=20, show_default=True, help="Threshold to consider variant")
|
58
|
+
@click.option("--qual-gq", is_flag=True, default=False, show_default=True, help="Use QUAL tag instead of GQ value for quality filter")
|
58
59
|
@click.option(
|
59
60
|
"--max-window",
|
60
61
|
"-m",
|
@@ -89,13 +90,14 @@ def load(
|
|
89
90
|
family_file,
|
90
91
|
family_type,
|
91
92
|
skip_case_id,
|
92
|
-
|
93
|
+
gq_threshold,
|
93
94
|
case_id,
|
94
95
|
ensure_index,
|
95
96
|
max_window,
|
96
97
|
check_profile,
|
97
98
|
hard_threshold,
|
98
99
|
soft_threshold,
|
100
|
+
qual_gq
|
99
101
|
):
|
100
102
|
"""Load the variants of a case
|
101
103
|
|
@@ -136,7 +138,8 @@ def load(
|
|
136
138
|
family_type=family_type,
|
137
139
|
skip_case_id=skip_case_id,
|
138
140
|
case_id=case_id,
|
139
|
-
|
141
|
+
gq_threshold=gq_threshold,
|
142
|
+
qual_gq=qual_gq,
|
140
143
|
max_window=max_window,
|
141
144
|
profile_file=variant_profile_path,
|
142
145
|
hard_threshold=hard_threshold,
|
loqusdb/commands/update.py
CHANGED
@@ -47,7 +47,7 @@ LOG = logging.getLogger(__name__)
|
|
47
47
|
help="Do not store case information on variants",
|
48
48
|
)
|
49
49
|
@click.option("--ensure-index", is_flag=True, help="Make sure that the indexes are in place")
|
50
|
-
@click.option("--gq-
|
50
|
+
@click.option("--gq-threshold", default=20, show_default=True, help="Threshold to consider variant")
|
51
51
|
@click.option(
|
52
52
|
"--max-window",
|
53
53
|
"-m",
|
@@ -63,7 +63,7 @@ def update(
|
|
63
63
|
family_file,
|
64
64
|
family_type,
|
65
65
|
skip_case_id,
|
66
|
-
|
66
|
+
gq_threshold,
|
67
67
|
case_id,
|
68
68
|
ensure_index,
|
69
69
|
max_window,
|
@@ -102,7 +102,7 @@ def update(
|
|
102
102
|
family_type=family_type,
|
103
103
|
skip_case_id=skip_case_id,
|
104
104
|
case_id=case_id,
|
105
|
-
|
105
|
+
gq_threshold=gq_threshold,
|
106
106
|
max_window=max_window,
|
107
107
|
)
|
108
108
|
except (SyntaxError, CaseError, IOError, VcfError) as error:
|
loqusdb/utils/load.py
CHANGED
@@ -31,7 +31,8 @@ def load_database(
|
|
31
31
|
family_file=None,
|
32
32
|
family_type="ped",
|
33
33
|
skip_case_id=False,
|
34
|
-
|
34
|
+
gq_threshold=None,
|
35
|
+
qual_gq=False,
|
35
36
|
case_id=None,
|
36
37
|
max_window=3000,
|
37
38
|
profile_file=None,
|
@@ -48,7 +49,8 @@ def load_database(
|
|
48
49
|
family_file(str): Path to family file
|
49
50
|
family_type(str): Format of family file
|
50
51
|
skip_case_id(bool): If no case information should be added to variants
|
51
|
-
|
52
|
+
gq_threshold(int): If only quality variants should be considered
|
53
|
+
qual_gq(bool): Use QUAL field instead of GQ format tag to gate quality
|
52
54
|
case_id(str): If different case id than the one in family file should be used
|
53
55
|
max_window(int): Specify the max size for sv windows
|
54
56
|
check_profile(bool): Does profile check if True
|
@@ -87,13 +89,13 @@ def load_database(
|
|
87
89
|
adapter, profiles, hard_threshold=hard_threshold, soft_threshold=soft_threshold
|
88
90
|
)
|
89
91
|
|
90
|
-
# If a gq
|
92
|
+
# If a gq threshold is used the variants needs to have GQ
|
91
93
|
for _vcf_file in vcf_files:
|
92
94
|
# Get a cyvcf2.VCF object
|
93
95
|
vcf = get_vcf(_vcf_file)
|
94
96
|
|
95
|
-
if
|
96
|
-
LOG.warning("Set gq-
|
97
|
+
if gq_threshold and not vcf.contains("GQ") and not qual_gq:
|
98
|
+
LOG.warning("Set gq-threshold to 0 or add info to vcf {0}".format(_vcf_file))
|
97
99
|
raise SyntaxError("GQ is not defined in vcf header")
|
98
100
|
|
99
101
|
# Get a ped_parser.Family object from family file
|
@@ -143,7 +145,8 @@ def load_database(
|
|
143
145
|
vcf_obj=vcf_obj,
|
144
146
|
case_obj=case_obj,
|
145
147
|
skip_case_id=skip_case_id,
|
146
|
-
|
148
|
+
gq_threshold=gq_threshold,
|
149
|
+
qual_gq=qual_gq,
|
147
150
|
max_window=max_window,
|
148
151
|
variant_type=variant_type,
|
149
152
|
genome_build=genome_build,
|
@@ -189,7 +192,8 @@ def load_variants(
|
|
189
192
|
vcf_obj,
|
190
193
|
case_obj,
|
191
194
|
skip_case_id=False,
|
192
|
-
|
195
|
+
gq_threshold=None,
|
196
|
+
qual_gq=False,
|
193
197
|
max_window=3000,
|
194
198
|
variant_type="snv",
|
195
199
|
genome_build=None,
|
@@ -202,7 +206,7 @@ def load_variants(
|
|
202
206
|
nr_variants(int)
|
203
207
|
skip_case_id (bool): whether to include the case id on variant level
|
204
208
|
or not
|
205
|
-
|
209
|
+
gq_threshold(int)
|
206
210
|
max_window(int): Specify the max size for sv windows
|
207
211
|
variant_type(str): 'sv' or 'snv'
|
208
212
|
|
@@ -222,7 +226,7 @@ def load_variants(
|
|
222
226
|
with click.progressbar(vcf_obj, label="Inserting variants", length=nr_variants) as bar:
|
223
227
|
|
224
228
|
variants = (
|
225
|
-
build_variant(variant, case_obj, case_id,
|
229
|
+
build_variant(variant, case_obj, case_id, gq_threshold, qual_gq, genome_build=genome_build)
|
226
230
|
for variant in bar
|
227
231
|
)
|
228
232
|
|
loqusdb/utils/update.py
CHANGED
@@ -28,7 +28,7 @@ def update_database(
|
|
28
28
|
family_file=None,
|
29
29
|
family_type="ped",
|
30
30
|
skip_case_id=False,
|
31
|
-
|
31
|
+
gq_threshold=None,
|
32
32
|
case_id=None,
|
33
33
|
max_window=3000,
|
34
34
|
):
|
@@ -41,7 +41,7 @@ def update_database(
|
|
41
41
|
family_file(str): Path to family file
|
42
42
|
family_type(str): Format of family file
|
43
43
|
skip_case_id(bool): If no case information should be added to variants
|
44
|
-
|
44
|
+
gq_threshold(int): If only quality variants should be considered
|
45
45
|
case_id(str): If different case id than the one in family file should be used
|
46
46
|
max_window(int): Specify the max size for sv windows
|
47
47
|
|
@@ -67,14 +67,14 @@ def update_database(
|
|
67
67
|
vcf_files.append(sv_file)
|
68
68
|
sv_individuals = vcf_info["individuals"]
|
69
69
|
|
70
|
-
# If a gq
|
70
|
+
# If a gq threshold is used the variants needs to have GQ
|
71
71
|
for _vcf_file in vcf_files:
|
72
72
|
# Get a cyvcf2.VCF object
|
73
73
|
vcf = get_vcf(_vcf_file)
|
74
74
|
|
75
|
-
if
|
75
|
+
if gq_threshold:
|
76
76
|
if not vcf.contains("GQ"):
|
77
|
-
LOG.warning("Set gq-
|
77
|
+
LOG.warning("Set gq-threshold to 0 or add info to vcf {0}".format(_vcf_file))
|
78
78
|
raise SyntaxError("GQ is not defined in vcf header")
|
79
79
|
|
80
80
|
# Get a ped_parser.Family object from family file
|
@@ -127,7 +127,7 @@ def update_database(
|
|
127
127
|
vcf_obj=vcf_obj,
|
128
128
|
case_obj=case_obj,
|
129
129
|
skip_case_id=skip_case_id,
|
130
|
-
|
130
|
+
gq_threshold=gq_threshold,
|
131
131
|
max_window=max_window,
|
132
132
|
variant_type=variant_type,
|
133
133
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: loqusdb
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.7.0
|
4
4
|
Summary: Store observations of vcf variants in a mongodb
|
5
5
|
Home-page: https://github.com/moonso/loqusdb
|
6
6
|
Author: Måns Magnusson
|
@@ -29,7 +29,6 @@ Requires-Dist: numpy (==1.21.4)
|
|
29
29
|
Requires-Dist: coloredlogs (==14.0)
|
30
30
|
Requires-Dist: pyyaml (==5.4.0)
|
31
31
|
Requires-Dist: vcftoolbox (==1.5)
|
32
|
-
Requires-Dist: pip (==21.3.1)
|
33
32
|
Requires-Dist: setuptools (==59.2.0)
|
34
33
|
Requires-Dist: mongo-adapter (>=0.3.3)
|
35
34
|
Requires-Dist: ped-parser
|
@@ -4,18 +4,18 @@ loqusdb/log.py,sha256=CDcrCjzs9ef-d5Wg8Q_41bCOZRM5j8PyP06kNcynTj0,1691
|
|
4
4
|
loqusdb/build_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
loqusdb/build_models/case.py,sha256=P3sfQkI_fH8u5iqecYWhV866lcHz4upWkepaea5MMIw,4255
|
6
6
|
loqusdb/build_models/profile_variant.py,sha256=TbSxfVjESstS_FgbkOW4NQwMQVeTyhn9oc9yPZmDhzI,1021
|
7
|
-
loqusdb/build_models/variant.py,sha256=
|
7
|
+
loqusdb/build_models/variant.py,sha256=JE3o_htRQjpwwUPxmP5HJ_0Ax8BwKp610uBIo3HTaVA,6671
|
8
8
|
loqusdb/commands/__init__.py,sha256=BXAN3UADgqPrkGczzjlLO9GyyQ96dnLnP7n92JlYHgo,603
|
9
9
|
loqusdb/commands/annotate.py,sha256=748kImopE5WbaO1nuv3WUgIqezWFSsi7SBeWhOz26-s,1384
|
10
10
|
loqusdb/commands/cli.py,sha256=wJD5S1BoCxtRTAobd1QtmQpzlngJg-mt1nsyD92fDD4,3176
|
11
11
|
loqusdb/commands/delete.py,sha256=R6ysHKSMw1mmL4ZbktoUIKzdzDLQ3314YPYhIy1myic,1979
|
12
12
|
loqusdb/commands/export.py,sha256=0V3S3QU9LKlR13w3KCGfqFliTYiDRCgNwusA27AEvmE,3254
|
13
13
|
loqusdb/commands/identity.py,sha256=hzbnvniKgSNEwSeYHsxdNvVlqu_vXeOSLlNFnDXTQjA,779
|
14
|
-
loqusdb/commands/load.py,sha256=
|
14
|
+
loqusdb/commands/load.py,sha256=cVDdY7meBfcv8nMEGsjAX6aE-SKDOceGqM2vAvXPhko,4407
|
15
15
|
loqusdb/commands/load_profile.py,sha256=cflCbF9f77_HCH8xPnN8zSSocvIffRMnC2LPE0j7Xq8,3336
|
16
16
|
loqusdb/commands/migrate.py,sha256=2C8YL-zVqnpnqg3JIyUr0rbVnb8-AGPVWNhicHnPKLo,667
|
17
17
|
loqusdb/commands/restore.py,sha256=eqPX0yao0IAYS5SbjCdlsfSJRBbRByBLISUU2hTzqqs,1492
|
18
|
-
loqusdb/commands/update.py,sha256=
|
18
|
+
loqusdb/commands/update.py,sha256=zz3wueaJVqJ1FKact-rpY2az__5oa1LnZKf7mgqNGPk,3211
|
19
19
|
loqusdb/commands/view.py,sha256=zQag5kgvUFa8nW9OVte_qjit0n8wGLc3C3hwLOMGY6o,5111
|
20
20
|
loqusdb/commands/wipe.py,sha256=WTOjyNooCUhtmZ6pdcPFa0PZrFc9E_pkLbnat_zP96M,553
|
21
21
|
loqusdb/constants/__init__.py,sha256=r6y2TN8BqbKuh2Uyxq0trh-3A9xiWeStqWlvEPp-rSA,1645
|
@@ -40,15 +40,15 @@ loqusdb/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
40
|
loqusdb/utils/annotate.py,sha256=cPNWlhsv6yoe3lxNfa9DytO5eACuM_mOJJw_mglVMN0,2646
|
41
41
|
loqusdb/utils/case.py,sha256=aeTvyACJTDjzl-aOjAZaUzFMLisgFKMfcoXSvNAZz4s,2168
|
42
42
|
loqusdb/utils/delete.py,sha256=-ddBM_QXKzlUN6egEJggKzXX1P-WEdi92HgaD1DJRtg,4843
|
43
|
-
loqusdb/utils/load.py,sha256=
|
43
|
+
loqusdb/utils/load.py,sha256=rujUk89lL5jO35QfO8NKGJkjEJmN6PQxohKkxtpnw6Y,8252
|
44
44
|
loqusdb/utils/migrate.py,sha256=9Q6kdIi9TpFVzDYptlEE8RqPPS5wyzfM3F8egzmmBBk,1113
|
45
45
|
loqusdb/utils/profiling.py,sha256=3OizF7CpYvSl9kyl2g4KGJxbIRUqWfmfLxn3843XYDk,9164
|
46
|
-
loqusdb/utils/update.py,sha256=
|
46
|
+
loqusdb/utils/update.py,sha256=1edJG-u24FgOSxyXAQEiyTG4IyK-Uo3lSIl5qyzcXsI,4433
|
47
47
|
loqusdb/utils/variant.py,sha256=Lq5x9egVB-3rExBiRceF67TaL4Hp2gGoWMSRBEcnm4Q,2088
|
48
48
|
loqusdb/utils/vcf.py,sha256=ybmrTBEPYa0FbUXo8ttlwATk13RnKjX9eIDbRDwCiVE,5175
|
49
|
-
loqusdb-2.
|
50
|
-
loqusdb-2.
|
51
|
-
loqusdb-2.
|
52
|
-
loqusdb-2.
|
53
|
-
loqusdb-2.
|
54
|
-
loqusdb-2.
|
49
|
+
loqusdb-2.7.0.dist-info/LICENSE,sha256=urpFcJXw3elN9kV2fFutc-lXegjuu2lqP_GSy8_CAbs,1054
|
50
|
+
loqusdb-2.7.0.dist-info/METADATA,sha256=Sg2aINOps7cTErVgDl6xszkTG7GWTPrGdwFUQ9onR50,6026
|
51
|
+
loqusdb-2.7.0.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
52
|
+
loqusdb-2.7.0.dist-info/entry_points.txt,sha256=39QklW01vy9ilBLcRPgCP18kN6oKXXLoOK50gZE7Jbs,59
|
53
|
+
loqusdb-2.7.0.dist-info/top_level.txt,sha256=lRdRO6hqPhJEjFhfNsbCgVWMztvkYsjiOGK9DAL0UAI,8
|
54
|
+
loqusdb-2.7.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|