scout-browser 4.81__py3-none-any.whl → 4.82.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scout/__version__.py +1 -1
- scout/adapter/mongo/disease_terms.py +5 -2
- scout/adapter/mongo/query.py +23 -11
- scout/build/managed_variant.py +12 -1
- scout/build/variant/genotype.py +2 -0
- scout/build/variant/variant.py +5 -0
- scout/constants/clinvar.py +1 -1
- scout/constants/query_terms.py +3 -1
- scout/models/variant/variant.py +1 -0
- scout/parse/variant/frequency.py +56 -54
- scout/parse/variant/genotype.py +89 -15
- scout/parse/variant/transcript.py +17 -9
- scout/parse/variant/variant.py +12 -0
- scout/server/app.py +6 -3
- scout/server/blueprints/alignviewers/templates/alignviewers/utils.html +1 -1
- scout/server/blueprints/cases/controllers.py +2 -57
- scout/server/blueprints/cases/templates/cases/case_report.html +82 -66
- scout/server/blueprints/cases/templates/cases/chanjo2_form.html +47 -0
- scout/server/blueprints/cases/templates/cases/collapsible_actionbar.html +4 -4
- scout/server/blueprints/cases/templates/cases/gene_panel.html +4 -11
- scout/server/blueprints/cases/templates/cases/utils.html +3 -1
- scout/server/blueprints/cases/views.py +0 -22
- scout/server/blueprints/clinvar/controllers.py +3 -3
- scout/server/blueprints/clinvar/templates/clinvar/clinvar_submissions.html +29 -2
- scout/server/blueprints/clinvar/templates/clinvar/multistep_add_variant.html +36 -18
- scout/server/blueprints/clinvar/views.py +13 -1
- scout/server/blueprints/diagnoses/controllers.py +2 -0
- scout/server/blueprints/institutes/controllers.py +76 -38
- scout/server/blueprints/institutes/templates/overview/cases.html +54 -42
- scout/server/blueprints/managed_variants/templates/managed_variants/managed_variants.html +1 -1
- scout/server/blueprints/managed_variants/views.py +2 -4
- scout/server/blueprints/panels/templates/panels/panel.html +8 -7
- scout/server/blueprints/panels/views.py +2 -11
- scout/server/blueprints/phenotypes/templates/phenotypes/hpo_terms.html +3 -2
- scout/server/blueprints/variant/controllers.py +3 -2
- scout/server/blueprints/variant/templates/variant/components.html +1 -1
- scout/server/blueprints/variant/templates/variant/utils.html +3 -1
- scout/server/blueprints/variant/templates/variant/variant.html +20 -15
- scout/server/blueprints/variant/templates/variant/variant_details.html +78 -26
- scout/server/blueprints/variant/utils.py +9 -13
- scout/server/blueprints/variants/controllers.py +32 -3
- scout/server/blueprints/variants/forms.py +15 -1
- scout/server/blueprints/variants/templates/variants/components.html +55 -0
- scout/server/blueprints/variants/templates/variants/fusion-variants.html +3 -50
- scout/server/blueprints/variants/templates/variants/str-variants.html +8 -5
- scout/server/blueprints/variants/templates/variants/utils.html +57 -31
- scout/server/blueprints/variants/templates/variants/variants.html +1 -1
- scout/server/blueprints/variants/utils.py +7 -10
- scout/server/extensions/clinvar_extension.py +10 -2
- {scout_browser-4.81.dist-info → scout_browser-4.82.1.dist-info}/METADATA +6 -5
- {scout_browser-4.81.dist-info → scout_browser-4.82.1.dist-info}/RECORD +55 -54
- {scout_browser-4.81.dist-info → scout_browser-4.82.1.dist-info}/LICENSE +0 -0
- {scout_browser-4.81.dist-info → scout_browser-4.82.1.dist-info}/WHEEL +0 -0
- {scout_browser-4.81.dist-info → scout_browser-4.82.1.dist-info}/entry_points.txt +0 -0
- {scout_browser-4.81.dist-info → scout_browser-4.82.1.dist-info}/top_level.txt +0 -0
scout/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "4.
|
1
|
+
__version__ = "4.82.1"
|
@@ -10,6 +10,9 @@ from scout.exceptions import IntegrityError
|
|
10
10
|
LOG = logging.getLogger(__name__)
|
11
11
|
|
12
12
|
DISEASE_FILTER_PROJECT = {"hpo_terms": 0, "genes": 0}
|
13
|
+
REGEX = "$regex"
|
14
|
+
REGEX_OPTIONS = "$options"
|
15
|
+
REGEX_IGNORECASE = "i"
|
13
16
|
|
14
17
|
|
15
18
|
class DiagnosisHandler(object):
|
@@ -32,8 +35,8 @@ class DiagnosisHandler(object):
|
|
32
35
|
if query:
|
33
36
|
query_dict = {
|
34
37
|
"$or": [
|
35
|
-
{"
|
36
|
-
{"description": {
|
38
|
+
{"disease_id": {REGEX: query, REGEX_OPTIONS: REGEX_IGNORECASE}},
|
39
|
+
{"description": {REGEX: query, REGEX_OPTIONS: REGEX_IGNORECASE}},
|
37
40
|
]
|
38
41
|
}
|
39
42
|
# If source is specified, add this restriction to the query
|
scout/adapter/mongo/query.py
CHANGED
@@ -11,6 +11,8 @@ from scout.constants import (
|
|
11
11
|
TRUSTED_REVSTAT_LEVEL,
|
12
12
|
)
|
13
13
|
|
14
|
+
CRITERION_EXCLUDE_OPERATOR = {False: "$in", True: "$nin"}
|
15
|
+
|
14
16
|
LOG = logging.getLogger(__name__)
|
15
17
|
|
16
18
|
|
@@ -256,7 +258,11 @@ class QueryHandler(object):
|
|
256
258
|
if criterion in ["hgnc_symbols", "gene_panels"]:
|
257
259
|
gene_query = self.gene_filter(query, build=build)
|
258
260
|
if len(gene_query) > 0 or "hpo" in query.get("gene_panels", []):
|
259
|
-
mongo_query["hgnc_ids"] = {
|
261
|
+
mongo_query["hgnc_ids"] = {
|
262
|
+
CRITERION_EXCLUDE_OPERATOR[
|
263
|
+
bool(query.get("gene_panels_exclude"))
|
264
|
+
]: gene_query
|
265
|
+
}
|
260
266
|
continue
|
261
267
|
|
262
268
|
if criterion == "chrom" and query.get("chrom"): # filter by coordinates
|
@@ -352,6 +358,7 @@ class QueryHandler(object):
|
|
352
358
|
mongo_query["$and"] = coordinate_query + mongo_query["$and"]
|
353
359
|
else:
|
354
360
|
mongo_query["$and"] = coordinate_query
|
361
|
+
|
355
362
|
return mongo_query
|
356
363
|
|
357
364
|
def affected_inds_query(self, mongo_query, case_id, gt_query):
|
@@ -572,16 +579,21 @@ class QueryHandler(object):
|
|
572
579
|
}
|
573
580
|
)
|
574
581
|
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
582
|
+
for local_obs_old_type in [
|
583
|
+
"local_obs_old",
|
584
|
+
"local_obs_cancer_germline_old",
|
585
|
+
"local_obs_cancer_somatic_old",
|
586
|
+
]:
|
587
|
+
if criterion == local_obs_old_type:
|
588
|
+
local_obs = query.get(local_obs_old_type)
|
589
|
+
mongo_secondary_query.append(
|
590
|
+
{
|
591
|
+
"$or": [
|
592
|
+
{local_obs_old_type: None},
|
593
|
+
{local_obs_old_type: {"$lt": local_obs + 1}},
|
594
|
+
]
|
595
|
+
}
|
596
|
+
)
|
585
597
|
|
586
598
|
if criterion == "local_obs_freq":
|
587
599
|
local_obs_freq = query.get("local_obs_freq")
|
scout/build/managed_variant.py
CHANGED
@@ -1,4 +1,7 @@
|
|
1
1
|
import logging
|
2
|
+
|
3
|
+
from flask import flash
|
4
|
+
|
2
5
|
from scout.models.managed_variant import ManagedVariant
|
3
6
|
|
4
7
|
LOG = logging.getLogger(__name__)
|
@@ -32,7 +35,15 @@ def build_managed_variant(managed_variant_info):
|
|
32
35
|
description=managed_variant_info.get("description", ""),
|
33
36
|
)
|
34
37
|
except KeyError:
|
35
|
-
|
38
|
+
flash(
|
39
|
+
"Managed variant has to have chr, pos, ref and alt",
|
40
|
+
"danger",
|
41
|
+
)
|
42
|
+
except ValueError as ve:
|
43
|
+
flash(
|
44
|
+
"Could not build managed variant {}".format(ve),
|
45
|
+
"danger",
|
46
|
+
)
|
36
47
|
|
37
48
|
LOG.debug("Built managed variant %s", managed_variant.get("display_id"))
|
38
49
|
|
scout/build/variant/genotype.py
CHANGED
@@ -14,6 +14,7 @@ def build_genotype(gt_call):
|
|
14
14
|
allele_depths = list, # int
|
15
15
|
read_depth = int,
|
16
16
|
genotype_quality = int,
|
17
|
+
alt_mc = int, # STR
|
17
18
|
so = str, # STR type of reads that support allele: "a/a" where a in [SPANNING, FLANKING, INREPEAT]
|
18
19
|
)
|
19
20
|
|
@@ -27,6 +28,7 @@ def build_genotype(gt_call):
|
|
27
28
|
alt_frequency=gt_call["alt_frequency"] or -1,
|
28
29
|
genotype_quality=gt_call["genotype_quality"],
|
29
30
|
so=gt_call["so"],
|
31
|
+
alt_mc=gt_call["alt_mc"],
|
30
32
|
ffpm=gt_call["ffpm"],
|
31
33
|
split_read=gt_call["split_read"],
|
32
34
|
)
|
scout/build/variant/variant.py
CHANGED
@@ -199,7 +199,12 @@ def build_variant(
|
|
199
199
|
variant_obj["str_pathologic_min"] = variant.get("str_pathologic_min")
|
200
200
|
variant_obj["str_ref"] = variant.get("str_ref")
|
201
201
|
variant_obj["str_repid"] = variant.get("str_repid")
|
202
|
+
variant_obj["str_trid"] = variant.get("str_trid")
|
203
|
+
variant_obj["str_pathologic_struc"] = variant.get("str_pathologic_struc")
|
204
|
+
variant_obj["str_struc"] = variant.get("str_struc")
|
205
|
+
variant_obj["str_motifs"] = variant.get("str_motifs")
|
202
206
|
variant_obj["str_ru"] = variant.get("str_ru")
|
207
|
+
variant_obj["str_display_ru"] = variant.get("str_display_ru")
|
203
208
|
variant_obj["str_source"] = variant.get("str_source")
|
204
209
|
variant_obj["str_status"] = variant.get("str_status")
|
205
210
|
variant_obj["str_swegen_mean"] = call_safe(float, variant.get("str_swegen_mean"))
|
scout/constants/clinvar.py
CHANGED
scout/constants/query_terms.py
CHANGED
@@ -31,7 +31,9 @@ PRIMARY_CRITERIA = ["clinsig"]
|
|
31
31
|
# such as a Pathogenic ClinSig.
|
32
32
|
SECONDARY_CRITERIA = [
|
33
33
|
"gnomad_frequency",
|
34
|
-
"
|
34
|
+
"local_obs_old",
|
35
|
+
"local_obs_cancer_somatic_old",
|
36
|
+
"local_obs_cancer_germline_old",
|
35
37
|
"local_obs_freq",
|
36
38
|
"clingen_ngi",
|
37
39
|
"swegen",
|
scout/models/variant/variant.py
CHANGED
scout/parse/variant/frequency.py
CHANGED
@@ -2,7 +2,41 @@ from typing import Dict
|
|
2
2
|
|
3
3
|
import cyvcf2
|
4
4
|
|
5
|
-
|
5
|
+
# SNV
|
6
|
+
SWEGEN_KEYS = ["swegen", "swegenAF", "SWEGENAF"]
|
7
|
+
THOUSAND_GENOMES_KEYS = ["1000GAF"]
|
8
|
+
THOUSAND_GENOMES_MAX_KEYS = ["1000G_MAX_AF"]
|
9
|
+
|
10
|
+
EXAC_KEYS = ["EXACAF"]
|
11
|
+
EXAC_MAX_KEYS = ["ExAC_MAX_AF", "EXAC_MAX_AF"]
|
12
|
+
|
13
|
+
# gnomAD has both SNV and SV
|
14
|
+
GNOMAD_INFO_KEYS = ["GNOMADAF", "GNOMAD_AF", "gnomADg_AF", "gnomad_svAF"]
|
15
|
+
GNOMAD_INFO_MAX_KEYS = ["gnomADg_AF_POPMAX", "GNOMADAF_popmax", "GNOMADAF_POPMAX", "GNOMADAF_MAX"]
|
16
|
+
|
17
|
+
# SV
|
18
|
+
CLINGEN_BENIGN_KEYS = [
|
19
|
+
"clingen_cgh_benignAF",
|
20
|
+
"clingen_cgh_benign",
|
21
|
+
"clingen_cgh_benignOCC",
|
22
|
+
]
|
23
|
+
|
24
|
+
CLINGEN_PATHOGENIC_KEYS = [
|
25
|
+
"clingen_cgh_pathogenicAF",
|
26
|
+
"clingen_cgh_pathogenic",
|
27
|
+
"clingen_cgh_pathogenicOCC",
|
28
|
+
]
|
29
|
+
|
30
|
+
CLINGEN_NGI_KEYS = ["clingen_ngi", "clingen_ngiAF", "clingen_ngiOCC"]
|
31
|
+
|
32
|
+
DECIPHER_KEYS = ["decipherAF", "decipher"]
|
33
|
+
CG_KEYS = ["clinical_genomics_mipAF", "clinical_genomics_mipOCC"]
|
34
|
+
|
35
|
+
# MEI
|
36
|
+
SWEGEN_ALU_KEYS = ["swegen_alu_FRQ", "swegen_alu_OCC"]
|
37
|
+
SWEGEN_HERV_KEYS = ["swegen_herv_FRQ", "swegen_herv_OCC"]
|
38
|
+
SWEGEN_L1_KEYS = ["swegen_l1_FRQ", "swegen_l1_OCC"]
|
39
|
+
SWEGEN_SVA_KEYS = ["swegen_sva_FRQ", "swegen_sva_OCC"]
|
6
40
|
|
7
41
|
|
8
42
|
def parse_frequencies(variant, transcripts):
|
@@ -19,37 +53,27 @@ def parse_frequencies(variant, transcripts):
|
|
19
53
|
frequencies(dict): A dictionary with the relevant frequencies
|
20
54
|
"""
|
21
55
|
frequencies = {}
|
22
|
-
# These lists could be extended...
|
23
|
-
thousand_genomes_keys = ["1000GAF"]
|
24
|
-
thousand_genomes_max_keys = ["1000G_MAX_AF"]
|
25
|
-
|
26
|
-
exac_keys = ["EXACAF"]
|
27
|
-
exac_max_keys = ["ExAC_MAX_AF", "EXAC_MAX_AF"]
|
28
56
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
update_frequency_from_vcf(frequencies, variant,
|
34
|
-
update_frequency_from_vcf(frequencies, variant,
|
35
|
-
update_frequency_from_vcf(frequencies, variant,
|
36
|
-
update_frequency_from_vcf(frequencies, variant, swegen_keys, "swegen")
|
37
|
-
update_frequency_from_vcf(frequencies, variant, gnomad_max_keys, "gnomad_max")
|
38
|
-
update_frequency_from_vcf(frequencies, variant, thousand_genomes_keys, "thousand_g")
|
39
|
-
update_frequency_from_vcf(frequencies, variant, thousand_genomes_max_keys, "thousand_g_max")
|
57
|
+
update_frequency_from_vcf(frequencies, variant, EXAC_KEYS, "exac")
|
58
|
+
update_frequency_from_vcf(frequencies, variant, EXAC_MAX_KEYS, "exac_max")
|
59
|
+
update_frequency_from_vcf(frequencies, variant, GNOMAD_INFO_KEYS, "gnomad")
|
60
|
+
update_frequency_from_vcf(frequencies, variant, SWEGEN_KEYS, "swegen")
|
61
|
+
update_frequency_from_vcf(frequencies, variant, GNOMAD_INFO_MAX_KEYS, "gnomad_max")
|
62
|
+
update_frequency_from_vcf(frequencies, variant, THOUSAND_GENOMES_KEYS, "thousand_g")
|
63
|
+
update_frequency_from_vcf(frequencies, variant, THOUSAND_GENOMES_MAX_KEYS, "thousand_g_max")
|
40
64
|
|
41
65
|
# For mitochondrial variants, keep both "hom" and "het" freqs
|
42
66
|
update_frequency_from_vcf(frequencies, variant, ["GNOMAD_MT_AF_HOM"], "gnomad_mt_homoplasmic")
|
43
67
|
update_frequency_from_vcf(frequencies, variant, ["GNOMAD_MT_AF_HET"], "gnomad_mt_heteroplasmic")
|
44
68
|
|
45
|
-
# Search transcripts if not found in VCF
|
46
|
-
if not frequencies:
|
47
|
-
update_frequency_from_transcript(frequencies, transcripts)
|
48
|
-
|
49
69
|
# These are SV-specific frequencies
|
50
70
|
update_frequency_from_vcf(frequencies, variant, ["left_1000GAF"], "thousand_g_left")
|
51
71
|
update_frequency_from_vcf(frequencies, variant, ["right_1000GAF"], "thousand_g_right")
|
52
72
|
|
73
|
+
# Search transcripts CSQ if not found in VCF INFO
|
74
|
+
if not frequencies:
|
75
|
+
update_frequency_from_transcript(frequencies, transcripts)
|
76
|
+
|
53
77
|
return frequencies
|
54
78
|
|
55
79
|
|
@@ -85,31 +109,14 @@ def parse_sv_frequencies(variant: cyvcf2.Variant) -> Dict:
|
|
85
109
|
"""
|
86
110
|
sv_frequencies = {}
|
87
111
|
|
88
|
-
|
89
|
-
"clingen_cgh_benignAF",
|
90
|
-
"clingen_cgh_benign",
|
91
|
-
"clingen_cgh_benignOCC",
|
92
|
-
]
|
93
|
-
|
94
|
-
clingen_pathogenic_keys = [
|
95
|
-
"clingen_cgh_pathogenicAF",
|
96
|
-
"clingen_cgh_pathogenic",
|
97
|
-
"clingen_cgh_pathogenicOCC",
|
98
|
-
]
|
99
|
-
|
100
|
-
clingen_ngi_keys = ["clingen_ngi", "clingen_ngiAF", "clingen_ngiOCC"]
|
101
|
-
|
102
|
-
decipher_keys = ["decipherAF", "decipher"]
|
103
|
-
cg_keys = ["clinical_genomics_mipAF", "clinical_genomics_mipOCC"]
|
104
|
-
|
105
|
-
update_sv_frequency_from_vcf(sv_frequencies, variant, clingen_benign_keys, "clingen_cgh_benign")
|
112
|
+
update_sv_frequency_from_vcf(sv_frequencies, variant, CLINGEN_BENIGN_KEYS, "clingen_cgh_benign")
|
106
113
|
update_sv_frequency_from_vcf(
|
107
|
-
sv_frequencies, variant,
|
114
|
+
sv_frequencies, variant, CLINGEN_PATHOGENIC_KEYS, "clingen_cgh_pathogenic"
|
108
115
|
)
|
109
|
-
update_sv_frequency_from_vcf(sv_frequencies, variant,
|
110
|
-
update_sv_frequency_from_vcf(sv_frequencies, variant,
|
111
|
-
update_sv_frequency_from_vcf(sv_frequencies, variant,
|
112
|
-
update_sv_frequency_from_vcf(sv_frequencies, variant,
|
116
|
+
update_sv_frequency_from_vcf(sv_frequencies, variant, CLINGEN_NGI_KEYS, "clingen_ngi")
|
117
|
+
update_sv_frequency_from_vcf(sv_frequencies, variant, SWEGEN_KEYS, "swegen")
|
118
|
+
update_sv_frequency_from_vcf(sv_frequencies, variant, DECIPHER_KEYS, "decipher")
|
119
|
+
update_sv_frequency_from_vcf(sv_frequencies, variant, CG_KEYS, "clingen_mip")
|
113
120
|
|
114
121
|
return sv_frequencies
|
115
122
|
|
@@ -117,17 +124,12 @@ def parse_sv_frequencies(variant: cyvcf2.Variant) -> Dict:
|
|
117
124
|
def parse_mei_frequencies(variant: cyvcf2.Variant) -> Dict:
|
118
125
|
"""Parsing of some custom mei frequencies."""
|
119
126
|
|
120
|
-
swegen_alu_keys = ["swegen_alu_FRQ", "swegen_alu_OCC"]
|
121
|
-
swegen_herv_keys = ["swegen_herv_FRQ", "swegen_herv_OCC"]
|
122
|
-
swegen_l1_keys = ["swegen_l1_FRQ", "swegen_l1_OCC"]
|
123
|
-
swegen_sva_keys = ["swegen_sva_FRQ", "swegen_sva_OCC"]
|
124
|
-
|
125
127
|
mei_frequencies = {}
|
126
128
|
|
127
|
-
update_sv_frequency_from_vcf(mei_frequencies, variant,
|
128
|
-
update_sv_frequency_from_vcf(mei_frequencies, variant,
|
129
|
-
update_sv_frequency_from_vcf(mei_frequencies, variant,
|
130
|
-
update_sv_frequency_from_vcf(mei_frequencies, variant,
|
129
|
+
update_sv_frequency_from_vcf(mei_frequencies, variant, SWEGEN_ALU_KEYS, "swegen_alu")
|
130
|
+
update_sv_frequency_from_vcf(mei_frequencies, variant, SWEGEN_HERV_KEYS, "swegen_herv")
|
131
|
+
update_sv_frequency_from_vcf(mei_frequencies, variant, SWEGEN_L1_KEYS, "swegen_l1")
|
132
|
+
update_sv_frequency_from_vcf(mei_frequencies, variant, SWEGEN_SVA_KEYS, "swegen_sva")
|
131
133
|
|
132
134
|
if any(mei_frequencies.values()):
|
133
135
|
max_mei_frequency = max(mei_frequencies.values())
|
scout/parse/variant/genotype.py
CHANGED
@@ -19,7 +19,7 @@ Uses 'DV' to describe number of paired ends that supports the event and
|
|
19
19
|
"""
|
20
20
|
|
21
21
|
import logging
|
22
|
-
from typing import Dict, List, Optional, Tuple
|
22
|
+
from typing import Dict, List, Optional, Tuple, Union
|
23
23
|
|
24
24
|
import cyvcf2
|
25
25
|
|
@@ -103,6 +103,14 @@ def parse_genotype(variant, ind, pos):
|
|
103
103
|
(flanking_ref, flanking_alt) = _parse_format_entry(variant, pos, "ADFL")
|
104
104
|
(inrepeat_ref, inrepeat_alt) = _parse_format_entry(variant, pos, "ADIR")
|
105
105
|
|
106
|
+
# TRGT long read STR specific
|
107
|
+
(_, mc_alt) = _parse_format_entry_trgt_mc(variant, pos)
|
108
|
+
gt_call["alt_mc"] = mc_alt
|
109
|
+
|
110
|
+
(sd_ref, sd_alt) = _parse_format_entry(variant, pos, "SD", float)
|
111
|
+
(ap_ref, ap_alt) = _parse_format_entry(variant, pos, "AP", float)
|
112
|
+
(am_ref, am_alt) = _parse_format_entry(variant, pos, "AM", float)
|
113
|
+
|
106
114
|
# MEI specific
|
107
115
|
(spanning_mei_ref, clip5_alt, clip3_alt) = get_mei_reads(
|
108
116
|
variant, pos
|
@@ -395,33 +403,48 @@ def get_str_so(variant, pos):
|
|
395
403
|
return str_so
|
396
404
|
|
397
405
|
|
398
|
-
def
|
399
|
-
"""
|
400
|
-
Expects that ref/alt values could be separated by
|
406
|
+
def split_values(values: List[str]) -> List[str]:
|
407
|
+
"""
|
408
|
+
Expects that ref/alt values could be separated by "/" or ",".
|
401
409
|
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
410
|
+
"""
|
411
|
+
new_values = []
|
412
|
+
for value in values:
|
413
|
+
for delim in ["/", ","]:
|
414
|
+
if delim in value:
|
415
|
+
new_values = list(value.split(delim))
|
416
|
+
|
417
|
+
if new_values:
|
418
|
+
return new_values
|
419
|
+
|
420
|
+
return values
|
421
|
+
|
422
|
+
|
423
|
+
def _parse_format_entry(
|
424
|
+
variant: cyvcf2.Variant,
|
425
|
+
pos: int,
|
426
|
+
format_entry_name: str,
|
427
|
+
number_format: Optional[Union[float, int]] = int,
|
428
|
+
) -> Tuple[Union[float, int], ...]:
|
429
|
+
"""Parse genotype format entry for named integer values.
|
430
|
+
Expects that ref/alt values could be separated by "/" or ",".
|
431
|
+
Give individual position in VCF as pos and name of format entry to parse as format_entry_name.
|
408
432
|
"""
|
409
433
|
|
410
434
|
ref = None
|
411
435
|
alt = None
|
412
436
|
if format_entry_name in variant.FORMAT:
|
413
437
|
try:
|
414
|
-
|
415
|
-
values = list(value.split("/"))
|
438
|
+
values = split_values(variant.format(format_entry_name)[pos])
|
416
439
|
|
417
440
|
ref_value = None
|
418
441
|
alt_value = None
|
419
442
|
|
420
443
|
if len(values) > 1:
|
421
|
-
ref_value =
|
422
|
-
alt_value =
|
444
|
+
ref_value = (number_format)(values[0])
|
445
|
+
alt_value = (number_format)(values[1])
|
423
446
|
if len(values) == 1:
|
424
|
-
alt_value =
|
447
|
+
alt_value = (number_format)(values[0])
|
425
448
|
if ref_value >= 0:
|
426
449
|
ref = ref_value
|
427
450
|
if alt_value >= 0:
|
@@ -429,3 +452,54 @@ def _parse_format_entry(variant, pos, format_entry_name):
|
|
429
452
|
except (ValueError, TypeError) as _ignore_error:
|
430
453
|
pass
|
431
454
|
return (ref, alt)
|
455
|
+
|
456
|
+
|
457
|
+
def _parse_format_entry_trgt_mc(variant: cyvcf2.Variant, pos: int):
|
458
|
+
"""Parse genotype entry for TRGT FORMAT MC
|
459
|
+
|
460
|
+
The MC format contains the Motif Counts for each allele, separated with "," and each motif in an expansion,
|
461
|
+
as a "_" separated list of the different available enumerated motifs. For some loci,
|
462
|
+
only certain motifs count towards a pathologic size, and if so a PathologicStruc INFO key is passed.
|
463
|
+
E.g. for non-reference motifs and more complex loci or alleles with different motifs.
|
464
|
+
As usual, VCF lines are decomposed, so at most one alt is present per entry.
|
465
|
+
The GT position gives us a ref index for any allele 0 in the call.
|
466
|
+
"""
|
467
|
+
|
468
|
+
mc_ref = None
|
469
|
+
mc_alt = None
|
470
|
+
|
471
|
+
if "MC" not in variant.FORMAT:
|
472
|
+
return (mc_ref, mc_alt)
|
473
|
+
|
474
|
+
mc = variant.format("MC")[pos]
|
475
|
+
if not mc:
|
476
|
+
return (mc_ref, mc_alt)
|
477
|
+
|
478
|
+
ref_idx = None
|
479
|
+
gt = variant.genotypes[pos]
|
480
|
+
if gt:
|
481
|
+
for idx, allele in enumerate(gt):
|
482
|
+
if allele == 0:
|
483
|
+
ref_idx = idx
|
484
|
+
|
485
|
+
pathologic_struc = variant.INFO.get("PathologicStruc", None)
|
486
|
+
pathologic_counts = 0
|
487
|
+
for idx, allele in enumerate(mc.split(",")):
|
488
|
+
mcs = allele.split("_")
|
489
|
+
|
490
|
+
if len(mcs) > 1:
|
491
|
+
pathologic_mcs = pathologic_struc or range(len(mcs))
|
492
|
+
|
493
|
+
for index, count in enumerate(mcs):
|
494
|
+
if index in pathologic_mcs:
|
495
|
+
pathologic_counts += int(count)
|
496
|
+
else:
|
497
|
+
pathologic_counts = int(allele)
|
498
|
+
|
499
|
+
if ref_idx is not None and idx == ref_idx:
|
500
|
+
mc_ref = pathologic_counts
|
501
|
+
continue
|
502
|
+
|
503
|
+
mc_alt = pathologic_counts
|
504
|
+
|
505
|
+
return (mc_ref, mc_alt)
|
@@ -5,6 +5,10 @@ from scout.constants import SO_TERMS
|
|
5
5
|
|
6
6
|
LOG = logging.getLogger(__name__)
|
7
7
|
|
8
|
+
# gnomAD transcript CSQ keys. Use plain (older) AF if available. For a secondary choice, prefer genomes over exomes.
|
9
|
+
GNOMAD_CSQ_KEYS = ["GNOMAD_AF", "GNOMADG_AF", "GNOMAD_EXOMES_AF"]
|
10
|
+
THOUSAND_GENOMES_CSQ_KEYS = ["AF", "1000GAF", "1000GP3_AF"]
|
11
|
+
|
8
12
|
|
9
13
|
def parse_transcripts(raw_transcripts):
|
10
14
|
"""Parse transcript information from VCF variants
|
@@ -303,6 +307,10 @@ def set_variant_frequencies(transcript, entry):
|
|
303
307
|
* 'gnomAD_AF' - gnomAD exomes, all populations combined
|
304
308
|
* 'gnomAD_xxx_AF' - gnomAD exomes, individual populations
|
305
309
|
* 'MAX_AF' - Max of all populations (1000G, gnomAD exomes, ESP)
|
310
|
+
In VEP 107/111 keys are
|
311
|
+
* 'gnomADg_AF' - genomes
|
312
|
+
* 'gnomAD_exomes_AF' - exomes
|
313
|
+
* 1000GP3_AF - 1000G Phase 3
|
306
314
|
|
307
315
|
Reference: https://www.ensembl.org/info/docs/tools/vep/vep_formats.html
|
308
316
|
"""
|
@@ -312,30 +320,30 @@ def set_variant_frequencies(transcript, entry):
|
|
312
320
|
try:
|
313
321
|
for key in entry:
|
314
322
|
# All frequencies endswith AF
|
315
|
-
if not key.endswith("AF"):
|
323
|
+
if not (key.endswith("AF") or key.endswith("POPMAX")):
|
316
324
|
continue
|
317
325
|
|
318
326
|
value = entry[key]
|
319
|
-
if not value:
|
327
|
+
if not value or value == ".":
|
320
328
|
continue
|
321
329
|
|
322
|
-
|
323
|
-
if key == "AF" or key == "1000GAF":
|
330
|
+
if key in THOUSAND_GENOMES_CSQ_KEYS:
|
324
331
|
transcript["thousand_g_maf"] = float(value)
|
325
332
|
continue
|
326
333
|
|
327
|
-
|
328
|
-
|
329
|
-
|
334
|
+
for gnomad_ordered_key in GNOMAD_CSQ_KEYS:
|
335
|
+
if key == gnomad_ordered_key:
|
336
|
+
transcript["gnomad_maf"] = float(value)
|
337
|
+
break
|
330
338
|
|
331
339
|
if key == "EXAC_MAX_AF":
|
332
340
|
transcript["exac_max"] = float(value)
|
333
341
|
transcript["exac_maf"] = float(value)
|
334
342
|
continue
|
335
343
|
|
344
|
+
# remaining gnomAD or 1000G subpopulation frequencies and/or popmax values
|
336
345
|
if "GNOMAD" in key:
|
337
346
|
gnomad_freqs.append(float(value))
|
338
|
-
|
339
347
|
else:
|
340
348
|
thousandg_freqs.append(float(value))
|
341
349
|
|
@@ -354,4 +362,4 @@ def set_variant_frequencies(transcript, entry):
|
|
354
362
|
)
|
355
363
|
LOG.debug("Exception details", exc_info=True)
|
356
364
|
LOG.debug("Current entry: %s", entry)
|
357
|
-
LOG.warning("Only
|
365
|
+
LOG.warning("Only decomposed/split and normalised VEP v90+ frequencies are supported")
|
scout/parse/variant/variant.py
CHANGED
@@ -393,6 +393,18 @@ def set_str_info(variant: Variant, parsed_variant: Dict[str, Any]):
|
|
393
393
|
# repeat id generally corresponds to gene symbol
|
394
394
|
parsed_variant["str_repid"] = call_safe(str, variant.INFO.get("REPID"))
|
395
395
|
|
396
|
+
# repeat id from trgt - generally corresponds to gene symbol and/or disease
|
397
|
+
parsed_variant["str_trid"] = call_safe(str, variant.INFO.get("TRID"))
|
398
|
+
|
399
|
+
# repeat unit - used e g in PanelApp naming of STRs
|
400
|
+
parsed_variant["str_struc"] = call_safe(str, variant.INFO.get("STRUC"))
|
401
|
+
|
402
|
+
# repeat motif(s) - used e g in TRGT MC motif splits
|
403
|
+
parsed_variant["str_motifs"] = call_safe(str, variant.INFO.get("MOTIFS"))
|
404
|
+
|
405
|
+
# repeat pathologic motifs structure - list of indicies of pathologic motifs counting towards MC
|
406
|
+
parsed_variant["str_pathologic_struc"] = call_safe(str, variant.INFO.get("PathologicStruc"))
|
407
|
+
|
396
408
|
# repeat unit - used e g in PanelApp naming of STRs
|
397
409
|
parsed_variant["str_ru"] = call_safe(str, variant.INFO.get("RU"))
|
398
410
|
|
scout/server/app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
"""Code for flask app"""
|
2
|
+
|
2
3
|
import logging
|
3
4
|
import os
|
4
5
|
from datetime import timedelta
|
@@ -10,7 +11,7 @@ from flask import Flask, current_app, redirect, request, url_for
|
|
10
11
|
from flask_babel import Babel
|
11
12
|
from flask_cors import CORS
|
12
13
|
from flask_login import current_user
|
13
|
-
from
|
14
|
+
from markdown import markdown as python_markdown
|
14
15
|
from markupsafe import Markup
|
15
16
|
|
16
17
|
from . import extensions
|
@@ -109,8 +110,6 @@ def configure_extensions(app):
|
|
109
110
|
extensions.login_manager.init_app(app)
|
110
111
|
extensions.mail.init_app(app)
|
111
112
|
|
112
|
-
Markdown(app)
|
113
|
-
|
114
113
|
if app.config.get("SQLALCHEMY_DATABASE_URI"):
|
115
114
|
LOG.info("Chanjo extension enabled")
|
116
115
|
configure_coverage(app)
|
@@ -227,6 +226,10 @@ def register_filters(app):
|
|
227
226
|
# round all other numbers
|
228
227
|
return round(number, ndigits)
|
229
228
|
|
229
|
+
@app.template_filter()
|
230
|
+
def markdown(text: str) -> Markup:
|
231
|
+
return Markup(python_markdown(text))
|
232
|
+
|
230
233
|
@app.template_filter()
|
231
234
|
def tuple_list_to_dict(tuple_list, key_elem, value_elem):
|
232
235
|
"""Accepts a list of tuples and returns a dictionary with tuple element = key_elem as keys and tuple element = value_elem as values"""
|
@@ -1,5 +1,5 @@
|
|
1
1
|
{% macro igv_script() %}
|
2
2
|
<link rel="shortcut icon" href="//igv.org/web/img/favicon.ico">
|
3
3
|
<!-- IGV JS-->
|
4
|
-
<script src="https://cdn.jsdelivr.net/npm/igv@2.15.
|
4
|
+
<script src="https://cdn.jsdelivr.net/npm/igv@2.15.11/dist/igv.min.js"></script>
|
5
5
|
{% endmacro %}
|