scout-browser 4.92__py3-none-any.whl → 4.95.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scout/adapter/mongo/base.py +3 -0
- scout/adapter/mongo/case.py +27 -2
- scout/adapter/mongo/ccv.py +131 -0
- scout/adapter/mongo/hgnc.py +5 -1
- scout/adapter/mongo/managed_variant.py +4 -2
- scout/adapter/mongo/query.py +91 -54
- scout/adapter/mongo/variant.py +17 -11
- scout/adapter/mongo/variant_events.py +45 -1
- scout/build/ccv.py +59 -0
- scout/build/panel.py +1 -1
- scout/commands/export/export_command.py +0 -0
- scout/commands/load/base.py +0 -0
- scout/commands/load/user.py +0 -0
- scout/commands/serve.py +2 -1
- scout/commands/update/disease.py +0 -0
- scout/commands/update/genes.py +0 -0
- scout/commands/wipe_database.py +0 -0
- scout/constants/__init__.py +2 -0
- scout/constants/case_tags.py +2 -0
- scout/constants/ccv.py +244 -0
- scout/constants/gene_tags.py +22 -12
- scout/demo/643594.config.yaml +2 -2
- scout/demo/643594.research.mei.vcf.gz +0 -0
- scout/demo/643594.research.mei.vcf.gz.tbi +0 -0
- scout/demo/images/custom_images/1300x1000.jpg +0 -0
- scout/load/panelapp.py +8 -12
- scout/models/ccv_evaluation.py +26 -0
- scout/models/variant/variant.py +1 -0
- scout/parse/omim.py +5 -6
- scout/parse/panelapp.py +16 -42
- scout/parse/variant/compound.py +20 -21
- scout/parse/variant/gene.py +0 -0
- scout/parse/variant/genotype.py +0 -0
- scout/resources/custom_igv_tracks/mane.bb +0 -0
- scout/server/blueprints/cases/controllers.py +48 -0
- scout/server/blueprints/cases/templates/cases/case_report.html +61 -1
- scout/server/blueprints/cases/templates/cases/collapsible_actionbar.html +2 -2
- scout/server/blueprints/cases/templates/cases/index.html +0 -2
- scout/server/blueprints/cases/views.py +5 -5
- scout/server/blueprints/clinvar/controllers.py +4 -5
- scout/server/blueprints/institutes/controllers.py +129 -67
- scout/server/blueprints/institutes/forms.py +5 -2
- scout/server/blueprints/institutes/templates/overview/cases.html +6 -0
- scout/server/blueprints/institutes/templates/overview/causatives.html +1 -1
- scout/server/blueprints/institutes/templates/overview/utils.html +18 -6
- scout/server/blueprints/institutes/templates/overview/verified.html +1 -1
- scout/server/blueprints/institutes/views.py +4 -0
- scout/server/blueprints/managed_variants/forms.py +17 -2
- scout/server/blueprints/managed_variants/templates/managed_variants/managed_variants.html +2 -2
- scout/server/blueprints/panels/controllers.py +5 -6
- scout/server/blueprints/panels/templates/panels/panel.html +5 -5
- scout/server/blueprints/variant/controllers.py +148 -1
- scout/server/blueprints/variant/templates/variant/cancer-variant.html +1 -1
- scout/server/blueprints/variant/templates/variant/ccv.html +183 -0
- scout/server/blueprints/variant/templates/variant/components.html +86 -5
- scout/server/blueprints/variant/templates/variant/sv-variant.html +2 -2
- scout/server/blueprints/variant/templates/variant/tx_overview.html +3 -3
- scout/server/blueprints/variant/templates/variant/variant.html +1 -1
- scout/server/blueprints/variant/templates/variant/variant_details.html +29 -11
- scout/server/blueprints/variant/utils.py +21 -1
- scout/server/blueprints/variant/views.py +115 -5
- scout/server/blueprints/variants/controllers.py +31 -0
- scout/server/blueprints/variants/forms.py +33 -5
- scout/server/blueprints/variants/templates/variants/cancer-sv-variants.html +4 -18
- scout/server/blueprints/variants/templates/variants/cancer-variants.html +4 -13
- scout/server/blueprints/variants/templates/variants/components.html +77 -73
- scout/server/blueprints/variants/templates/variants/indicators.html +11 -0
- scout/server/blueprints/variants/templates/variants/sv-variants.html +2 -2
- scout/server/links.py +1 -1
- scout/server/static/custom_images.js +19 -2
- scout/utils/acmg.py +0 -1
- scout/utils/ccv.py +193 -0
- scout/utils/link.py +4 -3
- scout/utils/md5.py +0 -0
- {scout_browser-4.92.dist-info → scout_browser-4.95.0.dist-info}/METADATA +67 -45
- {scout_browser-4.92.dist-info → scout_browser-4.95.0.dist-info}/RECORD +70 -65
- {scout_browser-4.92.dist-info → scout_browser-4.95.0.dist-info}/WHEEL +1 -2
- scout/__version__.py +0 -1
- scout/demo/images/custom_images/640x480_two.jpg +0 -0
- scout_browser-4.92.dist-info/top_level.txt +0 -1
- {scout_browser-4.92.dist-info → scout_browser-4.95.0.dist-info}/entry_points.txt +0 -0
- {scout_browser-4.92.dist-info → scout_browser-4.95.0.dist-info/licenses}/LICENSE +0 -0
scout/constants/__init__.py
CHANGED
@@ -23,6 +23,7 @@ from .case_tags import (
|
|
23
23
|
VERBS_ICONS_MAP,
|
24
24
|
VERBS_MAP,
|
25
25
|
)
|
26
|
+
from .ccv import CCV_COMPLETE_MAP, CCV_CRITERIA, CCV_MAP, CCV_OPTIONS, REV_CCV_MAP
|
26
27
|
from .clinvar import (
|
27
28
|
AFFECTED_STATUS,
|
28
29
|
ALLELE_OF_ORIGIN,
|
@@ -126,6 +127,7 @@ COLLECTIONS = [
|
|
126
127
|
"disease_term",
|
127
128
|
"variant",
|
128
129
|
"acmg",
|
130
|
+
"ccv",
|
129
131
|
]
|
130
132
|
|
131
133
|
BUILDS = ["37", "38", "GRCh38"]
|
scout/constants/case_tags.py
CHANGED
@@ -43,6 +43,7 @@ CASE_REPORT_VARIANT_TYPES = {
|
|
43
43
|
"partial_causatives_detailed": "partial_causatives",
|
44
44
|
"suspects_detailed": "suspects",
|
45
45
|
"classified_detailed": "acmg_classification",
|
46
|
+
"ccv_classified_detailed": "ccv_classification",
|
46
47
|
"tagged_detailed": "manual_rank",
|
47
48
|
"tier_detailed": "cancer_tier",
|
48
49
|
"dismissed_detailed": "dismiss_variant",
|
@@ -105,6 +106,7 @@ CASE_TAGS = {
|
|
105
106
|
|
106
107
|
VERBS_MAP = {
|
107
108
|
"acmg": "updated ACMG classification for",
|
109
|
+
"ccv": "updated ClinGen-CGC-VIGG classification for",
|
108
110
|
"add_case": "added case",
|
109
111
|
"add_cohort": "updated cohort for",
|
110
112
|
"add_phenotype": "added HPO term for",
|
scout/constants/ccv.py
ADDED
@@ -0,0 +1,244 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
from collections import OrderedDict
|
3
|
+
|
4
|
+
# from worst to most certain benign
|
5
|
+
CCV_MAP = OrderedDict(
|
6
|
+
[
|
7
|
+
(4, "oncogenic"),
|
8
|
+
(3, "likely_oncogenic"),
|
9
|
+
(0, "uncertain_significance"),
|
10
|
+
(2, "likely_benign"),
|
11
|
+
(1, "benign"),
|
12
|
+
]
|
13
|
+
)
|
14
|
+
# <a href="https://cancerhotspots.org" target="_blank">cancerhotspots.org</a>
|
15
|
+
REV_CCV_MAP = OrderedDict([(value, key) for key, value in CCV_MAP.items()])
|
16
|
+
|
17
|
+
CCV_OPTIONS = [
|
18
|
+
{"code": "oncogenic", "short": "O", "label": "Oncogenic", "color": "danger"},
|
19
|
+
{
|
20
|
+
"code": "likely_oncogenic",
|
21
|
+
"short": "LO",
|
22
|
+
"label": "Likely Oncogenic",
|
23
|
+
"color": "warning",
|
24
|
+
},
|
25
|
+
{
|
26
|
+
"code": "uncertain_significance",
|
27
|
+
"short": "VUS",
|
28
|
+
"label": "Uncertain Significance",
|
29
|
+
"color": "primary",
|
30
|
+
},
|
31
|
+
{"code": "likely_benign", "short": "LB", "label": "Likely Benign", "color": "info"},
|
32
|
+
{"code": "benign", "short": "B", "label": "Benign", "color": "success"},
|
33
|
+
]
|
34
|
+
|
35
|
+
CCV_COMPLETE_MAP = OrderedDict([(option["code"], option) for option in CCV_OPTIONS])
|
36
|
+
|
37
|
+
CCV_CRITERIA = OrderedDict()
|
38
|
+
|
39
|
+
CCV_CRITERIA["oncogenicity"] = OrderedDict(
|
40
|
+
[
|
41
|
+
(
|
42
|
+
"Very Strong",
|
43
|
+
OrderedDict(
|
44
|
+
[
|
45
|
+
(
|
46
|
+
"OVS1",
|
47
|
+
{
|
48
|
+
"short": "Null variant in tumor supressor",
|
49
|
+
"description": "Null variant (nonsense, frameshift, canonical ±1 or 2 splice sites, initiation codon, single-exon or multiexon deletion) in a bona fide tumor suppressor gene.",
|
50
|
+
"documentation": 'Strength can be modified based on <a href="https://pubmed.ncbi.nlm.nih.gov/30192042/" target="blank">ClinGen’s recommendations for PVS1</a>',
|
51
|
+
},
|
52
|
+
)
|
53
|
+
]
|
54
|
+
),
|
55
|
+
),
|
56
|
+
(
|
57
|
+
"Strong",
|
58
|
+
OrderedDict(
|
59
|
+
[
|
60
|
+
(
|
61
|
+
"OS1",
|
62
|
+
{
|
63
|
+
"short": "Same aa change as known oncogenic variant",
|
64
|
+
"description": "Same amino acid change as a previously established oncogenic variant (using this standard) regardless of nucleotide change.",
|
65
|
+
},
|
66
|
+
),
|
67
|
+
(
|
68
|
+
"OS2",
|
69
|
+
{
|
70
|
+
"short": "Well-established functional studies",
|
71
|
+
"description": "Well-established in vitro or in vivo functional studies, supportive of an oncogenic effect of the variant.",
|
72
|
+
},
|
73
|
+
),
|
74
|
+
(
|
75
|
+
"OS3",
|
76
|
+
{
|
77
|
+
"short": "Cancer hotspot: high frequency",
|
78
|
+
"description": "Located in one of the hotspots in cancerhotspots.org with at least 50 samples with a somatic variant at the same amino acid position, and the same amino acid change count in cancerhotspots.org in at least 10 samples.",
|
79
|
+
},
|
80
|
+
),
|
81
|
+
]
|
82
|
+
),
|
83
|
+
),
|
84
|
+
(
|
85
|
+
"Moderate",
|
86
|
+
OrderedDict(
|
87
|
+
[
|
88
|
+
(
|
89
|
+
"OM1",
|
90
|
+
{
|
91
|
+
"short": "Functional domain",
|
92
|
+
"description": "Located in a critical and well-established part of a functional domain (eg, active site of an enzyme).",
|
93
|
+
},
|
94
|
+
),
|
95
|
+
(
|
96
|
+
"OM2",
|
97
|
+
{
|
98
|
+
"short": "Protein length change",
|
99
|
+
"description": "Protein length changes as a result of in-frame deletions/insertions in a known oncogene or tumor suppressor gene or stop-loss variants in a known tumor suppressor gene.",
|
100
|
+
},
|
101
|
+
),
|
102
|
+
(
|
103
|
+
"OM3",
|
104
|
+
{
|
105
|
+
"short": "Cancer hotspot: moderate frequency",
|
106
|
+
"description": "Located in one of the hotspots in cancerhotspots.org with <50 samples with a somatic variant at the same amino acid position, and the same amino acid change count in cancerhotspots.org is at least 10.",
|
107
|
+
},
|
108
|
+
),
|
109
|
+
(
|
110
|
+
"OM4",
|
111
|
+
{
|
112
|
+
"short": "Missense variant at aa with other oncogenic missense variant",
|
113
|
+
"description": "Missense variant at an amino acid residue where a different missense variant determined to be oncogenic (using this standard) has been documented. Amino acid difference from reference amino acid should be greater or at least approximately the same as for missense change determined to be oncogenic.",
|
114
|
+
},
|
115
|
+
),
|
116
|
+
]
|
117
|
+
),
|
118
|
+
),
|
119
|
+
(
|
120
|
+
"Supporting",
|
121
|
+
OrderedDict(
|
122
|
+
[
|
123
|
+
(
|
124
|
+
"OP1",
|
125
|
+
{
|
126
|
+
"short": "Computatinal evidence",
|
127
|
+
"description": "All used lines of computational evidence support an oncogenic effect of a variant (conservation/evolutionary, splicing effect, etc.).",
|
128
|
+
},
|
129
|
+
),
|
130
|
+
(
|
131
|
+
"OP2",
|
132
|
+
{
|
133
|
+
"short": "Gene in a malignancy with a single genetic etiology",
|
134
|
+
"description": "Somatic variant in a gene in a malignancy with a single genetic etiology. Example: retinoblastoma is caused by bi-allelic RB1 inactivation.",
|
135
|
+
},
|
136
|
+
),
|
137
|
+
(
|
138
|
+
"OP3",
|
139
|
+
{
|
140
|
+
"short": "Cancer hotspots: low frequency",
|
141
|
+
"description": "Located in one of the hotspots in cancerhotspots.org and the particular amino acid change count in cancerhotspots.org is below 10",
|
142
|
+
},
|
143
|
+
),
|
144
|
+
(
|
145
|
+
"OP4",
|
146
|
+
{
|
147
|
+
"short": "Absent in population databases",
|
148
|
+
"description": "Absent from controls (or at an extremely low frequency) in gnomAD.",
|
149
|
+
},
|
150
|
+
),
|
151
|
+
]
|
152
|
+
),
|
153
|
+
),
|
154
|
+
]
|
155
|
+
)
|
156
|
+
|
157
|
+
CCV_CRITERIA["benign impact"] = OrderedDict(
|
158
|
+
[
|
159
|
+
(
|
160
|
+
"Very Strong",
|
161
|
+
OrderedDict(
|
162
|
+
[
|
163
|
+
(
|
164
|
+
"SBVS1",
|
165
|
+
{
|
166
|
+
"short": "MAF is >0.05",
|
167
|
+
"description": "Minor allele frequency is >5%% in gnomAD in any 5 general continental populations: African, East Asian, European (non-Finnish), Latino, and South Asian.",
|
168
|
+
},
|
169
|
+
)
|
170
|
+
]
|
171
|
+
),
|
172
|
+
),
|
173
|
+
(
|
174
|
+
"Strong",
|
175
|
+
OrderedDict(
|
176
|
+
[
|
177
|
+
(
|
178
|
+
"SBS1",
|
179
|
+
{
|
180
|
+
"short": "MAF is >0.01",
|
181
|
+
"description": "Minor allele frequency is >1%% in gnomAD in any 5 general continental populations: African, East Asian, European (non-Finnish), Latino, and South Asian. ",
|
182
|
+
},
|
183
|
+
),
|
184
|
+
(
|
185
|
+
"SBS2",
|
186
|
+
{
|
187
|
+
"short": "Well-established functional studies",
|
188
|
+
"description": "Well-established in vitro or in vivo functional studies show no oncogenic effects.",
|
189
|
+
},
|
190
|
+
),
|
191
|
+
]
|
192
|
+
),
|
193
|
+
),
|
194
|
+
(
|
195
|
+
"Supporting",
|
196
|
+
OrderedDict(
|
197
|
+
[
|
198
|
+
(
|
199
|
+
"SBP1",
|
200
|
+
{
|
201
|
+
"short": "Computational evidence",
|
202
|
+
"description": "All used lines of computational evidence suggest no effect of a variant (conservation/evolutionary, splicing effect, etc.).",
|
203
|
+
},
|
204
|
+
),
|
205
|
+
(
|
206
|
+
"SBP2",
|
207
|
+
{
|
208
|
+
"short": "Silent mutation (no predicted impact on splicing)",
|
209
|
+
"description": "A synonymous (silent) variant for which splicing prediction algorithms predict no effect on the splice consensus sequence nor the creation of a new splice site and the nucleotide is not highly conserved.",
|
210
|
+
},
|
211
|
+
),
|
212
|
+
]
|
213
|
+
),
|
214
|
+
),
|
215
|
+
]
|
216
|
+
)
|
217
|
+
|
218
|
+
CCV_POTENTIAL_CONFLICTS = [
|
219
|
+
(
|
220
|
+
"OS2",
|
221
|
+
"OS1",
|
222
|
+
"If OS1 is applicable, OS2 can be used only if functional studies are based on the particular nucleotide change of the variant.",
|
223
|
+
),
|
224
|
+
(
|
225
|
+
"OS3",
|
226
|
+
"OS1",
|
227
|
+
"OS3 cannot be used if OS1 is applicable, unless it is possible to observe hotspots on the basis of the particular nucleotide change.",
|
228
|
+
),
|
229
|
+
(
|
230
|
+
"OM1",
|
231
|
+
"OVS1",
|
232
|
+
"OM1 cannot be used if OVS1 is applicable.",
|
233
|
+
),
|
234
|
+
(
|
235
|
+
"OM3",
|
236
|
+
"OM1",
|
237
|
+
"OM3 cannot be used if OM1 is applicable.",
|
238
|
+
),
|
239
|
+
(
|
240
|
+
"OM3",
|
241
|
+
"OM4",
|
242
|
+
"OM3 cannot be used if OM4 is applicable.",
|
243
|
+
),
|
244
|
+
]
|
scout/constants/gene_tags.py
CHANGED
@@ -33,20 +33,30 @@ INHERITANCE_PALETTE = {
|
|
33
33
|
"other": {"bgcolor": "bg-light", "text_color": "text-dark"},
|
34
34
|
}
|
35
35
|
|
36
|
-
INCOMPLETE_PENETRANCE_MAP = {"unknown": None, "
|
36
|
+
INCOMPLETE_PENETRANCE_MAP = {"unknown": None, "None": None, "Complete": False, "Incomplete": True}
|
37
37
|
|
38
38
|
MODELS_MAP = {
|
39
|
-
"
|
40
|
-
"
|
41
|
-
"
|
42
|
-
|
43
|
-
|
44
|
-
"
|
45
|
-
|
46
|
-
|
47
|
-
"
|
48
|
-
"
|
49
|
-
"
|
39
|
+
"MONOALLELIC, autosomal or pseudoautosomal, NOT imprinted": ["AD"],
|
40
|
+
"MONOALLELIC, autosomal or pseudoautosomal, imprinted status unknown": ["AD"],
|
41
|
+
"MONOALLELIC, autosomal or pseudoautosomal, maternally imprinted (paternal allele expressed)": [
|
42
|
+
"AD"
|
43
|
+
],
|
44
|
+
"MONOALLELIC, autosomal or pseudoautosomal, paternally imprinted (maternal allele expressed)": [
|
45
|
+
"AD"
|
46
|
+
],
|
47
|
+
"BIALLELIC, autosomal or pseudoautosomal": ["AR"],
|
48
|
+
"BOTH monoallelic and biallelic, autosomal or pseudoautosomal": ["AD", "AR"],
|
49
|
+
"BOTH monoallelic and biallelic (but BIALLELIC mutations cause a more SEVERE disease form), autosomal or pseudoautosomal": [
|
50
|
+
"AD",
|
51
|
+
"AR",
|
52
|
+
],
|
53
|
+
"X-LINKED: hemizygous mutation in males, biallelic mutations in females": ["XR"],
|
54
|
+
"X-LINKED: hemizygous mutation in males, monoallelic mutations in females may cause disease (may be less severe, later onset than males)": [
|
55
|
+
"XD"
|
56
|
+
],
|
57
|
+
"MITOCHONDRIAL": ["MT"],
|
58
|
+
"Other": [],
|
59
|
+
"Other - please specifiy in evaluation comments": [],
|
50
60
|
}
|
51
61
|
|
52
62
|
PANEL_GENE_INFO_TRANSCRIPTS = [
|
scout/demo/643594.config.yaml
CHANGED
@@ -113,8 +113,8 @@ custom_images:
|
|
113
113
|
path: scout/demo/images/custom_images/640x480_one.png
|
114
114
|
- title: A jpg image
|
115
115
|
description: A very good description
|
116
|
-
width:
|
117
|
-
path: scout/demo/images/custom_images/
|
116
|
+
width: 1300
|
117
|
+
path: scout/demo/images/custom_images/1300x1000.jpg
|
118
118
|
section_two:
|
119
119
|
- title: An SVG image
|
120
120
|
description: Another very good description
|
Binary file
|
Binary file
|
Binary file
|
scout/load/panelapp.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
import logging
|
2
2
|
import math
|
3
3
|
from datetime import datetime
|
4
|
-
from typing import
|
4
|
+
from typing import List, Set
|
5
5
|
|
6
|
-
from click import
|
6
|
+
from click import progressbar
|
7
7
|
|
8
8
|
from scout.adapter import MongoAdapter
|
9
9
|
from scout.constants.panels import PRESELECTED_PANELAPP_PANEL_TYPE_SLUGS
|
@@ -28,15 +28,11 @@ def load_panelapp_panel(
|
|
28
28
|
LOG.info("Fetching all panel app panels")
|
29
29
|
panel_ids: List[str] = panelapp.get_panel_ids(signed_off=False)
|
30
30
|
|
31
|
-
|
32
|
-
hgnc_symbol_to_ensembl_id_map: Dict[int, str] = adapter.hgnc_symbol_ensembl_id_mapping()
|
33
|
-
|
34
|
-
for _ in panel_ids:
|
31
|
+
for panel_id in panel_ids:
|
35
32
|
panel_info: dict = panelapp.get_panel(panel_id)
|
36
33
|
parsed_panel = parse_panelapp_panel(
|
34
|
+
hgnc_gene_ids=adapter.hgnc_ids(),
|
37
35
|
panel_info=panel_info,
|
38
|
-
ensembl_id_to_hgnc_id_map=ensembl_id_to_hgnc_id_map,
|
39
|
-
hgnc_symbol_to_ensembl_id_map=hgnc_symbol_to_ensembl_id_map,
|
40
36
|
institute=institute,
|
41
37
|
confidence=confidence,
|
42
38
|
)
|
@@ -54,8 +50,6 @@ def get_panelapp_genes(
|
|
54
50
|
"""Parse and collect genes from one or more panelApp panels."""
|
55
51
|
|
56
52
|
genes = set()
|
57
|
-
ensembl_id_to_hgnc_id_map: Dict[str, int] = adapter.ensembl_to_hgnc_id_mapping()
|
58
|
-
hgnc_symbol_to_ensembl_id_map: Dict[int, str] = adapter.hgnc_symbol_ensembl_id_mapping()
|
59
53
|
|
60
54
|
with progressbar(panel_ids, label="Parsing panels", length=len(panel_ids)) as panel_ids:
|
61
55
|
for panel_id in panel_ids:
|
@@ -66,9 +60,8 @@ def get_panelapp_genes(
|
|
66
60
|
continue
|
67
61
|
|
68
62
|
parsed_panel = parse_panelapp_panel(
|
63
|
+
hgnc_gene_ids=adapter.hgnc_ids(),
|
69
64
|
panel_info=panel_dict,
|
70
|
-
ensembl_id_to_hgnc_id_map=ensembl_id_to_hgnc_id_map,
|
71
|
-
hgnc_symbol_to_ensembl_id_map=hgnc_symbol_to_ensembl_id_map,
|
72
65
|
institute=institute,
|
73
66
|
confidence="green",
|
74
67
|
)
|
@@ -86,6 +79,8 @@ def load_panelapp_green_panel(adapter: MongoAdapter, institute: str, force: bool
|
|
86
79
|
"""Translate panel type input from users to panel type slugs."""
|
87
80
|
if not types_filter:
|
88
81
|
return PRESELECTED_PANELAPP_PANEL_TYPE_SLUGS
|
82
|
+
if "all" in types_filter:
|
83
|
+
return available_types
|
89
84
|
index_list = [int(typeint) - 1 for typeint in types_filter.replace(" ", "").split(",")]
|
90
85
|
return [available_types[i] for i in index_list]
|
91
86
|
|
@@ -107,6 +102,7 @@ def load_panelapp_green_panel(adapter: MongoAdapter, institute: str, force: bool
|
|
107
102
|
available_types: List[str] = panelapp.get_panel_types()
|
108
103
|
for number, type in enumerate(available_types, 1):
|
109
104
|
LOG.info(f"{number}: {type}")
|
105
|
+
LOG.info("all: all types above")
|
110
106
|
preselected_options_idx: List[str] = [
|
111
107
|
str(available_types.index(presel) + 1)
|
112
108
|
for presel in PRESELECTED_PANELAPP_PANEL_TYPE_SLUGS
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""
|
3
|
+
scout.models.ccv_evaluation
|
4
|
+
~~~~~~~~~~~~~~~~~~
|
5
|
+
|
6
|
+
Define a document to describe a ClinGen-CGC-VIGG evaluation
|
7
|
+
|
8
|
+
Evaluations are stored in its own collection
|
9
|
+
|
10
|
+
"""
|
11
|
+
|
12
|
+
from datetime import datetime
|
13
|
+
|
14
|
+
ccv_evaluation = dict(
|
15
|
+
variant_specific=str, # md5 document id
|
16
|
+
variant_id=str, # md5 variant id
|
17
|
+
institute_id=str, # Institute _id, required
|
18
|
+
case_id=str, # case_id, required
|
19
|
+
classification=str, # What did the evaluation end up in?
|
20
|
+
# All evaluations will have an author
|
21
|
+
user_id=str, # user email, required
|
22
|
+
user_name=str, # user name
|
23
|
+
criteria=list, # List of dictionaries with criterias
|
24
|
+
# timestamps
|
25
|
+
created_at=datetime,
|
26
|
+
)
|
scout/models/variant/variant.py
CHANGED
scout/parse/omim.py
CHANGED
@@ -311,7 +311,6 @@ def get_mim_genes(genemap_lines, mim2gene_lines):
|
|
311
311
|
mim_number = entry["mim_number"]
|
312
312
|
inheritance = entry["inheritance"]
|
313
313
|
phenotype_info = entry["phenotypes"]
|
314
|
-
hgnc_symbol = entry["hgnc_symbol"]
|
315
314
|
hgnc_symbols = entry["hgnc_symbols"]
|
316
315
|
if mim_number in genes:
|
317
316
|
genes[mim_number]["inheritance"] = inheritance
|
@@ -354,11 +353,11 @@ def get_mim_disease(genemap_lines: Iterable[str]) -> Dict[str, Any]:
|
|
354
353
|
"""
|
355
354
|
diseases_found = {}
|
356
355
|
|
357
|
-
#
|
358
|
-
# Each line hold a lot of information and in
|
359
|
-
# has information about the phenotypes that a gene is associated with
|
360
|
-
# From this source we collect
|
361
|
-
# a disease is associated with
|
356
|
+
# Genemap2 is a file with one entry per gene.
|
357
|
+
# Each line hold a lot of information and in particular it
|
358
|
+
# has information about the phenotypes that a gene is associated with.
|
359
|
+
# From this source we collect inheritance patterns and what hgnc symbols
|
360
|
+
# a disease is associated with.
|
362
361
|
for entry in parse_genemap2(genemap_lines):
|
363
362
|
hgnc_symbol = entry["hgnc_symbol"]
|
364
363
|
for disease in entry["phenotypes"]:
|
scout/parse/panelapp.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
"""Code to parse panel information"""
|
2
2
|
|
3
3
|
import logging
|
4
|
-
from typing import
|
4
|
+
from typing import Optional, Set
|
5
5
|
|
6
6
|
from scout.constants import INCOMPLETE_PENETRANCE_MAP, MODELS_MAP, PANELAPP_CONFIDENCE_EXCLUDE
|
7
7
|
from scout.utils.date import get_date
|
@@ -11,9 +11,8 @@ PANELAPP_PANELS_URL = "https://panelapp.genomicsengland.co.uk/panels/"
|
|
11
11
|
|
12
12
|
|
13
13
|
def parse_panel_app_gene(
|
14
|
+
hgnc_gene_ids: Set[int],
|
14
15
|
panelapp_gene: dict,
|
15
|
-
ensembl_gene_hgnc_id_map: Dict[str, int],
|
16
|
-
hgnc_symbol_ensembl_gene_map: Dict[str, str],
|
17
16
|
confidence: str,
|
18
17
|
) -> dict:
|
19
18
|
"""Parse a panel app-formatted gene."""
|
@@ -23,55 +22,30 @@ def parse_panel_app_gene(
|
|
23
22
|
if confidence_level in PANELAPP_CONFIDENCE_EXCLUDE[confidence]:
|
24
23
|
return gene_info
|
25
24
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
for version in genome.values()
|
31
|
-
]
|
32
|
-
|
33
|
-
if not ensembl_ids: # This gene is probably tagged as ensembl_ids_known_missing on PanelApp
|
34
|
-
if hgnc_symbol in hgnc_symbol_ensembl_gene_map:
|
35
|
-
LOG.warning(
|
36
|
-
f"PanelApp gene {hgnc_symbol} does not contain Ensembl IDs. Using Ensembl IDs from internal gene collection instead."
|
37
|
-
)
|
38
|
-
ensembl_ids = [hgnc_symbol_ensembl_gene_map[hgnc_symbol]]
|
39
|
-
else:
|
40
|
-
LOG.warning(
|
41
|
-
f"PanelApp gene {hgnc_symbol} does not contain Ensembl IDs and gene symbol does not correspond to a gene in scout."
|
42
|
-
)
|
43
|
-
|
44
|
-
hgnc_ids = set(
|
45
|
-
ensembl_gene_hgnc_id_map.get(ensembl_id)
|
46
|
-
for ensembl_id in ensembl_ids
|
47
|
-
if ensembl_gene_hgnc_id_map.get(ensembl_id)
|
48
|
-
)
|
49
|
-
if not hgnc_ids:
|
50
|
-
LOG.warning("Gene %s does not exist in database. Skipping gene...", hgnc_symbol)
|
25
|
+
gene_symbol = panelapp_gene["gene_data"]["gene_symbol"]
|
26
|
+
hgnc_id = int(panelapp_gene["gene_data"]["hgnc_id"].split(":")[1])
|
27
|
+
if hgnc_id not in hgnc_gene_ids:
|
28
|
+
LOG.warning("Gene %s does not exist in database. Skipping gene...", gene_symbol)
|
51
29
|
return gene_info
|
52
30
|
|
53
|
-
|
54
|
-
|
31
|
+
gene_info["hgnc_id"] = hgnc_id
|
32
|
+
gene_info["hgnc_symbol"] = gene_symbol
|
55
33
|
|
56
|
-
|
57
|
-
|
58
|
-
gene_info["hgnc_id"] = hgnc_id
|
34
|
+
if panelapp_gene["penetrance"] in ["Complete", "Incomplete"]:
|
35
|
+
gene_info["reduced_penetrance"] = INCOMPLETE_PENETRANCE_MAP.get(panelapp_gene["penetrance"])
|
59
36
|
|
60
|
-
|
37
|
+
mode_of_inheritance = panelapp_gene.get("mode_of_inheritance")
|
38
|
+
if mode_of_inheritance not in MODELS_MAP:
|
39
|
+
LOG.warning(f"Mode of inheritance '{mode_of_inheritance}' not found in MODELS_MAP.")
|
61
40
|
|
62
|
-
inheritance_models = []
|
63
|
-
for model in MODELS_MAP.get(panelapp_gene["mode_of_inheritance"], []):
|
64
|
-
inheritance_models.append(model)
|
65
|
-
|
66
|
-
gene_info["inheritance_models"] = inheritance_models
|
41
|
+
gene_info["inheritance_models"] = MODELS_MAP.get(mode_of_inheritance, [])
|
67
42
|
|
68
43
|
return gene_info
|
69
44
|
|
70
45
|
|
71
46
|
def parse_panelapp_panel(
|
47
|
+
hgnc_gene_ids: Set[int],
|
72
48
|
panel_info: dict,
|
73
|
-
ensembl_id_to_hgnc_id_map: Dict[str, int],
|
74
|
-
hgnc_symbol_to_ensembl_id_map: Dict[str, str],
|
75
49
|
institute: Optional[str] = "cust000",
|
76
50
|
confidence: Optional[str] = "green",
|
77
51
|
) -> dict:
|
@@ -101,7 +75,7 @@ def parse_panelapp_panel(
|
|
101
75
|
nr_genes = 0
|
102
76
|
for nr_genes, gene in enumerate(panel_info["genes"], 1):
|
103
77
|
gene_info = parse_panel_app_gene(
|
104
|
-
|
78
|
+
hgnc_gene_ids=hgnc_gene_ids, panelapp_gene=gene, confidence=confidence
|
105
79
|
)
|
106
80
|
if not gene_info:
|
107
81
|
nr_excluded += 1
|
scout/parse/variant/compound.py
CHANGED
@@ -1,43 +1,42 @@
|
|
1
1
|
import logging
|
2
|
+
from typing import List
|
2
3
|
|
3
4
|
from scout.utils.md5 import generate_md5_key
|
4
5
|
|
5
6
|
LOG = logging.getLogger(__name__)
|
6
7
|
|
7
8
|
|
8
|
-
def parse_compounds(compound_info, case_id, variant_type):
|
9
|
-
"""Get a list with compounds objects for this variant.
|
9
|
+
def parse_compounds(compound_info: str, case_id: str, variant_type: str) -> List[dict]:
|
10
|
+
"""Get a list with compounds objects(dicts) for this variant.
|
10
11
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
12
|
+
Scout IDs do not have "chr" prefixed chromosome names, hence we lstrip that from
|
13
|
+
any compound names.
|
14
|
+
|
15
|
+
We need the case id to construct the correct id, as well as the variant type (clinical or research).
|
15
16
|
|
16
|
-
Returns:
|
17
|
-
compounds(list(dict)): A list of compounds
|
18
17
|
"""
|
19
|
-
|
18
|
+
|
20
19
|
compounds = []
|
21
20
|
if compound_info:
|
22
21
|
for family_info in compound_info.split(","):
|
23
|
-
|
22
|
+
split_entry = family_info.split(":")
|
24
23
|
# This is the family id
|
25
|
-
if
|
26
|
-
for compound in
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
24
|
+
if split_entry[0] == case_id:
|
25
|
+
for compound in split_entry[1].split("|"):
|
26
|
+
split_compound = compound.split(">")
|
27
|
+
compound_name = split_compound[0].lstrip("chr")
|
28
|
+
compound_obj = {
|
29
|
+
"display_name": compound_name,
|
30
|
+
"variant": generate_md5_key(
|
31
|
+
compound_name.split("_") + [variant_type, case_id]
|
32
|
+
),
|
33
|
+
}
|
33
34
|
|
34
35
|
try:
|
35
|
-
compound_score = float(
|
36
|
+
compound_score = float(split_compound[1])
|
36
37
|
except (TypeError, IndexError):
|
37
38
|
compound_score = 0.0
|
38
|
-
|
39
39
|
compound_obj["score"] = compound_score
|
40
|
-
compound_obj["display_name"] = compound_name
|
41
40
|
|
42
41
|
compounds.append(compound_obj)
|
43
42
|
|
scout/parse/variant/gene.py
CHANGED
File without changes
|
scout/parse/variant/genotype.py
CHANGED
File without changes
|
Binary file
|