scout-browser 4.82.2__py3-none-any.whl → 4.84__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scout/__version__.py +1 -1
- scout/adapter/client.py +1 -0
- scout/adapter/mongo/base.py +0 -1
- scout/adapter/mongo/case.py +19 -37
- scout/adapter/mongo/case_events.py +98 -2
- scout/adapter/mongo/hgnc.py +39 -22
- scout/adapter/mongo/institute.py +3 -9
- scout/adapter/mongo/panel.py +2 -1
- scout/adapter/mongo/variant.py +12 -2
- scout/adapter/mongo/variant_loader.py +156 -141
- scout/build/genes/hgnc_gene.py +5 -134
- scout/commands/base.py +1 -0
- scout/commands/download/ensembl.py +1 -0
- scout/commands/download/everything.py +1 -0
- scout/commands/download/exac.py +1 -0
- scout/commands/download/hgnc.py +1 -0
- scout/commands/download/hpo.py +1 -0
- scout/commands/download/omim.py +1 -0
- scout/commands/export/database.py +1 -0
- scout/commands/load/panel.py +1 -0
- scout/commands/load/report.py +1 -0
- scout/commands/update/case.py +10 -10
- scout/commands/update/individual.py +6 -1
- scout/commands/update/omim.py +1 -0
- scout/commands/update/panelapp.py +1 -0
- scout/constants/file_types.py +86 -13
- scout/export/exon.py +1 -0
- scout/load/__init__.py +0 -1
- scout/load/all.py +8 -5
- scout/load/hgnc_gene.py +1 -1
- scout/load/panel.py +8 -4
- scout/load/setup.py +1 -0
- scout/models/case/case_loading_models.py +6 -16
- scout/models/hgnc_map.py +50 -87
- scout/models/phenotype_term.py +3 -3
- scout/parse/case.py +0 -1
- scout/parse/disease_terms.py +1 -0
- scout/parse/omim.py +1 -0
- scout/parse/orpha.py +1 -0
- scout/parse/panel.py +40 -15
- scout/parse/variant/conservation.py +1 -0
- scout/resources/__init__.py +3 -0
- scout/server/app.py +4 -50
- scout/server/blueprints/alignviewers/controllers.py +15 -17
- scout/server/blueprints/alignviewers/templates/alignviewers/igv_viewer.html +13 -3
- scout/server/blueprints/alignviewers/views.py +10 -15
- scout/server/blueprints/cases/controllers.py +70 -73
- scout/server/blueprints/cases/templates/cases/case.html +94 -71
- scout/server/blueprints/cases/templates/cases/collapsible_actionbar.html +1 -1
- scout/server/blueprints/cases/templates/cases/phenotype.html +8 -6
- scout/server/blueprints/cases/templates/cases/utils.html +3 -3
- scout/server/blueprints/cases/views.py +8 -6
- scout/server/blueprints/panels/forms.py +1 -0
- scout/server/blueprints/variant/controllers.py +14 -19
- scout/server/blueprints/variant/templates/variant/acmg.html +25 -16
- scout/server/blueprints/variant/templates/variant/components.html +11 -6
- scout/server/blueprints/variant/views.py +5 -2
- scout/server/blueprints/variants/controllers.py +12 -28
- scout/server/blueprints/variants/views.py +1 -1
- scout/server/config.py +16 -4
- scout/server/extensions/__init__.py +4 -2
- scout/server/extensions/beacon_extension.py +1 -0
- scout/server/extensions/bionano_extension.py +1 -0
- scout/server/extensions/chanjo_extension.py +59 -0
- scout/server/extensions/gens_extension.py +1 -0
- scout/server/extensions/ldap_extension.py +5 -3
- scout/server/extensions/loqus_extension.py +16 -14
- scout/server/extensions/matchmaker_extension.py +1 -0
- scout/server/extensions/mongo_extension.py +1 -0
- scout/server/extensions/phenopacket_extension.py +1 -0
- scout/server/extensions/rerunner_extension.py +1 -0
- scout/server/links.py +4 -4
- scout/server/static/bs_styles.css +20 -2
- scout/server/utils.py +16 -2
- scout/utils/acmg.py +33 -20
- scout/utils/ensembl_rest_clients.py +1 -0
- scout/utils/scout_requests.py +1 -0
- scout/utils/sort.py +21 -0
- scout/utils/track_resources.py +70 -0
- {scout_browser-4.82.2.dist-info → scout_browser-4.84.dist-info}/METADATA +2 -5
- {scout_browser-4.82.2.dist-info → scout_browser-4.84.dist-info}/RECORD +85 -84
- {scout_browser-4.82.2.dist-info → scout_browser-4.84.dist-info}/WHEEL +1 -1
- {scout_browser-4.82.2.dist-info → scout_browser-4.84.dist-info}/entry_points.txt +0 -1
- scout/load/case.py +0 -36
- scout/utils/cloud_resources.py +0 -61
- {scout_browser-4.82.2.dist-info → scout_browser-4.84.dist-info}/LICENSE +0 -0
- {scout_browser-4.82.2.dist-info → scout_browser-4.84.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
|
|
1
1
|
"""Code for updating information on individuals
|
2
2
|
"""
|
3
|
+
|
3
4
|
from pathlib import Path
|
4
5
|
|
5
6
|
import click
|
@@ -95,4 +96,8 @@ def individual(case_id, ind, key, value):
|
|
95
96
|
|
96
97
|
ind_obj[key] = value
|
97
98
|
|
98
|
-
|
99
|
+
link = f"/{case_obj['owner']}/{case_obj['display_name']}"
|
100
|
+
institute_obj = store.institute(case_obj["owner"])
|
101
|
+
store.update_case_individual(
|
102
|
+
case_obj, user_obj=None, institute_obj=institute_obj, link=link, keep_date=False
|
103
|
+
)
|
scout/commands/update/omim.py
CHANGED
scout/constants/file_types.py
CHANGED
@@ -1,17 +1,90 @@
|
|
1
1
|
# Collect general information about the file types used in Scout
|
2
|
+
# Load priority determines load order, with lowest value loaded first.
|
2
3
|
|
3
4
|
FILE_TYPE_MAP = {
|
4
|
-
"vcf_cancer": {
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
"
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
"
|
15
|
-
|
16
|
-
|
5
|
+
"vcf_cancer": {
|
6
|
+
"category": "cancer",
|
7
|
+
"variant_type": "clinical",
|
8
|
+
"load_priority": 10,
|
9
|
+
},
|
10
|
+
"vcf_cancer_research": {
|
11
|
+
"category": "cancer",
|
12
|
+
"variant_type": "research",
|
13
|
+
"load_priority": 110,
|
14
|
+
},
|
15
|
+
"vcf_cancer_sv": {
|
16
|
+
"category": "cancer_sv",
|
17
|
+
"variant_type": "clinical",
|
18
|
+
"load_priority": 20,
|
19
|
+
},
|
20
|
+
"vcf_cancer_sv_research": {
|
21
|
+
"category": "cancer_sv",
|
22
|
+
"variant_type": "research",
|
23
|
+
"load_priority": 120,
|
24
|
+
},
|
25
|
+
"vcf_fusion": {
|
26
|
+
"category": "fusion",
|
27
|
+
"variant_type": "clinical",
|
28
|
+
"load_priority": 70,
|
29
|
+
},
|
30
|
+
"vcf_fusion_research": {
|
31
|
+
"category": "fusion",
|
32
|
+
"variant_type": "research",
|
33
|
+
"load_priority": 170,
|
34
|
+
},
|
35
|
+
"vcf_mei": {
|
36
|
+
"category": "mei",
|
37
|
+
"variant_type": "clinical",
|
38
|
+
"load_priority": 60,
|
39
|
+
},
|
40
|
+
"vcf_mei_research": {
|
41
|
+
"category": "mei",
|
42
|
+
"variant_type": "research",
|
43
|
+
"load_priority": 160,
|
44
|
+
},
|
45
|
+
"vcf_snv": {
|
46
|
+
"category": "snv",
|
47
|
+
"variant_type": "clinical",
|
48
|
+
"load_priority": 35,
|
49
|
+
},
|
50
|
+
"vcf_snv_mt": {
|
51
|
+
"category": "snv",
|
52
|
+
"variant_type": "clinical",
|
53
|
+
"load_priority": 30,
|
54
|
+
},
|
55
|
+
"vcf_snv_research": {
|
56
|
+
"category": "snv",
|
57
|
+
"variant_type": "research",
|
58
|
+
"load_priority": 135,
|
59
|
+
},
|
60
|
+
"vcf_snv_research_mt": {
|
61
|
+
"category": "snv",
|
62
|
+
"variant_type": "research",
|
63
|
+
"load_priority": 130,
|
64
|
+
},
|
65
|
+
"vcf_sv": {
|
66
|
+
"category": "sv",
|
67
|
+
"variant_type": "clinical",
|
68
|
+
"load_priority": 45,
|
69
|
+
},
|
70
|
+
"vcf_sv_mt": {
|
71
|
+
"category": "sv",
|
72
|
+
"variant_type": "clinical",
|
73
|
+
"load_priority": 40,
|
74
|
+
},
|
75
|
+
"vcf_sv_research": {
|
76
|
+
"category": "sv",
|
77
|
+
"variant_type": "research",
|
78
|
+
"load_priority": 145,
|
79
|
+
},
|
80
|
+
"vcf_sv_research_mt": {
|
81
|
+
"category": "sv",
|
82
|
+
"variant_type": "research",
|
83
|
+
"load_priority": 140,
|
84
|
+
},
|
85
|
+
"vcf_str": {
|
86
|
+
"category": "str",
|
87
|
+
"variant_type": "clinical",
|
88
|
+
"load_priority": 50,
|
89
|
+
},
|
17
90
|
}
|
scout/export/exon.py
CHANGED
@@ -11,6 +11,7 @@ head develop/mip_references/grch37_scout_exons_-2017-01-.bed
|
|
11
11
|
7 65413656 65413769 7-65413658-65413767 NM_173517 21492 VKORC1L1
|
12
12
|
5 159776172 159776790 5-159776174-159776788 NM_031908 14325 C1QTNF2
|
13
13
|
"""
|
14
|
+
|
14
15
|
import logging
|
15
16
|
|
16
17
|
LOG = logging.getLogger(__name__)
|
scout/load/__init__.py
CHANGED
scout/load/all.py
CHANGED
@@ -3,6 +3,7 @@ import logging
|
|
3
3
|
|
4
4
|
from scout.constants import FILE_TYPE_MAP
|
5
5
|
from scout.exceptions.config import ConfigError
|
6
|
+
from scout.utils.sort import get_load_priority
|
6
7
|
|
7
8
|
LOG = logging.getLogger(__name__)
|
8
9
|
|
@@ -54,15 +55,18 @@ def load_region(adapter, case_id, hgnc_id=None, chrom=None, start=None, end=None
|
|
54
55
|
start = gene_caption["start"]
|
55
56
|
end = gene_caption["end"]
|
56
57
|
|
57
|
-
case_file_types =
|
58
|
+
case_file_types = set()
|
58
59
|
|
59
60
|
for file_type in FILE_TYPE_MAP:
|
60
61
|
if case_obj.get("vcf_files", {}).get(file_type):
|
61
|
-
case_file_types.
|
62
|
+
case_file_types.add(
|
62
63
|
(FILE_TYPE_MAP[file_type]["variant_type"], FILE_TYPE_MAP[file_type]["category"])
|
63
64
|
)
|
64
65
|
|
65
|
-
for variant_type, category in
|
66
|
+
for variant_type, category in sorted(
|
67
|
+
case_file_types,
|
68
|
+
key=lambda tup: get_load_priority(variant_type=tup[0], category=tup[1]),
|
69
|
+
):
|
66
70
|
if variant_type == "research" and not case_obj["is_research"]:
|
67
71
|
continue
|
68
72
|
|
@@ -84,13 +88,12 @@ def load_region(adapter, case_id, hgnc_id=None, chrom=None, start=None, end=None
|
|
84
88
|
adapter.case_variants_count(case_obj["_id"], case_obj["owner"], force_update_case=True)
|
85
89
|
|
86
90
|
|
87
|
-
def load_scout(adapter, config,
|
91
|
+
def load_scout(adapter, config, update=False):
|
88
92
|
"""Load a new case from a Scout config.
|
89
93
|
|
90
94
|
Args:
|
91
95
|
adapter(MongoAdapter)
|
92
96
|
config(dict): loading info
|
93
|
-
ped(Iterable(str)): Pedigree ingformation
|
94
97
|
update(bool): If existing case should be updated
|
95
98
|
|
96
99
|
DEPRECATED method, historically used by the CG monolith, which has since switched to call the Scout CLI instead.
|
scout/load/hgnc_gene.py
CHANGED
@@ -91,7 +91,7 @@ def load_hgnc_genes(
|
|
91
91
|
gene_objects.append(gene_obj)
|
92
92
|
|
93
93
|
LOG.info("Nr of genes without coordinates in build %s: %s", build, non_existing)
|
94
|
-
LOG.info(f"Loading {len(gene_objects)}
|
94
|
+
LOG.info(f"Loading {len(gene_objects)} genes into the database")
|
95
95
|
adapter.load_hgnc_bulk(gene_objects)
|
96
96
|
|
97
97
|
LOG.info("Loading done. %s genes loaded", len(gene_objects))
|
scout/load/panel.py
CHANGED
@@ -7,6 +7,7 @@ functions to load panels into the database
|
|
7
7
|
import logging
|
8
8
|
import math
|
9
9
|
from datetime import datetime
|
10
|
+
from typing import Dict, List
|
10
11
|
|
11
12
|
from click import Abort
|
12
13
|
from flask.cli import current_app
|
@@ -110,7 +111,7 @@ def load_panel(panel_path, adapter, **kwargs):
|
|
110
111
|
raise err
|
111
112
|
|
112
113
|
|
113
|
-
def _panelapp_panel_ids():
|
114
|
+
def _panelapp_panel_ids() -> List[str]:
|
114
115
|
"""Fetch all PanelApp panel IDs"""
|
115
116
|
json_lines = fetch_resource(PANELAPP_BASE_URL.format("list_panels"), json=True)
|
116
117
|
return [panel_info["Panel_Id"] for panel_info in json_lines.get("result", [])]
|
@@ -129,11 +130,14 @@ def _parse_panelapp_panel(adapter, panel_id, institute, confidence):
|
|
129
130
|
{'version': 3.3, 'date': datetime.datetime(2023, 1, 31, 16, 43, 37, 521719), 'display_name': 'Diabetes - neonatal onset - [GREEN]', 'institute': 'cust000', 'panel_type': 'clinical', 'genes': [list of genes], 'panel_id': '55a9041e22c1fc6711b0c6c0'}
|
130
131
|
|
131
132
|
"""
|
132
|
-
|
133
|
+
ensembl_gene_hgnc_id_map: Dict[str, int] = adapter.ensembl_to_hgnc_id_mapping()
|
134
|
+
hgnc_symbol_ensembl_gene_map: Dict[str, str] = adapter.hgnc_symbol_ensembl_id_mapping()
|
135
|
+
|
133
136
|
json_lines = fetch_resource(PANELAPP_BASE_URL.format("get_panel") + panel_id, json=True)
|
134
137
|
parsed_panel = parse_panel_app_panel(
|
135
138
|
panel_info=json_lines["result"],
|
136
|
-
|
139
|
+
ensembl_gene_hgnc_id_map=ensembl_gene_hgnc_id_map,
|
140
|
+
hgnc_symbol_ensembl_gene_map=hgnc_symbol_ensembl_gene_map,
|
137
141
|
institute=institute,
|
138
142
|
confidence=confidence,
|
139
143
|
)
|
@@ -160,7 +164,7 @@ def load_panelapp_panel(adapter, panel_id=None, institute="cust000", confidence=
|
|
160
164
|
|
161
165
|
if not panel_id:
|
162
166
|
LOG.info("Fetching all panel app panels")
|
163
|
-
panel_ids = _panelapp_panel_ids()
|
167
|
+
panel_ids: List[str] = _panelapp_panel_ids()
|
164
168
|
|
165
169
|
for _ in panel_ids:
|
166
170
|
parsed_panel = _parse_panelapp_panel(adapter, _, institute, confidence)
|
scout/load/setup.py
CHANGED
@@ -15,7 +15,7 @@ except ImportError:
|
|
15
15
|
|
16
16
|
from pydantic import BaseModel, Field, field_validator, model_validator
|
17
17
|
|
18
|
-
from scout.constants import ANALYSIS_TYPES
|
18
|
+
from scout.constants import ANALYSIS_TYPES, FILE_TYPE_MAP
|
19
19
|
from scout.exceptions import PedigreeError
|
20
20
|
from scout.utils.date import get_date
|
21
21
|
|
@@ -58,21 +58,7 @@ CASE_FILE_PATH_CHECKS = [
|
|
58
58
|
"RNAfusion_report_research",
|
59
59
|
]
|
60
60
|
|
61
|
-
VCF_FILE_PATH_CHECKS =
|
62
|
-
"vcf_cancer",
|
63
|
-
"vcf_cancer_research",
|
64
|
-
"vcf_cancer_sv",
|
65
|
-
"vcf_cancer_sv_research",
|
66
|
-
"vcf_fusion",
|
67
|
-
"vcf_fusion_research",
|
68
|
-
"vcf_snv",
|
69
|
-
"vcf_snv_research",
|
70
|
-
"vcf_mei",
|
71
|
-
"vcf_mei_research",
|
72
|
-
"vcf_str",
|
73
|
-
"vcf_sv",
|
74
|
-
"vcf_sv_research",
|
75
|
-
]
|
61
|
+
VCF_FILE_PATH_CHECKS = FILE_TYPE_MAP.keys()
|
76
62
|
|
77
63
|
GENOME_BUILDS = ["37", "38"]
|
78
64
|
TRACKS = ["rare", "cancer"]
|
@@ -110,12 +96,16 @@ class VcfFiles(BaseModel):
|
|
110
96
|
vcf_cancer_sv: Optional[str] = None
|
111
97
|
vcf_cancer_sv_research: Optional[str] = None
|
112
98
|
vcf_snv: Optional[str] = None
|
99
|
+
vcf_snv_mt: Optional[str] = None
|
113
100
|
vcf_snv_research: Optional[str] = None
|
101
|
+
vcf_snv_research_mt: Optional[str] = None
|
114
102
|
vcf_mei: Optional[str] = None
|
115
103
|
vcf_mei_research: Optional[str] = None
|
116
104
|
vcf_str: Optional[str] = None
|
117
105
|
vcf_sv: Optional[str] = None
|
106
|
+
vcf_sv_mt: Optional[str] = None
|
118
107
|
vcf_sv_research: Optional[str] = None
|
108
|
+
vcf_sv_research_mt: Optional[str] = None
|
119
109
|
vcf_fusion: Optional[str] = None
|
120
110
|
vcf_fusion_research: Optional[str] = None
|
121
111
|
|
scout/models/hgnc_map.py
CHANGED
@@ -2,6 +2,8 @@ from __future__ import unicode_literals
|
|
2
2
|
|
3
3
|
from typing import List, Optional
|
4
4
|
|
5
|
+
from pydantic import BaseModel, Field, field_validator, model_validator
|
6
|
+
|
5
7
|
|
6
8
|
class Exon(dict):
|
7
9
|
"""Exon dictionary
|
@@ -66,90 +68,51 @@ class HgncTranscript(dict):
|
|
66
68
|
self["mane_plus_clinical"] = mane_plus_clinical
|
67
69
|
|
68
70
|
|
69
|
-
class HgncGene(
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
incomplete_penetrance=False,
|
118
|
-
phenotypes=None,
|
119
|
-
build="37",
|
120
|
-
):
|
121
|
-
super(HgncGene, self).__init__()
|
122
|
-
self["hgnc_id"] = int(hgnc_id)
|
123
|
-
self["hgnc_symbol"] = hgnc_symbol
|
124
|
-
self["ensembl_id"] = ensembl_id
|
125
|
-
|
126
|
-
self["chromosome"] = chrom
|
127
|
-
self["start"] = int(start)
|
128
|
-
self["end"] = int(end)
|
129
|
-
self["length"] = self["end"] - self["start"]
|
130
|
-
|
131
|
-
self["description"] = description
|
132
|
-
self["aliases"] = aliases
|
133
|
-
self["primary_transcripts"] = primary_transcripts
|
134
|
-
self["inheritance_models"] = inheritance_models
|
135
|
-
self["phenotypes"] = phenotypes
|
136
|
-
|
137
|
-
self["entrez_id"] = entrez_id
|
138
|
-
if entrez_id:
|
139
|
-
self["entrez_id"] = int(entrez_id)
|
140
|
-
|
141
|
-
self["omim_id"] = omim_id
|
142
|
-
if omim_id:
|
143
|
-
self["omim_id"] = int(omim_id)
|
144
|
-
|
145
|
-
self["ucsc_id"] = ucsc_id
|
146
|
-
self["uniprot_ids"] = uniprot_ids
|
147
|
-
self["vega_id"] = vega_id
|
148
|
-
|
149
|
-
self["pli_score"] = pli_score
|
150
|
-
if pli_score:
|
151
|
-
self["pli_score"] = float(pli_score)
|
152
|
-
|
153
|
-
self["incomplete_penetrance"] = incomplete_penetrance
|
154
|
-
|
155
|
-
self["build"] = build
|
71
|
+
class HgncGene(BaseModel):
|
72
|
+
hgnc_id: int
|
73
|
+
hgnc_symbol: str
|
74
|
+
build: str
|
75
|
+
chromosome: str
|
76
|
+
start: int
|
77
|
+
end: int
|
78
|
+
length: int
|
79
|
+
description: Optional[str] = None
|
80
|
+
ensembl_id: Optional[str] = Field(None, alias="ensembl_gene_id")
|
81
|
+
aliases: Optional[List[str]] = Field(None, alias="previous_symbols")
|
82
|
+
entrez_id: Optional[int] = None
|
83
|
+
omim_id: Optional[int] = None
|
84
|
+
primary_transcripts: Optional[List[str]] = Field(None, alias="ref_seq")
|
85
|
+
ucsc_id: Optional[str] = None
|
86
|
+
uniprot_ids: Optional[List[str]] = None
|
87
|
+
vega_id: Optional[str] = None
|
88
|
+
inheritance_models: Optional[List[str]] = None
|
89
|
+
incomplete_penetrance: Optional[bool] = False
|
90
|
+
phenotypes: Optional[List[dict]] = None
|
91
|
+
pli_score: Optional[float] = None
|
92
|
+
constraint_lof_oe: Optional[float] = None
|
93
|
+
constraint_lof_oe_ci_lower: Optional[float] = None
|
94
|
+
constraint_lof_oe_ci_upper: Optional[float] = None
|
95
|
+
constraint_lof_z: Optional[float] = None
|
96
|
+
constraint_mis_oe: Optional[float] = None
|
97
|
+
constraint_mis_oe_ci_lower: Optional[float] = None
|
98
|
+
constraint_mis_oe_ci_upper: Optional[float] = None
|
99
|
+
constraint_mis_z: Optional[float] = None
|
100
|
+
|
101
|
+
@model_validator(mode="before")
|
102
|
+
def set_gene_length(cls, values) -> "HgncGene":
|
103
|
+
"""Set gene length."""
|
104
|
+
if None in [values.get("end"), values.get("start")]:
|
105
|
+
values.update({"length": None})
|
106
|
+
else:
|
107
|
+
values.update({"length": values.get("end") - values.get("start")})
|
108
|
+
return values
|
109
|
+
|
110
|
+
@field_validator("phenotypes", mode="before")
|
111
|
+
@classmethod
|
112
|
+
def set_phenotypes_inheritance(cls, phenotypes) -> Optional[List[dict]]:
|
113
|
+
"""Convert field 'inheritance' of each phenotype in phenotypes from set to list."""
|
114
|
+
for phenotype in phenotypes:
|
115
|
+
phenotype["inheritance_models"] = list(phenotype.get("inheritance", {}))
|
116
|
+
phenotype.pop("inheritance", None)
|
117
|
+
|
118
|
+
return phenotypes
|
scout/models/phenotype_term.py
CHANGED
@@ -14,9 +14,9 @@ class HpoTerm(BaseModel):
|
|
14
14
|
"""
|
15
15
|
|
16
16
|
hpo_id: str # id field in the hpo.obo file
|
17
|
-
hpo_number: Optional[
|
18
|
-
|
19
|
-
|
17
|
+
hpo_number: Optional[int] = (
|
18
|
+
None # id field in the hpo.obo file, stripped of the 'HP:' part and the zeroes
|
19
|
+
)
|
20
20
|
description: str # name field in the hpo.obo file
|
21
21
|
ancestors: List = []
|
22
22
|
all_ancestors: List = []
|
scout/parse/case.py
CHANGED
@@ -86,7 +86,6 @@ def parse_case_data(**kwargs):
|
|
86
86
|
config_dict["case_id"] = config_dict["family"]
|
87
87
|
|
88
88
|
if config_dict.get("smn_tsv"):
|
89
|
-
LOG.info("Adding SMN info from {}.".format(config_dict["smn_tsv"]))
|
90
89
|
add_smn_info_case(config_dict)
|
91
90
|
|
92
91
|
return remove_none_recursive(config_dict)
|
scout/parse/disease_terms.py
CHANGED
scout/parse/omim.py
CHANGED
scout/parse/orpha.py
CHANGED
scout/parse/panel.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
"""Code to parse panel information"""
|
2
|
+
|
2
3
|
import logging
|
3
4
|
from datetime import datetime
|
5
|
+
from typing import Dict, List, Optional
|
4
6
|
|
5
7
|
from scout.constants import (
|
6
8
|
INCOMPLETE_PENETRANCE_MAP,
|
@@ -233,7 +235,12 @@ def parse_genes(gene_lines):
|
|
233
235
|
|
234
236
|
|
235
237
|
def parse_gene_panel(
|
236
|
-
path,
|
238
|
+
path,
|
239
|
+
institute="cust000",
|
240
|
+
panel_id="test",
|
241
|
+
panel_type="clinical",
|
242
|
+
genes=None,
|
243
|
+
**kwargs,
|
237
244
|
):
|
238
245
|
"""Parse the panel info and return a gene panel
|
239
246
|
|
@@ -268,17 +275,14 @@ def parse_gene_panel(
|
|
268
275
|
return gene_panel
|
269
276
|
|
270
277
|
|
271
|
-
def parse_panel_app_gene(
|
272
|
-
|
278
|
+
def parse_panel_app_gene(
|
279
|
+
app_gene: dict,
|
280
|
+
ensembl_gene_hgnc_id_map: Dict[str, int],
|
281
|
+
hgnc_symbol_ensembl_gene_map: Dict[str, str],
|
282
|
+
confidence: str,
|
283
|
+
) -> dict:
|
284
|
+
"""Parse a panel app-formatted gene."""
|
273
285
|
|
274
|
-
Args:
|
275
|
-
app_gene(dict): dict with panel app info, where Ensembl ids are present as a loist with key "EnsembleGeneIds"
|
276
|
-
hgnc_map(dict): a dictionary with Ensembl IDs as keys and HGNC ids as values
|
277
|
-
confidence(str): enum green|amber|red
|
278
|
-
|
279
|
-
Returns:
|
280
|
-
gene_info(dict): Scout infromation
|
281
|
-
"""
|
282
286
|
gene_info = {}
|
283
287
|
confidence_level = app_gene["LevelOfConfidence"]
|
284
288
|
# Return empty gene if not confident gene
|
@@ -288,8 +292,22 @@ def parse_panel_app_gene(app_gene, hgnc_map, confidence):
|
|
288
292
|
hgnc_symbol = app_gene["GeneSymbol"]
|
289
293
|
|
290
294
|
ensembl_ids = app_gene["EnsembleGeneIds"]
|
295
|
+
|
296
|
+
if not ensembl_ids: # This gene is probably tagged as ensembl_ids_known_missing on PanelApp
|
297
|
+
if hgnc_symbol in hgnc_symbol_ensembl_gene_map:
|
298
|
+
LOG.warning(
|
299
|
+
f"PanelApp gene {hgnc_symbol} does not contain Ensembl IDs. Using Ensembl IDs from internal gene collection instead."
|
300
|
+
)
|
301
|
+
ensembl_ids = [hgnc_symbol_ensembl_gene_map[hgnc_symbol]]
|
302
|
+
else:
|
303
|
+
LOG.warning(
|
304
|
+
f"PanelApp gene {hgnc_symbol} does not contain Ensembl IDs and gene symbol does not correspond to a gene in scout."
|
305
|
+
)
|
306
|
+
|
291
307
|
hgnc_ids = set(
|
292
|
-
|
308
|
+
ensembl_gene_hgnc_id_map.get(ensembl_id)
|
309
|
+
for ensembl_id in ensembl_ids
|
310
|
+
if ensembl_gene_hgnc_id_map.get(ensembl_id)
|
293
311
|
)
|
294
312
|
if not hgnc_ids:
|
295
313
|
LOG.warning("Gene %s does not exist in database. Skipping gene...", hgnc_symbol)
|
@@ -314,8 +332,13 @@ def parse_panel_app_gene(app_gene, hgnc_map, confidence):
|
|
314
332
|
|
315
333
|
|
316
334
|
def parse_panel_app_panel(
|
317
|
-
panel_info
|
318
|
-
|
335
|
+
panel_info: dict,
|
336
|
+
ensembl_gene_hgnc_id_map: Dict[str, int],
|
337
|
+
hgnc_symbol_ensembl_gene_map: Dict[str, str],
|
338
|
+
institute: Optional[str] = "cust000",
|
339
|
+
panel_type: Optional[str] = "clinical",
|
340
|
+
confidence: Optional[str] = "green",
|
341
|
+
) -> dict:
|
319
342
|
"""Parse a PanelApp panel
|
320
343
|
|
321
344
|
Args:
|
@@ -346,7 +369,9 @@ def parse_panel_app_panel(
|
|
346
369
|
nr_excluded = 0
|
347
370
|
nr_genes = 0
|
348
371
|
for nr_genes, gene in enumerate(panel_info["Genes"], 1):
|
349
|
-
gene_info = parse_panel_app_gene(
|
372
|
+
gene_info = parse_panel_app_gene(
|
373
|
+
gene, ensembl_gene_hgnc_id_map, hgnc_symbol_ensembl_gene_map, confidence
|
374
|
+
)
|
350
375
|
if not gene_info:
|
351
376
|
nr_excluded += 1
|
352
377
|
continue
|