scout-browser 4.93.1__py3-none-any.whl → 4.95.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scout/adapter/mongo/base.py +0 -0
- scout/adapter/mongo/hgnc.py +5 -1
- scout/adapter/mongo/managed_variant.py +4 -2
- scout/adapter/mongo/query.py +91 -54
- scout/adapter/mongo/variant.py +13 -8
- scout/build/panel.py +1 -1
- scout/commands/export/export_command.py +0 -0
- scout/commands/load/base.py +0 -0
- scout/commands/load/user.py +0 -0
- scout/commands/update/disease.py +0 -0
- scout/commands/update/genes.py +0 -0
- scout/commands/wipe_database.py +0 -0
- scout/constants/gene_tags.py +22 -12
- scout/demo/643594.research.mei.vcf.gz +0 -0
- scout/demo/643594.research.mei.vcf.gz.tbi +0 -0
- scout/load/panelapp.py +8 -12
- scout/parse/omim.py +5 -6
- scout/parse/panelapp.py +16 -42
- scout/parse/variant/compound.py +20 -21
- scout/parse/variant/gene.py +0 -0
- scout/parse/variant/genotype.py +0 -0
- scout/resources/custom_igv_tracks/mane.bb +0 -0
- scout/server/blueprints/cases/controllers.py +48 -0
- scout/server/blueprints/cases/templates/cases/case_report.html +17 -2
- scout/server/blueprints/cases/views.py +5 -5
- scout/server/blueprints/clinvar/controllers.py +4 -5
- scout/server/blueprints/institutes/controllers.py +129 -67
- scout/server/blueprints/institutes/forms.py +5 -2
- scout/server/blueprints/institutes/templates/overview/cases.html +6 -0
- scout/server/blueprints/institutes/templates/overview/utils.html +6 -5
- scout/server/blueprints/managed_variants/forms.py +17 -2
- scout/server/blueprints/managed_variants/templates/managed_variants/managed_variants.html +2 -2
- scout/server/blueprints/variant/templates/variant/components.html +27 -4
- scout/server/blueprints/variant/templates/variant/sv-variant.html +2 -2
- scout/server/blueprints/variant/templates/variant/tx_overview.html +3 -3
- scout/server/blueprints/variant/views.py +1 -2
- scout/server/blueprints/variants/forms.py +33 -5
- scout/server/blueprints/variants/templates/variants/cancer-sv-variants.html +4 -18
- scout/server/blueprints/variants/templates/variants/cancer-variants.html +2 -12
- scout/server/blueprints/variants/templates/variants/components.html +15 -1
- scout/server/blueprints/variants/templates/variants/sv-variants.html +2 -2
- scout/server/links.py +1 -1
- scout/utils/acmg.py +0 -1
- scout/utils/ccv.py +1 -9
- scout/utils/link.py +4 -3
- scout/utils/md5.py +0 -0
- {scout_browser-4.93.1.dist-info → scout_browser-4.95.0.dist-info}/METADATA +66 -45
- {scout_browser-4.93.1.dist-info → scout_browser-4.95.0.dist-info}/RECORD +41 -42
- {scout_browser-4.93.1.dist-info → scout_browser-4.95.0.dist-info}/WHEEL +1 -2
- scout/__version__.py +0 -1
- scout_browser-4.93.1.dist-info/top_level.txt +0 -1
- {scout_browser-4.93.1.dist-info → scout_browser-4.95.0.dist-info}/entry_points.txt +0 -0
- {scout_browser-4.93.1.dist-info → scout_browser-4.95.0.dist-info/licenses}/LICENSE +0 -0
scout/adapter/mongo/base.py
CHANGED
File without changes
|
scout/adapter/mongo/hgnc.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
import logging
|
2
|
-
from typing import Dict
|
2
|
+
from typing import Dict, Set
|
3
3
|
|
4
4
|
import intervaltree
|
5
5
|
from pymongo.errors import BulkWriteError, DuplicateKeyError
|
@@ -126,6 +126,10 @@ class GeneHandler(object):
|
|
126
126
|
|
127
127
|
return None
|
128
128
|
|
129
|
+
def hgnc_ids(self) -> Set[int]:
|
130
|
+
"""Returns all HGNC IDs present in the hgnc_gene collection."""
|
131
|
+
return set(self.hgnc_collection.distinct("hgnc_id"))
|
132
|
+
|
129
133
|
def hgnc_genes(self, hgnc_symbol, build="37", search=False):
|
130
134
|
"""Fetch all hgnc genes that match a hgnc symbol
|
131
135
|
|
@@ -180,10 +180,12 @@ class ManagedVariantHandler(object):
|
|
180
180
|
}
|
181
181
|
|
182
182
|
if "position" in query_options:
|
183
|
-
|
183
|
+
position = max(int(query_options["position"]), 1)
|
184
|
+
query["end"] = {"$gte": position}
|
184
185
|
|
185
186
|
if "end" in query_options:
|
186
|
-
|
187
|
+
end = max(int(query_options["end"]), 1)
|
188
|
+
query["position"] = {"$lte": end}
|
187
189
|
|
188
190
|
if "sub_category" in query_options:
|
189
191
|
query["sub_category"] = {"$in": query_options["sub_category"]}
|
scout/adapter/mongo/query.py
CHANGED
@@ -469,68 +469,105 @@ class QueryHandler(object):
|
|
469
469
|
mongo_query(dict): returned object contains coordinate filters
|
470
470
|
|
471
471
|
"""
|
472
|
-
|
473
|
-
|
472
|
+
start_pos = max(int(query["start"]), 1)
|
473
|
+
end_pos = max(int(query["end"]), 1)
|
474
|
+
mongo_query["position"] = {"$lte": end_pos}
|
475
|
+
mongo_query["end"] = {"$gte": start_pos}
|
474
476
|
|
475
477
|
return mongo_query
|
476
478
|
|
477
|
-
def
|
478
|
-
"""
|
479
|
-
This method is called to buid coordinate query for sv variants
|
479
|
+
def get_position_query(self, chrom: str, start: int, end: int) -> dict:
|
480
|
+
"""Helper function that returns a dictionary containing start and stop coordinates.
|
480
481
|
|
481
|
-
|
482
|
-
|
483
|
-
|
482
|
+
The position query consists of 3 parts, each of them elements of the $or
|
483
|
+
First part applies to searches when chromosome and end_chrom are the same.
|
484
|
+
Here are the possible overlapping search scenarios:
|
485
|
+
# Case 1
|
486
|
+
# filter xxxxxxxxx
|
487
|
+
# Variant xxxxxxxx
|
484
488
|
|
485
|
-
|
486
|
-
|
489
|
+
# Case 2
|
490
|
+
# filter xxxxxxxxx
|
491
|
+
# Variant xxxxxxxx
|
492
|
+
|
493
|
+
# Case 3
|
494
|
+
# filter xxxxxxxxx
|
495
|
+
# Variant xx
|
487
496
|
|
497
|
+
# Case 4
|
498
|
+
# filter xxxxxxxxx
|
499
|
+
# Variant xxxxxxxxxxxxxx
|
500
|
+
|
501
|
+
Second and third elements of the $or cover queries for variants where chromosome != end_chrom.
|
502
|
+
In this situation there are the following scenarios:
|
503
|
+
- Case chromosome != end_chrom, position matching 'chromosome'
|
504
|
+
- Case chromosome != end_chrom, position matching 'end_chrom'
|
488
505
|
"""
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
}
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
"$
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
506
|
+
|
507
|
+
return {
|
508
|
+
"$or": [
|
509
|
+
# Case chromosome == end_chrom
|
510
|
+
{
|
511
|
+
"$and": [
|
512
|
+
{"chromosome": chrom},
|
513
|
+
{"end_chrom": chrom},
|
514
|
+
{
|
515
|
+
"$or": [
|
516
|
+
# Overlapping cases 1-4 (chromosome == end_chrom)
|
517
|
+
{"end": {"$gte": start, "$lte": end}}, # Case 1
|
518
|
+
{"position": {"$gte": start, "$lte": end}}, # Case 2
|
519
|
+
{
|
520
|
+
"$and": [
|
521
|
+
{"position": {"$lte": start}},
|
522
|
+
{"end": {"$gte": end}},
|
523
|
+
]
|
524
|
+
}, # Case 3
|
525
|
+
{
|
526
|
+
"$and": [
|
527
|
+
{"position": {"$gte": start}},
|
528
|
+
{"end": {"$lte": end}},
|
529
|
+
]
|
530
|
+
}, # Case 4
|
531
|
+
]
|
532
|
+
},
|
533
|
+
]
|
534
|
+
},
|
535
|
+
# Case chromosome != end_chrom, position matching 'chromosome'
|
536
|
+
{
|
537
|
+
"$and": [
|
538
|
+
{"chromosome": chrom},
|
539
|
+
{"end_chrom": {"$ne": chrom}},
|
540
|
+
{"position": {"$gte": start}},
|
541
|
+
{"position": {"$lte": end}},
|
542
|
+
]
|
543
|
+
},
|
544
|
+
# Case chromosome != end_chrom, position matching 'end_chrom'
|
545
|
+
{
|
546
|
+
"$and": [
|
547
|
+
{"chromosome": {"$ne": chrom}},
|
548
|
+
{"end_chrom": chrom},
|
549
|
+
{"end": {"$gte": start}},
|
550
|
+
{"end": {"$lte": end}},
|
551
|
+
]
|
552
|
+
},
|
553
|
+
]
|
554
|
+
}
|
555
|
+
|
556
|
+
def sv_coordinate_query(self, query: dict) -> dict:
|
557
|
+
"""Adds genomic coordinated-related filters to the query object
|
558
|
+
This method is called to build coordinate query for sv variants
|
559
|
+
"""
|
560
|
+
if (
|
561
|
+
query.get("start") is not None and query.get("end") is not None
|
562
|
+
): # query contains full coordinates
|
563
|
+
chrom = query["chrom"]
|
564
|
+
start = max(int(query["start"]), 1)
|
565
|
+
end = max(int(query["end"]), 1)
|
566
|
+
coordinate_query = self.get_position_query(chrom=chrom, start=start, end=end)
|
567
|
+
else: # query contains only chromosome info
|
568
|
+
coordinate_query = {
|
569
|
+
"$or": [{"chromosome": query["chrom"]}, {"end_chrom": query["chrom"]}]
|
530
570
|
}
|
531
|
-
coordinate_query = {"$and": [chromosome_query, position_query]}
|
532
|
-
else:
|
533
|
-
coordinate_query = chromosome_query
|
534
571
|
return coordinate_query
|
535
572
|
|
536
573
|
def gene_filter(self, query, build="37"):
|
scout/adapter/mongo/variant.py
CHANGED
@@ -311,14 +311,14 @@ class VariantHandler(VariantLoader):
|
|
311
311
|
"category": variant_obj["category"], # sv
|
312
312
|
"variant_type": variant_obj["variant_type"], # clinical or research
|
313
313
|
"sub_category": variant_obj["sub_category"], # example -> "del"
|
314
|
-
"$
|
314
|
+
"$or": coordinate_query["$or"], # query for overlapping SV variants
|
315
315
|
}
|
316
|
-
|
317
316
|
overlapping_svs = list(
|
318
317
|
self.variant_collection.find(
|
319
318
|
query,
|
320
319
|
)
|
321
320
|
)
|
321
|
+
|
322
322
|
if not overlapping_svs:
|
323
323
|
return None
|
324
324
|
if len(overlapping_svs) == 1:
|
@@ -491,12 +491,17 @@ class VariantHandler(VariantLoader):
|
|
491
491
|
if len(affected_ids) == 0:
|
492
492
|
return []
|
493
493
|
filters["case_id"] = case_obj["_id"]
|
494
|
-
filters["
|
495
|
-
"$
|
496
|
-
|
497
|
-
"
|
498
|
-
|
499
|
-
|
494
|
+
filters["$or"] = [
|
495
|
+
{"samples": {"$size": 1}}, # Condition for samples with exactly one element
|
496
|
+
{
|
497
|
+
"samples": {
|
498
|
+
"$elemMatch": { # Condition for samples with more than one element: individual/sample should be carrier
|
499
|
+
"sample_id": {"$in": affected_ids},
|
500
|
+
"genotype_call": {"$regex": CARRIER},
|
501
|
+
}
|
502
|
+
}
|
503
|
+
},
|
504
|
+
]
|
500
505
|
|
501
506
|
if limit_genes:
|
502
507
|
filters["genes.hgnc_id"] = {"$in": limit_genes}
|
scout/build/panel.py
CHANGED
@@ -42,7 +42,7 @@ def build_gene(gene_info: dict, adapter) -> dict:
|
|
42
42
|
|
43
43
|
# Add boolean flags
|
44
44
|
gene_obj.update(
|
45
|
-
{key:
|
45
|
+
{key: gene_info.get(key) for key in ["reduced_penetrance", "mosaicism"] if key in gene_info}
|
46
46
|
)
|
47
47
|
|
48
48
|
# Handle inheritance models
|
File without changes
|
scout/commands/load/base.py
CHANGED
File without changes
|
scout/commands/load/user.py
CHANGED
File without changes
|
scout/commands/update/disease.py
CHANGED
File without changes
|
scout/commands/update/genes.py
CHANGED
File without changes
|
scout/commands/wipe_database.py
CHANGED
File without changes
|
scout/constants/gene_tags.py
CHANGED
@@ -33,20 +33,30 @@ INHERITANCE_PALETTE = {
|
|
33
33
|
"other": {"bgcolor": "bg-light", "text_color": "text-dark"},
|
34
34
|
}
|
35
35
|
|
36
|
-
INCOMPLETE_PENETRANCE_MAP = {"unknown": None, "
|
36
|
+
INCOMPLETE_PENETRANCE_MAP = {"unknown": None, "None": None, "Complete": False, "Incomplete": True}
|
37
37
|
|
38
38
|
MODELS_MAP = {
|
39
|
-
"
|
40
|
-
"
|
41
|
-
"
|
42
|
-
|
43
|
-
|
44
|
-
"
|
45
|
-
|
46
|
-
|
47
|
-
"
|
48
|
-
"
|
49
|
-
"
|
39
|
+
"MONOALLELIC, autosomal or pseudoautosomal, NOT imprinted": ["AD"],
|
40
|
+
"MONOALLELIC, autosomal or pseudoautosomal, imprinted status unknown": ["AD"],
|
41
|
+
"MONOALLELIC, autosomal or pseudoautosomal, maternally imprinted (paternal allele expressed)": [
|
42
|
+
"AD"
|
43
|
+
],
|
44
|
+
"MONOALLELIC, autosomal or pseudoautosomal, paternally imprinted (maternal allele expressed)": [
|
45
|
+
"AD"
|
46
|
+
],
|
47
|
+
"BIALLELIC, autosomal or pseudoautosomal": ["AR"],
|
48
|
+
"BOTH monoallelic and biallelic, autosomal or pseudoautosomal": ["AD", "AR"],
|
49
|
+
"BOTH monoallelic and biallelic (but BIALLELIC mutations cause a more SEVERE disease form), autosomal or pseudoautosomal": [
|
50
|
+
"AD",
|
51
|
+
"AR",
|
52
|
+
],
|
53
|
+
"X-LINKED: hemizygous mutation in males, biallelic mutations in females": ["XR"],
|
54
|
+
"X-LINKED: hemizygous mutation in males, monoallelic mutations in females may cause disease (may be less severe, later onset than males)": [
|
55
|
+
"XD"
|
56
|
+
],
|
57
|
+
"MITOCHONDRIAL": ["MT"],
|
58
|
+
"Other": [],
|
59
|
+
"Other - please specifiy in evaluation comments": [],
|
50
60
|
}
|
51
61
|
|
52
62
|
PANEL_GENE_INFO_TRANSCRIPTS = [
|
Binary file
|
Binary file
|
scout/load/panelapp.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
import logging
|
2
2
|
import math
|
3
3
|
from datetime import datetime
|
4
|
-
from typing import
|
4
|
+
from typing import List, Set
|
5
5
|
|
6
|
-
from click import
|
6
|
+
from click import progressbar
|
7
7
|
|
8
8
|
from scout.adapter import MongoAdapter
|
9
9
|
from scout.constants.panels import PRESELECTED_PANELAPP_PANEL_TYPE_SLUGS
|
@@ -28,15 +28,11 @@ def load_panelapp_panel(
|
|
28
28
|
LOG.info("Fetching all panel app panels")
|
29
29
|
panel_ids: List[str] = panelapp.get_panel_ids(signed_off=False)
|
30
30
|
|
31
|
-
|
32
|
-
hgnc_symbol_to_ensembl_id_map: Dict[int, str] = adapter.hgnc_symbol_ensembl_id_mapping()
|
33
|
-
|
34
|
-
for _ in panel_ids:
|
31
|
+
for panel_id in panel_ids:
|
35
32
|
panel_info: dict = panelapp.get_panel(panel_id)
|
36
33
|
parsed_panel = parse_panelapp_panel(
|
34
|
+
hgnc_gene_ids=adapter.hgnc_ids(),
|
37
35
|
panel_info=panel_info,
|
38
|
-
ensembl_id_to_hgnc_id_map=ensembl_id_to_hgnc_id_map,
|
39
|
-
hgnc_symbol_to_ensembl_id_map=hgnc_symbol_to_ensembl_id_map,
|
40
36
|
institute=institute,
|
41
37
|
confidence=confidence,
|
42
38
|
)
|
@@ -54,8 +50,6 @@ def get_panelapp_genes(
|
|
54
50
|
"""Parse and collect genes from one or more panelApp panels."""
|
55
51
|
|
56
52
|
genes = set()
|
57
|
-
ensembl_id_to_hgnc_id_map: Dict[str, int] = adapter.ensembl_to_hgnc_id_mapping()
|
58
|
-
hgnc_symbol_to_ensembl_id_map: Dict[int, str] = adapter.hgnc_symbol_ensembl_id_mapping()
|
59
53
|
|
60
54
|
with progressbar(panel_ids, label="Parsing panels", length=len(panel_ids)) as panel_ids:
|
61
55
|
for panel_id in panel_ids:
|
@@ -66,9 +60,8 @@ def get_panelapp_genes(
|
|
66
60
|
continue
|
67
61
|
|
68
62
|
parsed_panel = parse_panelapp_panel(
|
63
|
+
hgnc_gene_ids=adapter.hgnc_ids(),
|
69
64
|
panel_info=panel_dict,
|
70
|
-
ensembl_id_to_hgnc_id_map=ensembl_id_to_hgnc_id_map,
|
71
|
-
hgnc_symbol_to_ensembl_id_map=hgnc_symbol_to_ensembl_id_map,
|
72
65
|
institute=institute,
|
73
66
|
confidence="green",
|
74
67
|
)
|
@@ -86,6 +79,8 @@ def load_panelapp_green_panel(adapter: MongoAdapter, institute: str, force: bool
|
|
86
79
|
"""Translate panel type input from users to panel type slugs."""
|
87
80
|
if not types_filter:
|
88
81
|
return PRESELECTED_PANELAPP_PANEL_TYPE_SLUGS
|
82
|
+
if "all" in types_filter:
|
83
|
+
return available_types
|
89
84
|
index_list = [int(typeint) - 1 for typeint in types_filter.replace(" ", "").split(",")]
|
90
85
|
return [available_types[i] for i in index_list]
|
91
86
|
|
@@ -107,6 +102,7 @@ def load_panelapp_green_panel(adapter: MongoAdapter, institute: str, force: bool
|
|
107
102
|
available_types: List[str] = panelapp.get_panel_types()
|
108
103
|
for number, type in enumerate(available_types, 1):
|
109
104
|
LOG.info(f"{number}: {type}")
|
105
|
+
LOG.info("all: all types above")
|
110
106
|
preselected_options_idx: List[str] = [
|
111
107
|
str(available_types.index(presel) + 1)
|
112
108
|
for presel in PRESELECTED_PANELAPP_PANEL_TYPE_SLUGS
|
scout/parse/omim.py
CHANGED
@@ -311,7 +311,6 @@ def get_mim_genes(genemap_lines, mim2gene_lines):
|
|
311
311
|
mim_number = entry["mim_number"]
|
312
312
|
inheritance = entry["inheritance"]
|
313
313
|
phenotype_info = entry["phenotypes"]
|
314
|
-
hgnc_symbol = entry["hgnc_symbol"]
|
315
314
|
hgnc_symbols = entry["hgnc_symbols"]
|
316
315
|
if mim_number in genes:
|
317
316
|
genes[mim_number]["inheritance"] = inheritance
|
@@ -354,11 +353,11 @@ def get_mim_disease(genemap_lines: Iterable[str]) -> Dict[str, Any]:
|
|
354
353
|
"""
|
355
354
|
diseases_found = {}
|
356
355
|
|
357
|
-
#
|
358
|
-
# Each line hold a lot of information and in
|
359
|
-
# has information about the phenotypes that a gene is associated with
|
360
|
-
# From this source we collect
|
361
|
-
# a disease is associated with
|
356
|
+
# Genemap2 is a file with one entry per gene.
|
357
|
+
# Each line hold a lot of information and in particular it
|
358
|
+
# has information about the phenotypes that a gene is associated with.
|
359
|
+
# From this source we collect inheritance patterns and what hgnc symbols
|
360
|
+
# a disease is associated with.
|
362
361
|
for entry in parse_genemap2(genemap_lines):
|
363
362
|
hgnc_symbol = entry["hgnc_symbol"]
|
364
363
|
for disease in entry["phenotypes"]:
|
scout/parse/panelapp.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
"""Code to parse panel information"""
|
2
2
|
|
3
3
|
import logging
|
4
|
-
from typing import
|
4
|
+
from typing import Optional, Set
|
5
5
|
|
6
6
|
from scout.constants import INCOMPLETE_PENETRANCE_MAP, MODELS_MAP, PANELAPP_CONFIDENCE_EXCLUDE
|
7
7
|
from scout.utils.date import get_date
|
@@ -11,9 +11,8 @@ PANELAPP_PANELS_URL = "https://panelapp.genomicsengland.co.uk/panels/"
|
|
11
11
|
|
12
12
|
|
13
13
|
def parse_panel_app_gene(
|
14
|
+
hgnc_gene_ids: Set[int],
|
14
15
|
panelapp_gene: dict,
|
15
|
-
ensembl_gene_hgnc_id_map: Dict[str, int],
|
16
|
-
hgnc_symbol_ensembl_gene_map: Dict[str, str],
|
17
16
|
confidence: str,
|
18
17
|
) -> dict:
|
19
18
|
"""Parse a panel app-formatted gene."""
|
@@ -23,55 +22,30 @@ def parse_panel_app_gene(
|
|
23
22
|
if confidence_level in PANELAPP_CONFIDENCE_EXCLUDE[confidence]:
|
24
23
|
return gene_info
|
25
24
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
for version in genome.values()
|
31
|
-
]
|
32
|
-
|
33
|
-
if not ensembl_ids: # This gene is probably tagged as ensembl_ids_known_missing on PanelApp
|
34
|
-
if hgnc_symbol in hgnc_symbol_ensembl_gene_map:
|
35
|
-
LOG.warning(
|
36
|
-
f"PanelApp gene {hgnc_symbol} does not contain Ensembl IDs. Using Ensembl IDs from internal gene collection instead."
|
37
|
-
)
|
38
|
-
ensembl_ids = [hgnc_symbol_ensembl_gene_map[hgnc_symbol]]
|
39
|
-
else:
|
40
|
-
LOG.warning(
|
41
|
-
f"PanelApp gene {hgnc_symbol} does not contain Ensembl IDs and gene symbol does not correspond to a gene in scout."
|
42
|
-
)
|
43
|
-
|
44
|
-
hgnc_ids = set(
|
45
|
-
ensembl_gene_hgnc_id_map.get(ensembl_id)
|
46
|
-
for ensembl_id in ensembl_ids
|
47
|
-
if ensembl_gene_hgnc_id_map.get(ensembl_id)
|
48
|
-
)
|
49
|
-
if not hgnc_ids:
|
50
|
-
LOG.warning("Gene %s does not exist in database. Skipping gene...", hgnc_symbol)
|
25
|
+
gene_symbol = panelapp_gene["gene_data"]["gene_symbol"]
|
26
|
+
hgnc_id = int(panelapp_gene["gene_data"]["hgnc_id"].split(":")[1])
|
27
|
+
if hgnc_id not in hgnc_gene_ids:
|
28
|
+
LOG.warning("Gene %s does not exist in database. Skipping gene...", gene_symbol)
|
51
29
|
return gene_info
|
52
30
|
|
53
|
-
|
54
|
-
|
31
|
+
gene_info["hgnc_id"] = hgnc_id
|
32
|
+
gene_info["hgnc_symbol"] = gene_symbol
|
55
33
|
|
56
|
-
|
57
|
-
|
58
|
-
gene_info["hgnc_id"] = hgnc_id
|
34
|
+
if panelapp_gene["penetrance"] in ["Complete", "Incomplete"]:
|
35
|
+
gene_info["reduced_penetrance"] = INCOMPLETE_PENETRANCE_MAP.get(panelapp_gene["penetrance"])
|
59
36
|
|
60
|
-
|
37
|
+
mode_of_inheritance = panelapp_gene.get("mode_of_inheritance")
|
38
|
+
if mode_of_inheritance not in MODELS_MAP:
|
39
|
+
LOG.warning(f"Mode of inheritance '{mode_of_inheritance}' not found in MODELS_MAP.")
|
61
40
|
|
62
|
-
inheritance_models = []
|
63
|
-
for model in MODELS_MAP.get(panelapp_gene["mode_of_inheritance"], []):
|
64
|
-
inheritance_models.append(model)
|
65
|
-
|
66
|
-
gene_info["inheritance_models"] = inheritance_models
|
41
|
+
gene_info["inheritance_models"] = MODELS_MAP.get(mode_of_inheritance, [])
|
67
42
|
|
68
43
|
return gene_info
|
69
44
|
|
70
45
|
|
71
46
|
def parse_panelapp_panel(
|
47
|
+
hgnc_gene_ids: Set[int],
|
72
48
|
panel_info: dict,
|
73
|
-
ensembl_id_to_hgnc_id_map: Dict[str, int],
|
74
|
-
hgnc_symbol_to_ensembl_id_map: Dict[str, str],
|
75
49
|
institute: Optional[str] = "cust000",
|
76
50
|
confidence: Optional[str] = "green",
|
77
51
|
) -> dict:
|
@@ -101,7 +75,7 @@ def parse_panelapp_panel(
|
|
101
75
|
nr_genes = 0
|
102
76
|
for nr_genes, gene in enumerate(panel_info["genes"], 1):
|
103
77
|
gene_info = parse_panel_app_gene(
|
104
|
-
|
78
|
+
hgnc_gene_ids=hgnc_gene_ids, panelapp_gene=gene, confidence=confidence
|
105
79
|
)
|
106
80
|
if not gene_info:
|
107
81
|
nr_excluded += 1
|
scout/parse/variant/compound.py
CHANGED
@@ -1,43 +1,42 @@
|
|
1
1
|
import logging
|
2
|
+
from typing import List
|
2
3
|
|
3
4
|
from scout.utils.md5 import generate_md5_key
|
4
5
|
|
5
6
|
LOG = logging.getLogger(__name__)
|
6
7
|
|
7
8
|
|
8
|
-
def parse_compounds(compound_info, case_id, variant_type):
|
9
|
-
"""Get a list with compounds objects for this variant.
|
9
|
+
def parse_compounds(compound_info: str, case_id: str, variant_type: str) -> List[dict]:
|
10
|
+
"""Get a list with compounds objects(dicts) for this variant.
|
10
11
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
12
|
+
Scout IDs do not have "chr" prefixed chromosome names, hence we lstrip that from
|
13
|
+
any compound names.
|
14
|
+
|
15
|
+
We need the case id to construct the correct id, as well as the variant type (clinical or research).
|
15
16
|
|
16
|
-
Returns:
|
17
|
-
compounds(list(dict)): A list of compounds
|
18
17
|
"""
|
19
|
-
|
18
|
+
|
20
19
|
compounds = []
|
21
20
|
if compound_info:
|
22
21
|
for family_info in compound_info.split(","):
|
23
|
-
|
22
|
+
split_entry = family_info.split(":")
|
24
23
|
# This is the family id
|
25
|
-
if
|
26
|
-
for compound in
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
24
|
+
if split_entry[0] == case_id:
|
25
|
+
for compound in split_entry[1].split("|"):
|
26
|
+
split_compound = compound.split(">")
|
27
|
+
compound_name = split_compound[0].lstrip("chr")
|
28
|
+
compound_obj = {
|
29
|
+
"display_name": compound_name,
|
30
|
+
"variant": generate_md5_key(
|
31
|
+
compound_name.split("_") + [variant_type, case_id]
|
32
|
+
),
|
33
|
+
}
|
33
34
|
|
34
35
|
try:
|
35
|
-
compound_score = float(
|
36
|
+
compound_score = float(split_compound[1])
|
36
37
|
except (TypeError, IndexError):
|
37
38
|
compound_score = 0.0
|
38
|
-
|
39
39
|
compound_obj["score"] = compound_score
|
40
|
-
compound_obj["display_name"] = compound_name
|
41
40
|
|
42
41
|
compounds.append(compound_obj)
|
43
42
|
|
scout/parse/variant/gene.py
CHANGED
File without changes
|
scout/parse/variant/genotype.py
CHANGED
File without changes
|
Binary file
|
@@ -62,6 +62,8 @@ from scout.server.utils import (
|
|
62
62
|
case_has_rna_tracks,
|
63
63
|
institute_and_case,
|
64
64
|
)
|
65
|
+
from scout.utils.acmg import get_acmg_temperature
|
66
|
+
from scout.utils.ccv import get_ccv_temperature
|
65
67
|
|
66
68
|
LOG = logging.getLogger(__name__)
|
67
69
|
|
@@ -611,6 +613,46 @@ def check_outdated_gene_panel(panel_obj, latest_panel):
|
|
611
613
|
return extra_genes, missing_genes
|
612
614
|
|
613
615
|
|
616
|
+
def add_bayesian_acmg_classification(variant_obj: dict):
|
617
|
+
"""Append info to display the ACMG VUS Bayesian score / temperature.
|
618
|
+
Criteria have a term and a modifier field on the db document
|
619
|
+
that are joined together in a string to conform to a regular
|
620
|
+
ACMG term format. A set of such terms are passed on for evaluation
|
621
|
+
to the same function as the ACMG classification form uses.
|
622
|
+
"""
|
623
|
+
variant_acmg_classifications = list(
|
624
|
+
store.get_evaluations_case_specific(document_id=variant_obj["_id"])
|
625
|
+
)
|
626
|
+
if variant_acmg_classifications:
|
627
|
+
terms = set()
|
628
|
+
for criterium in variant_acmg_classifications[0].get("criteria", []):
|
629
|
+
term = criterium.get("term")
|
630
|
+
if criterium.get("modifier"):
|
631
|
+
term += f"_{criterium.get('modifier')}"
|
632
|
+
terms.add(term)
|
633
|
+
variant_obj["bayesian_acmg"] = get_acmg_temperature(terms)
|
634
|
+
|
635
|
+
|
636
|
+
def add_bayesian_ccv_classification(variant_obj: dict):
|
637
|
+
"""Append info to display the CCV VUS Bayesian score / temperature.
|
638
|
+
Criteria have a term and a modifier field on the db document
|
639
|
+
that are joined together in a string to conform to a regular
|
640
|
+
CCV term format. A set of such terms are passed on for evaluation
|
641
|
+
to the same function as the CCV classification form uses.
|
642
|
+
"""
|
643
|
+
variant_ccv_classifications = list(
|
644
|
+
store.get_ccv_evaluations_case_specific(document_id=variant_obj["_id"])
|
645
|
+
)
|
646
|
+
if variant_ccv_classifications:
|
647
|
+
terms = set()
|
648
|
+
for criterium in variant_ccv_classifications[0].get("ccv_criteria", []):
|
649
|
+
term = criterium.get("term")
|
650
|
+
if criterium.get("modifier"):
|
651
|
+
term += f"_{criterium.get('modifier')}"
|
652
|
+
terms.add(term)
|
653
|
+
variant_obj["bayesian_ccv"] = get_ccv_temperature(terms)
|
654
|
+
|
655
|
+
|
614
656
|
def case_report_variants(store: MongoAdapter, case_obj: dict, institute_obj: dict, data: dict):
|
615
657
|
"""Gather evaluated variants info to include in case report."""
|
616
658
|
|
@@ -624,6 +666,8 @@ def case_report_variants(store: MongoAdapter, case_obj: dict, institute_obj: dic
|
|
624
666
|
continue
|
625
667
|
if case_key == "partial_causatives":
|
626
668
|
var_obj["phenotypes"] = case_obj["partial_causatives"][var_id]
|
669
|
+
add_bayesian_acmg_classification(var_obj)
|
670
|
+
add_bayesian_ccv_classification(var_obj)
|
627
671
|
evaluated_variants_by_type[eval_category].append(
|
628
672
|
_get_decorated_var(var_obj=var_obj, institute_obj=institute_obj, case_obj=case_obj)
|
629
673
|
)
|
@@ -663,6 +707,10 @@ def _append_evaluated_variant_by_type(
|
|
663
707
|
"""
|
664
708
|
for eval_category, variant_key in CASE_REPORT_VARIANT_TYPES.items():
|
665
709
|
if variant_key in var_obj and var_obj[variant_key] is not None:
|
710
|
+
|
711
|
+
add_bayesian_acmg_classification(var_obj)
|
712
|
+
add_bayesian_ccv_classification(var_obj)
|
713
|
+
|
666
714
|
evaluated_variants_by_type[eval_category].append(
|
667
715
|
_get_decorated_var(var_obj=var_obj, institute_obj=institute_obj, case_obj=case_obj)
|
668
716
|
)
|