scout-browser 4.82.2__py3-none-any.whl → 4.83__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scout/__version__.py +1 -1
- scout/adapter/client.py +1 -0
- scout/adapter/mongo/base.py +0 -1
- scout/adapter/mongo/case.py +15 -37
- scout/adapter/mongo/case_events.py +98 -2
- scout/adapter/mongo/hgnc.py +39 -22
- scout/adapter/mongo/institute.py +3 -9
- scout/adapter/mongo/panel.py +2 -1
- scout/adapter/mongo/variant.py +3 -2
- scout/adapter/mongo/variant_loader.py +92 -79
- scout/commands/base.py +1 -0
- scout/commands/update/case.py +10 -10
- scout/commands/update/individual.py +6 -1
- scout/constants/file_types.py +4 -0
- scout/load/__init__.py +0 -1
- scout/load/all.py +3 -4
- scout/load/panel.py +8 -4
- scout/load/setup.py +1 -0
- scout/models/case/case_loading_models.py +6 -16
- scout/parse/case.py +0 -1
- scout/parse/disease_terms.py +1 -0
- scout/parse/omim.py +1 -0
- scout/parse/panel.py +40 -15
- scout/resources/__init__.py +3 -0
- scout/server/app.py +4 -50
- scout/server/blueprints/alignviewers/controllers.py +15 -17
- scout/server/blueprints/alignviewers/templates/alignviewers/igv_viewer.html +13 -3
- scout/server/blueprints/alignviewers/views.py +10 -15
- scout/server/blueprints/cases/controllers.py +70 -73
- scout/server/blueprints/cases/templates/cases/case.html +37 -21
- scout/server/blueprints/cases/templates/cases/collapsible_actionbar.html +1 -1
- scout/server/blueprints/cases/templates/cases/phenotype.html +8 -6
- scout/server/blueprints/cases/templates/cases/utils.html +3 -3
- scout/server/blueprints/cases/views.py +8 -6
- scout/server/blueprints/variant/controllers.py +5 -5
- scout/server/blueprints/variant/templates/variant/acmg.html +25 -16
- scout/server/blueprints/variant/templates/variant/components.html +11 -6
- scout/server/blueprints/variant/views.py +5 -2
- scout/server/blueprints/variants/controllers.py +1 -1
- scout/server/blueprints/variants/views.py +1 -1
- scout/server/config.py +16 -4
- scout/server/extensions/__init__.py +4 -2
- scout/server/extensions/beacon_extension.py +1 -0
- scout/server/extensions/chanjo_extension.py +58 -0
- scout/server/extensions/phenopacket_extension.py +1 -0
- scout/server/static/bs_styles.css +18 -0
- scout/server/utils.py +16 -2
- scout/utils/acmg.py +33 -20
- scout/utils/track_resources.py +70 -0
- {scout_browser-4.82.2.dist-info → scout_browser-4.83.dist-info}/METADATA +1 -1
- {scout_browser-4.82.2.dist-info → scout_browser-4.83.dist-info}/RECORD +55 -55
- scout/load/case.py +0 -36
- scout/utils/cloud_resources.py +0 -61
- {scout_browser-4.82.2.dist-info → scout_browser-4.83.dist-info}/LICENSE +0 -0
- {scout_browser-4.82.2.dist-info → scout_browser-4.83.dist-info}/WHEEL +0 -0
- {scout_browser-4.82.2.dist-info → scout_browser-4.83.dist-info}/entry_points.txt +0 -0
- {scout_browser-4.82.2.dist-info → scout_browser-4.83.dist-info}/top_level.txt +0 -0
scout/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "4.
|
1
|
+
__version__ = "4.83"
|
scout/adapter/client.py
CHANGED
scout/adapter/mongo/base.py
CHANGED
scout/adapter/mongo/case.py
CHANGED
@@ -10,7 +10,7 @@ import pymongo
|
|
10
10
|
from bson import ObjectId
|
11
11
|
|
12
12
|
from scout.build.case import build_case
|
13
|
-
from scout.constants import ACMG_MAP, ID_PROJECTION
|
13
|
+
from scout.constants import ACMG_MAP, FILE_TYPE_MAP, ID_PROJECTION
|
14
14
|
from scout.exceptions import ConfigError, IntegrityError
|
15
15
|
from scout.parse.variant.ids import parse_document_id
|
16
16
|
from scout.utils.algorithms import ui_score
|
@@ -883,44 +883,34 @@ class CaseHandler(object):
|
|
883
883
|
self.evaluated_variants(case_obj["_id"], case_obj["owner"])
|
884
884
|
)
|
885
885
|
|
886
|
+
# load from files
|
886
887
|
files = [
|
887
|
-
{"file_name": "vcf_snv", "variant_type": "clinical", "category": "snv"},
|
888
|
-
{"file_name": "vcf_sv", "variant_type": "clinical", "category": "sv"},
|
889
888
|
{
|
890
|
-
"file_name":
|
891
|
-
"variant_type": "
|
892
|
-
"category": "
|
893
|
-
}
|
894
|
-
|
895
|
-
|
896
|
-
"variant_type": "clinical",
|
897
|
-
"category": "cancer_sv",
|
898
|
-
},
|
899
|
-
{"file_name": "vcf_str", "variant_type": "clinical", "category": "str"},
|
900
|
-
{"file_name": "vcf_mei", "variant_type": "clinical", "category": "mei"},
|
901
|
-
{
|
902
|
-
"file_name": "vcf_fusion",
|
903
|
-
"variant_type": "clinical",
|
904
|
-
"category": "fusion",
|
905
|
-
},
|
889
|
+
"file_name": file_type,
|
890
|
+
"variant_type": FILE_TYPE_MAP[file_type]["variant_type"],
|
891
|
+
"category": FILE_TYPE_MAP[file_type]["category"],
|
892
|
+
}
|
893
|
+
for file_type in FILE_TYPE_MAP.keys()
|
894
|
+
if FILE_TYPE_MAP[file_type]["variant_type"] != "research"
|
906
895
|
]
|
907
896
|
|
897
|
+
# (type, category) tuples are not unique - eg SNV, SNV_MT
|
898
|
+
load_variants = set()
|
908
899
|
try:
|
909
900
|
for vcf_file in files:
|
910
|
-
# Check if file
|
901
|
+
# Check if any file of this kind is configured for case
|
911
902
|
if not case_obj["vcf_files"].get(vcf_file["file_name"]):
|
912
903
|
LOG.debug("didn't find {}, skipping".format(vcf_file["file_name"]))
|
913
904
|
continue
|
905
|
+
load_variants.add((vcf_file["variant_type"], vcf_file["category"]))
|
914
906
|
|
915
|
-
|
916
|
-
category = vcf_file["category"]
|
907
|
+
for variant_type, category in load_variants:
|
917
908
|
if update:
|
918
909
|
self.delete_variants(
|
919
910
|
case_id=case_obj["_id"],
|
920
911
|
variant_type=variant_type,
|
921
912
|
category=category,
|
922
913
|
)
|
923
|
-
|
924
914
|
# add variants
|
925
915
|
self.load_variants(
|
926
916
|
case_obj=case_obj,
|
@@ -948,7 +938,7 @@ class CaseHandler(object):
|
|
948
938
|
force_update_case=True,
|
949
939
|
)
|
950
940
|
|
951
|
-
self.
|
941
|
+
self.update_case_cli(case_obj, institute_obj)
|
952
942
|
# update Sanger status for the new inserted variants
|
953
943
|
self.update_case_sanger_variants(institute_obj, case_obj, old_sanger_variants)
|
954
944
|
|
@@ -957,7 +947,7 @@ class CaseHandler(object):
|
|
957
947
|
|
958
948
|
else:
|
959
949
|
LOG.info("Loading case %s into database", case_obj["display_name"])
|
960
|
-
self.
|
950
|
+
self.add_case(case_obj, institute_obj)
|
961
951
|
|
962
952
|
return case_obj
|
963
953
|
|
@@ -972,18 +962,6 @@ class CaseHandler(object):
|
|
972
962
|
"custom_images"
|
973
963
|
].get(variant_category)
|
974
964
|
|
975
|
-
def _add_case(self, case_obj):
|
976
|
-
"""Add a case to the database
|
977
|
-
If the case already exists exception is raised
|
978
|
-
|
979
|
-
Args:
|
980
|
-
case_obj(Case)
|
981
|
-
"""
|
982
|
-
if self.case(case_obj["_id"], projection=ID_PROJECTION):
|
983
|
-
raise IntegrityError("Case %s already exists in database" % case_obj["_id"])
|
984
|
-
|
985
|
-
return self.case_collection.insert_one(case_obj)
|
986
|
-
|
987
965
|
def update_case(self, case_obj, keep_date=False):
|
988
966
|
"""Update a case in the database.
|
989
967
|
While updating the case, it compares the date of the latest analysis (case_obj["analysis_date"]) against
|
@@ -1,10 +1,12 @@
|
|
1
1
|
import logging
|
2
2
|
from collections import Counter
|
3
|
+
from os import getlogin
|
3
4
|
from typing import Dict, List, Optional
|
4
5
|
|
5
6
|
import pymongo
|
6
7
|
|
7
|
-
from scout.constants import CASE_STATUSES, CASE_TAGS
|
8
|
+
from scout.constants import CASE_STATUSES, CASE_TAGS, ID_PROJECTION
|
9
|
+
from scout.exceptions import IntegrityError
|
8
10
|
|
9
11
|
LOG = logging.getLogger(__name__)
|
10
12
|
|
@@ -12,6 +14,100 @@ LOG = logging.getLogger(__name__)
|
|
12
14
|
class CaseEventHandler(object):
|
13
15
|
"""Class to handle case events for the mongo adapter"""
|
14
16
|
|
17
|
+
def get_cli_user(self) -> dict:
|
18
|
+
"""
|
19
|
+
Return a faux CLI user with a login username from OS CLI if it is available.
|
20
|
+
"""
|
21
|
+
try:
|
22
|
+
cli_user_name = getlogin()
|
23
|
+
except OSError:
|
24
|
+
# no controlling terminal
|
25
|
+
cli_user_name = "CLI user"
|
26
|
+
|
27
|
+
return {"_id": "CLI", "name": cli_user_name}
|
28
|
+
|
29
|
+
def add_case(self, case_obj: dict, institute_obj: dict):
|
30
|
+
"""Add a case to the database
|
31
|
+
If the case already exists exception is raised.
|
32
|
+
Add case will only be called from CLI, or tests, so the user will be the faux CLI user with
|
33
|
+
a login username from OS CLI if available.
|
34
|
+
"""
|
35
|
+
if self.case(case_obj["_id"], projection=ID_PROJECTION):
|
36
|
+
raise IntegrityError("Case %s already exists in database" % case_obj["_id"])
|
37
|
+
link = f"/{case_obj['owner']}/{case_obj['display_name']}"
|
38
|
+
|
39
|
+
self.create_event(
|
40
|
+
institute=institute_obj,
|
41
|
+
case=case_obj,
|
42
|
+
user=self.get_cli_user(),
|
43
|
+
link=link,
|
44
|
+
category="case",
|
45
|
+
verb="add_case",
|
46
|
+
subject=case_obj["display_name"],
|
47
|
+
)
|
48
|
+
|
49
|
+
return self.case_collection.insert_one(case_obj)
|
50
|
+
|
51
|
+
def update_case_individual(
|
52
|
+
self, case_obj: dict, user_obj: dict, institute_obj: dict, link: str, keep_date: bool = True
|
53
|
+
):
|
54
|
+
"""Update case with new individual data (age and/or Tissue type) for a case
|
55
|
+
and create an associated event"""
|
56
|
+
self._update_case_component(
|
57
|
+
case_obj, user_obj, institute_obj, link, verb="update_individual", keep_date=keep_date
|
58
|
+
)
|
59
|
+
|
60
|
+
def update_case_sample(
|
61
|
+
self, case_obj: dict, user_obj: dict, institute_obj: dict, link: str, keep_date=True
|
62
|
+
):
|
63
|
+
"""Handle update of sample data data (tissue, tumor_type, tumor_purity) for a cancer case
|
64
|
+
and create an associated event"""
|
65
|
+
self._update_case_component(
|
66
|
+
case_obj, user_obj, institute_obj, link, verb="update_sample", keep_date=keep_date
|
67
|
+
)
|
68
|
+
|
69
|
+
def _update_case_component(
|
70
|
+
self,
|
71
|
+
case_obj: dict,
|
72
|
+
user_obj: Optional[dict],
|
73
|
+
institute_obj: dict,
|
74
|
+
link: str,
|
75
|
+
verb: str,
|
76
|
+
keep_date: bool = True,
|
77
|
+
):
|
78
|
+
"""Update case with new sample data, and create an associated event"""
|
79
|
+
self.update_case(case_obj, keep_date)
|
80
|
+
|
81
|
+
if not user_obj:
|
82
|
+
user_obj = self.get_cli_user()
|
83
|
+
|
84
|
+
self.create_event(
|
85
|
+
institute=institute_obj,
|
86
|
+
case=case_obj,
|
87
|
+
user=user_obj,
|
88
|
+
link=link,
|
89
|
+
category="case",
|
90
|
+
verb=verb,
|
91
|
+
subject=case_obj["display_name"],
|
92
|
+
)
|
93
|
+
|
94
|
+
def update_case_cli(self, case_obj: dict, institute_obj: dict):
|
95
|
+
"""Update case with new case obj, and create an associated CLI user event."""
|
96
|
+
|
97
|
+
link = f"/{case_obj['owner']}/{case_obj['display_name']}"
|
98
|
+
|
99
|
+
self.create_event(
|
100
|
+
institute=institute_obj,
|
101
|
+
case=case_obj,
|
102
|
+
user=self.get_cli_user(),
|
103
|
+
link=link,
|
104
|
+
category="case",
|
105
|
+
verb="update_case",
|
106
|
+
subject=case_obj["display_name"],
|
107
|
+
)
|
108
|
+
|
109
|
+
self.update_case(case_obj)
|
110
|
+
|
15
111
|
def assign(self, institute, case, user, link):
|
16
112
|
"""Assign a user to a case.
|
17
113
|
|
@@ -550,7 +646,7 @@ class CaseEventHandler(object):
|
|
550
646
|
updated_diagnoses = []
|
551
647
|
case_diagnoses = case.get("diagnosis_phenotypes") or []
|
552
648
|
|
553
|
-
if remove
|
649
|
+
if remove: # Remove term from case diagnoses list
|
554
650
|
for case_dia in case_diagnoses:
|
555
651
|
if case_dia.get("disease_id") == disease_id:
|
556
652
|
continue
|
scout/adapter/mongo/hgnc.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
import logging
|
2
|
+
from typing import Dict
|
2
3
|
|
3
4
|
import intervaltree
|
4
5
|
from pymongo.errors import BulkWriteError, DuplicateKeyError
|
@@ -186,11 +187,11 @@ class GeneHandler(object):
|
|
186
187
|
if build == "GRCh38":
|
187
188
|
build = "38"
|
188
189
|
|
189
|
-
LOG.
|
190
|
+
LOG.debug("Fetching all genes")
|
190
191
|
|
191
192
|
hgnc_tx = {}
|
192
193
|
if add_transcripts:
|
193
|
-
LOG.
|
194
|
+
LOG.debug("Adding transcripts")
|
194
195
|
for tx in self.transcripts(build=str(build)):
|
195
196
|
hgnc_id = tx["hgnc_id"]
|
196
197
|
if not hgnc_id in hgnc_tx:
|
@@ -318,7 +319,7 @@ class GeneHandler(object):
|
|
318
319
|
res = self.hgnc_collection.find({"aliases": symbol, "build": str(build)})
|
319
320
|
return res
|
320
321
|
|
321
|
-
def genes_by_alias(self, build=
|
322
|
+
def genes_by_alias(self, build=None, genes=None):
|
322
323
|
"""Return a dictionary with hgnc symbols as keys and a list of hgnc ids
|
323
324
|
as value.
|
324
325
|
|
@@ -333,45 +334,61 @@ class GeneHandler(object):
|
|
333
334
|
Returns:
|
334
335
|
alias_genes(dict): {<hgnc_alias>: {'true': <hgnc_id>, 'ids': {<hgnc_id_1>, <hgnc_id_2>, ...}}}
|
335
336
|
"""
|
336
|
-
LOG.
|
337
|
+
LOG.debug("Fetching all genes by alias")
|
337
338
|
# Collect one entry for each alias symbol that exists
|
338
339
|
alias_genes = {}
|
339
|
-
|
340
|
-
if
|
341
|
-
|
340
|
+
|
341
|
+
if genes is None:
|
342
|
+
genes_query = {"build": str(build)} if build else {}
|
343
|
+
genes = self.hgnc_collection.find(
|
344
|
+
genes_query, projection={"hgnc_id": 1, "hgnc_symbol": 1, "aliases": 1}
|
345
|
+
)
|
342
346
|
|
343
347
|
for gene in genes:
|
344
|
-
# Collect the hgnc_id
|
345
348
|
hgnc_id = gene["hgnc_id"]
|
346
|
-
# Collect the true symbol given by hgnc
|
347
349
|
hgnc_symbol = gene["hgnc_symbol"]
|
348
|
-
|
350
|
+
|
349
351
|
for alias in gene["aliases"]:
|
350
|
-
|
351
|
-
|
352
|
+
if alias not in alias_genes:
|
353
|
+
alias_genes[alias] = {"true": None, "ids": set()}
|
354
|
+
|
355
|
+
alias_genes[alias]["ids"].add(hgnc_id)
|
352
356
|
if alias == hgnc_symbol:
|
353
|
-
|
354
|
-
# If the alias is already in the list we add the id
|
355
|
-
if alias in alias_genes:
|
356
|
-
alias_genes[alias]["ids"].add(hgnc_id)
|
357
|
-
if true_id:
|
358
|
-
alias_genes[alias]["true"] = hgnc_id
|
359
|
-
else:
|
360
|
-
alias_genes[alias] = {"true": hgnc_id, "ids": set([hgnc_id])}
|
357
|
+
alias_genes[alias]["true"] = hgnc_id
|
361
358
|
|
362
359
|
return alias_genes
|
363
360
|
|
364
|
-
def
|
361
|
+
def ensembl_to_hgnc_id_mapping(self) -> Dict[str, int]:
|
365
362
|
"""Return a dictionary with Ensembl ids as keys and hgnc_ids as values
|
366
363
|
|
367
364
|
Returns:
|
368
|
-
mapping(dict): {"ENSG00000121410":
|
365
|
+
mapping(dict): {"ENSG00000121410": 5, ...}
|
369
366
|
"""
|
370
367
|
pipeline = [{"$group": {"_id": {"ensembl_id": "$ensembl_id", "hgnc_id": "$hgnc_id"}}}]
|
371
368
|
result = self.hgnc_collection.aggregate(pipeline)
|
372
369
|
mapping = {res["_id"]["ensembl_id"]: res["_id"]["hgnc_id"] for res in result}
|
373
370
|
return mapping
|
374
371
|
|
372
|
+
def hgnc_symbol_ensembl_id_mapping(self) -> Dict[str, str]:
|
373
|
+
"""Return a dictionary with HGNC symbols as keys and Ensembl ids as values.
|
374
|
+
|
375
|
+
Returns:
|
376
|
+
mapping(dict): {"A1BG": "ENSG00000121410".}
|
377
|
+
"""
|
378
|
+
pipeline = [
|
379
|
+
{
|
380
|
+
"$group": {
|
381
|
+
"_id": {
|
382
|
+
"hgnc_symbol": "$hgnc_symbol",
|
383
|
+
"ensembl_id": "$ensembl_id",
|
384
|
+
}
|
385
|
+
}
|
386
|
+
}
|
387
|
+
]
|
388
|
+
result = self.hgnc_collection.aggregate(pipeline)
|
389
|
+
mapping = {res["_id"]["hgnc_symbol"]: res["_id"]["ensembl_id"] for res in result}
|
390
|
+
return mapping
|
391
|
+
|
375
392
|
def ensembl_genes(self, build=None, add_transcripts=False, id_transcripts=False):
|
376
393
|
"""Return a dictionary with ensembl ids as keys and gene objects as value.
|
377
394
|
|
scout/adapter/mongo/institute.py
CHANGED
@@ -158,15 +158,9 @@ class InstituteHandler(object):
|
|
158
158
|
|
159
159
|
return institute_obj
|
160
160
|
|
161
|
-
def safe_genes_filter(self, institute_id):
|
161
|
+
def safe_genes_filter(self, institute_id: str) -> List[int]:
|
162
162
|
"""Returns a list of "safe" HGNC IDs to filter variants with. These genes are retrieved from the institute.gene_panels_matching
|
163
|
-
Can be used to limit secondary findings when retrieving other causatives or matching managed variants
|
164
|
-
|
165
|
-
Args:
|
166
|
-
institute_id(str): _id of an institute
|
167
|
-
|
168
|
-
Returns:
|
169
|
-
safe_genes(list of HGNC ids)
|
163
|
+
Can be used to limit secondary findings when retrieving other causatives or matching managed variants.
|
170
164
|
"""
|
171
165
|
safe_genes = []
|
172
166
|
institute_obj = self.institute(institute_id)
|
@@ -174,7 +168,7 @@ class InstituteHandler(object):
|
|
174
168
|
return safe_genes # return an empty list
|
175
169
|
for panel_name in institute_obj.get("gene_panels_matching", {}).keys():
|
176
170
|
safe_genes += self.panel_to_genes(panel_name=panel_name, gene_format="hgnc_id")
|
177
|
-
return safe_genes
|
171
|
+
return list(set(safe_genes))
|
178
172
|
|
179
173
|
def institutes(self, institute_ids=None):
|
180
174
|
"""Fetch all institutes.
|
scout/adapter/mongo/panel.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
"""Code to handle panels in the mongo database"""
|
2
|
+
|
2
3
|
import datetime as dt
|
3
4
|
import logging
|
4
5
|
import math
|
@@ -244,7 +245,7 @@ class PanelHandler:
|
|
244
245
|
for panel in res:
|
245
246
|
return panel
|
246
247
|
|
247
|
-
LOG.
|
248
|
+
LOG.warning("Gene panel not found")
|
248
249
|
|
249
250
|
return None
|
250
251
|
|
scout/adapter/mongo/variant.py
CHANGED
@@ -542,7 +542,8 @@ class VariantHandler(VariantLoader):
|
|
542
542
|
"institute": case_obj["owner"],
|
543
543
|
"verb": {"$in": ["mark_causative", "mark_partial_causative"]},
|
544
544
|
"category": "variant",
|
545
|
-
}
|
545
|
+
},
|
546
|
+
{"case": 1, "link": 1, "subject": 1},
|
546
547
|
)
|
547
548
|
|
548
549
|
positional_variant_ids = set()
|
@@ -553,7 +554,7 @@ class VariantHandler(VariantLoader):
|
|
553
554
|
|
554
555
|
other_case = self.case(var_event["case"], CASE_CAUSATIVES_PROJECTION)
|
555
556
|
if other_case is None:
|
556
|
-
# Other variant belongs to a case that
|
557
|
+
# Other variant belongs to a case that doesn't exist anymore
|
557
558
|
continue
|
558
559
|
other_link = var_event["link"]
|
559
560
|
# link contains other variant ID
|
@@ -32,7 +32,6 @@ LOG = logging.getLogger(__name__)
|
|
32
32
|
|
33
33
|
|
34
34
|
class VariantLoader(object):
|
35
|
-
|
36
35
|
"""Methods to handle variant loading in the mongo adapter"""
|
37
36
|
|
38
37
|
def update_variant(self, variant_obj):
|
@@ -363,6 +362,9 @@ class VariantLoader(object):
|
|
363
362
|
sample_info=None,
|
364
363
|
custom_images=None,
|
365
364
|
local_archive_info=None,
|
365
|
+
gene_to_panels=None,
|
366
|
+
hgncid_to_gene=None,
|
367
|
+
genomic_intervals=None,
|
366
368
|
):
|
367
369
|
"""Perform the loading of variants
|
368
370
|
|
@@ -389,10 +391,6 @@ class VariantLoader(object):
|
|
389
391
|
nr_inserted(int)
|
390
392
|
"""
|
391
393
|
build = build or "37"
|
392
|
-
genes = [gene_obj for gene_obj in self.all_genes(build=build)]
|
393
|
-
gene_to_panels = self.gene_to_panels(case_obj)
|
394
|
-
hgncid_to_gene = self.hgncid_to_gene(genes=genes, build=build)
|
395
|
-
genomic_intervals = self.get_coding_intervals(genes=genes, build=build)
|
396
394
|
|
397
395
|
LOG.info("Start inserting {0} {1} variants into database".format(variant_type, category))
|
398
396
|
start_insertion = datetime.now()
|
@@ -611,6 +609,16 @@ class VariantLoader(object):
|
|
611
609
|
is not None
|
612
610
|
)
|
613
611
|
|
612
|
+
def _has_variants_in_file(self, variant_file: str) -> bool:
|
613
|
+
"""Check if variant file has any variants."""
|
614
|
+
try:
|
615
|
+
vcf_obj = VCF(variant_file)
|
616
|
+
var = next(vcf_obj)
|
617
|
+
return True
|
618
|
+
except StopIteration as err:
|
619
|
+
LOG.warning("Variant file %s does not include any variants", variant_file)
|
620
|
+
return False
|
621
|
+
|
614
622
|
def load_variants(
|
615
623
|
self,
|
616
624
|
case_obj,
|
@@ -649,7 +657,7 @@ class VariantLoader(object):
|
|
649
657
|
|
650
658
|
nr_inserted = 0
|
651
659
|
|
652
|
-
|
660
|
+
variant_files = []
|
653
661
|
for vcf_file_key in FILE_TYPE_MAP.keys():
|
654
662
|
if FILE_TYPE_MAP[vcf_file_key]["variant_type"] != variant_type:
|
655
663
|
continue
|
@@ -658,85 +666,90 @@ class VariantLoader(object):
|
|
658
666
|
|
659
667
|
LOG.debug("Attempt to load %s %s VCF.", variant_type, category.upper())
|
660
668
|
variant_file = case_obj["vcf_files"].get(vcf_file_key)
|
669
|
+
if variant_file:
|
670
|
+
variant_files.append(variant_file)
|
661
671
|
|
662
|
-
if not
|
672
|
+
if not variant_files:
|
663
673
|
raise SyntaxError(
|
664
|
-
"VCF
|
674
|
+
"VCF files for {} {} does not seem to exist".format(category, variant_type)
|
665
675
|
)
|
666
676
|
|
667
|
-
|
668
|
-
|
677
|
+
gene_to_panels = self.gene_to_panels(case_obj)
|
678
|
+
genes = [gene_obj for gene_obj in self.all_genes(build=build)]
|
679
|
+
hgncid_to_gene = self.hgncid_to_gene(genes=genes, build=build)
|
680
|
+
genomic_intervals = self.get_coding_intervals(genes=genes, build=build)
|
681
|
+
|
682
|
+
for variant_file in variant_files:
|
683
|
+
if not self._has_variants_in_file(variant_file):
|
684
|
+
continue
|
685
|
+
|
669
686
|
vcf_obj = VCF(variant_file)
|
670
|
-
var = next(vcf_obj)
|
671
|
-
except StopIteration as err:
|
672
|
-
LOG.warning("Variant file %s does not include any variants", variant_file)
|
673
|
-
return nr_inserted
|
674
|
-
# We need to reload the file
|
675
|
-
vcf_obj = VCF(variant_file)
|
676
|
-
|
677
|
-
# Parse the neccessary headers from vcf file
|
678
|
-
rank_results_header = parse_rank_results_header(vcf_obj)
|
679
|
-
|
680
|
-
local_archive_info = parse_local_archive_header(vcf_obj)
|
681
|
-
|
682
|
-
vep_header = parse_vep_header(vcf_obj)
|
683
|
-
if vep_header:
|
684
|
-
LOG.info("Found VEP header %s", "|".join(vep_header))
|
685
|
-
|
686
|
-
# This is a dictionary to tell where ind are in vcf
|
687
|
-
individual_positions = {}
|
688
|
-
for i, ind in enumerate(vcf_obj.samples):
|
689
|
-
individual_positions[ind] = i
|
690
|
-
|
691
|
-
# Dictionary for cancer analysis
|
692
|
-
sample_info = {}
|
693
|
-
if category in ("cancer", "cancer_sv"):
|
694
|
-
for ind in case_obj["individuals"]:
|
695
|
-
if ind["phenotype"] == 2:
|
696
|
-
sample_info[ind["individual_id"]] = "case"
|
697
|
-
else:
|
698
|
-
sample_info[ind["individual_id"]] = "control"
|
699
|
-
|
700
|
-
# Check if a region scould be uploaded
|
701
|
-
region = ""
|
702
|
-
if gene_obj:
|
703
|
-
chrom = gene_obj["chromosome"]
|
704
|
-
# Add same padding as VEP
|
705
|
-
start = max(gene_obj["start"] - 5000, 0)
|
706
|
-
end = gene_obj["end"] + 5000
|
707
|
-
if chrom:
|
708
|
-
# We want to load all variants in the region regardless of rank score
|
709
|
-
rank_threshold = rank_threshold or -1000
|
710
|
-
if not (start and end):
|
711
|
-
raise SyntaxError("Specify chrom start and end")
|
712
|
-
region = "{0}:{1}-{2}".format(chrom, start, end)
|
713
|
-
else:
|
714
|
-
rank_threshold = rank_threshold or 0
|
715
|
-
|
716
|
-
variants = vcf_obj(region)
|
717
687
|
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
688
|
+
# Parse the necessary headers from vcf file
|
689
|
+
rank_results_header = parse_rank_results_header(vcf_obj)
|
690
|
+
|
691
|
+
local_archive_info = parse_local_archive_header(vcf_obj)
|
692
|
+
|
693
|
+
vep_header = parse_vep_header(vcf_obj)
|
694
|
+
if vep_header:
|
695
|
+
LOG.debug("Found VEP header %s", "|".join(vep_header))
|
696
|
+
|
697
|
+
# This is a dictionary to tell where ind are in vcf
|
698
|
+
individual_positions = {ind: i for i, ind in enumerate(vcf_obj.samples)}
|
699
|
+
|
700
|
+
# Dictionary for cancer analysis
|
701
|
+
sample_info = {}
|
702
|
+
if category in ("cancer", "cancer_sv"):
|
703
|
+
for ind in case_obj["individuals"]:
|
704
|
+
if ind["phenotype"] == 2:
|
705
|
+
sample_info[ind["individual_id"]] = "case"
|
706
|
+
else:
|
707
|
+
sample_info[ind["individual_id"]] = "control"
|
708
|
+
|
709
|
+
# Check if a region should be uploaded
|
710
|
+
region = ""
|
711
|
+
if gene_obj:
|
712
|
+
chrom = gene_obj["chromosome"]
|
713
|
+
# Add same padding as VEP
|
714
|
+
start = max(gene_obj["start"] - 5000, 0)
|
715
|
+
end = gene_obj["end"] + 5000
|
716
|
+
if chrom:
|
717
|
+
# We want to load all variants in the region regardless of rank score
|
718
|
+
rank_threshold = rank_threshold or -1000
|
719
|
+
if not (start and end):
|
720
|
+
raise SyntaxError("Specify chrom start and end")
|
721
|
+
region = "{0}:{1}-{2}".format(chrom, start, end)
|
722
|
+
else:
|
723
|
+
rank_threshold = rank_threshold or 0
|
724
|
+
|
725
|
+
variants = vcf_obj(region)
|
739
726
|
|
740
|
-
|
727
|
+
try:
|
728
|
+
nr_inserted = self._load_variants(
|
729
|
+
variants=variants,
|
730
|
+
variant_type=variant_type,
|
731
|
+
case_obj=case_obj,
|
732
|
+
individual_positions=individual_positions,
|
733
|
+
rank_threshold=rank_threshold,
|
734
|
+
institute_id=institute_id,
|
735
|
+
build=build,
|
736
|
+
rank_results_header=rank_results_header,
|
737
|
+
vep_header=vep_header,
|
738
|
+
category=category,
|
739
|
+
sample_info=sample_info,
|
740
|
+
custom_images=custom_images,
|
741
|
+
local_archive_info=local_archive_info,
|
742
|
+
gene_to_panels=gene_to_panels,
|
743
|
+
hgncid_to_gene=hgncid_to_gene,
|
744
|
+
genomic_intervals=genomic_intervals,
|
745
|
+
)
|
746
|
+
except Exception as error:
|
747
|
+
LOG.exception("unexpected error")
|
748
|
+
LOG.warning("Deleting inserted variants")
|
749
|
+
self.delete_variants(case_obj["_id"], variant_type)
|
750
|
+
raise error
|
751
|
+
|
752
|
+
if nr_inserted:
|
753
|
+
self.update_variant_rank(case_obj, variant_type, category=category)
|
741
754
|
|
742
755
|
return nr_inserted
|