scout-browser 4.95.0__py3-none-any.whl → 4.97.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scout/adapter/mongo/case.py +75 -70
- scout/adapter/mongo/filter.py +28 -11
- scout/adapter/mongo/institute.py +2 -0
- scout/adapter/mongo/omics_variant.py +20 -5
- scout/adapter/mongo/query.py +104 -95
- scout/adapter/mongo/variant.py +0 -5
- scout/adapter/mongo/variant_loader.py +10 -12
- scout/build/case.py +3 -1
- scout/build/individual.py +3 -11
- scout/commands/delete/delete_command.py +87 -49
- scout/commands/load/research.py +4 -4
- scout/commands/load/variants.py +25 -8
- scout/commands/setup/setup_scout.py +1 -1
- scout/commands/update/case.py +12 -0
- scout/commands/update/individual.py +1 -2
- scout/constants/__init__.py +7 -2
- scout/constants/acmg.py +25 -18
- scout/constants/file_types.py +68 -119
- scout/constants/filters.py +2 -1
- scout/constants/gene_tags.py +3 -3
- scout/constants/igv_tracks.py +7 -11
- scout/constants/query_terms.py +2 -2
- scout/demo/643594.config.yaml +6 -0
- scout/demo/643594.peddy.ped +1 -1
- scout/demo/643594.somalier.ancestry.tsv +4 -0
- scout/demo/643594.somalier.pairs.tsv +4 -0
- scout/demo/643594.somalier.samples.tsv +4 -0
- scout/demo/cancer.load_config.yaml +2 -3
- scout/demo/resources/__init__.py +1 -1
- scout/demo/resources/gnomad.v4.1.constraint_metrics_reduced.tsv +3755 -0
- scout/demo/rnafusion.load_config.yaml +1 -0
- scout/exceptions/database.py +1 -1
- scout/load/all.py +8 -16
- scout/models/case/case.py +1 -0
- scout/models/case/case_loading_models.py +15 -5
- scout/models/managed_variant.py +3 -3
- scout/models/omics_variant.py +3 -3
- scout/parse/case.py +113 -5
- scout/parse/pedqc.py +127 -0
- scout/parse/variant/frequency.py +9 -6
- scout/parse/variant/variant.py +71 -39
- scout/server/app.py +14 -0
- scout/server/blueprints/alignviewers/controllers.py +2 -0
- scout/server/blueprints/alignviewers/templates/alignviewers/igv_viewer.html +3 -0
- scout/server/blueprints/alignviewers/templates/alignviewers/utils.html +1 -1
- scout/server/blueprints/cases/controllers.py +25 -3
- scout/server/blueprints/cases/templates/cases/case.html +3 -0
- scout/server/blueprints/cases/templates/cases/case_report.html +28 -2
- scout/server/blueprints/cases/templates/cases/chanjo2_form.html +2 -2
- scout/server/blueprints/cases/templates/cases/collapsible_actionbar.html +12 -0
- scout/server/blueprints/cases/templates/cases/gene_panel.html +9 -3
- scout/server/blueprints/cases/templates/cases/individuals_table.html +4 -1
- scout/server/blueprints/cases/templates/cases/utils.html +23 -19
- scout/server/blueprints/cases/views.py +5 -9
- scout/server/blueprints/clinvar/controllers.py +12 -11
- scout/server/blueprints/clinvar/templates/clinvar/clinvar_submissions.html +10 -14
- scout/server/blueprints/clinvar/templates/clinvar/multistep_add_variant.html +15 -7
- scout/server/blueprints/clinvar/views.py +18 -31
- scout/server/blueprints/institutes/controllers.py +20 -1
- scout/server/blueprints/institutes/forms.py +5 -1
- scout/server/blueprints/institutes/templates/overview/institute_settings.html +7 -0
- scout/server/blueprints/institutes/templates/overview/utils.html +20 -1
- scout/server/blueprints/omics_variants/templates/omics_variants/outliers.html +9 -2
- scout/server/blueprints/omics_variants/views.py +8 -10
- scout/server/blueprints/variant/controllers.py +30 -1
- scout/server/blueprints/variant/templates/variant/cancer-variant.html +21 -5
- scout/server/blueprints/variant/templates/variant/components.html +26 -9
- scout/server/blueprints/variant/templates/variant/variant.html +4 -2
- scout/server/blueprints/variant/templates/variant/variant_details.html +1 -1
- scout/server/blueprints/variant/utils.py +2 -0
- scout/server/blueprints/variant/views.py +10 -3
- scout/server/blueprints/variants/controllers.py +29 -3
- scout/server/blueprints/variants/forms.py +37 -10
- scout/server/blueprints/variants/templates/variants/cancer-variants.html +5 -4
- scout/server/blueprints/variants/templates/variants/components.html +12 -10
- scout/server/blueprints/variants/templates/variants/str-variants.html +13 -9
- scout/server/blueprints/variants/templates/variants/utils.html +59 -36
- scout/server/blueprints/variants/views.py +45 -60
- scout/server/extensions/beacon_extension.py +1 -1
- scout/server/extensions/bionano_extension.py +5 -5
- scout/server/extensions/chanjo2_extension.py +40 -1
- scout/server/extensions/chanjo_extension.py +1 -1
- scout/server/extensions/clinvar_extension.py +56 -2
- scout/server/extensions/matchmaker_extension.py +1 -1
- scout/server/links.py +0 -14
- scout/server/static/bs_styles.css +2 -0
- scout/server/templates/layout.html +1 -0
- scout/server/utils.py +5 -0
- scout/utils/acmg.py +5 -5
- scout/utils/ensembl_biomart_clients.py +2 -11
- scout/utils/scout_requests.py +1 -1
- {scout_browser-4.95.0.dist-info → scout_browser-4.97.0.dist-info}/METADATA +1 -1
- {scout_browser-4.95.0.dist-info → scout_browser-4.97.0.dist-info}/RECORD +96 -94
- scout/demo/resources/gnomad.v4.0.constraint_metrics_reduced.tsv +0 -3755
- scout/parse/peddy.py +0 -149
- scout/utils/sort.py +0 -21
- {scout_browser-4.95.0.dist-info → scout_browser-4.97.0.dist-info}/WHEEL +0 -0
- {scout_browser-4.95.0.dist-info → scout_browser-4.97.0.dist-info}/entry_points.txt +0 -0
- {scout_browser-4.95.0.dist-info → scout_browser-4.97.0.dist-info}/licenses/LICENSE +0 -0
scout/exceptions/database.py
CHANGED
scout/load/all.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
import logging
|
3
3
|
|
4
|
-
from scout.constants import
|
4
|
+
from scout.constants import ORDERED_FILE_TYPE_MAP
|
5
5
|
from scout.exceptions.config import ConfigError
|
6
|
-
from scout.utils.sort import get_load_priority
|
7
6
|
|
8
7
|
LOG = logging.getLogger(__name__)
|
9
8
|
|
@@ -55,30 +54,23 @@ def load_region(adapter, case_id, hgnc_id=None, chrom=None, start=None, end=None
|
|
55
54
|
start = gene_caption["start"]
|
56
55
|
end = gene_caption["end"]
|
57
56
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
(FILE_TYPE_MAP[file_type]["variant_type"], FILE_TYPE_MAP[file_type]["category"])
|
64
|
-
)
|
65
|
-
|
66
|
-
for variant_type, category in sorted(
|
67
|
-
case_file_types,
|
68
|
-
key=lambda tup: get_load_priority(variant_type=tup[0], category=tup[1]),
|
69
|
-
):
|
57
|
+
for file_type, vcf_dict in ORDERED_FILE_TYPE_MAP.items():
|
58
|
+
if not case_obj.get("vcf_files", {}).get(file_type):
|
59
|
+
continue
|
60
|
+
variant_type = vcf_dict["variant_type"]
|
61
|
+
variant_category = vcf_dict["category"]
|
70
62
|
if variant_type == "research" and not case_obj["is_research"]:
|
71
63
|
continue
|
72
64
|
|
73
65
|
LOG.info(
|
74
66
|
"Load {} {} variants for case: {} region: chr {}, start {}, end {}".format(
|
75
|
-
|
67
|
+
variant_category, variant_type.upper(), case_obj["_id"], chrom, start, end
|
76
68
|
)
|
77
69
|
)
|
78
70
|
adapter.load_variants(
|
79
71
|
case_obj=case_obj,
|
80
72
|
variant_type=variant_type,
|
81
|
-
category=
|
73
|
+
category=variant_category,
|
82
74
|
chrom=chrom,
|
83
75
|
start=start,
|
84
76
|
end=end,
|
scout/models/case/case.py
CHANGED
@@ -8,6 +8,8 @@ from os.path import abspath, dirname, exists, isabs
|
|
8
8
|
from pathlib import Path
|
9
9
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
10
10
|
|
11
|
+
from scout.constants import CASE_STATUSES
|
12
|
+
|
11
13
|
try:
|
12
14
|
from typing import Literal
|
13
15
|
except ImportError:
|
@@ -15,7 +17,7 @@ except ImportError:
|
|
15
17
|
|
16
18
|
from pydantic import BaseModel, Field, field_validator, model_validator
|
17
19
|
|
18
|
-
from scout.constants import ANALYSIS_TYPES,
|
20
|
+
from scout.constants import ANALYSIS_TYPES, ORDERED_FILE_TYPE_MAP, ORDERED_OMICS_FILE_TYPE_MAP
|
19
21
|
from scout.exceptions import PedigreeError
|
20
22
|
from scout.utils.date import get_date
|
21
23
|
|
@@ -52,6 +54,9 @@ CASE_FILE_PATH_CHECKS = [
|
|
52
54
|
"peddy_ped",
|
53
55
|
"peddy_ped_check",
|
54
56
|
"peddy_sex_check",
|
57
|
+
"somalier_ancestry",
|
58
|
+
"somalier_pairs",
|
59
|
+
"somalier_samples",
|
55
60
|
"smn_tsv",
|
56
61
|
"reference_info",
|
57
62
|
"RNAfusion_inspector",
|
@@ -61,8 +66,8 @@ CASE_FILE_PATH_CHECKS = [
|
|
61
66
|
"rna_delivery_report",
|
62
67
|
]
|
63
68
|
|
64
|
-
VCF_FILE_PATH_CHECKS =
|
65
|
-
OMICS_FILE_PATH_CHECKS =
|
69
|
+
VCF_FILE_PATH_CHECKS = ORDERED_FILE_TYPE_MAP.keys()
|
70
|
+
OMICS_FILE_PATH_CHECKS = ORDERED_OMICS_FILE_TYPE_MAP.keys()
|
66
71
|
|
67
72
|
GENOME_BUILDS = ["37", "38"]
|
68
73
|
TRACKS = ["rare", "cancer"]
|
@@ -207,6 +212,7 @@ class SampleLoader(BaseModel):
|
|
207
212
|
d4_file: Optional[str] = None
|
208
213
|
display_name: Optional[str] = None
|
209
214
|
father: Optional[str] = None
|
215
|
+
hrd: Optional[str] = None
|
210
216
|
individual_id: str = Field(alias="sample_id")
|
211
217
|
is_sma: Optional[str] = None
|
212
218
|
is_sma_carrier: Optional[str] = None
|
@@ -245,8 +251,8 @@ class SampleLoader(BaseModel):
|
|
245
251
|
|
246
252
|
@model_validator(mode="before")
|
247
253
|
def convert_cancer_int_values_to_str(cls, values) -> "SampleLoader":
|
248
|
-
"""Sets 'msi' and '
|
249
|
-
for item in ["msi", "tmb"]:
|
254
|
+
"""Sets 'msi' 'tmb' and 'hrd' values for cancer cases to string. This is a required step in Pydantic2, in Pydantic1 values were just coerced from int to str."""
|
255
|
+
for item in ["msi", "tmb", "hrd"]:
|
250
256
|
if values.get(item):
|
251
257
|
values[item] = str(values[item])
|
252
258
|
return values
|
@@ -424,6 +430,9 @@ class CaseLoader(BaseModel):
|
|
424
430
|
peddy_sex_check: Optional[str] = Field(None, alias="peddy_sex") # Soon to be deprecated
|
425
431
|
phenotype_groups: Optional[List[str]] = None
|
426
432
|
phenotype_terms: Optional[List[str]] = None
|
433
|
+
somalier_ancestry: Optional[str] = None
|
434
|
+
somalier_pairs: Optional[str] = None
|
435
|
+
somalier_samples: Optional[str] = None
|
427
436
|
exe_ver: Optional[str] = None
|
428
437
|
rank_model_version: Optional[str] = None
|
429
438
|
rank_score_threshold: Optional[int] = 0
|
@@ -436,6 +445,7 @@ class CaseLoader(BaseModel):
|
|
436
445
|
smn_tsv: Optional[str] = None
|
437
446
|
sv_rank_model_version: Optional[str] = None
|
438
447
|
synopsis: Optional[Union[List[str], str]] = None
|
448
|
+
status: Optional[Literal[tuple(CASE_STATUSES)]] = None
|
439
449
|
track: Literal["rare", "cancer"] = "rare"
|
440
450
|
vcf_files: Optional[VcfFiles]
|
441
451
|
|
scout/models/managed_variant.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
"""
|
1
|
+
"""Managed variant
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
For potentially causative variants that are not yet in ClinVar
|
4
|
+
and have yet not been marked causative in any existing case.
|
5
5
|
|
6
6
|
"""
|
7
7
|
|
scout/models/omics_variant.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
"""
|
1
|
+
"""OMICS variant
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
For potentially causative variants that are not yet in ClinVar
|
4
|
+
and have yet not been marked causative in any existing case.
|
5
5
|
|
6
6
|
"""
|
7
7
|
|
scout/parse/case.py
CHANGED
@@ -1,12 +1,20 @@
|
|
1
1
|
import logging
|
2
|
+
from typing import Any, Dict, Tuple
|
2
3
|
|
3
4
|
from ped_parser import FamilyParser
|
4
5
|
|
5
|
-
from scout.constants import PHENOTYPE_MAP, SEX_MAP
|
6
|
+
from scout.constants import PHENOTYPE_MAP, REV_SEX_MAP, SEX_MAP
|
6
7
|
from scout.exceptions import PedigreeError
|
7
8
|
from scout.models.case.case_loading_models import CaseLoader
|
8
9
|
from scout.parse.mitodel import parse_mitodel_file
|
9
|
-
from scout.parse.
|
10
|
+
from scout.parse.pedqc import (
|
11
|
+
parse_peddy_ped,
|
12
|
+
parse_peddy_ped_check,
|
13
|
+
parse_peddy_sex_check,
|
14
|
+
parse_somalier_ancestry,
|
15
|
+
parse_somalier_pairs,
|
16
|
+
parse_somalier_samples,
|
17
|
+
)
|
10
18
|
from scout.parse.smn import parse_smn_file
|
11
19
|
|
12
20
|
LOG = logging.getLogger(__name__)
|
@@ -19,7 +27,7 @@ def parse_case_data(**kwargs):
|
|
19
27
|
on the command line. Or all the information can be specified in a config file.
|
20
28
|
Please see Scout documentation for further instructions.
|
21
29
|
|
22
|
-
Possible keyword args:
|
30
|
+
Possible keyword args are formally available in the CaseLoader class, but here is a common list with explanations:
|
23
31
|
cnv_report: Path to pdf file with CNV report
|
24
32
|
config(dict): A yaml formatted config file
|
25
33
|
coverage_qc_report: Path to html file with coverage and qc report
|
@@ -34,6 +42,7 @@ def parse_case_data(**kwargs):
|
|
34
42
|
RNAfusion_report: Path to the RNA fusion report
|
35
43
|
RNAfusion_report_research: Path to the research RNA fusion report
|
36
44
|
smn_tsv(str): Path to an SMN tsv file
|
45
|
+
status(str): Optional case status ("prioritized", "inactive", "ignored", "active", "solved", "archived")
|
37
46
|
vcf_cancer(str): Path to a vcf file
|
38
47
|
vcf_cancer_sv(str): Path to a vcf file
|
39
48
|
vcf_fusion(str): Path to a vcf file
|
@@ -73,8 +82,10 @@ def parse_case_data(**kwargs):
|
|
73
82
|
except KeyError:
|
74
83
|
config_dict[key] = None
|
75
84
|
|
76
|
-
# This will add information from
|
85
|
+
# This will add pedigree qc information from Peddy and Somalier to the individuals.
|
86
|
+
# Let the newer Somalier have the last word if there is any disagreement
|
77
87
|
add_peddy_information(config_dict)
|
88
|
+
add_somalier_information(config_dict)
|
78
89
|
|
79
90
|
if config_dict.get("smn_tsv"):
|
80
91
|
add_smn_info(config_dict)
|
@@ -180,7 +191,104 @@ def add_smn_info_case(case_data):
|
|
180
191
|
]:
|
181
192
|
ind[key] = smn_info[ind_id][key]
|
182
193
|
except KeyError as err:
|
183
|
-
LOG.warning("Individual {} has no SMN info to update: {}."
|
194
|
+
LOG.warning(f"Individual {ind_id} has no SMN info to update: {err}.")
|
195
|
+
|
196
|
+
|
197
|
+
def set_somalier_sex_check_ind(ind: Dict[str, str], sex_check: Dict[str, Dict[str, str]]):
|
198
|
+
"""Check if Somalier has inferred the sex"""
|
199
|
+
|
200
|
+
ind_id = ind["individual_id"]
|
201
|
+
if ind_id in sex_check and all(
|
202
|
+
key in sex_check[ind_id] for key in ("sex", "original_pedigree_sex")
|
203
|
+
):
|
204
|
+
ind["confirmed_sex"]: bool = (
|
205
|
+
sex_check[ind_id]["sex"] == REV_SEX_MAP[sex_check[ind_id]["original_pedigree_sex"]]
|
206
|
+
)
|
207
|
+
|
208
|
+
|
209
|
+
def set_somalier_confirmed_parent(
|
210
|
+
analysis_inds: Dict[str, Any], ind: Dict[str, Any], ped_check: Dict[Tuple, Any]
|
211
|
+
):
|
212
|
+
"""Check if Somalier confirmed parental relations.
|
213
|
+
First, check that we are looking at individual with parents.
|
214
|
+
Double-check that the child/parent pair is in somalier data and set ok.
|
215
|
+
If we demand Somalier be run with "relate --infer" we can skip this.
|
216
|
+
"""
|
217
|
+
|
218
|
+
ind_id = ind["individual_id"]
|
219
|
+
for parent in ["mother", "father"]:
|
220
|
+
parent_id = ind[parent]
|
221
|
+
if parent_id == "0":
|
222
|
+
continue
|
223
|
+
|
224
|
+
for pair in ped_check:
|
225
|
+
if ind_id not in pair or parent_id not in pair:
|
226
|
+
continue
|
227
|
+
if (
|
228
|
+
ped_check[pair]["relatedness"] > 0.32
|
229
|
+
and ped_check[pair]["relatedness"] < 0.67
|
230
|
+
and ped_check[pair]["ibs0"] / ped_check[pair]["ibs2"] < 0.014
|
231
|
+
):
|
232
|
+
analysis_inds[parent_id]["confirmed_parent"] = True
|
233
|
+
continue
|
234
|
+
# else if parent confirmation failed
|
235
|
+
analysis_inds[parent_id]["confirmed_parent"] = False
|
236
|
+
|
237
|
+
|
238
|
+
def set_somalier_sex_and_relatedness_checks(
|
239
|
+
case_config: dict,
|
240
|
+
ped_check: Dict[Tuple, Any],
|
241
|
+
sex_check: Dict[str, Dict],
|
242
|
+
ancestry_info: Dict[str, Dict],
|
243
|
+
):
|
244
|
+
"""
|
245
|
+
Update ancestry, sex and relatedness checks for individuals in case config based on parsed Somalier file content.
|
246
|
+
"""
|
247
|
+
analysis_inds = {}
|
248
|
+
for ind in case_config["individuals"]:
|
249
|
+
ind_id = ind["individual_id"]
|
250
|
+
analysis_inds[ind_id] = ind
|
251
|
+
|
252
|
+
for ind_id in analysis_inds:
|
253
|
+
ind = analysis_inds[ind_id]
|
254
|
+
# Check if Somalier has inferred the ancestry
|
255
|
+
if ind_id in ancestry_info:
|
256
|
+
ind["predicted_ancestry"]: str = ancestry_info[ind_id].get(
|
257
|
+
"predicted_ancestry", "UNKNOWN"
|
258
|
+
)
|
259
|
+
set_somalier_sex_check_ind(ind, sex_check)
|
260
|
+
set_somalier_confirmed_parent(analysis_inds, ind, ped_check)
|
261
|
+
|
262
|
+
|
263
|
+
def add_somalier_information(case_config: dict):
|
264
|
+
"""
|
265
|
+
Parse any somalier files, and update ancestry, sex and relatedness checks for individuals in case config
|
266
|
+
based on them.
|
267
|
+
"""
|
268
|
+
ped_check = {}
|
269
|
+
sex_check = {}
|
270
|
+
ancestry_info = {}
|
271
|
+
|
272
|
+
if case_config.get("somalier_pairs"):
|
273
|
+
with open(case_config["somalier_pairs"], "r") as file_handle:
|
274
|
+
for pair_info in parse_somalier_pairs(file_handle):
|
275
|
+
ped_check[(pair_info["sample_a"], pair_info["sample_b"])] = pair_info
|
276
|
+
|
277
|
+
if case_config.get("somalier_samples"):
|
278
|
+
with open(case_config["somalier_samples"], "r") as file_handle:
|
279
|
+
for ind_info in parse_somalier_samples(file_handle):
|
280
|
+
sex_check[ind_info["sample_id"]] = ind_info
|
281
|
+
|
282
|
+
if case_config.get("somalier_ancestry"):
|
283
|
+
with open(case_config["somalier_ancestry"], "r") as file_handle:
|
284
|
+
for ind_info in parse_somalier_ancestry(file_handle):
|
285
|
+
ancestry_info[ind_info["sample_id"]] = ind_info
|
286
|
+
|
287
|
+
if not (ped_check or sex_check or ancestry_info):
|
288
|
+
return
|
289
|
+
|
290
|
+
LOG.info("Adding Somalier info")
|
291
|
+
set_somalier_sex_and_relatedness_checks(case_config, ped_check, sex_check, ancestry_info)
|
184
292
|
|
185
293
|
|
186
294
|
def add_peddy_information(config_data):
|
scout/parse/pedqc.py
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from scout.utils.convert import convert_number, make_bool
|
4
|
+
|
5
|
+
|
6
|
+
def tsv_to_info_dicts(
|
7
|
+
lines: List[str], separator: str = "\t", number_keys: List[str] = [], bool_keys: List[str] = []
|
8
|
+
) -> List[dict]:
|
9
|
+
"""Parse a tsv (or csv with "," as separator) file to a list of dicts, with the header fields as dict keys,
|
10
|
+
column values as dict values, and each list item one such dict for each row.
|
11
|
+
The number_keys and bool_keys are lists of key names to attempt to explicitly coerce values into number or bool before return.
|
12
|
+
"""
|
13
|
+
info_dicts = []
|
14
|
+
for i, line in enumerate(lines):
|
15
|
+
line = line.rstrip()
|
16
|
+
if i == 0:
|
17
|
+
header = line.lstrip("#").split(separator)
|
18
|
+
continue
|
19
|
+
info_dict = dict(zip(header, line.split(separator)))
|
20
|
+
for number_key in number_keys:
|
21
|
+
if number_key in info_dict:
|
22
|
+
info_dict[number_key] = convert_number(info_dict[number_key])
|
23
|
+
for bool_key in bool_keys:
|
24
|
+
if bool_key in info_dict:
|
25
|
+
info_dict[bool_key] = make_bool(info_dict.get(bool_key))
|
26
|
+
info_dicts.append(info_dict)
|
27
|
+
|
28
|
+
return info_dicts
|
29
|
+
|
30
|
+
|
31
|
+
def parse_peddy_ped(lines: List[str]) -> List[dict]:
|
32
|
+
"""Parse a peddy.ped file
|
33
|
+
|
34
|
+
ancestry-prediction: one of AFR AMR EAS EUR SAS UNKNOWN
|
35
|
+
PC1/PC2/PC3/PC4: the first 4 values after this sample was
|
36
|
+
projected onto the thousand genomes principal components.
|
37
|
+
|
38
|
+
idr_baf: inter-decile range (90th percentile - 10th percentile)
|
39
|
+
of b-allele frequency. We make a distribution of all sites of
|
40
|
+
alts / (ref + alts) and then report the difference between the
|
41
|
+
90th and the 10th percentile.
|
42
|
+
Large values indicated likely sample contamination.
|
43
|
+
"""
|
44
|
+
return tsv_to_info_dicts(
|
45
|
+
lines,
|
46
|
+
"\t",
|
47
|
+
number_keys=["PC1", "PC2", "PC3", "het_call_rate", "het_idr_baf", "het_mean_depth"],
|
48
|
+
)
|
49
|
+
|
50
|
+
|
51
|
+
def parse_peddy_ped_check(lines: List[str]) -> List[dict]:
|
52
|
+
"""Parse a .ped_check.csv file
|
53
|
+
|
54
|
+
The following keys are explicitly coerced upon insertion into the returned dicts
|
55
|
+
hets_a - the number of sites at which sample_a was heterozygous
|
56
|
+
hets_b - the number of sites at which sample_b was heterozygous
|
57
|
+
ibs0 - the number of sites at which the 2 samples shared no alleles
|
58
|
+
(should approach 0 for parent-child pairs).
|
59
|
+
ibs2 - the number of sites and which the 2 samples where both
|
60
|
+
hom-ref, both het, or both hom-alt.
|
61
|
+
n - the number of sites that was used to predict the relatedness.
|
62
|
+
rel - the relatedness reported in the ped file.
|
63
|
+
pedigree_relatedness - the relatedness reported in the ped file.
|
64
|
+
rel_difference - difference between the preceding 2 columns.
|
65
|
+
shared_hets - the number of sites at which both samples were hets.
|
66
|
+
|
67
|
+
pedigree_parents - boolean indicating that this pair is a parent-child pair
|
68
|
+
according to the ped file.
|
69
|
+
predicted_parents - boolean indicating that this pair is expected to be a parent-child
|
70
|
+
pair according to the ibs0 (< 0.012) calculated from the genotypes.
|
71
|
+
parent_error - boolean indicating that the preceding 2 columns do not match
|
72
|
+
sample_duplication_error - boolean indicating that rel > 0.75 and ibs0 < 0.012
|
73
|
+
"""
|
74
|
+
return tsv_to_info_dicts(
|
75
|
+
lines,
|
76
|
+
",",
|
77
|
+
number_keys=[
|
78
|
+
"hets_a",
|
79
|
+
"hets_b",
|
80
|
+
"ibs0",
|
81
|
+
"ibs2",
|
82
|
+
"n",
|
83
|
+
"rel",
|
84
|
+
"pedigree_relatedness",
|
85
|
+
"rel_difference",
|
86
|
+
"shared_hets",
|
87
|
+
],
|
88
|
+
bool_keys=[
|
89
|
+
"pedigree_parents",
|
90
|
+
"predicted_parents",
|
91
|
+
"parent_error",
|
92
|
+
"sample_duplication_error",
|
93
|
+
],
|
94
|
+
)
|
95
|
+
|
96
|
+
|
97
|
+
def parse_peddy_sex_check(lines: List[str]) -> List[dict]:
|
98
|
+
"""Parse a .ped_check.csv file
|
99
|
+
|
100
|
+
Type coerce the following keys for each dict in the returned sex_check dict:
|
101
|
+
error: boolean indicating whether there is a mismatch between chr genotypes and ped sex
|
102
|
+
hom_alt_count: number of homozygous-alternate calls
|
103
|
+
hom_ref_count: number of homozygous-reference calls
|
104
|
+
het_count: number of heterozygote calls
|
105
|
+
het_ratio: ratio of het_count / hom_alt_count. Low for males, high for females
|
106
|
+
"""
|
107
|
+
return tsv_to_info_dicts(
|
108
|
+
lines,
|
109
|
+
",",
|
110
|
+
number_keys=["hom_alt_count", "hom_ref_count", "het_count", "het_ratio"],
|
111
|
+
bool_keys=["error"],
|
112
|
+
)
|
113
|
+
|
114
|
+
|
115
|
+
def parse_somalier_pairs(lines: List[str]) -> List[dict]:
|
116
|
+
"""Parse a Somalier pairs tsv file"""
|
117
|
+
return tsv_to_info_dicts(lines, "\t", ["relatedness", "ibs0", "ibs2"])
|
118
|
+
|
119
|
+
|
120
|
+
def parse_somalier_samples(lines: List[str]) -> List[dict]:
|
121
|
+
"""Parse a Somalier samples tsv file"""
|
122
|
+
return tsv_to_info_dicts(lines, "\t")
|
123
|
+
|
124
|
+
|
125
|
+
def parse_somalier_ancestry(lines: List[str]) -> List[dict]:
|
126
|
+
"""Parse a Somalier ancestry tsv file"""
|
127
|
+
return tsv_to_info_dicts(lines, "\t")
|
scout/parse/variant/frequency.py
CHANGED
@@ -92,10 +92,10 @@ def parse_frequency(variant, info_key):
|
|
92
92
|
info_key(str)
|
93
93
|
|
94
94
|
Returns:
|
95
|
-
frequency(float): or None if frequency does not exist
|
95
|
+
frequency(float): or None if frequency does not exist (or is ".", "0", or "-1")
|
96
96
|
"""
|
97
97
|
raw_annotation = variant.INFO.get(info_key)
|
98
|
-
raw_annotation = None if raw_annotation
|
98
|
+
raw_annotation = None if raw_annotation in [".", "-1", -1, 0, "0"] else raw_annotation
|
99
99
|
frequency = float(raw_annotation) if raw_annotation else None
|
100
100
|
return frequency
|
101
101
|
|
@@ -152,11 +152,14 @@ def parse_sv_frequency(variant, info_key):
|
|
152
152
|
These have to be treated separately since some of them are not actually frequencies(float) but
|
153
153
|
occurences(int)
|
154
154
|
"""
|
155
|
-
|
156
|
-
if
|
157
|
-
|
155
|
+
raw_value = variant.INFO.get(info_key, 0)
|
156
|
+
if raw_value in [".", "-1", -1, 0, "0"]:
|
157
|
+
return None
|
158
|
+
|
159
|
+
if any(float_str in info_key.upper() for float_str in ["AF", "FRQ"]):
|
160
|
+
value = float(raw_value)
|
158
161
|
else:
|
159
|
-
value = int(
|
162
|
+
value = int(raw_value)
|
160
163
|
if value > 0:
|
161
164
|
return value
|
162
165
|
return None
|
scout/parse/variant/variant.py
CHANGED
@@ -27,14 +27,14 @@ LOG = logging.getLogger(__name__)
|
|
27
27
|
|
28
28
|
def parse_variant(
|
29
29
|
variant: Variant,
|
30
|
-
case,
|
31
|
-
variant_type="clinical",
|
32
|
-
rank_results_header=None,
|
33
|
-
vep_header=None,
|
34
|
-
individual_positions=None,
|
35
|
-
category=None,
|
36
|
-
local_archive_info=None,
|
37
|
-
):
|
30
|
+
case: dict,
|
31
|
+
variant_type: str = "clinical",
|
32
|
+
rank_results_header: list = None,
|
33
|
+
vep_header: list = None,
|
34
|
+
individual_positions: dict = None,
|
35
|
+
category: str = None,
|
36
|
+
local_archive_info: dict = None,
|
37
|
+
) -> dict:
|
38
38
|
"""Return a parsed variant
|
39
39
|
|
40
40
|
Get all the necessary information to build a variant object
|
@@ -183,10 +183,63 @@ def parse_variant(
|
|
183
183
|
|
184
184
|
parsed_variant["frequencies"] = frequencies
|
185
185
|
|
186
|
-
|
186
|
+
set_loqus_archive_frequencies(parsed_variant, variant, local_archive_info)
|
187
|
+
|
188
|
+
set_severity_predictions(parsed_variant, variant, parsed_transcripts)
|
189
|
+
|
190
|
+
###################### Add conservation ######################
|
191
|
+
parsed_variant["conservation"] = parse_conservations(variant, parsed_transcripts)
|
192
|
+
|
193
|
+
parsed_variant["callers"] = parse_callers(variant, category=category)
|
194
|
+
set_rank_result(parsed_variant, variant, rank_results_header)
|
195
|
+
|
196
|
+
##################### Add type specific #####################
|
197
|
+
set_sv_specific_annotations(parsed_variant, variant)
|
198
|
+
|
199
|
+
set_mei_specific_annotations(parsed_variant, variant)
|
200
|
+
|
201
|
+
set_cancer_specific_annotations(parsed_variant, variant)
|
202
|
+
|
203
|
+
remove_nonetype(parsed_variant)
|
204
|
+
return parsed_variant
|
205
|
+
|
206
|
+
|
207
|
+
def set_mei_specific_annotations(parsed_variant: dict, variant: dict):
|
208
|
+
"""Add MEI specific annotations"""
|
209
|
+
if parsed_variant.get("category") in ["mei"]:
|
210
|
+
mei_frequencies = parse_mei_frequencies(variant)
|
211
|
+
for key in mei_frequencies:
|
212
|
+
parsed_variant["frequencies"][key] = mei_frequencies[key]
|
213
|
+
|
214
|
+
|
215
|
+
def set_cancer_specific_annotations(parsed_variant: dict, variant: dict):
|
216
|
+
"""
|
217
|
+
###################### Add Cancer specific annotations ######################
|
218
|
+
# MSK_MVL indicates if variants are in the MSK managed variant list
|
219
|
+
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5437632/
|
220
|
+
"""
|
221
|
+
if variant.INFO.get("MSK_MVL"):
|
222
|
+
parsed_variant["mvl_tag"] = True
|
223
|
+
|
224
|
+
|
225
|
+
def set_sv_specific_annotations(parsed_variant: dict, variant: dict):
|
226
|
+
"""
|
227
|
+
Add SV specific annotations
|
228
|
+
"""
|
229
|
+
if parsed_variant.get("category") in ["sv", "cancer_sv"]:
|
230
|
+
sv_frequencies = parse_sv_frequencies(variant)
|
231
|
+
for key in sv_frequencies:
|
232
|
+
parsed_variant["frequencies"][key] = sv_frequencies[key]
|
233
|
+
|
234
|
+
|
235
|
+
def set_loqus_archive_frequencies(parsed_variant: dict, variant: dict, local_archive_info: dict):
|
236
|
+
"""
|
237
|
+
loqusdb archive frequencies
|
238
|
+
Fist, RD germline, for MIP and Balsamic
|
239
|
+
Then, Cancer (Balsamic) Germline and Somatic loqus archives
|
240
|
+
SNVs contain INFO field Obs, SVs contain clinical_genomics_loqusObs
|
241
|
+
"""
|
187
242
|
|
188
|
-
# RD germline, for MIP and Balsamic
|
189
|
-
# SNVs contain INFO field Obs, SVs contain clinical_genomics_loqusObs
|
190
243
|
local_obs_old = (
|
191
244
|
variant.INFO.get("Obs")
|
192
245
|
or variant.INFO.get("clinical_genomics_loqusObs")
|
@@ -203,7 +256,6 @@ def parse_variant(
|
|
203
256
|
parsed_variant["local_obs_old_freq"] = call_safe(float, local_frq_old)
|
204
257
|
set_local_archive_info(parsed_variant, local_archive_info)
|
205
258
|
|
206
|
-
# Cancer (Balsamic) Germline and Somatic loqus archives
|
207
259
|
parsed_variant["local_obs_cancer_germline_old"] = call_safe(
|
208
260
|
int, variant.INFO.get("Cancer_Germline_Obs")
|
209
261
|
)
|
@@ -224,7 +276,12 @@ def parse_variant(
|
|
224
276
|
float, variant.INFO.get("Cancer_Somatic_Frq")
|
225
277
|
)
|
226
278
|
|
227
|
-
|
279
|
+
|
280
|
+
def set_severity_predictions(parsed_variant: dict, variant: dict, parsed_transcripts: dict):
|
281
|
+
"""
|
282
|
+
Set severity predictions on parsed variant.
|
283
|
+
"""
|
284
|
+
|
228
285
|
parsed_variant["cadd_score"] = parse_cadd(variant, parsed_transcripts)
|
229
286
|
parsed_variant["spidex"] = call_safe(float, variant.INFO.get("SPIDEX"))
|
230
287
|
|
@@ -234,31 +291,6 @@ def parse_variant(
|
|
234
291
|
) # This is actually the value of REVEL_rankscore
|
235
292
|
parsed_variant["revel"] = get_highest_revel_score(parsed_transcripts)
|
236
293
|
|
237
|
-
###################### Add conservation ######################
|
238
|
-
parsed_variant["conservation"] = parse_conservations(variant, parsed_transcripts)
|
239
|
-
|
240
|
-
parsed_variant["callers"] = parse_callers(variant, category=category)
|
241
|
-
set_rank_result(parsed_variant, variant, rank_results_header)
|
242
|
-
|
243
|
-
###################### Add SV specific annotations ######################
|
244
|
-
sv_frequencies = parse_sv_frequencies(variant)
|
245
|
-
for key in sv_frequencies:
|
246
|
-
parsed_variant["frequencies"][key] = sv_frequencies[key]
|
247
|
-
|
248
|
-
###################### Add MEI specific annotations #####################
|
249
|
-
mei_frequencies = parse_mei_frequencies(variant)
|
250
|
-
for key in mei_frequencies:
|
251
|
-
parsed_variant["frequencies"][key] = mei_frequencies[key]
|
252
|
-
|
253
|
-
###################### Add Cancer specific annotations ######################
|
254
|
-
# MSK_MVL indicates if variants are in the MSK managed variant list
|
255
|
-
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5437632/
|
256
|
-
if variant.INFO.get("MSK_MVL"):
|
257
|
-
parsed_variant["mvl_tag"] = True
|
258
|
-
|
259
|
-
remove_nonetype(parsed_variant)
|
260
|
-
return parsed_variant
|
261
|
-
|
262
294
|
|
263
295
|
def get_highest_revel_score(parsed_transcripts: List[dict]) -> Optional[float]:
|
264
296
|
"""Retrieve the highest REVEL_score value from parsed variant transcripts."""
|
@@ -484,7 +516,7 @@ def set_fusion_info(variant: Variant, parsed_variant: Dict[str, Any]):
|
|
484
516
|
|
485
517
|
|
486
518
|
def add_gene_and_transcript_info_for_fusions(
|
487
|
-
parsed_variant: Dict[str, Any]
|
519
|
+
parsed_variant: Dict[str, Any],
|
488
520
|
) -> List[Optional[Dict]]:
|
489
521
|
"""Add gene and transcript info for fusions. Return list of parsed
|
490
522
|
transcripts for later use in parsing.
|
scout/server/app.py
CHANGED
@@ -13,6 +13,8 @@ from flask_login import current_user
|
|
13
13
|
from markdown import markdown as python_markdown
|
14
14
|
from markupsafe import Markup
|
15
15
|
|
16
|
+
from scout import __version__
|
17
|
+
from scout.constants import SPIDEX_HUMAN
|
16
18
|
from scout.log import init_log
|
17
19
|
|
18
20
|
from . import extensions
|
@@ -47,6 +49,7 @@ def create_app(config_file=None, config=None):
|
|
47
49
|
app = Flask(__name__)
|
48
50
|
CORS(app)
|
49
51
|
app.jinja_env.add_extension("jinja2.ext.do")
|
52
|
+
app.jinja_env.globals["SCOUT_VERSION"] = __version__
|
50
53
|
|
51
54
|
app.config.from_pyfile("config.py") # Load default config file
|
52
55
|
if (
|
@@ -199,6 +202,17 @@ def register_filters(app):
|
|
199
202
|
return "{:,}".format(int(value)).replace(",", " ")
|
200
203
|
return value
|
201
204
|
|
205
|
+
@app.template_filter()
|
206
|
+
def spidex_human(spidex):
|
207
|
+
"""Translate SPIDEX annotation to human readable string."""
|
208
|
+
if spidex is None:
|
209
|
+
return "not_reported"
|
210
|
+
if abs(spidex) < SPIDEX_HUMAN["low"]["pos"][1]:
|
211
|
+
return "low"
|
212
|
+
if abs(spidex) < SPIDEX_HUMAN["medium"]["pos"][1]:
|
213
|
+
return "medium"
|
214
|
+
return "high"
|
215
|
+
|
202
216
|
@app.template_filter()
|
203
217
|
def human_decimal(number, ndigits=4):
|
204
218
|
"""Return a standard representation of a decimal number.
|