scout-browser 4.96.0__py3-none-any.whl → 4.98.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scout/adapter/mongo/case.py +51 -47
- scout/adapter/mongo/filter.py +28 -11
- scout/adapter/mongo/institute.py +2 -0
- scout/adapter/mongo/omics_variant.py +20 -5
- scout/adapter/mongo/query.py +104 -95
- scout/adapter/mongo/variant.py +0 -5
- scout/adapter/mongo/variant_loader.py +10 -12
- scout/build/individual.py +3 -11
- scout/commands/delete/delete_command.py +87 -49
- scout/commands/load/research.py +4 -4
- scout/commands/load/variants.py +25 -8
- scout/commands/setup/setup_scout.py +1 -1
- scout/commands/update/case.py +12 -0
- scout/commands/update/individual.py +1 -2
- scout/constants/__init__.py +7 -2
- scout/constants/file_types.py +68 -119
- scout/constants/filters.py +2 -1
- scout/constants/gene_tags.py +3 -3
- scout/constants/igv_tracks.py +7 -11
- scout/constants/query_terms.py +2 -2
- scout/demo/643594.config.yaml +6 -0
- scout/demo/643594.peddy.ped +1 -1
- scout/demo/643594.somalier.ancestry.tsv +4 -0
- scout/demo/643594.somalier.pairs.tsv +4 -0
- scout/demo/643594.somalier.samples.tsv +4 -0
- scout/demo/cancer.load_config.yaml +1 -0
- scout/demo/resources/__init__.py +1 -1
- scout/demo/resources/gnomad.v4.1.constraint_metrics_reduced.tsv +3755 -0
- scout/exceptions/database.py +1 -1
- scout/load/all.py +8 -16
- scout/models/case/case.py +1 -0
- scout/models/case/case_loading_models.py +12 -5
- scout/models/managed_variant.py +3 -3
- scout/models/omics_variant.py +3 -3
- scout/parse/case.py +112 -5
- scout/parse/pedqc.py +127 -0
- scout/parse/variant/frequency.py +9 -6
- scout/parse/variant/variant.py +71 -39
- scout/server/app.py +2 -0
- scout/server/blueprints/alignviewers/controllers.py +2 -0
- scout/server/blueprints/alignviewers/templates/alignviewers/igv_viewer.html +3 -0
- scout/server/blueprints/alignviewers/templates/alignviewers/utils.html +1 -1
- scout/server/blueprints/cases/controllers.py +23 -3
- scout/server/blueprints/cases/templates/cases/case.html +3 -0
- scout/server/blueprints/cases/templates/cases/chanjo2_form.html +2 -2
- scout/server/blueprints/cases/templates/cases/gene_panel.html +9 -3
- scout/server/blueprints/cases/templates/cases/individuals_table.html +4 -1
- scout/server/blueprints/cases/templates/cases/utils.html +23 -19
- scout/server/blueprints/cases/views.py +5 -9
- scout/server/blueprints/clinvar/controllers.py +11 -11
- scout/server/blueprints/clinvar/templates/clinvar/multistep_add_variant.html +15 -7
- scout/server/blueprints/institutes/controllers.py +20 -1
- scout/server/blueprints/institutes/forms.py +5 -1
- scout/server/blueprints/institutes/templates/overview/filters.html +14 -1
- scout/server/blueprints/institutes/templates/overview/institute_settings.html +7 -0
- scout/server/blueprints/institutes/templates/overview/utils.html +20 -1
- scout/server/blueprints/omics_variants/templates/omics_variants/outliers.html +9 -2
- scout/server/blueprints/omics_variants/views.py +8 -10
- scout/server/blueprints/variant/controllers.py +30 -1
- scout/server/blueprints/variant/templates/variant/cancer-variant.html +19 -3
- scout/server/blueprints/variant/templates/variant/components.html +26 -9
- scout/server/blueprints/variant/templates/variant/variant.html +4 -2
- scout/server/blueprints/variant/utils.py +2 -0
- scout/server/blueprints/variants/controllers.py +29 -3
- scout/server/blueprints/variants/forms.py +37 -10
- scout/server/blueprints/variants/templates/variants/components.html +12 -10
- scout/server/blueprints/variants/templates/variants/utils.html +59 -36
- scout/server/blueprints/variants/views.py +45 -60
- scout/server/extensions/beacon_extension.py +1 -1
- scout/server/extensions/bionano_extension.py +5 -5
- scout/server/extensions/chanjo2_extension.py +40 -1
- scout/server/extensions/chanjo_extension.py +1 -1
- scout/server/extensions/matchmaker_extension.py +1 -1
- scout/server/static/bs_styles.css +2 -0
- scout/server/templates/layout.html +1 -0
- scout/server/utils.py +5 -0
- scout/utils/ensembl_biomart_clients.py +2 -11
- scout/utils/scout_requests.py +1 -1
- {scout_browser-4.96.0.dist-info → scout_browser-4.98.0.dist-info}/METADATA +1 -1
- {scout_browser-4.96.0.dist-info → scout_browser-4.98.0.dist-info}/RECORD +83 -81
- scout/demo/resources/gnomad.v4.0.constraint_metrics_reduced.tsv +0 -3755
- scout/parse/peddy.py +0 -149
- scout/utils/sort.py +0 -21
- {scout_browser-4.96.0.dist-info → scout_browser-4.98.0.dist-info}/WHEEL +0 -0
- {scout_browser-4.96.0.dist-info → scout_browser-4.98.0.dist-info}/entry_points.txt +0 -0
- {scout_browser-4.96.0.dist-info → scout_browser-4.98.0.dist-info}/licenses/LICENSE +0 -0
scout/exceptions/database.py
CHANGED
scout/load/all.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
import logging
|
3
3
|
|
4
|
-
from scout.constants import
|
4
|
+
from scout.constants import ORDERED_FILE_TYPE_MAP
|
5
5
|
from scout.exceptions.config import ConfigError
|
6
|
-
from scout.utils.sort import get_load_priority
|
7
6
|
|
8
7
|
LOG = logging.getLogger(__name__)
|
9
8
|
|
@@ -55,30 +54,23 @@ def load_region(adapter, case_id, hgnc_id=None, chrom=None, start=None, end=None
|
|
55
54
|
start = gene_caption["start"]
|
56
55
|
end = gene_caption["end"]
|
57
56
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
(FILE_TYPE_MAP[file_type]["variant_type"], FILE_TYPE_MAP[file_type]["category"])
|
64
|
-
)
|
65
|
-
|
66
|
-
for variant_type, category in sorted(
|
67
|
-
case_file_types,
|
68
|
-
key=lambda tup: get_load_priority(variant_type=tup[0], category=tup[1]),
|
69
|
-
):
|
57
|
+
for file_type, vcf_dict in ORDERED_FILE_TYPE_MAP.items():
|
58
|
+
if not case_obj.get("vcf_files", {}).get(file_type):
|
59
|
+
continue
|
60
|
+
variant_type = vcf_dict["variant_type"]
|
61
|
+
variant_category = vcf_dict["category"]
|
70
62
|
if variant_type == "research" and not case_obj["is_research"]:
|
71
63
|
continue
|
72
64
|
|
73
65
|
LOG.info(
|
74
66
|
"Load {} {} variants for case: {} region: chr {}, start {}, end {}".format(
|
75
|
-
|
67
|
+
variant_category, variant_type.upper(), case_obj["_id"], chrom, start, end
|
76
68
|
)
|
77
69
|
)
|
78
70
|
adapter.load_variants(
|
79
71
|
case_obj=case_obj,
|
80
72
|
variant_type=variant_type,
|
81
|
-
category=
|
73
|
+
category=variant_category,
|
82
74
|
chrom=chrom,
|
83
75
|
start=start,
|
84
76
|
end=end,
|
scout/models/case/case.py
CHANGED
@@ -17,7 +17,7 @@ except ImportError:
|
|
17
17
|
|
18
18
|
from pydantic import BaseModel, Field, field_validator, model_validator
|
19
19
|
|
20
|
-
from scout.constants import ANALYSIS_TYPES,
|
20
|
+
from scout.constants import ANALYSIS_TYPES, ORDERED_FILE_TYPE_MAP, ORDERED_OMICS_FILE_TYPE_MAP
|
21
21
|
from scout.exceptions import PedigreeError
|
22
22
|
from scout.utils.date import get_date
|
23
23
|
|
@@ -54,6 +54,9 @@ CASE_FILE_PATH_CHECKS = [
|
|
54
54
|
"peddy_ped",
|
55
55
|
"peddy_ped_check",
|
56
56
|
"peddy_sex_check",
|
57
|
+
"somalier_ancestry",
|
58
|
+
"somalier_pairs",
|
59
|
+
"somalier_samples",
|
57
60
|
"smn_tsv",
|
58
61
|
"reference_info",
|
59
62
|
"RNAfusion_inspector",
|
@@ -63,8 +66,8 @@ CASE_FILE_PATH_CHECKS = [
|
|
63
66
|
"rna_delivery_report",
|
64
67
|
]
|
65
68
|
|
66
|
-
VCF_FILE_PATH_CHECKS =
|
67
|
-
OMICS_FILE_PATH_CHECKS =
|
69
|
+
VCF_FILE_PATH_CHECKS = ORDERED_FILE_TYPE_MAP.keys()
|
70
|
+
OMICS_FILE_PATH_CHECKS = ORDERED_OMICS_FILE_TYPE_MAP.keys()
|
68
71
|
|
69
72
|
GENOME_BUILDS = ["37", "38"]
|
70
73
|
TRACKS = ["rare", "cancer"]
|
@@ -209,6 +212,7 @@ class SampleLoader(BaseModel):
|
|
209
212
|
d4_file: Optional[str] = None
|
210
213
|
display_name: Optional[str] = None
|
211
214
|
father: Optional[str] = None
|
215
|
+
hrd: Optional[str] = None
|
212
216
|
individual_id: str = Field(alias="sample_id")
|
213
217
|
is_sma: Optional[str] = None
|
214
218
|
is_sma_carrier: Optional[str] = None
|
@@ -247,8 +251,8 @@ class SampleLoader(BaseModel):
|
|
247
251
|
|
248
252
|
@model_validator(mode="before")
|
249
253
|
def convert_cancer_int_values_to_str(cls, values) -> "SampleLoader":
|
250
|
-
"""Sets 'msi' and '
|
251
|
-
for item in ["msi", "tmb"]:
|
254
|
+
"""Sets 'msi' 'tmb' and 'hrd' values for cancer cases to string. This is a required step in Pydantic2, in Pydantic1 values were just coerced from int to str."""
|
255
|
+
for item in ["msi", "tmb", "hrd"]:
|
252
256
|
if values.get(item):
|
253
257
|
values[item] = str(values[item])
|
254
258
|
return values
|
@@ -426,6 +430,9 @@ class CaseLoader(BaseModel):
|
|
426
430
|
peddy_sex_check: Optional[str] = Field(None, alias="peddy_sex") # Soon to be deprecated
|
427
431
|
phenotype_groups: Optional[List[str]] = None
|
428
432
|
phenotype_terms: Optional[List[str]] = None
|
433
|
+
somalier_ancestry: Optional[str] = None
|
434
|
+
somalier_pairs: Optional[str] = None
|
435
|
+
somalier_samples: Optional[str] = None
|
429
436
|
exe_ver: Optional[str] = None
|
430
437
|
rank_model_version: Optional[str] = None
|
431
438
|
rank_score_threshold: Optional[int] = 0
|
scout/models/managed_variant.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
"""
|
1
|
+
"""Managed variant
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
For potentially causative variants that are not yet in ClinVar
|
4
|
+
and have yet not been marked causative in any existing case.
|
5
5
|
|
6
6
|
"""
|
7
7
|
|
scout/models/omics_variant.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
"""
|
1
|
+
"""OMICS variant
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
For potentially causative variants that are not yet in ClinVar
|
4
|
+
and have yet not been marked causative in any existing case.
|
5
5
|
|
6
6
|
"""
|
7
7
|
|
scout/parse/case.py
CHANGED
@@ -1,12 +1,20 @@
|
|
1
1
|
import logging
|
2
|
+
from typing import Any, Dict, Tuple
|
2
3
|
|
3
4
|
from ped_parser import FamilyParser
|
4
5
|
|
5
|
-
from scout.constants import PHENOTYPE_MAP, SEX_MAP
|
6
|
+
from scout.constants import PHENOTYPE_MAP, REV_SEX_MAP, SEX_MAP
|
6
7
|
from scout.exceptions import PedigreeError
|
7
8
|
from scout.models.case.case_loading_models import CaseLoader
|
8
9
|
from scout.parse.mitodel import parse_mitodel_file
|
9
|
-
from scout.parse.
|
10
|
+
from scout.parse.pedqc import (
|
11
|
+
parse_peddy_ped,
|
12
|
+
parse_peddy_ped_check,
|
13
|
+
parse_peddy_sex_check,
|
14
|
+
parse_somalier_ancestry,
|
15
|
+
parse_somalier_pairs,
|
16
|
+
parse_somalier_samples,
|
17
|
+
)
|
10
18
|
from scout.parse.smn import parse_smn_file
|
11
19
|
|
12
20
|
LOG = logging.getLogger(__name__)
|
@@ -19,7 +27,7 @@ def parse_case_data(**kwargs):
|
|
19
27
|
on the command line. Or all the information can be specified in a config file.
|
20
28
|
Please see Scout documentation for further instructions.
|
21
29
|
|
22
|
-
Possible keyword args:
|
30
|
+
Possible keyword args are formally available in the CaseLoader class, but here is a common list with explanations:
|
23
31
|
cnv_report: Path to pdf file with CNV report
|
24
32
|
config(dict): A yaml formatted config file
|
25
33
|
coverage_qc_report: Path to html file with coverage and qc report
|
@@ -74,8 +82,10 @@ def parse_case_data(**kwargs):
|
|
74
82
|
except KeyError:
|
75
83
|
config_dict[key] = None
|
76
84
|
|
77
|
-
# This will add information from
|
85
|
+
# This will add pedigree qc information from Peddy and Somalier to the individuals.
|
86
|
+
# Let the newer Somalier have the last word if there is any disagreement
|
78
87
|
add_peddy_information(config_dict)
|
88
|
+
add_somalier_information(config_dict)
|
79
89
|
|
80
90
|
if config_dict.get("smn_tsv"):
|
81
91
|
add_smn_info(config_dict)
|
@@ -181,7 +191,104 @@ def add_smn_info_case(case_data):
|
|
181
191
|
]:
|
182
192
|
ind[key] = smn_info[ind_id][key]
|
183
193
|
except KeyError as err:
|
184
|
-
LOG.warning("Individual {} has no SMN info to update: {}."
|
194
|
+
LOG.warning(f"Individual {ind_id} has no SMN info to update: {err}.")
|
195
|
+
|
196
|
+
|
197
|
+
def set_somalier_sex_check_ind(ind: Dict[str, str], sex_check: Dict[str, Dict[str, str]]):
|
198
|
+
"""Check if Somalier has inferred the sex"""
|
199
|
+
|
200
|
+
ind_id = ind["individual_id"]
|
201
|
+
if ind_id in sex_check and all(
|
202
|
+
key in sex_check[ind_id] for key in ("sex", "original_pedigree_sex")
|
203
|
+
):
|
204
|
+
ind["confirmed_sex"]: bool = (
|
205
|
+
sex_check[ind_id]["sex"] == REV_SEX_MAP[sex_check[ind_id]["original_pedigree_sex"]]
|
206
|
+
)
|
207
|
+
|
208
|
+
|
209
|
+
def set_somalier_confirmed_parent(
|
210
|
+
analysis_inds: Dict[str, Any], ind: Dict[str, Any], ped_check: Dict[Tuple, Any]
|
211
|
+
):
|
212
|
+
"""Check if Somalier confirmed parental relations.
|
213
|
+
First, check that we are looking at individual with parents.
|
214
|
+
Double-check that the child/parent pair is in somalier data and set ok.
|
215
|
+
If we demand Somalier be run with "relate --infer" we can skip this.
|
216
|
+
"""
|
217
|
+
|
218
|
+
ind_id = ind["individual_id"]
|
219
|
+
for parent in ["mother", "father"]:
|
220
|
+
parent_id = ind[parent]
|
221
|
+
if parent_id == "0":
|
222
|
+
continue
|
223
|
+
|
224
|
+
for pair in ped_check:
|
225
|
+
if ind_id not in pair or parent_id not in pair:
|
226
|
+
continue
|
227
|
+
if (
|
228
|
+
ped_check[pair]["relatedness"] > 0.32
|
229
|
+
and ped_check[pair]["relatedness"] < 0.67
|
230
|
+
and ped_check[pair]["ibs0"] / ped_check[pair]["ibs2"] < 0.014
|
231
|
+
):
|
232
|
+
analysis_inds[parent_id]["confirmed_parent"] = True
|
233
|
+
continue
|
234
|
+
# else if parent confirmation failed
|
235
|
+
analysis_inds[parent_id]["confirmed_parent"] = False
|
236
|
+
|
237
|
+
|
238
|
+
def set_somalier_sex_and_relatedness_checks(
|
239
|
+
case_config: dict,
|
240
|
+
ped_check: Dict[Tuple, Any],
|
241
|
+
sex_check: Dict[str, Dict],
|
242
|
+
ancestry_info: Dict[str, Dict],
|
243
|
+
):
|
244
|
+
"""
|
245
|
+
Update ancestry, sex and relatedness checks for individuals in case config based on parsed Somalier file content.
|
246
|
+
"""
|
247
|
+
analysis_inds = {}
|
248
|
+
for ind in case_config["individuals"]:
|
249
|
+
ind_id = ind["individual_id"]
|
250
|
+
analysis_inds[ind_id] = ind
|
251
|
+
|
252
|
+
for ind_id in analysis_inds:
|
253
|
+
ind = analysis_inds[ind_id]
|
254
|
+
# Check if Somalier has inferred the ancestry
|
255
|
+
if ind_id in ancestry_info:
|
256
|
+
ind["predicted_ancestry"]: str = ancestry_info[ind_id].get(
|
257
|
+
"predicted_ancestry", "UNKNOWN"
|
258
|
+
)
|
259
|
+
set_somalier_sex_check_ind(ind, sex_check)
|
260
|
+
set_somalier_confirmed_parent(analysis_inds, ind, ped_check)
|
261
|
+
|
262
|
+
|
263
|
+
def add_somalier_information(case_config: dict):
|
264
|
+
"""
|
265
|
+
Parse any somalier files, and update ancestry, sex and relatedness checks for individuals in case config
|
266
|
+
based on them.
|
267
|
+
"""
|
268
|
+
ped_check = {}
|
269
|
+
sex_check = {}
|
270
|
+
ancestry_info = {}
|
271
|
+
|
272
|
+
if case_config.get("somalier_pairs"):
|
273
|
+
with open(case_config["somalier_pairs"], "r") as file_handle:
|
274
|
+
for pair_info in parse_somalier_pairs(file_handle):
|
275
|
+
ped_check[(pair_info["sample_a"], pair_info["sample_b"])] = pair_info
|
276
|
+
|
277
|
+
if case_config.get("somalier_samples"):
|
278
|
+
with open(case_config["somalier_samples"], "r") as file_handle:
|
279
|
+
for ind_info in parse_somalier_samples(file_handle):
|
280
|
+
sex_check[ind_info["sample_id"]] = ind_info
|
281
|
+
|
282
|
+
if case_config.get("somalier_ancestry"):
|
283
|
+
with open(case_config["somalier_ancestry"], "r") as file_handle:
|
284
|
+
for ind_info in parse_somalier_ancestry(file_handle):
|
285
|
+
ancestry_info[ind_info["sample_id"]] = ind_info
|
286
|
+
|
287
|
+
if not (ped_check or sex_check or ancestry_info):
|
288
|
+
return
|
289
|
+
|
290
|
+
LOG.info("Adding Somalier info")
|
291
|
+
set_somalier_sex_and_relatedness_checks(case_config, ped_check, sex_check, ancestry_info)
|
185
292
|
|
186
293
|
|
187
294
|
def add_peddy_information(config_data):
|
scout/parse/pedqc.py
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from scout.utils.convert import convert_number, make_bool
|
4
|
+
|
5
|
+
|
6
|
+
def tsv_to_info_dicts(
|
7
|
+
lines: List[str], separator: str = "\t", number_keys: List[str] = [], bool_keys: List[str] = []
|
8
|
+
) -> List[dict]:
|
9
|
+
"""Parse a tsv (or csv with "," as separator) file to a list of dicts, with the header fields as dict keys,
|
10
|
+
column values as dict values, and each list item one such dict for each row.
|
11
|
+
The number_keys and bool_keys are lists of key names to attempt to explicitly coerce values into number or bool before return.
|
12
|
+
"""
|
13
|
+
info_dicts = []
|
14
|
+
for i, line in enumerate(lines):
|
15
|
+
line = line.rstrip()
|
16
|
+
if i == 0:
|
17
|
+
header = line.lstrip("#").split(separator)
|
18
|
+
continue
|
19
|
+
info_dict = dict(zip(header, line.split(separator)))
|
20
|
+
for number_key in number_keys:
|
21
|
+
if number_key in info_dict:
|
22
|
+
info_dict[number_key] = convert_number(info_dict[number_key])
|
23
|
+
for bool_key in bool_keys:
|
24
|
+
if bool_key in info_dict:
|
25
|
+
info_dict[bool_key] = make_bool(info_dict.get(bool_key))
|
26
|
+
info_dicts.append(info_dict)
|
27
|
+
|
28
|
+
return info_dicts
|
29
|
+
|
30
|
+
|
31
|
+
def parse_peddy_ped(lines: List[str]) -> List[dict]:
|
32
|
+
"""Parse a peddy.ped file
|
33
|
+
|
34
|
+
ancestry-prediction: one of AFR AMR EAS EUR SAS UNKNOWN
|
35
|
+
PC1/PC2/PC3/PC4: the first 4 values after this sample was
|
36
|
+
projected onto the thousand genomes principal components.
|
37
|
+
|
38
|
+
idr_baf: inter-decile range (90th percentile - 10th percentile)
|
39
|
+
of b-allele frequency. We make a distribution of all sites of
|
40
|
+
alts / (ref + alts) and then report the difference between the
|
41
|
+
90th and the 10th percentile.
|
42
|
+
Large values indicated likely sample contamination.
|
43
|
+
"""
|
44
|
+
return tsv_to_info_dicts(
|
45
|
+
lines,
|
46
|
+
"\t",
|
47
|
+
number_keys=["PC1", "PC2", "PC3", "het_call_rate", "het_idr_baf", "het_mean_depth"],
|
48
|
+
)
|
49
|
+
|
50
|
+
|
51
|
+
def parse_peddy_ped_check(lines: List[str]) -> List[dict]:
|
52
|
+
"""Parse a .ped_check.csv file
|
53
|
+
|
54
|
+
The following keys are explicitly coerced upon insertion into the returned dicts
|
55
|
+
hets_a - the number of sites at which sample_a was heterozygous
|
56
|
+
hets_b - the number of sites at which sample_b was heterozygous
|
57
|
+
ibs0 - the number of sites at which the 2 samples shared no alleles
|
58
|
+
(should approach 0 for parent-child pairs).
|
59
|
+
ibs2 - the number of sites and which the 2 samples where both
|
60
|
+
hom-ref, both het, or both hom-alt.
|
61
|
+
n - the number of sites that was used to predict the relatedness.
|
62
|
+
rel - the relatedness reported in the ped file.
|
63
|
+
pedigree_relatedness - the relatedness reported in the ped file.
|
64
|
+
rel_difference - difference between the preceding 2 columns.
|
65
|
+
shared_hets - the number of sites at which both samples were hets.
|
66
|
+
|
67
|
+
pedigree_parents - boolean indicating that this pair is a parent-child pair
|
68
|
+
according to the ped file.
|
69
|
+
predicted_parents - boolean indicating that this pair is expected to be a parent-child
|
70
|
+
pair according to the ibs0 (< 0.012) calculated from the genotypes.
|
71
|
+
parent_error - boolean indicating that the preceding 2 columns do not match
|
72
|
+
sample_duplication_error - boolean indicating that rel > 0.75 and ibs0 < 0.012
|
73
|
+
"""
|
74
|
+
return tsv_to_info_dicts(
|
75
|
+
lines,
|
76
|
+
",",
|
77
|
+
number_keys=[
|
78
|
+
"hets_a",
|
79
|
+
"hets_b",
|
80
|
+
"ibs0",
|
81
|
+
"ibs2",
|
82
|
+
"n",
|
83
|
+
"rel",
|
84
|
+
"pedigree_relatedness",
|
85
|
+
"rel_difference",
|
86
|
+
"shared_hets",
|
87
|
+
],
|
88
|
+
bool_keys=[
|
89
|
+
"pedigree_parents",
|
90
|
+
"predicted_parents",
|
91
|
+
"parent_error",
|
92
|
+
"sample_duplication_error",
|
93
|
+
],
|
94
|
+
)
|
95
|
+
|
96
|
+
|
97
|
+
def parse_peddy_sex_check(lines: List[str]) -> List[dict]:
|
98
|
+
"""Parse a .ped_check.csv file
|
99
|
+
|
100
|
+
Type coerce the following keys for each dict in the returned sex_check dict:
|
101
|
+
error: boolean indicating whether there is a mismatch between chr genotypes and ped sex
|
102
|
+
hom_alt_count: number of homozygous-alternate calls
|
103
|
+
hom_ref_count: number of homozygous-reference calls
|
104
|
+
het_count: number of heterozygote calls
|
105
|
+
het_ratio: ratio of het_count / hom_alt_count. Low for males, high for females
|
106
|
+
"""
|
107
|
+
return tsv_to_info_dicts(
|
108
|
+
lines,
|
109
|
+
",",
|
110
|
+
number_keys=["hom_alt_count", "hom_ref_count", "het_count", "het_ratio"],
|
111
|
+
bool_keys=["error"],
|
112
|
+
)
|
113
|
+
|
114
|
+
|
115
|
+
def parse_somalier_pairs(lines: List[str]) -> List[dict]:
|
116
|
+
"""Parse a Somalier pairs tsv file"""
|
117
|
+
return tsv_to_info_dicts(lines, "\t", ["relatedness", "ibs0", "ibs2"])
|
118
|
+
|
119
|
+
|
120
|
+
def parse_somalier_samples(lines: List[str]) -> List[dict]:
|
121
|
+
"""Parse a Somalier samples tsv file"""
|
122
|
+
return tsv_to_info_dicts(lines, "\t")
|
123
|
+
|
124
|
+
|
125
|
+
def parse_somalier_ancestry(lines: List[str]) -> List[dict]:
|
126
|
+
"""Parse a Somalier ancestry tsv file"""
|
127
|
+
return tsv_to_info_dicts(lines, "\t")
|
scout/parse/variant/frequency.py
CHANGED
@@ -92,10 +92,10 @@ def parse_frequency(variant, info_key):
|
|
92
92
|
info_key(str)
|
93
93
|
|
94
94
|
Returns:
|
95
|
-
frequency(float): or None if frequency does not exist
|
95
|
+
frequency(float): or None if frequency does not exist (or is ".", "0", or "-1")
|
96
96
|
"""
|
97
97
|
raw_annotation = variant.INFO.get(info_key)
|
98
|
-
raw_annotation = None if raw_annotation
|
98
|
+
raw_annotation = None if raw_annotation in [".", "-1", -1, 0, "0"] else raw_annotation
|
99
99
|
frequency = float(raw_annotation) if raw_annotation else None
|
100
100
|
return frequency
|
101
101
|
|
@@ -152,11 +152,14 @@ def parse_sv_frequency(variant, info_key):
|
|
152
152
|
These have to be treated separately since some of them are not actually frequencies(float) but
|
153
153
|
occurences(int)
|
154
154
|
"""
|
155
|
-
|
156
|
-
if
|
157
|
-
|
155
|
+
raw_value = variant.INFO.get(info_key, 0)
|
156
|
+
if raw_value in [".", "-1", -1, 0, "0"]:
|
157
|
+
return None
|
158
|
+
|
159
|
+
if any(float_str in info_key.upper() for float_str in ["AF", "FRQ"]):
|
160
|
+
value = float(raw_value)
|
158
161
|
else:
|
159
|
-
value = int(
|
162
|
+
value = int(raw_value)
|
160
163
|
if value > 0:
|
161
164
|
return value
|
162
165
|
return None
|
scout/parse/variant/variant.py
CHANGED
@@ -27,14 +27,14 @@ LOG = logging.getLogger(__name__)
|
|
27
27
|
|
28
28
|
def parse_variant(
|
29
29
|
variant: Variant,
|
30
|
-
case,
|
31
|
-
variant_type="clinical",
|
32
|
-
rank_results_header=None,
|
33
|
-
vep_header=None,
|
34
|
-
individual_positions=None,
|
35
|
-
category=None,
|
36
|
-
local_archive_info=None,
|
37
|
-
):
|
30
|
+
case: dict,
|
31
|
+
variant_type: str = "clinical",
|
32
|
+
rank_results_header: list = None,
|
33
|
+
vep_header: list = None,
|
34
|
+
individual_positions: dict = None,
|
35
|
+
category: str = None,
|
36
|
+
local_archive_info: dict = None,
|
37
|
+
) -> dict:
|
38
38
|
"""Return a parsed variant
|
39
39
|
|
40
40
|
Get all the necessary information to build a variant object
|
@@ -183,10 +183,63 @@ def parse_variant(
|
|
183
183
|
|
184
184
|
parsed_variant["frequencies"] = frequencies
|
185
185
|
|
186
|
-
|
186
|
+
set_loqus_archive_frequencies(parsed_variant, variant, local_archive_info)
|
187
|
+
|
188
|
+
set_severity_predictions(parsed_variant, variant, parsed_transcripts)
|
189
|
+
|
190
|
+
###################### Add conservation ######################
|
191
|
+
parsed_variant["conservation"] = parse_conservations(variant, parsed_transcripts)
|
192
|
+
|
193
|
+
parsed_variant["callers"] = parse_callers(variant, category=category)
|
194
|
+
set_rank_result(parsed_variant, variant, rank_results_header)
|
195
|
+
|
196
|
+
##################### Add type specific #####################
|
197
|
+
set_sv_specific_annotations(parsed_variant, variant)
|
198
|
+
|
199
|
+
set_mei_specific_annotations(parsed_variant, variant)
|
200
|
+
|
201
|
+
set_cancer_specific_annotations(parsed_variant, variant)
|
202
|
+
|
203
|
+
remove_nonetype(parsed_variant)
|
204
|
+
return parsed_variant
|
205
|
+
|
206
|
+
|
207
|
+
def set_mei_specific_annotations(parsed_variant: dict, variant: dict):
|
208
|
+
"""Add MEI specific annotations"""
|
209
|
+
if parsed_variant.get("category") in ["mei"]:
|
210
|
+
mei_frequencies = parse_mei_frequencies(variant)
|
211
|
+
for key in mei_frequencies:
|
212
|
+
parsed_variant["frequencies"][key] = mei_frequencies[key]
|
213
|
+
|
214
|
+
|
215
|
+
def set_cancer_specific_annotations(parsed_variant: dict, variant: dict):
|
216
|
+
"""
|
217
|
+
###################### Add Cancer specific annotations ######################
|
218
|
+
# MSK_MVL indicates if variants are in the MSK managed variant list
|
219
|
+
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5437632/
|
220
|
+
"""
|
221
|
+
if variant.INFO.get("MSK_MVL"):
|
222
|
+
parsed_variant["mvl_tag"] = True
|
223
|
+
|
224
|
+
|
225
|
+
def set_sv_specific_annotations(parsed_variant: dict, variant: dict):
|
226
|
+
"""
|
227
|
+
Add SV specific annotations
|
228
|
+
"""
|
229
|
+
if parsed_variant.get("category") in ["sv", "cancer_sv"]:
|
230
|
+
sv_frequencies = parse_sv_frequencies(variant)
|
231
|
+
for key in sv_frequencies:
|
232
|
+
parsed_variant["frequencies"][key] = sv_frequencies[key]
|
233
|
+
|
234
|
+
|
235
|
+
def set_loqus_archive_frequencies(parsed_variant: dict, variant: dict, local_archive_info: dict):
|
236
|
+
"""
|
237
|
+
loqusdb archive frequencies
|
238
|
+
Fist, RD germline, for MIP and Balsamic
|
239
|
+
Then, Cancer (Balsamic) Germline and Somatic loqus archives
|
240
|
+
SNVs contain INFO field Obs, SVs contain clinical_genomics_loqusObs
|
241
|
+
"""
|
187
242
|
|
188
|
-
# RD germline, for MIP and Balsamic
|
189
|
-
# SNVs contain INFO field Obs, SVs contain clinical_genomics_loqusObs
|
190
243
|
local_obs_old = (
|
191
244
|
variant.INFO.get("Obs")
|
192
245
|
or variant.INFO.get("clinical_genomics_loqusObs")
|
@@ -203,7 +256,6 @@ def parse_variant(
|
|
203
256
|
parsed_variant["local_obs_old_freq"] = call_safe(float, local_frq_old)
|
204
257
|
set_local_archive_info(parsed_variant, local_archive_info)
|
205
258
|
|
206
|
-
# Cancer (Balsamic) Germline and Somatic loqus archives
|
207
259
|
parsed_variant["local_obs_cancer_germline_old"] = call_safe(
|
208
260
|
int, variant.INFO.get("Cancer_Germline_Obs")
|
209
261
|
)
|
@@ -224,7 +276,12 @@ def parse_variant(
|
|
224
276
|
float, variant.INFO.get("Cancer_Somatic_Frq")
|
225
277
|
)
|
226
278
|
|
227
|
-
|
279
|
+
|
280
|
+
def set_severity_predictions(parsed_variant: dict, variant: dict, parsed_transcripts: dict):
|
281
|
+
"""
|
282
|
+
Set severity predictions on parsed variant.
|
283
|
+
"""
|
284
|
+
|
228
285
|
parsed_variant["cadd_score"] = parse_cadd(variant, parsed_transcripts)
|
229
286
|
parsed_variant["spidex"] = call_safe(float, variant.INFO.get("SPIDEX"))
|
230
287
|
|
@@ -234,31 +291,6 @@ def parse_variant(
|
|
234
291
|
) # This is actually the value of REVEL_rankscore
|
235
292
|
parsed_variant["revel"] = get_highest_revel_score(parsed_transcripts)
|
236
293
|
|
237
|
-
###################### Add conservation ######################
|
238
|
-
parsed_variant["conservation"] = parse_conservations(variant, parsed_transcripts)
|
239
|
-
|
240
|
-
parsed_variant["callers"] = parse_callers(variant, category=category)
|
241
|
-
set_rank_result(parsed_variant, variant, rank_results_header)
|
242
|
-
|
243
|
-
###################### Add SV specific annotations ######################
|
244
|
-
sv_frequencies = parse_sv_frequencies(variant)
|
245
|
-
for key in sv_frequencies:
|
246
|
-
parsed_variant["frequencies"][key] = sv_frequencies[key]
|
247
|
-
|
248
|
-
###################### Add MEI specific annotations #####################
|
249
|
-
mei_frequencies = parse_mei_frequencies(variant)
|
250
|
-
for key in mei_frequencies:
|
251
|
-
parsed_variant["frequencies"][key] = mei_frequencies[key]
|
252
|
-
|
253
|
-
###################### Add Cancer specific annotations ######################
|
254
|
-
# MSK_MVL indicates if variants are in the MSK managed variant list
|
255
|
-
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5437632/
|
256
|
-
if variant.INFO.get("MSK_MVL"):
|
257
|
-
parsed_variant["mvl_tag"] = True
|
258
|
-
|
259
|
-
remove_nonetype(parsed_variant)
|
260
|
-
return parsed_variant
|
261
|
-
|
262
294
|
|
263
295
|
def get_highest_revel_score(parsed_transcripts: List[dict]) -> Optional[float]:
|
264
296
|
"""Retrieve the highest REVEL_score value from parsed variant transcripts."""
|
@@ -484,7 +516,7 @@ def set_fusion_info(variant: Variant, parsed_variant: Dict[str, Any]):
|
|
484
516
|
|
485
517
|
|
486
518
|
def add_gene_and_transcript_info_for_fusions(
|
487
|
-
parsed_variant: Dict[str, Any]
|
519
|
+
parsed_variant: Dict[str, Any],
|
488
520
|
) -> List[Optional[Dict]]:
|
489
521
|
"""Add gene and transcript info for fusions. Return list of parsed
|
490
522
|
transcripts for later use in parsing.
|
scout/server/app.py
CHANGED
@@ -13,6 +13,7 @@ from flask_login import current_user
|
|
13
13
|
from markdown import markdown as python_markdown
|
14
14
|
from markupsafe import Markup
|
15
15
|
|
16
|
+
from scout import __version__
|
16
17
|
from scout.constants import SPIDEX_HUMAN
|
17
18
|
from scout.log import init_log
|
18
19
|
|
@@ -48,6 +49,7 @@ def create_app(config_file=None, config=None):
|
|
48
49
|
app = Flask(__name__)
|
49
50
|
CORS(app)
|
50
51
|
app.jinja_env.add_extension("jinja2.ext.do")
|
52
|
+
app.jinja_env.globals["SCOUT_VERSION"] = __version__
|
51
53
|
|
52
54
|
app.config.from_pyfile("config.py") # Load default config file
|
53
55
|
if (
|
@@ -305,6 +305,7 @@ def set_sample_tracks(display_obj: dict, case_groups: list, chromosome: str):
|
|
305
305
|
|
306
306
|
A missing file is indicated with the string "missing", and no track is made for such entries.
|
307
307
|
"""
|
308
|
+
|
308
309
|
sample_tracks = []
|
309
310
|
|
310
311
|
track_items = "mt_bams" if chromosome == "M" else "bam_files"
|
@@ -330,6 +331,7 @@ def set_sample_tracks(display_obj: dict, case_groups: list, chromosome: str):
|
|
330
331
|
"indexURL": case[track_index_items][count],
|
331
332
|
"format": case[track_items][count].split(".")[-1], # "bam" or "cram"
|
332
333
|
"height": 700,
|
334
|
+
"show_soft_clips": case["track_items_soft_clips_settings"][count],
|
333
335
|
}
|
334
336
|
)
|
335
337
|
display_obj["sample_tracks"] = sample_tracks
|
@@ -116,6 +116,9 @@
|
|
116
116
|
url: "{{ url_for('alignviewers.remote_static', file=track.url) }}",
|
117
117
|
indexURL: "{{ url_for('alignviewers.remote_static', file=track.indexURL) }}",
|
118
118
|
sourceType: "file",
|
119
|
+
groupBy: "tag:HP",
|
120
|
+
colorBy: "basemod2:m",
|
121
|
+
showSoftClips: {{track.show_soft_clips | lower }},
|
119
122
|
format: "{{ track.format }}",
|
120
123
|
height: "{{track.height}}"
|
121
124
|
},
|
@@ -1,5 +1,5 @@
|
|
1
1
|
{% macro igv_script() %}
|
2
2
|
<link rel="shortcut icon" href="//igv.org/web/img/favicon.ico">
|
3
3
|
<!-- IGV JS-->
|
4
|
-
<script src="https://cdn.jsdelivr.net/npm/igv@3.
|
4
|
+
<script src="https://cdn.jsdelivr.net/npm/igv@3.2.0/dist/igv.min.js" integrity="sha512-MHnbGQeONlQXyEs6PgiW2bhwywJW5IwUnRKfQKrPaVSrzopctBTU1VtOiEXMf/ZPBk47eFimlVRxdff+sdsyAg==" crossorigin="anonymous"></script>
|
5
5
|
{% endmacro %}
|