scout-browser 4.85__py3-none-any.whl → 4.86__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scout/__version__.py +1 -1
- scout/adapter/mongo/base.py +17 -14
- scout/adapter/mongo/case.py +20 -1
- scout/adapter/mongo/filter.py +36 -1
- scout/adapter/mongo/omics_variant.py +145 -0
- scout/adapter/mongo/query.py +13 -3
- scout/adapter/mongo/variant.py +10 -4
- scout/build/case.py +5 -0
- scout/build/variant/variant.py +1 -0
- scout/constants/__init__.py +3 -1
- scout/constants/case_tags.py +1 -0
- scout/constants/clinvar.py +1 -1
- scout/constants/file_types.py +31 -0
- scout/constants/filters.py +4 -0
- scout/constants/indexes.py +30 -13
- scout/constants/variant_tags.py +3 -0
- scout/demo/643594.clinical.mei.vcf.gz +0 -0
- scout/demo/643594.clinical.mei.vcf.gz.tbi +0 -0
- scout/demo/643594.config.yaml +4 -0
- scout/demo/drop/fraser_top_hits_clinical.tsv +5 -0
- scout/demo/drop/outrider_top_hits_clinical.tsv +10 -0
- scout/load/setup.py +4 -4
- scout/models/case/case_loading_models.py +25 -2
- scout/models/omics_variant.py +227 -0
- scout/parse/omics_variant/__init__.py +11 -0
- scout/parse/omics_variant/drop.py +19 -0
- scout/parse/variant/callers.py +6 -3
- scout/parse/variant/frequency.py +10 -2
- scout/server/app.py +4 -1
- scout/server/blueprints/alignviewers/controllers.py +35 -24
- scout/server/blueprints/alignviewers/templates/alignviewers/igv_sashimi_viewer.html +19 -15
- scout/server/blueprints/alignviewers/templates/alignviewers/igv_viewer.html +45 -5
- scout/server/blueprints/alignviewers/templates/alignviewers/utils.html +1 -1
- scout/server/blueprints/alignviewers/views.py +10 -2
- scout/server/blueprints/cases/controllers.py +3 -0
- scout/server/blueprints/cases/templates/cases/case.html +27 -9
- scout/server/blueprints/cases/templates/cases/case_report.html +2 -17
- scout/server/blueprints/cases/templates/cases/phenotype.html +8 -5
- scout/server/blueprints/cases/templates/cases/utils.html +26 -3
- scout/server/blueprints/clinvar/controllers.py +9 -3
- scout/server/blueprints/dashboard/controllers.py +44 -13
- scout/server/blueprints/dashboard/static/charts.js +46 -36
- scout/server/blueprints/dashboard/templates/dashboard/dashboard_general.html +2 -2
- scout/server/blueprints/institutes/forms.py +2 -0
- scout/server/blueprints/institutes/templates/overview/cases.html +6 -4
- scout/server/blueprints/institutes/templates/overview/gene_variants.html +40 -27
- scout/server/blueprints/institutes/templates/overview/institute_sidebar.html +1 -1
- scout/server/blueprints/institutes/views.py +5 -12
- scout/server/blueprints/omics_variants/__init__.py +1 -0
- scout/server/blueprints/omics_variants/controllers.py +122 -0
- scout/server/blueprints/omics_variants/templates/omics_variants/outliers.html +262 -0
- scout/server/blueprints/omics_variants/views.py +106 -0
- scout/server/blueprints/panels/controllers.py +1 -7
- scout/server/blueprints/panels/templates/panels/panels.html +12 -4
- scout/server/blueprints/panels/views.py +9 -11
- scout/server/blueprints/variant/templates/variant/buttons.html +7 -2
- scout/server/blueprints/variant/templates/variant/str-variant-reviewer.html +1 -1
- scout/server/blueprints/variant/templates/variant/utils.html +1 -1
- scout/server/blueprints/variant/utils.py +54 -103
- scout/server/blueprints/variant/views.py +1 -0
- scout/server/blueprints/variants/controllers.py +1 -4
- scout/server/blueprints/variants/forms.py +42 -0
- scout/server/blueprints/variants/templates/variants/utils.html +8 -4
- scout/server/blueprints/variants/views.py +28 -7
- scout/server/config.py +4 -0
- scout/server/extensions/clinvar_extension.py +7 -7
- scout/server/links.py +2 -2
- scout/server/templates/bootstrap_global.html +1 -4
- scout/server/templates/utils.html +3 -3
- scout/server/utils.py +4 -1
- {scout_browser-4.85.dist-info → scout_browser-4.86.dist-info}/METADATA +10 -10
- {scout_browser-4.85.dist-info → scout_browser-4.86.dist-info}/RECORD +76 -66
- {scout_browser-4.85.dist-info → scout_browser-4.86.dist-info}/WHEEL +1 -1
- {scout_browser-4.85.dist-info → scout_browser-4.86.dist-info}/LICENSE +0 -0
- {scout_browser-4.85.dist-info → scout_browser-4.86.dist-info}/entry_points.txt +0 -0
- {scout_browser-4.85.dist-info → scout_browser-4.86.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,5 @@
|
|
1
|
+
hgnc_id geneID hgncSymbol gene_type gene_name_orig sampleID seqnames start end width strand type pValue psiValue deltaPsi counts totalCounts meanCounts meanTotalCounts nonsplitCounts nonsplitProportion nonsplitProportion_99quantile annotatedJunction pValueGene padjustGene PAIRED_END DNA_ID DROP_GROUP SPLICE_COUNTS_DIR HPO_TERMS GENE_COUNTS_FILE GENE_ANNOTATION GENOME isExternal potentialImpact causesFrameshift UTR_overlap blacklist
|
2
|
+
2439 ENSG00000119535.18 CSF3R protein_coding CSF3R ADM1059A2 chr1 36479517 Imp 1961 - jaccard 1.6652e-06 0.49 -0.39 127 258 4237.77 4570.55 3 0.01 0.02 both 9.9912e-06 0.013423 True outrider,fraser False annotatedIntron_reducedUsage unlikely 5'-UTR False
|
3
|
+
4831 ENSG00000213934.9 HBG1 protein_coding HBG1 ADM1059A2 chr11 5248488 5254291 5804 - jaccard 1.918e-12 0.36 0.35 35 96 22.84 6902.91 0 0.0 0.0 end 5.7541e-12 1.2885e-08 True outrider,fraser False exonSkipping inconclusive 3'-UTR False
|
4
|
+
4832 ENSG00000196565.15 HBG2 protein_coding HBG2 ADM1059A2 chr11 5248488 5254291 5804 - jaccard 1.918e-12 0.36 0.35 35 96 22.84 6902.91 0 0.0 0.0 end 3.836e-12 1.2885e-08 True outrider,fraser False exonSkipping inconclusive 3'-UTR False
|
5
|
+
17284 ENSG00000213934.9 POT1 protein_coding POT1 ADM1059A2 chr7 124532319 124532434 115 - jaccard 1.918e-12 0.36 0.35 35 96 22.84 6902.91 0 0.0 0.0 end 5.7541e-12 1.2885e-08 True outrider,fraser False exonSkipping inconclusive 3'-UTR False
|
@@ -0,0 +1,10 @@
|
|
1
|
+
hgnc_id seqnames start end strand geneID hgncSymbol gene_type gene_name_orig sampleID pValue padjust zScore l2fc rawcounts normcounts meanCorrected theta aberrant AberrantBySample AberrantByGene padj_rank FDR_set foldChange
|
2
|
+
25415 chr4 88257620 88284769 - ENSG00000163644.15 PPM1K protein_coding PPM1K ADM1059A2 0.0016124374690447165 1.0 -5.92 -0.9 27 317.46 601.46 139.77 False 4.0 0.0 6110.0 transcriptome-wide 0.54
|
3
|
+
10019 chr6 3063824 3115187 + ENSG00000137275.16 RIPK1 protein_coding RIPK1 ADM1059A2 0.0009997468998232232 1.0 6.33 0.53 104 1891.09 1308.4 547.35 False 4.0 0.0 6110.0 transcriptome-wide 1.44
|
4
|
+
4827 chr11 5225464 5229395 - ENSG00000244734.4 HBB protein_coding HBB ADM1059A2 3.417496739472308e-25 4.169327890311004e-20 -12.61 -10.14 61 2625.42 3016434.64 9.21 True 4.0 1.0 1.0 transcriptome-wide 0.0
|
5
|
+
4831 chr11 5248269 5249857 - ENSG00000213934.9 HBG1 protein_coding HBG1 ADM1059A3 0.0016205310428587193 1.0 2.23 2.86 73 151.01 19.19 0.91 False 4.0 0.0 6110.0 transcriptome-wide 7.26
|
6
|
+
16860 chr12 108522214 108561400 - ENSG00000075856.12 SART3 protein_coding SART3 ADM1059A2 0.0014636643867152977 1.0 7.02 0.51 97 1412.71 989.2 1000.0 False 4.0 0.0 6110.0 transcriptome-wide 1.42
|
7
|
+
4824 chr16 172876 173710 + ENSG00000188536.13 HBA2 protein_coding HBA2 ADM1059A2 2.15484140654196e-24 1.2729742965811128e-19 -12.53 -10.01 262 6781.29 7019330.9 8.97 True 4.0 1.0 2.5 transcriptome-wide 0.0
|
8
|
+
4823 chr16 176680 177522 + ENSG00000206172.8 HBA1 protein_coding HBA1 ADM1059A2 3.1302782768232926e-24 1.2729742965811128e-19 -12.5 -9.97 246 6497.61 6540277.08 8.95 True 4.0 1.0 2.5 transcriptome-wide 0.0
|
9
|
+
9543 chr17 4796144 4798502 + ENSG00000142507.10 PSMB6 protein_coding PSMB6 ADM1059A2 0.0010771639306525651 1.0 -10.54 -1.14 13 342.89 783.82 1000.0 False 4.0 0.0 6110.0 transcriptome-wide 0.45
|
10
|
+
17284 chr7 124532319 124532434 - ENSG00000213934.9 POT1 protein_coding POT1 ADM1059A2 0.0016205310428587193 1.0 2.23 2.86 73 151.01 19.19 0.91 False 4.0 0.0 6110.0 transcriptome-wide 7.26
|
scout/load/setup.py
CHANGED
@@ -51,12 +51,12 @@ def setup_scout(
|
|
51
51
|
|
52
52
|
WARNING: If the instance is populated all collections will be deleted
|
53
53
|
|
54
|
-
Build insert
|
55
|
-
There are multiple sources of information that is used by scout and that needs to exist for
|
56
|
-
scout to work proper.
|
54
|
+
Build and insert an institute and an admin user.
|
57
55
|
|
56
|
+
Multiple sources of information that are used by scout need to exist for
|
57
|
+
scout to work properly.
|
58
58
|
Genes:
|
59
|
-
Scout uses HGNC as the source for gene identifiers
|
59
|
+
Scout uses HGNC as the source for gene identifiers and ENSEMBL as source for coordinates.
|
60
60
|
Additional information of disease connections for genes if fetched from OMIM.
|
61
61
|
Link between hpo terms and genes is fetched from HPO
|
62
62
|
For more details check the documentation.
|
@@ -15,7 +15,7 @@ except ImportError:
|
|
15
15
|
|
16
16
|
from pydantic import BaseModel, Field, field_validator, model_validator
|
17
17
|
|
18
|
-
from scout.constants import ANALYSIS_TYPES, FILE_TYPE_MAP
|
18
|
+
from scout.constants import ANALYSIS_TYPES, FILE_TYPE_MAP, OMICS_FILE_TYPE_MAP
|
19
19
|
from scout.exceptions import PedigreeError
|
20
20
|
from scout.utils.date import get_date
|
21
21
|
|
@@ -41,15 +41,17 @@ CASE_FILE_PATH_CHECKS = [
|
|
41
41
|
"cnv_report",
|
42
42
|
"coverage_qc_report",
|
43
43
|
"delivery_report",
|
44
|
+
"exe_ver",
|
45
|
+
"fraser_tsv",
|
44
46
|
"gene_fusion_report",
|
45
47
|
"gene_fusion_report_research",
|
46
48
|
"madeline_info",
|
47
49
|
"multiqc",
|
48
50
|
"multiqc_rna",
|
51
|
+
"outrider_tsv",
|
49
52
|
"peddy_ped",
|
50
53
|
"peddy_ped_check",
|
51
54
|
"peddy_sex_check",
|
52
|
-
"exe_ver",
|
53
55
|
"smn_tsv",
|
54
56
|
"reference_info",
|
55
57
|
"RNAfusion_inspector",
|
@@ -59,6 +61,7 @@ CASE_FILE_PATH_CHECKS = [
|
|
59
61
|
]
|
60
62
|
|
61
63
|
VCF_FILE_PATH_CHECKS = FILE_TYPE_MAP.keys()
|
64
|
+
OMICS_FILE_PATH_CHECKS = OMICS_FILE_TYPE_MAP.keys()
|
62
65
|
|
63
66
|
GENOME_BUILDS = ["37", "38"]
|
64
67
|
TRACKS = ["rare", "cancer"]
|
@@ -153,6 +156,25 @@ class Mitodel(BaseModel):
|
|
153
156
|
ratioppk: Optional[float] = None
|
154
157
|
|
155
158
|
|
159
|
+
class OmicsFiles(BaseModel):
|
160
|
+
"""Represents multiple kinds of omics files, e.g. RNA expression outliers for aberrant splicing
|
161
|
+
and aberrant expression."""
|
162
|
+
|
163
|
+
fraser: Optional[str] = None
|
164
|
+
fraser_research: Optional[str] = None
|
165
|
+
outrider: Optional[str] = None
|
166
|
+
outrider_research: Optional[str] = None
|
167
|
+
|
168
|
+
@model_validator(mode="before")
|
169
|
+
def validate_file_path(cls, values: Dict) -> "OmicsFiles":
|
170
|
+
"""Make sure that VCF file exists on disk."""
|
171
|
+
for item in OMICS_FILE_PATH_CHECKS:
|
172
|
+
item_path: str = values.get(item)
|
173
|
+
if item_path:
|
174
|
+
values[item] = _resource_abs_path(item_path)
|
175
|
+
return values
|
176
|
+
|
177
|
+
|
156
178
|
class REViewer(BaseModel):
|
157
179
|
alignment: Optional[str] = None
|
158
180
|
alignment_index: Optional[str] = None
|
@@ -392,6 +414,7 @@ class CaseLoader(BaseModel):
|
|
392
414
|
madeline_info: Optional[str] = Field(None, alias="madeline")
|
393
415
|
multiqc: Optional[str] = None
|
394
416
|
multiqc_rna: Optional[str] = None
|
417
|
+
omics_files: Optional[OmicsFiles] = None
|
395
418
|
owner: Optional[str] = None
|
396
419
|
peddy_ped: Optional[str] = None # Soon to be deprecated
|
397
420
|
peddy_ped_check: Optional[str] = Field(None, alias="peddy_check") # Soon to be deprecated
|
@@ -0,0 +1,227 @@
|
|
1
|
+
""" OMICS variant
|
2
|
+
|
3
|
+
For potentially causative variants that are not yet in ClinVar
|
4
|
+
and have yet not been marked causative in any existing case.
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
from datetime import datetime
|
10
|
+
from typing import List, Optional
|
11
|
+
|
12
|
+
from pydantic import BaseModel, Field, field_validator, model_validator
|
13
|
+
|
14
|
+
LOG = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
|
17
|
+
class OmicsVariantLoader(BaseModel):
|
18
|
+
"""Omics variants loader
|
19
|
+
OmicsVariants are e.g. RNA expression outliers as identified by the DROP pipeline.
|
20
|
+
|
21
|
+
Variable names are as found in the original files, plus a set common to all mixed in after file parsing,
|
22
|
+
but before model validation by this class.
|
23
|
+
|
24
|
+
The serialisation names will be used when dumping the model for e.g. db storage.
|
25
|
+
"""
|
26
|
+
|
27
|
+
case_id: str
|
28
|
+
institute: str
|
29
|
+
build: str = "38"
|
30
|
+
variant_type: str = "clinical"
|
31
|
+
category: str # eg "outlier"
|
32
|
+
sub_category: str # eg "splicing"
|
33
|
+
date: datetime = datetime.now()
|
34
|
+
display_name: str
|
35
|
+
omics_variant_id: str
|
36
|
+
|
37
|
+
# DROP Fraser and Outrider outlier TSVs
|
38
|
+
|
39
|
+
# sample id is mandatory: each row pertains to one outlier event in one individual as compared to others
|
40
|
+
# In the db object, this will be replaced with a "samples" array of individual dict.
|
41
|
+
sampleID: str
|
42
|
+
|
43
|
+
# outlier variants must identify the gene they pertain to, primarily with an hgnc_id
|
44
|
+
hgnc_ids: Optional[List[int]] = Field(alias="hgnc_id", serialization_alias="hgnc_ids")
|
45
|
+
geneID: Optional[str]
|
46
|
+
|
47
|
+
hgnc_symbols: Optional[List[str]] = Field(
|
48
|
+
alias="hgncSymbol", serialization_alias="hgnc_symbols"
|
49
|
+
)
|
50
|
+
gene_name_orig: Optional[str]
|
51
|
+
|
52
|
+
gene_type: Optional[str]
|
53
|
+
|
54
|
+
# coordinates if applicable
|
55
|
+
chromosome: Optional[str] = Field(alias="seqnames", serialization_alias="chromosome")
|
56
|
+
position: Optional[int] = Field(alias="start", serialization_alias="position")
|
57
|
+
end: Optional[int]
|
58
|
+
width: Optional[int] = None
|
59
|
+
strand: Optional[str] = None
|
60
|
+
|
61
|
+
p_value: Optional[float] = Field(alias="pValue", serialization_alias="p_value", default=None)
|
62
|
+
|
63
|
+
# Fraser specific
|
64
|
+
type: Optional[str] = None
|
65
|
+
psi_value: Optional[float] = Field(
|
66
|
+
alias="psiValue", serialization_alias="psi_value", default=None
|
67
|
+
)
|
68
|
+
delta_psi: Optional[float] = Field(
|
69
|
+
alias="deltaPsi", serialization_alias="delta_psi", default=None
|
70
|
+
)
|
71
|
+
counts: Optional[int] = None
|
72
|
+
total_counts: Optional[int] = Field(
|
73
|
+
alias="totalCounts", serialization_alias="total_counts", default=None
|
74
|
+
)
|
75
|
+
mean_counts: Optional[float] = Field(
|
76
|
+
alias="meanCounts", serialization_alias="mean_counts", default=None
|
77
|
+
)
|
78
|
+
mean_total_counts: Optional[float] = Field(
|
79
|
+
alias="meanTotalCounts", serialization_alias="mean_total_counts", default=None
|
80
|
+
)
|
81
|
+
nonsplit_counts: Optional[int] = Field(
|
82
|
+
alias="nonsplitCounts", serialization_alias="nonsplit_counts", default=None
|
83
|
+
)
|
84
|
+
nonsplit_proportion: Optional[float] = Field(
|
85
|
+
alias="nonsplitProportion", serialization_alias="nonsplit_proportion", default=None
|
86
|
+
)
|
87
|
+
nonsplit_proportion_99quantile: Optional[float] = Field(
|
88
|
+
alias="nonsplitProportion_99quantile",
|
89
|
+
serialization_alias="nonsplit_proportion_99quantile",
|
90
|
+
default=None,
|
91
|
+
)
|
92
|
+
annotated_junction: Optional[str] = Field(
|
93
|
+
alias="annotatedJunction", serialization_alias="annotated_junction", default=None
|
94
|
+
)
|
95
|
+
p_value_gene: Optional[float] = Field(
|
96
|
+
alias="pValueGene", serialization_alias="p_value_gene", default=None
|
97
|
+
)
|
98
|
+
p_adjust_gene: Optional[float] = Field(
|
99
|
+
alias="padjustGene", serialization_alias="p_adjust_gene", default=None
|
100
|
+
)
|
101
|
+
paired_end: Optional[str] = Field(
|
102
|
+
alias="PAIRED_END", serialization_alias="paired_end", default=None
|
103
|
+
)
|
104
|
+
is_external: Optional[bool] = Field(
|
105
|
+
alias="isExternal", serialization_alias="is_external", default=None
|
106
|
+
)
|
107
|
+
potential_impact: Optional[str] = Field(
|
108
|
+
alias="potentialImpact", serialization_alias="potential_impact", default=None
|
109
|
+
)
|
110
|
+
causes_frameshift: Optional[str] = Field(
|
111
|
+
alias="causesFrameshift", serialization_alias="causes_frameshift", default=None
|
112
|
+
)
|
113
|
+
utr_overlap: Optional[str] = Field(
|
114
|
+
alias="UTR_overlap", serialization_alias="utr_overlap", default=None
|
115
|
+
)
|
116
|
+
|
117
|
+
# Outrider specific
|
118
|
+
padjust: Optional[float] = None
|
119
|
+
zscore: Optional[float] = Field(alias="zScore", serialization_alias="zscore", default=None)
|
120
|
+
l2fc: Optional[float] = None
|
121
|
+
rawcounts: Optional[int] = None
|
122
|
+
normcounts: Optional[float] = None
|
123
|
+
meanCorrected: Optional[float] = None
|
124
|
+
theta: Optional[float] = None
|
125
|
+
aberrant: Optional[bool] = None
|
126
|
+
aberrant_by_sample: Optional[float] = Field(
|
127
|
+
alias="aberrantBySample", serialization_alias="aberrant_by_sample", default=None
|
128
|
+
)
|
129
|
+
aberrant_by_gene: Optional[float] = Field(
|
130
|
+
alias="aberrantByGene", serialization_alias="aberrant_by_gene", default=None
|
131
|
+
)
|
132
|
+
padj_rank: Optional[float] = None
|
133
|
+
fdr_set: Optional[str] = Field(alias="FDR_set", serialization_alias="fdr_set", default=None)
|
134
|
+
fold_change: Optional[float] = Field(
|
135
|
+
alias="foldChange", serialization_alias="fold_change", default=None
|
136
|
+
)
|
137
|
+
|
138
|
+
@field_validator("chromosome")
|
139
|
+
def strip_chr(cls, chrom: str) -> str:
|
140
|
+
"""We store chromosome names without a chr prefix internally."""
|
141
|
+
return chrom.lstrip("chr")
|
142
|
+
|
143
|
+
@model_validator(mode="before")
|
144
|
+
def ensure_end(cls, values):
|
145
|
+
"""End is not always set, but sometimes width is.
|
146
|
+
Sometimes Imp is given as end. Worst case we default to width 1."""
|
147
|
+
end_guess = int(values.get("start")) + int(values.get("width", 1))
|
148
|
+
if "end" not in values:
|
149
|
+
values["end"] = end_guess
|
150
|
+
|
151
|
+
if isinstance(values["end"], str):
|
152
|
+
if values["end"].isdigit():
|
153
|
+
values["end"] = int(values["end"])
|
154
|
+
if values["end"] == "Imp":
|
155
|
+
# imprecise?
|
156
|
+
values["end"] = end_guess
|
157
|
+
|
158
|
+
return values
|
159
|
+
|
160
|
+
@model_validator(mode="before")
|
161
|
+
def genes_become_lists(cls, values):
|
162
|
+
"""HGNC ids and gene symbols are found one on each line in DROP tsvs.
|
163
|
+
Convert to a list with a single member in omics_variants for storage."""
|
164
|
+
|
165
|
+
if "hgnc_id" in values:
|
166
|
+
values["hgnc_id"] = [int(values.get("hgnc_id"))]
|
167
|
+
|
168
|
+
if "hgncSymbol" in values:
|
169
|
+
values["hgncSymbol"] = [str(values.get("hgncSymbol"))]
|
170
|
+
|
171
|
+
return values
|
172
|
+
|
173
|
+
@model_validator(mode="before")
|
174
|
+
def set_display_name(cls, values) -> "OmicsVariantLoader":
|
175
|
+
"""Set a free text qualification, depending on the kind of variant."""
|
176
|
+
|
177
|
+
values["display_name"] = "_".join(
|
178
|
+
[
|
179
|
+
values.get("hgncSymbol"),
|
180
|
+
values.get("category"),
|
181
|
+
values.get("sub_category"),
|
182
|
+
get_qualification(values=values),
|
183
|
+
values.get("seqnames"), # chrom, unserialised
|
184
|
+
str(values.get("start")),
|
185
|
+
str(values.get("end")),
|
186
|
+
values.get("variant_type"),
|
187
|
+
]
|
188
|
+
)
|
189
|
+
return values
|
190
|
+
|
191
|
+
@model_validator(mode="before")
|
192
|
+
def set_omics_variant_id(cls, values) -> "OmicsVariantLoader":
|
193
|
+
"""Set OMICS variant id based on the kind of variant."""
|
194
|
+
|
195
|
+
values["omics_variant_id"] = "_".join(
|
196
|
+
[
|
197
|
+
values.get("seqnames"), # chrom, unserialised
|
198
|
+
str(values.get("start")),
|
199
|
+
str(values.get("end")),
|
200
|
+
values.get("build"),
|
201
|
+
values.get("hgncSymbol"),
|
202
|
+
values.get("sub_category"),
|
203
|
+
get_qualification(values=values),
|
204
|
+
values.get("variant_type"),
|
205
|
+
]
|
206
|
+
)
|
207
|
+
return values
|
208
|
+
|
209
|
+
@model_validator(mode="before")
|
210
|
+
def set_sample_display_name(cls, values) -> "OmicsVariantLoader":
|
211
|
+
"""Set a display name."""
|
212
|
+
values["display_name"] = values.get(
|
213
|
+
"display_name", values.get("sample_name", values.get("individual_id"))
|
214
|
+
)
|
215
|
+
return values
|
216
|
+
|
217
|
+
|
218
|
+
def get_qualification(values: dict) -> str:
|
219
|
+
"""Get qualification string for ID and display name.
|
220
|
+
This string further qualifies the kind of omics event,
|
221
|
+
e.g. for an expression outlier it could be 'up' or 'down'."""
|
222
|
+
qualification = "affected"
|
223
|
+
if values.get("sub_category") == "expression":
|
224
|
+
qualification = "up" if float(values.get("zScore", 0)) > 0 else "down"
|
225
|
+
if values.get("sub_category") == "splicing":
|
226
|
+
qualification = values.get("potentialImpact")
|
227
|
+
return qualification
|
@@ -0,0 +1,11 @@
|
|
1
|
+
from typing import Dict, Iterable, List
|
2
|
+
|
3
|
+
from .drop import parse_omics_tsv
|
4
|
+
|
5
|
+
OMICS_CATEGORY_PARSER = {"tsv": parse_omics_tsv}
|
6
|
+
|
7
|
+
|
8
|
+
def parse_omics_file(omics_lines: Iterable[str], omics_file_type: dict) -> List[Dict[str, str]]:
|
9
|
+
"""Call appropriate parser for omics variants file, depending on the file format anticipated."""
|
10
|
+
parser = OMICS_CATEGORY_PARSER[omics_file_type.get("format")]
|
11
|
+
return parser(omics_lines)
|
@@ -0,0 +1,19 @@
|
|
1
|
+
from typing import Dict, Iterable, List
|
2
|
+
|
3
|
+
|
4
|
+
def parse_omics_tsv(lines: Iterable[str]) -> List[Dict[str, str]]:
|
5
|
+
"""Parse a DROP Outrider or Fraser TSV file."""
|
6
|
+
omics_infos = []
|
7
|
+
header = []
|
8
|
+
|
9
|
+
for i, line in enumerate(lines):
|
10
|
+
line = line.rstrip()
|
11
|
+
if i == 0:
|
12
|
+
# Header line
|
13
|
+
header = line.split("\t")
|
14
|
+
continue
|
15
|
+
|
16
|
+
info = dict(zip(header, line.split("\t")))
|
17
|
+
omics_infos.append(info)
|
18
|
+
|
19
|
+
return omics_infos
|
scout/parse/variant/callers.py
CHANGED
@@ -22,6 +22,7 @@ def parse_callers(variant, category="snv"):
|
|
22
22
|
"""
|
23
23
|
relevant_callers = CALLERS[category]
|
24
24
|
callers = {caller["id"]: None for caller in relevant_callers}
|
25
|
+
callers_keys = set(callers.keys())
|
25
26
|
|
26
27
|
other_info = variant.INFO.get("FOUND_IN")
|
27
28
|
svdb_origin = variant.INFO.get("svdb_origin")
|
@@ -30,10 +31,12 @@ def parse_callers(variant, category="snv"):
|
|
30
31
|
if other_info:
|
31
32
|
for info in other_info.split(","):
|
32
33
|
called_by = info.split("|")[0]
|
33
|
-
|
34
|
+
if called_by in callers_keys:
|
35
|
+
callers[called_by] = "Pass"
|
34
36
|
elif svdb_origin:
|
35
37
|
for called_by in svdb_origin.split("|"):
|
36
|
-
|
38
|
+
if called_by in callers_keys:
|
39
|
+
callers[called_by] = "Pass"
|
37
40
|
elif raw_info:
|
38
41
|
info = raw_info.split("-")
|
39
42
|
for call in info:
|
@@ -47,7 +50,7 @@ def parse_callers(variant, category="snv"):
|
|
47
50
|
for caller in callers:
|
48
51
|
if caller in call:
|
49
52
|
callers[caller] = "Filtered"
|
50
|
-
elif call in
|
53
|
+
elif call in callers_keys:
|
51
54
|
callers[call] = "Pass"
|
52
55
|
|
53
56
|
if raw_info or svdb_origin or other_info:
|
scout/parse/variant/frequency.py
CHANGED
@@ -11,8 +11,14 @@ EXAC_KEYS = ["EXACAF"]
|
|
11
11
|
EXAC_MAX_KEYS = ["ExAC_MAX_AF", "EXAC_MAX_AF"]
|
12
12
|
|
13
13
|
# gnomAD has both SNV and SV
|
14
|
-
GNOMAD_INFO_KEYS = ["GNOMADAF", "GNOMAD_AF", "gnomADg_AF", "gnomad_svAF"]
|
15
|
-
GNOMAD_INFO_MAX_KEYS = [
|
14
|
+
GNOMAD_INFO_KEYS = ["GNOMADAF", "GNOMAD_AF", "gnomADg_AF", "gnomad_svAF", "gnomad_af"]
|
15
|
+
GNOMAD_INFO_MAX_KEYS = [
|
16
|
+
"gnomADg_AF_POPMAX",
|
17
|
+
"GNOMADAF_popmax",
|
18
|
+
"GNOMADAF_POPMAX",
|
19
|
+
"GNOMADAF_MAX",
|
20
|
+
"gnomad_popmax_af",
|
21
|
+
]
|
16
22
|
|
17
23
|
# SV
|
18
24
|
CLINGEN_BENIGN_KEYS = [
|
@@ -69,6 +75,7 @@ def parse_frequencies(variant, transcripts):
|
|
69
75
|
# These are SV-specific frequencies
|
70
76
|
update_frequency_from_vcf(frequencies, variant, ["left_1000GAF"], "thousand_g_left")
|
71
77
|
update_frequency_from_vcf(frequencies, variant, ["right_1000GAF"], "thousand_g_right")
|
78
|
+
update_frequency_from_vcf(frequencies, variant, ["colorsdb_af"], "colorsdb_af")
|
72
79
|
|
73
80
|
# Search transcripts CSQ if not found in VCF INFO
|
74
81
|
if not frequencies:
|
@@ -117,6 +124,7 @@ def parse_sv_frequencies(variant: cyvcf2.Variant) -> Dict:
|
|
117
124
|
update_sv_frequency_from_vcf(sv_frequencies, variant, SWEGEN_KEYS, "swegen")
|
118
125
|
update_sv_frequency_from_vcf(sv_frequencies, variant, DECIPHER_KEYS, "decipher")
|
119
126
|
update_sv_frequency_from_vcf(sv_frequencies, variant, CG_KEYS, "clingen_mip")
|
127
|
+
update_sv_frequency_from_vcf(sv_frequencies, variant, ["colorsdb_af"], "colorsdb_af")
|
120
128
|
|
121
129
|
return sv_frequencies
|
122
130
|
|
scout/server/app.py
CHANGED
@@ -7,7 +7,7 @@ from typing import Dict, Union
|
|
7
7
|
from urllib.parse import parse_qsl, unquote, urlsplit
|
8
8
|
|
9
9
|
import coloredlogs
|
10
|
-
from flask import Flask,
|
10
|
+
from flask import Flask, redirect, request, url_for
|
11
11
|
from flask_cors import CORS
|
12
12
|
from flask_login import current_user
|
13
13
|
from markdown import markdown as python_markdown
|
@@ -25,6 +25,7 @@ from .blueprints import (
|
|
25
25
|
institutes,
|
26
26
|
login,
|
27
27
|
managed_variants,
|
28
|
+
omics_variants,
|
28
29
|
panels,
|
29
30
|
phenomodels,
|
30
31
|
phenotypes,
|
@@ -98,6 +99,7 @@ def configure_extensions(app):
|
|
98
99
|
extensions.store.init_app(app)
|
99
100
|
extensions.login_manager.init_app(app)
|
100
101
|
extensions.mail.init_app(app)
|
102
|
+
extensions.clinvar_api.init_app(app)
|
101
103
|
|
102
104
|
if app.config.get("SQLALCHEMY_DATABASE_URI"):
|
103
105
|
extensions.chanjo_report.init_app(app)
|
@@ -177,6 +179,7 @@ def register_blueprints(app):
|
|
177
179
|
app.register_blueprint(diagnoses.omim_bp)
|
178
180
|
app.register_blueprint(institutes.overview)
|
179
181
|
app.register_blueprint(managed_variants.managed_variants_bp)
|
182
|
+
app.register_blueprint(omics_variants.omics_variants_bp)
|
180
183
|
|
181
184
|
|
182
185
|
def register_filters(app):
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
import logging
|
3
3
|
import os.path
|
4
|
-
from typing import Dict
|
4
|
+
from typing import Dict, Optional
|
5
5
|
|
6
6
|
from flask import flash, session
|
7
7
|
from flask_login import current_user
|
@@ -12,7 +12,7 @@ from scout.server.utils import case_append_alignments, find_index
|
|
12
12
|
from scout.utils.ensembl_rest_clients import EnsemblRestApiClient
|
13
13
|
|
14
14
|
LOG = logging.getLogger(__name__)
|
15
|
-
|
15
|
+
DEFAULT_TRACK_NAMES = ["Genes", "ClinVar", "ClinVar CNVs"]
|
16
16
|
|
17
17
|
|
18
18
|
def check_session_tracks(resource):
|
@@ -51,33 +51,39 @@ def set_session_tracks(display_obj: dict):
|
|
51
51
|
session["igv_tracks"] = session_tracks
|
52
52
|
|
53
53
|
|
54
|
-
def make_igv_tracks(
|
54
|
+
def make_igv_tracks(
|
55
|
+
case_obj: dict,
|
56
|
+
variant_id: str,
|
57
|
+
chrom: Optional[str] = None,
|
58
|
+
start: Optional[int] = None,
|
59
|
+
stop: Optional[int] = None,
|
60
|
+
) -> dict:
|
55
61
|
"""Create a dictionary containing the required tracks for displaying IGV tracks for case or a group of cases
|
56
62
|
|
57
63
|
Args:
|
58
|
-
institute_id
|
64
|
+
institute_id: institute _id
|
59
65
|
case_obj(scout.models.Case)
|
60
|
-
variant_id
|
61
|
-
chrom
|
62
|
-
start
|
63
|
-
stop
|
66
|
+
variant_id: _id of a variant
|
67
|
+
chrom: requested chromosome [1-22], X, Y, [M-MT]
|
68
|
+
start: start of the genomic interval to be displayed
|
69
|
+
stop: stop of the genomic interval to be displayed
|
64
70
|
|
65
71
|
Returns:
|
66
|
-
display_obj
|
72
|
+
display_obj: A display object containing case name, list of genes, locus and tracks
|
67
73
|
"""
|
68
74
|
display_obj = {}
|
69
75
|
variant_obj = store.variant(document_id=variant_id)
|
70
76
|
|
77
|
+
chromosome = "All"
|
71
78
|
if variant_obj:
|
72
79
|
# Set display locus
|
73
80
|
start = start or variant_obj["position"]
|
74
81
|
stop = stop or variant_obj["end"]
|
82
|
+
chrom = chrom or variant_obj.get("chromosome")
|
75
83
|
|
76
|
-
|
77
|
-
chromosome =
|
84
|
+
if all([start, stop, chrom]):
|
85
|
+
chromosome = chrom.replace("MT", "M")
|
78
86
|
display_obj["locus"] = "chr{0}:{1}-{2}".format(chromosome, start, stop)
|
79
|
-
else:
|
80
|
-
chromosome = "All"
|
81
87
|
|
82
88
|
# Set genome build for displaying alignments:
|
83
89
|
if "38" in str(case_obj.get("genome_build", "37")) or chromosome == "M":
|
@@ -115,20 +121,27 @@ def make_igv_tracks(case_obj, variant_id, chrom=None, start=None, stop=None):
|
|
115
121
|
return display_obj
|
116
122
|
|
117
123
|
|
118
|
-
def make_sashimi_tracks(
|
124
|
+
def make_sashimi_tracks(
|
125
|
+
case_obj: dict, variant_id: Optional[str] = None, omics_variant_id: Optional[str] = None
|
126
|
+
):
|
119
127
|
"""Create a dictionary containing the required tracks for a splice junction plot
|
128
|
+
If either a regular variant_id or an omics variant id is passed, set display to a particular locus.
|
129
|
+
Otherwise defaults to whole genome "All" view.
|
120
130
|
|
121
|
-
Args:
|
122
|
-
case_obj(scout.models.Case)
|
123
|
-
variant_id(str) _id of a variant
|
124
131
|
Returns:
|
125
|
-
display_obj(dict): A display object containing case name, list of genes,
|
132
|
+
display_obj(dict): A display object containing case name, list of genes, locus and tracks
|
126
133
|
"""
|
127
134
|
build = "38" # This feature is only available for RNA tracks in build 38
|
128
135
|
|
129
136
|
locus = "All"
|
137
|
+
variant_obj = None
|
138
|
+
|
130
139
|
if variant_id:
|
131
140
|
variant_obj = store.variant(document_id=variant_id)
|
141
|
+
if omics_variant_id:
|
142
|
+
variant_obj = store.omics_variant(variant_id=omics_variant_id)
|
143
|
+
|
144
|
+
if variant_obj:
|
132
145
|
locus = make_locus_from_variant(variant_obj, case_obj, build)
|
133
146
|
|
134
147
|
display_obj = {"locus": locus, "tracks": []}
|
@@ -234,9 +247,9 @@ def set_common_tracks(display_obj, build):
|
|
234
247
|
# Set up IGV tracks that are common for all cases:
|
235
248
|
display_obj["reference_track"] = HUMAN_REFERENCE[build] # Human reference is always present
|
236
249
|
|
237
|
-
# if user settings for igv tracks exist -> use these settings, otherwise display
|
250
|
+
# if user settings for igv tracks exist -> use these settings, otherwise display default tracks ---> Genes, ClinVar and ClinVar CNVs
|
238
251
|
custom_tracks_names = (
|
239
|
-
user_obj.get("igv_tracks") if "igv_tracks" in user_obj else
|
252
|
+
user_obj.get("igv_tracks") if "igv_tracks" in user_obj else DEFAULT_TRACK_NAMES
|
240
253
|
)
|
241
254
|
|
242
255
|
display_obj["custom_tracks"] = []
|
@@ -300,7 +313,6 @@ def set_config_custom_tracks(display_obj: dict, build: str):
|
|
300
313
|
"""Set up custom public or private tracks stored in a cloud bucket or locally. These tracks were those specified in the Scout config file.
|
301
314
|
Respect user's preferences."""
|
302
315
|
user_obj = store.user(email=current_user.email)
|
303
|
-
custom_tracks_names = user_obj.get("igv_tracks")
|
304
316
|
|
305
317
|
config_custom_tracks = []
|
306
318
|
|
@@ -308,8 +320,7 @@ def set_config_custom_tracks(display_obj: dict, build: str):
|
|
308
320
|
build_tracks = config_igv_tracks.tracks.get(build, [])
|
309
321
|
for track in build_tracks:
|
310
322
|
# Do not display track if user doesn't want to see it
|
311
|
-
if
|
312
|
-
|
313
|
-
config_custom_tracks.append(track)
|
323
|
+
if "igv_tracks" not in user_obj or track["name"] in user_obj.get("igv_tracks"):
|
324
|
+
config_custom_tracks.append(track)
|
314
325
|
if config_custom_tracks:
|
315
326
|
display_obj["config_custom_tracks"] = config_custom_tracks
|
@@ -79,6 +79,7 @@
|
|
79
79
|
type: 'wig',
|
80
80
|
format: "bigwig",
|
81
81
|
url: "{{ url_for('alignviewers.remote_static', file=track.coverage_wig) }}",
|
82
|
+
height: 500,
|
82
83
|
},
|
83
84
|
{
|
84
85
|
type: 'spliceJunctions',
|
@@ -88,24 +89,27 @@
|
|
88
89
|
labelUniqueReadCount: true,
|
89
90
|
url: "{{ url_for('alignviewers.remote_static', file=track.splicej_bed) }}",
|
90
91
|
indexURL: "{{ url_for('alignviewers.remote_static', file=track.splicej_bed_index) }}",
|
91
|
-
minUniquelyMappedReads: 1
|
92
|
+
minUniquelyMappedReads: 1,
|
93
|
+
height: 500,
|
92
94
|
},
|
93
95
|
]
|
94
96
|
}, // end of sashimi track with data
|
95
|
-
{
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
97
|
+
{% if custom_tracks|selectattr("name","equalto", "Genes")|list|length > 0 %}
|
98
|
+
{ // genes track
|
99
|
+
name: geneTrack.name,
|
100
|
+
type: geneTrack.type,
|
101
|
+
format: geneTrack.format,
|
102
|
+
sourceType: geneTrack.sourceType,
|
103
|
+
url: geneTrack.url,
|
104
|
+
indexURL: geneTrack.indexURL,
|
105
|
+
displayMode: geneTrack.displayMode,
|
106
|
+
visibilityWindow: 300000000,
|
107
|
+
height: 100,
|
108
|
+
searchable: true,
|
109
|
+
order: {{counter.loop}},
|
110
|
+
infoURL: "https://www.ncbi.nlm.nih.gov/gene/?term=$$"
|
111
|
+
}, // end of genes track
|
112
|
+
{% endif %}
|
109
113
|
{% endif %}
|
110
114
|
{% endfor %}
|
111
115
|
] // end of tracks
|