scout-browser 4.85__py3-none-any.whl → 4.86__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. scout/__version__.py +1 -1
  2. scout/adapter/mongo/base.py +17 -14
  3. scout/adapter/mongo/case.py +20 -1
  4. scout/adapter/mongo/filter.py +36 -1
  5. scout/adapter/mongo/omics_variant.py +145 -0
  6. scout/adapter/mongo/query.py +13 -3
  7. scout/adapter/mongo/variant.py +10 -4
  8. scout/build/case.py +5 -0
  9. scout/build/variant/variant.py +1 -0
  10. scout/constants/__init__.py +3 -1
  11. scout/constants/case_tags.py +1 -0
  12. scout/constants/clinvar.py +1 -1
  13. scout/constants/file_types.py +31 -0
  14. scout/constants/filters.py +4 -0
  15. scout/constants/indexes.py +30 -13
  16. scout/constants/variant_tags.py +3 -0
  17. scout/demo/643594.clinical.mei.vcf.gz +0 -0
  18. scout/demo/643594.clinical.mei.vcf.gz.tbi +0 -0
  19. scout/demo/643594.config.yaml +4 -0
  20. scout/demo/drop/fraser_top_hits_clinical.tsv +5 -0
  21. scout/demo/drop/outrider_top_hits_clinical.tsv +10 -0
  22. scout/load/setup.py +4 -4
  23. scout/models/case/case_loading_models.py +25 -2
  24. scout/models/omics_variant.py +227 -0
  25. scout/parse/omics_variant/__init__.py +11 -0
  26. scout/parse/omics_variant/drop.py +19 -0
  27. scout/parse/variant/callers.py +6 -3
  28. scout/parse/variant/frequency.py +10 -2
  29. scout/server/app.py +4 -1
  30. scout/server/blueprints/alignviewers/controllers.py +35 -24
  31. scout/server/blueprints/alignviewers/templates/alignviewers/igv_sashimi_viewer.html +19 -15
  32. scout/server/blueprints/alignviewers/templates/alignviewers/igv_viewer.html +45 -5
  33. scout/server/blueprints/alignviewers/templates/alignviewers/utils.html +1 -1
  34. scout/server/blueprints/alignviewers/views.py +10 -2
  35. scout/server/blueprints/cases/controllers.py +3 -0
  36. scout/server/blueprints/cases/templates/cases/case.html +27 -9
  37. scout/server/blueprints/cases/templates/cases/case_report.html +2 -17
  38. scout/server/blueprints/cases/templates/cases/phenotype.html +8 -5
  39. scout/server/blueprints/cases/templates/cases/utils.html +26 -3
  40. scout/server/blueprints/clinvar/controllers.py +9 -3
  41. scout/server/blueprints/dashboard/controllers.py +44 -13
  42. scout/server/blueprints/dashboard/static/charts.js +46 -36
  43. scout/server/blueprints/dashboard/templates/dashboard/dashboard_general.html +2 -2
  44. scout/server/blueprints/institutes/forms.py +2 -0
  45. scout/server/blueprints/institutes/templates/overview/cases.html +6 -4
  46. scout/server/blueprints/institutes/templates/overview/gene_variants.html +40 -27
  47. scout/server/blueprints/institutes/templates/overview/institute_sidebar.html +1 -1
  48. scout/server/blueprints/institutes/views.py +5 -12
  49. scout/server/blueprints/omics_variants/__init__.py +1 -0
  50. scout/server/blueprints/omics_variants/controllers.py +122 -0
  51. scout/server/blueprints/omics_variants/templates/omics_variants/outliers.html +262 -0
  52. scout/server/blueprints/omics_variants/views.py +106 -0
  53. scout/server/blueprints/panels/controllers.py +1 -7
  54. scout/server/blueprints/panels/templates/panels/panels.html +12 -4
  55. scout/server/blueprints/panels/views.py +9 -11
  56. scout/server/blueprints/variant/templates/variant/buttons.html +7 -2
  57. scout/server/blueprints/variant/templates/variant/str-variant-reviewer.html +1 -1
  58. scout/server/blueprints/variant/templates/variant/utils.html +1 -1
  59. scout/server/blueprints/variant/utils.py +54 -103
  60. scout/server/blueprints/variant/views.py +1 -0
  61. scout/server/blueprints/variants/controllers.py +1 -4
  62. scout/server/blueprints/variants/forms.py +42 -0
  63. scout/server/blueprints/variants/templates/variants/utils.html +8 -4
  64. scout/server/blueprints/variants/views.py +28 -7
  65. scout/server/config.py +4 -0
  66. scout/server/extensions/clinvar_extension.py +7 -7
  67. scout/server/links.py +2 -2
  68. scout/server/templates/bootstrap_global.html +1 -4
  69. scout/server/templates/utils.html +3 -3
  70. scout/server/utils.py +4 -1
  71. {scout_browser-4.85.dist-info → scout_browser-4.86.dist-info}/METADATA +10 -10
  72. {scout_browser-4.85.dist-info → scout_browser-4.86.dist-info}/RECORD +76 -66
  73. {scout_browser-4.85.dist-info → scout_browser-4.86.dist-info}/WHEEL +1 -1
  74. {scout_browser-4.85.dist-info → scout_browser-4.86.dist-info}/LICENSE +0 -0
  75. {scout_browser-4.85.dist-info → scout_browser-4.86.dist-info}/entry_points.txt +0 -0
  76. {scout_browser-4.85.dist-info → scout_browser-4.86.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,5 @@
1
+ hgnc_id geneID hgncSymbol gene_type gene_name_orig sampleID seqnames start end width strand type pValue psiValue deltaPsi counts totalCounts meanCounts meanTotalCounts nonsplitCounts nonsplitProportion nonsplitProportion_99quantile annotatedJunction pValueGene padjustGene PAIRED_END DNA_ID DROP_GROUP SPLICE_COUNTS_DIR HPO_TERMS GENE_COUNTS_FILE GENE_ANNOTATION GENOME isExternal potentialImpact causesFrameshift UTR_overlap blacklist
2
+ 2439 ENSG00000119535.18 CSF3R protein_coding CSF3R ADM1059A2 chr1 36479517 Imp 1961 - jaccard 1.6652e-06 0.49 -0.39 127 258 4237.77 4570.55 3 0.01 0.02 both 9.9912e-06 0.013423 True outrider,fraser False annotatedIntron_reducedUsage unlikely 5'-UTR False
3
+ 4831 ENSG00000213934.9 HBG1 protein_coding HBG1 ADM1059A2 chr11 5248488 5254291 5804 - jaccard 1.918e-12 0.36 0.35 35 96 22.84 6902.91 0 0.0 0.0 end 5.7541e-12 1.2885e-08 True outrider,fraser False exonSkipping inconclusive 3'-UTR False
4
+ 4832 ENSG00000196565.15 HBG2 protein_coding HBG2 ADM1059A2 chr11 5248488 5254291 5804 - jaccard 1.918e-12 0.36 0.35 35 96 22.84 6902.91 0 0.0 0.0 end 3.836e-12 1.2885e-08 True outrider,fraser False exonSkipping inconclusive 3'-UTR False
5
+ 17284 ENSG00000213934.9 POT1 protein_coding POT1 ADM1059A2 chr7 124532319 124532434 115 - jaccard 1.918e-12 0.36 0.35 35 96 22.84 6902.91 0 0.0 0.0 end 5.7541e-12 1.2885e-08 True outrider,fraser False exonSkipping inconclusive 3'-UTR False
@@ -0,0 +1,10 @@
1
+ hgnc_id seqnames start end strand geneID hgncSymbol gene_type gene_name_orig sampleID pValue padjust zScore l2fc rawcounts normcounts meanCorrected theta aberrant AberrantBySample AberrantByGene padj_rank FDR_set foldChange
2
+ 25415 chr4 88257620 88284769 - ENSG00000163644.15 PPM1K protein_coding PPM1K ADM1059A2 0.0016124374690447165 1.0 -5.92 -0.9 27 317.46 601.46 139.77 False 4.0 0.0 6110.0 transcriptome-wide 0.54
3
+ 10019 chr6 3063824 3115187 + ENSG00000137275.16 RIPK1 protein_coding RIPK1 ADM1059A2 0.0009997468998232232 1.0 6.33 0.53 104 1891.09 1308.4 547.35 False 4.0 0.0 6110.0 transcriptome-wide 1.44
4
+ 4827 chr11 5225464 5229395 - ENSG00000244734.4 HBB protein_coding HBB ADM1059A2 3.417496739472308e-25 4.169327890311004e-20 -12.61 -10.14 61 2625.42 3016434.64 9.21 True 4.0 1.0 1.0 transcriptome-wide 0.0
5
+ 4831 chr11 5248269 5249857 - ENSG00000213934.9 HBG1 protein_coding HBG1 ADM1059A3 0.0016205310428587193 1.0 2.23 2.86 73 151.01 19.19 0.91 False 4.0 0.0 6110.0 transcriptome-wide 7.26
6
+ 16860 chr12 108522214 108561400 - ENSG00000075856.12 SART3 protein_coding SART3 ADM1059A2 0.0014636643867152977 1.0 7.02 0.51 97 1412.71 989.2 1000.0 False 4.0 0.0 6110.0 transcriptome-wide 1.42
7
+ 4824 chr16 172876 173710 + ENSG00000188536.13 HBA2 protein_coding HBA2 ADM1059A2 2.15484140654196e-24 1.2729742965811128e-19 -12.53 -10.01 262 6781.29 7019330.9 8.97 True 4.0 1.0 2.5 transcriptome-wide 0.0
8
+ 4823 chr16 176680 177522 + ENSG00000206172.8 HBA1 protein_coding HBA1 ADM1059A2 3.1302782768232926e-24 1.2729742965811128e-19 -12.5 -9.97 246 6497.61 6540277.08 8.95 True 4.0 1.0 2.5 transcriptome-wide 0.0
9
+ 9543 chr17 4796144 4798502 + ENSG00000142507.10 PSMB6 protein_coding PSMB6 ADM1059A2 0.0010771639306525651 1.0 -10.54 -1.14 13 342.89 783.82 1000.0 False 4.0 0.0 6110.0 transcriptome-wide 0.45
10
+ 17284 chr7 124532319 124532434 - ENSG00000213934.9 POT1 protein_coding POT1 ADM1059A2 0.0016205310428587193 1.0 2.23 2.86 73 151.01 19.19 0.91 False 4.0 0.0 6110.0 transcriptome-wide 7.26
scout/load/setup.py CHANGED
@@ -51,12 +51,12 @@ def setup_scout(
51
51
 
52
52
  WARNING: If the instance is populated all collections will be deleted
53
53
 
54
- Build insert a institute and an admin user.
55
- There are multiple sources of information that is used by scout and that needs to exist for
56
- scout to work proper.
54
+ Build and insert an institute and an admin user.
57
55
 
56
+ Multiple sources of information that are used by scout need to exist for
57
+ scout to work properly.
58
58
  Genes:
59
- Scout uses HGNC as the source for gene identifiers en ensembl as source for coordinates.
59
+ Scout uses HGNC as the source for gene identifiers and ENSEMBL as source for coordinates.
60
60
  Additional information of disease connections for genes if fetched from OMIM.
61
61
  Link between hpo terms and genes is fetched from HPO
62
62
  For more details check the documentation.
@@ -15,7 +15,7 @@ except ImportError:
15
15
 
16
16
  from pydantic import BaseModel, Field, field_validator, model_validator
17
17
 
18
- from scout.constants import ANALYSIS_TYPES, FILE_TYPE_MAP
18
+ from scout.constants import ANALYSIS_TYPES, FILE_TYPE_MAP, OMICS_FILE_TYPE_MAP
19
19
  from scout.exceptions import PedigreeError
20
20
  from scout.utils.date import get_date
21
21
 
@@ -41,15 +41,17 @@ CASE_FILE_PATH_CHECKS = [
41
41
  "cnv_report",
42
42
  "coverage_qc_report",
43
43
  "delivery_report",
44
+ "exe_ver",
45
+ "fraser_tsv",
44
46
  "gene_fusion_report",
45
47
  "gene_fusion_report_research",
46
48
  "madeline_info",
47
49
  "multiqc",
48
50
  "multiqc_rna",
51
+ "outrider_tsv",
49
52
  "peddy_ped",
50
53
  "peddy_ped_check",
51
54
  "peddy_sex_check",
52
- "exe_ver",
53
55
  "smn_tsv",
54
56
  "reference_info",
55
57
  "RNAfusion_inspector",
@@ -59,6 +61,7 @@ CASE_FILE_PATH_CHECKS = [
59
61
  ]
60
62
 
61
63
  VCF_FILE_PATH_CHECKS = FILE_TYPE_MAP.keys()
64
+ OMICS_FILE_PATH_CHECKS = OMICS_FILE_TYPE_MAP.keys()
62
65
 
63
66
  GENOME_BUILDS = ["37", "38"]
64
67
  TRACKS = ["rare", "cancer"]
@@ -153,6 +156,25 @@ class Mitodel(BaseModel):
153
156
  ratioppk: Optional[float] = None
154
157
 
155
158
 
159
+ class OmicsFiles(BaseModel):
160
+ """Represents multiple kinds of omics files, e.g. RNA expression outliers for aberrant splicing
161
+ and aberrant expression."""
162
+
163
+ fraser: Optional[str] = None
164
+ fraser_research: Optional[str] = None
165
+ outrider: Optional[str] = None
166
+ outrider_research: Optional[str] = None
167
+
168
+ @model_validator(mode="before")
169
+ def validate_file_path(cls, values: Dict) -> "OmicsFiles":
170
+ """Make sure that VCF file exists on disk."""
171
+ for item in OMICS_FILE_PATH_CHECKS:
172
+ item_path: str = values.get(item)
173
+ if item_path:
174
+ values[item] = _resource_abs_path(item_path)
175
+ return values
176
+
177
+
156
178
  class REViewer(BaseModel):
157
179
  alignment: Optional[str] = None
158
180
  alignment_index: Optional[str] = None
@@ -392,6 +414,7 @@ class CaseLoader(BaseModel):
392
414
  madeline_info: Optional[str] = Field(None, alias="madeline")
393
415
  multiqc: Optional[str] = None
394
416
  multiqc_rna: Optional[str] = None
417
+ omics_files: Optional[OmicsFiles] = None
395
418
  owner: Optional[str] = None
396
419
  peddy_ped: Optional[str] = None # Soon to be deprecated
397
420
  peddy_ped_check: Optional[str] = Field(None, alias="peddy_check") # Soon to be deprecated
@@ -0,0 +1,227 @@
1
+ """ OMICS variant
2
+
3
+ For potentially causative variants that are not yet in ClinVar
4
+ and have yet not been marked causative in any existing case.
5
+
6
+ """
7
+
8
+ import logging
9
+ from datetime import datetime
10
+ from typing import List, Optional
11
+
12
+ from pydantic import BaseModel, Field, field_validator, model_validator
13
+
14
+ LOG = logging.getLogger(__name__)
15
+
16
+
17
+ class OmicsVariantLoader(BaseModel):
18
+ """Omics variants loader
19
+ OmicsVariants are e.g. RNA expression outliers as identified by the DROP pipeline.
20
+
21
+ Variable names are as found in the original files, plus a set common to all mixed in after file parsing,
22
+ but before model validation by this class.
23
+
24
+ The serialisation names will be used when dumping the model for e.g. db storage.
25
+ """
26
+
27
+ case_id: str
28
+ institute: str
29
+ build: str = "38"
30
+ variant_type: str = "clinical"
31
+ category: str # eg "outlier"
32
+ sub_category: str # eg "splicing"
33
+ date: datetime = datetime.now()
34
+ display_name: str
35
+ omics_variant_id: str
36
+
37
+ # DROP Fraser and Outrider outlier TSVs
38
+
39
+ # sample id is mandatory: each row pertains to one outlier event in one individual as compared to others
40
+ # In the db object, this will be replaced with a "samples" array of individual dict.
41
+ sampleID: str
42
+
43
+ # outlier variants must identify the gene they pertain to, primarily with an hgnc_id
44
+ hgnc_ids: Optional[List[int]] = Field(alias="hgnc_id", serialization_alias="hgnc_ids")
45
+ geneID: Optional[str]
46
+
47
+ hgnc_symbols: Optional[List[str]] = Field(
48
+ alias="hgncSymbol", serialization_alias="hgnc_symbols"
49
+ )
50
+ gene_name_orig: Optional[str]
51
+
52
+ gene_type: Optional[str]
53
+
54
+ # coordinates if applicable
55
+ chromosome: Optional[str] = Field(alias="seqnames", serialization_alias="chromosome")
56
+ position: Optional[int] = Field(alias="start", serialization_alias="position")
57
+ end: Optional[int]
58
+ width: Optional[int] = None
59
+ strand: Optional[str] = None
60
+
61
+ p_value: Optional[float] = Field(alias="pValue", serialization_alias="p_value", default=None)
62
+
63
+ # Fraser specific
64
+ type: Optional[str] = None
65
+ psi_value: Optional[float] = Field(
66
+ alias="psiValue", serialization_alias="psi_value", default=None
67
+ )
68
+ delta_psi: Optional[float] = Field(
69
+ alias="deltaPsi", serialization_alias="delta_psi", default=None
70
+ )
71
+ counts: Optional[int] = None
72
+ total_counts: Optional[int] = Field(
73
+ alias="totalCounts", serialization_alias="total_counts", default=None
74
+ )
75
+ mean_counts: Optional[float] = Field(
76
+ alias="meanCounts", serialization_alias="mean_counts", default=None
77
+ )
78
+ mean_total_counts: Optional[float] = Field(
79
+ alias="meanTotalCounts", serialization_alias="mean_total_counts", default=None
80
+ )
81
+ nonsplit_counts: Optional[int] = Field(
82
+ alias="nonsplitCounts", serialization_alias="nonsplit_counts", default=None
83
+ )
84
+ nonsplit_proportion: Optional[float] = Field(
85
+ alias="nonsplitProportion", serialization_alias="nonsplit_proportion", default=None
86
+ )
87
+ nonsplit_proportion_99quantile: Optional[float] = Field(
88
+ alias="nonsplitProportion_99quantile",
89
+ serialization_alias="nonsplit_proportion_99quantile",
90
+ default=None,
91
+ )
92
+ annotated_junction: Optional[str] = Field(
93
+ alias="annotatedJunction", serialization_alias="annotated_junction", default=None
94
+ )
95
+ p_value_gene: Optional[float] = Field(
96
+ alias="pValueGene", serialization_alias="p_value_gene", default=None
97
+ )
98
+ p_adjust_gene: Optional[float] = Field(
99
+ alias="padjustGene", serialization_alias="p_adjust_gene", default=None
100
+ )
101
+ paired_end: Optional[str] = Field(
102
+ alias="PAIRED_END", serialization_alias="paired_end", default=None
103
+ )
104
+ is_external: Optional[bool] = Field(
105
+ alias="isExternal", serialization_alias="is_external", default=None
106
+ )
107
+ potential_impact: Optional[str] = Field(
108
+ alias="potentialImpact", serialization_alias="potential_impact", default=None
109
+ )
110
+ causes_frameshift: Optional[str] = Field(
111
+ alias="causesFrameshift", serialization_alias="causes_frameshift", default=None
112
+ )
113
+ utr_overlap: Optional[str] = Field(
114
+ alias="UTR_overlap", serialization_alias="utr_overlap", default=None
115
+ )
116
+
117
+ # Outrider specific
118
+ padjust: Optional[float] = None
119
+ zscore: Optional[float] = Field(alias="zScore", serialization_alias="zscore", default=None)
120
+ l2fc: Optional[float] = None
121
+ rawcounts: Optional[int] = None
122
+ normcounts: Optional[float] = None
123
+ meanCorrected: Optional[float] = None
124
+ theta: Optional[float] = None
125
+ aberrant: Optional[bool] = None
126
+ aberrant_by_sample: Optional[float] = Field(
127
+ alias="aberrantBySample", serialization_alias="aberrant_by_sample", default=None
128
+ )
129
+ aberrant_by_gene: Optional[float] = Field(
130
+ alias="aberrantByGene", serialization_alias="aberrant_by_gene", default=None
131
+ )
132
+ padj_rank: Optional[float] = None
133
+ fdr_set: Optional[str] = Field(alias="FDR_set", serialization_alias="fdr_set", default=None)
134
+ fold_change: Optional[float] = Field(
135
+ alias="foldChange", serialization_alias="fold_change", default=None
136
+ )
137
+
138
+ @field_validator("chromosome")
139
+ def strip_chr(cls, chrom: str) -> str:
140
+ """We store chromosome names without a chr prefix internally."""
141
+ return chrom.lstrip("chr")
142
+
143
+ @model_validator(mode="before")
144
+ def ensure_end(cls, values):
145
+ """End is not always set, but sometimes width is.
146
+ Sometimes Imp is given as end. Worst case we default to width 1."""
147
+ end_guess = int(values.get("start")) + int(values.get("width", 1))
148
+ if "end" not in values:
149
+ values["end"] = end_guess
150
+
151
+ if isinstance(values["end"], str):
152
+ if values["end"].isdigit():
153
+ values["end"] = int(values["end"])
154
+ if values["end"] == "Imp":
155
+ # imprecise?
156
+ values["end"] = end_guess
157
+
158
+ return values
159
+
160
+ @model_validator(mode="before")
161
+ def genes_become_lists(cls, values):
162
+ """HGNC ids and gene symbols are found one on each line in DROP tsvs.
163
+ Convert to a list with a single member in omics_variants for storage."""
164
+
165
+ if "hgnc_id" in values:
166
+ values["hgnc_id"] = [int(values.get("hgnc_id"))]
167
+
168
+ if "hgncSymbol" in values:
169
+ values["hgncSymbol"] = [str(values.get("hgncSymbol"))]
170
+
171
+ return values
172
+
173
+ @model_validator(mode="before")
174
+ def set_display_name(cls, values) -> "OmicsVariantLoader":
175
+ """Set a free text qualification, depending on the kind of variant."""
176
+
177
+ values["display_name"] = "_".join(
178
+ [
179
+ values.get("hgncSymbol"),
180
+ values.get("category"),
181
+ values.get("sub_category"),
182
+ get_qualification(values=values),
183
+ values.get("seqnames"), # chrom, unserialised
184
+ str(values.get("start")),
185
+ str(values.get("end")),
186
+ values.get("variant_type"),
187
+ ]
188
+ )
189
+ return values
190
+
191
+ @model_validator(mode="before")
192
+ def set_omics_variant_id(cls, values) -> "OmicsVariantLoader":
193
+ """Set OMICS variant id based on the kind of variant."""
194
+
195
+ values["omics_variant_id"] = "_".join(
196
+ [
197
+ values.get("seqnames"), # chrom, unserialised
198
+ str(values.get("start")),
199
+ str(values.get("end")),
200
+ values.get("build"),
201
+ values.get("hgncSymbol"),
202
+ values.get("sub_category"),
203
+ get_qualification(values=values),
204
+ values.get("variant_type"),
205
+ ]
206
+ )
207
+ return values
208
+
209
+ @model_validator(mode="before")
210
+ def set_sample_display_name(cls, values) -> "OmicsVariantLoader":
211
+ """Set a display name."""
212
+ values["display_name"] = values.get(
213
+ "display_name", values.get("sample_name", values.get("individual_id"))
214
+ )
215
+ return values
216
+
217
+
218
+ def get_qualification(values: dict) -> str:
219
+ """Get qualification string for ID and display name.
220
+ This string further qualifies the kind of omics event,
221
+ e.g. for an expression outlier it could be 'up' or 'down'."""
222
+ qualification = "affected"
223
+ if values.get("sub_category") == "expression":
224
+ qualification = "up" if float(values.get("zScore", 0)) > 0 else "down"
225
+ if values.get("sub_category") == "splicing":
226
+ qualification = values.get("potentialImpact")
227
+ return qualification
@@ -0,0 +1,11 @@
1
+ from typing import Dict, Iterable, List
2
+
3
+ from .drop import parse_omics_tsv
4
+
5
+ OMICS_CATEGORY_PARSER = {"tsv": parse_omics_tsv}
6
+
7
+
8
+ def parse_omics_file(omics_lines: Iterable[str], omics_file_type: dict) -> List[Dict[str, str]]:
9
+ """Call appropriate parser for omics variants file, depending on the file format anticipated."""
10
+ parser = OMICS_CATEGORY_PARSER[omics_file_type.get("format")]
11
+ return parser(omics_lines)
@@ -0,0 +1,19 @@
1
+ from typing import Dict, Iterable, List
2
+
3
+
4
+ def parse_omics_tsv(lines: Iterable[str]) -> List[Dict[str, str]]:
5
+ """Parse a DROP Outrider or Fraser TSV file."""
6
+ omics_infos = []
7
+ header = []
8
+
9
+ for i, line in enumerate(lines):
10
+ line = line.rstrip()
11
+ if i == 0:
12
+ # Header line
13
+ header = line.split("\t")
14
+ continue
15
+
16
+ info = dict(zip(header, line.split("\t")))
17
+ omics_infos.append(info)
18
+
19
+ return omics_infos
@@ -22,6 +22,7 @@ def parse_callers(variant, category="snv"):
22
22
  """
23
23
  relevant_callers = CALLERS[category]
24
24
  callers = {caller["id"]: None for caller in relevant_callers}
25
+ callers_keys = set(callers.keys())
25
26
 
26
27
  other_info = variant.INFO.get("FOUND_IN")
27
28
  svdb_origin = variant.INFO.get("svdb_origin")
@@ -30,10 +31,12 @@ def parse_callers(variant, category="snv"):
30
31
  if other_info:
31
32
  for info in other_info.split(","):
32
33
  called_by = info.split("|")[0]
33
- callers[called_by] = "Pass"
34
+ if called_by in callers_keys:
35
+ callers[called_by] = "Pass"
34
36
  elif svdb_origin:
35
37
  for called_by in svdb_origin.split("|"):
36
- callers[called_by] = "Pass"
38
+ if called_by in callers_keys:
39
+ callers[called_by] = "Pass"
37
40
  elif raw_info:
38
41
  info = raw_info.split("-")
39
42
  for call in info:
@@ -47,7 +50,7 @@ def parse_callers(variant, category="snv"):
47
50
  for caller in callers:
48
51
  if caller in call:
49
52
  callers[caller] = "Filtered"
50
- elif call in set(callers.keys()):
53
+ elif call in callers_keys:
51
54
  callers[call] = "Pass"
52
55
 
53
56
  if raw_info or svdb_origin or other_info:
@@ -11,8 +11,14 @@ EXAC_KEYS = ["EXACAF"]
11
11
  EXAC_MAX_KEYS = ["ExAC_MAX_AF", "EXAC_MAX_AF"]
12
12
 
13
13
  # gnomAD has both SNV and SV
14
- GNOMAD_INFO_KEYS = ["GNOMADAF", "GNOMAD_AF", "gnomADg_AF", "gnomad_svAF"]
15
- GNOMAD_INFO_MAX_KEYS = ["gnomADg_AF_POPMAX", "GNOMADAF_popmax", "GNOMADAF_POPMAX", "GNOMADAF_MAX"]
14
+ GNOMAD_INFO_KEYS = ["GNOMADAF", "GNOMAD_AF", "gnomADg_AF", "gnomad_svAF", "gnomad_af"]
15
+ GNOMAD_INFO_MAX_KEYS = [
16
+ "gnomADg_AF_POPMAX",
17
+ "GNOMADAF_popmax",
18
+ "GNOMADAF_POPMAX",
19
+ "GNOMADAF_MAX",
20
+ "gnomad_popmax_af",
21
+ ]
16
22
 
17
23
  # SV
18
24
  CLINGEN_BENIGN_KEYS = [
@@ -69,6 +75,7 @@ def parse_frequencies(variant, transcripts):
69
75
  # These are SV-specific frequencies
70
76
  update_frequency_from_vcf(frequencies, variant, ["left_1000GAF"], "thousand_g_left")
71
77
  update_frequency_from_vcf(frequencies, variant, ["right_1000GAF"], "thousand_g_right")
78
+ update_frequency_from_vcf(frequencies, variant, ["colorsdb_af"], "colorsdb_af")
72
79
 
73
80
  # Search transcripts CSQ if not found in VCF INFO
74
81
  if not frequencies:
@@ -117,6 +124,7 @@ def parse_sv_frequencies(variant: cyvcf2.Variant) -> Dict:
117
124
  update_sv_frequency_from_vcf(sv_frequencies, variant, SWEGEN_KEYS, "swegen")
118
125
  update_sv_frequency_from_vcf(sv_frequencies, variant, DECIPHER_KEYS, "decipher")
119
126
  update_sv_frequency_from_vcf(sv_frequencies, variant, CG_KEYS, "clingen_mip")
127
+ update_sv_frequency_from_vcf(sv_frequencies, variant, ["colorsdb_af"], "colorsdb_af")
120
128
 
121
129
  return sv_frequencies
122
130
 
scout/server/app.py CHANGED
@@ -7,7 +7,7 @@ from typing import Dict, Union
7
7
  from urllib.parse import parse_qsl, unquote, urlsplit
8
8
 
9
9
  import coloredlogs
10
- from flask import Flask, current_app, redirect, request, url_for
10
+ from flask import Flask, redirect, request, url_for
11
11
  from flask_cors import CORS
12
12
  from flask_login import current_user
13
13
  from markdown import markdown as python_markdown
@@ -25,6 +25,7 @@ from .blueprints import (
25
25
  institutes,
26
26
  login,
27
27
  managed_variants,
28
+ omics_variants,
28
29
  panels,
29
30
  phenomodels,
30
31
  phenotypes,
@@ -98,6 +99,7 @@ def configure_extensions(app):
98
99
  extensions.store.init_app(app)
99
100
  extensions.login_manager.init_app(app)
100
101
  extensions.mail.init_app(app)
102
+ extensions.clinvar_api.init_app(app)
101
103
 
102
104
  if app.config.get("SQLALCHEMY_DATABASE_URI"):
103
105
  extensions.chanjo_report.init_app(app)
@@ -177,6 +179,7 @@ def register_blueprints(app):
177
179
  app.register_blueprint(diagnoses.omim_bp)
178
180
  app.register_blueprint(institutes.overview)
179
181
  app.register_blueprint(managed_variants.managed_variants_bp)
182
+ app.register_blueprint(omics_variants.omics_variants_bp)
180
183
 
181
184
 
182
185
  def register_filters(app):
@@ -1,7 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  import logging
3
3
  import os.path
4
- from typing import Dict
4
+ from typing import Dict, Optional
5
5
 
6
6
  from flask import flash, session
7
7
  from flask_login import current_user
@@ -12,7 +12,7 @@ from scout.server.utils import case_append_alignments, find_index
12
12
  from scout.utils.ensembl_rest_clients import EnsemblRestApiClient
13
13
 
14
14
  LOG = logging.getLogger(__name__)
15
- CUSTOM_TRACK_NAMES = ["Genes", "ClinVar", "ClinVar CNVs"]
15
+ DEFAULT_TRACK_NAMES = ["Genes", "ClinVar", "ClinVar CNVs"]
16
16
 
17
17
 
18
18
  def check_session_tracks(resource):
@@ -51,33 +51,39 @@ def set_session_tracks(display_obj: dict):
51
51
  session["igv_tracks"] = session_tracks
52
52
 
53
53
 
54
- def make_igv_tracks(case_obj, variant_id, chrom=None, start=None, stop=None):
54
+ def make_igv_tracks(
55
+ case_obj: dict,
56
+ variant_id: str,
57
+ chrom: Optional[str] = None,
58
+ start: Optional[int] = None,
59
+ stop: Optional[int] = None,
60
+ ) -> dict:
55
61
  """Create a dictionary containing the required tracks for displaying IGV tracks for case or a group of cases
56
62
 
57
63
  Args:
58
- institute_id(str): institute _id
64
+ institute_id: institute _id
59
65
  case_obj(scout.models.Case)
60
- variant_id(str): _id of a variant
61
- chrom(str/None): requested chromosome [1-22], X, Y, [M-MT]
62
- start(int/None): start of the genomic interval to be displayed
63
- stop(int/None): stop of the genomic interval to be displayed
66
+ variant_id: _id of a variant
67
+ chrom: requested chromosome [1-22], X, Y, [M-MT]
68
+ start: start of the genomic interval to be displayed
69
+ stop: stop of the genomic interval to be displayed
64
70
 
65
71
  Returns:
66
- display_obj(dict): A display object containing case name, list of genes, lucus and tracks
72
+ display_obj: A display object containing case name, list of genes, locus and tracks
67
73
  """
68
74
  display_obj = {}
69
75
  variant_obj = store.variant(document_id=variant_id)
70
76
 
77
+ chromosome = "All"
71
78
  if variant_obj:
72
79
  # Set display locus
73
80
  start = start or variant_obj["position"]
74
81
  stop = stop or variant_obj["end"]
82
+ chrom = chrom or variant_obj.get("chromosome")
75
83
 
76
- chromosome = chrom or variant_obj.get("chromosome")
77
- chromosome = chromosome.replace("MT", "M")
84
+ if all([start, stop, chrom]):
85
+ chromosome = chrom.replace("MT", "M")
78
86
  display_obj["locus"] = "chr{0}:{1}-{2}".format(chromosome, start, stop)
79
- else:
80
- chromosome = "All"
81
87
 
82
88
  # Set genome build for displaying alignments:
83
89
  if "38" in str(case_obj.get("genome_build", "37")) or chromosome == "M":
@@ -115,20 +121,27 @@ def make_igv_tracks(case_obj, variant_id, chrom=None, start=None, stop=None):
115
121
  return display_obj
116
122
 
117
123
 
118
- def make_sashimi_tracks(case_obj, variant_id=None):
124
+ def make_sashimi_tracks(
125
+ case_obj: dict, variant_id: Optional[str] = None, omics_variant_id: Optional[str] = None
126
+ ):
119
127
  """Create a dictionary containing the required tracks for a splice junction plot
128
+ If either a regular variant_id or an omics variant id is passed, set display to a particular locus.
129
+ Otherwise defaults to whole genome "All" view.
120
130
 
121
- Args:
122
- case_obj(scout.models.Case)
123
- variant_id(str) _id of a variant
124
131
  Returns:
125
- display_obj(dict): A display object containing case name, list of genes, lucus and tracks
132
+ display_obj(dict): A display object containing case name, list of genes, locus and tracks
126
133
  """
127
134
  build = "38" # This feature is only available for RNA tracks in build 38
128
135
 
129
136
  locus = "All"
137
+ variant_obj = None
138
+
130
139
  if variant_id:
131
140
  variant_obj = store.variant(document_id=variant_id)
141
+ if omics_variant_id:
142
+ variant_obj = store.omics_variant(variant_id=omics_variant_id)
143
+
144
+ if variant_obj:
132
145
  locus = make_locus_from_variant(variant_obj, case_obj, build)
133
146
 
134
147
  display_obj = {"locus": locus, "tracks": []}
@@ -234,9 +247,9 @@ def set_common_tracks(display_obj, build):
234
247
  # Set up IGV tracks that are common for all cases:
235
248
  display_obj["reference_track"] = HUMAN_REFERENCE[build] # Human reference is always present
236
249
 
237
- # if user settings for igv tracks exist -> use these settings, otherwise display all tracks
250
+ # if user settings for igv tracks exist -> use these settings, otherwise display default tracks ---> Genes, ClinVar and ClinVar CNVs
238
251
  custom_tracks_names = (
239
- user_obj.get("igv_tracks") if "igv_tracks" in user_obj else CUSTOM_TRACK_NAMES
252
+ user_obj.get("igv_tracks") if "igv_tracks" in user_obj else DEFAULT_TRACK_NAMES
240
253
  )
241
254
 
242
255
  display_obj["custom_tracks"] = []
@@ -300,7 +313,6 @@ def set_config_custom_tracks(display_obj: dict, build: str):
300
313
  """Set up custom public or private tracks stored in a cloud bucket or locally. These tracks were those specified in the Scout config file.
301
314
  Respect user's preferences."""
302
315
  user_obj = store.user(email=current_user.email)
303
- custom_tracks_names = user_obj.get("igv_tracks")
304
316
 
305
317
  config_custom_tracks = []
306
318
 
@@ -308,8 +320,7 @@ def set_config_custom_tracks(display_obj: dict, build: str):
308
320
  build_tracks = config_igv_tracks.tracks.get(build, [])
309
321
  for track in build_tracks:
310
322
  # Do not display track if user doesn't want to see it
311
- if custom_tracks_names and track["name"] not in custom_tracks_names:
312
- continue
313
- config_custom_tracks.append(track)
323
+ if "igv_tracks" not in user_obj or track["name"] in user_obj.get("igv_tracks"):
324
+ config_custom_tracks.append(track)
314
325
  if config_custom_tracks:
315
326
  display_obj["config_custom_tracks"] = config_custom_tracks
@@ -79,6 +79,7 @@
79
79
  type: 'wig',
80
80
  format: "bigwig",
81
81
  url: "{{ url_for('alignviewers.remote_static', file=track.coverage_wig) }}",
82
+ height: 500,
82
83
  },
83
84
  {
84
85
  type: 'spliceJunctions',
@@ -88,24 +89,27 @@
88
89
  labelUniqueReadCount: true,
89
90
  url: "{{ url_for('alignviewers.remote_static', file=track.splicej_bed) }}",
90
91
  indexURL: "{{ url_for('alignviewers.remote_static', file=track.splicej_bed_index) }}",
91
- minUniquelyMappedReads: 1
92
+ minUniquelyMappedReads: 1,
93
+ height: 500,
92
94
  },
93
95
  ]
94
96
  }, // end of sashimi track with data
95
- { // genes track
96
- name: geneTrack.name,
97
- type: geneTrack.type,
98
- format: geneTrack.format,
99
- sourceType: geneTrack.sourceType,
100
- url: geneTrack.url,
101
- indexURL: geneTrack.indexURL,
102
- displayMode: geneTrack.displayMode,
103
- visibilityWindow: 300000000,
104
- height: 100,
105
- searchable: true,
106
- order: {{counter.loop}},
107
- infoURL: "https://www.ncbi.nlm.nih.gov/gene/?term=$$"
108
- }, // end of genes track
97
+ {% if custom_tracks|selectattr("name","equalto", "Genes")|list|length > 0 %}
98
+ { // genes track
99
+ name: geneTrack.name,
100
+ type: geneTrack.type,
101
+ format: geneTrack.format,
102
+ sourceType: geneTrack.sourceType,
103
+ url: geneTrack.url,
104
+ indexURL: geneTrack.indexURL,
105
+ displayMode: geneTrack.displayMode,
106
+ visibilityWindow: 300000000,
107
+ height: 100,
108
+ searchable: true,
109
+ order: {{counter.loop}},
110
+ infoURL: "https://www.ncbi.nlm.nih.gov/gene/?term=$$"
111
+ }, // end of genes track
112
+ {% endif %}
109
113
  {% endif %}
110
114
  {% endfor %}
111
115
  ] // end of tracks