scout-browser 4.96.0__py3-none-any.whl → 4.97.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. scout/adapter/mongo/case.py +51 -47
  2. scout/adapter/mongo/filter.py +28 -11
  3. scout/adapter/mongo/institute.py +2 -0
  4. scout/adapter/mongo/omics_variant.py +20 -5
  5. scout/adapter/mongo/query.py +104 -95
  6. scout/adapter/mongo/variant.py +0 -5
  7. scout/adapter/mongo/variant_loader.py +10 -12
  8. scout/build/individual.py +3 -11
  9. scout/commands/delete/delete_command.py +87 -49
  10. scout/commands/load/research.py +4 -4
  11. scout/commands/load/variants.py +25 -8
  12. scout/commands/setup/setup_scout.py +1 -1
  13. scout/commands/update/case.py +12 -0
  14. scout/commands/update/individual.py +1 -2
  15. scout/constants/__init__.py +7 -2
  16. scout/constants/file_types.py +68 -119
  17. scout/constants/filters.py +2 -1
  18. scout/constants/gene_tags.py +3 -3
  19. scout/constants/igv_tracks.py +7 -11
  20. scout/constants/query_terms.py +2 -2
  21. scout/demo/643594.config.yaml +6 -0
  22. scout/demo/643594.peddy.ped +1 -1
  23. scout/demo/643594.somalier.ancestry.tsv +4 -0
  24. scout/demo/643594.somalier.pairs.tsv +4 -0
  25. scout/demo/643594.somalier.samples.tsv +4 -0
  26. scout/demo/cancer.load_config.yaml +1 -0
  27. scout/demo/resources/__init__.py +1 -1
  28. scout/demo/resources/gnomad.v4.1.constraint_metrics_reduced.tsv +3755 -0
  29. scout/exceptions/database.py +1 -1
  30. scout/load/all.py +8 -16
  31. scout/models/case/case.py +1 -0
  32. scout/models/case/case_loading_models.py +12 -5
  33. scout/models/managed_variant.py +3 -3
  34. scout/models/omics_variant.py +3 -3
  35. scout/parse/case.py +112 -5
  36. scout/parse/pedqc.py +127 -0
  37. scout/parse/variant/frequency.py +9 -6
  38. scout/parse/variant/variant.py +71 -39
  39. scout/server/app.py +2 -0
  40. scout/server/blueprints/alignviewers/controllers.py +2 -0
  41. scout/server/blueprints/alignviewers/templates/alignviewers/igv_viewer.html +3 -0
  42. scout/server/blueprints/alignviewers/templates/alignviewers/utils.html +1 -1
  43. scout/server/blueprints/cases/controllers.py +23 -3
  44. scout/server/blueprints/cases/templates/cases/case.html +3 -0
  45. scout/server/blueprints/cases/templates/cases/chanjo2_form.html +2 -2
  46. scout/server/blueprints/cases/templates/cases/gene_panel.html +9 -3
  47. scout/server/blueprints/cases/templates/cases/individuals_table.html +4 -1
  48. scout/server/blueprints/cases/templates/cases/utils.html +23 -19
  49. scout/server/blueprints/cases/views.py +5 -9
  50. scout/server/blueprints/clinvar/controllers.py +11 -11
  51. scout/server/blueprints/clinvar/templates/clinvar/multistep_add_variant.html +15 -7
  52. scout/server/blueprints/institutes/controllers.py +20 -1
  53. scout/server/blueprints/institutes/forms.py +5 -1
  54. scout/server/blueprints/institutes/templates/overview/institute_settings.html +7 -0
  55. scout/server/blueprints/institutes/templates/overview/utils.html +20 -1
  56. scout/server/blueprints/omics_variants/templates/omics_variants/outliers.html +9 -2
  57. scout/server/blueprints/omics_variants/views.py +8 -10
  58. scout/server/blueprints/variant/controllers.py +30 -1
  59. scout/server/blueprints/variant/templates/variant/cancer-variant.html +19 -3
  60. scout/server/blueprints/variant/templates/variant/components.html +26 -9
  61. scout/server/blueprints/variant/templates/variant/variant.html +4 -2
  62. scout/server/blueprints/variant/utils.py +2 -0
  63. scout/server/blueprints/variants/controllers.py +29 -3
  64. scout/server/blueprints/variants/forms.py +37 -10
  65. scout/server/blueprints/variants/templates/variants/components.html +12 -10
  66. scout/server/blueprints/variants/templates/variants/utils.html +59 -36
  67. scout/server/blueprints/variants/views.py +45 -60
  68. scout/server/extensions/beacon_extension.py +1 -1
  69. scout/server/extensions/bionano_extension.py +5 -5
  70. scout/server/extensions/chanjo2_extension.py +40 -1
  71. scout/server/extensions/chanjo_extension.py +1 -1
  72. scout/server/extensions/matchmaker_extension.py +1 -1
  73. scout/server/static/bs_styles.css +2 -0
  74. scout/server/templates/layout.html +1 -0
  75. scout/server/utils.py +5 -0
  76. scout/utils/ensembl_biomart_clients.py +2 -11
  77. scout/utils/scout_requests.py +1 -1
  78. {scout_browser-4.96.0.dist-info → scout_browser-4.97.0.dist-info}/METADATA +1 -1
  79. {scout_browser-4.96.0.dist-info → scout_browser-4.97.0.dist-info}/RECORD +82 -80
  80. scout/demo/resources/gnomad.v4.0.constraint_metrics_reduced.tsv +0 -3755
  81. scout/parse/peddy.py +0 -149
  82. scout/utils/sort.py +0 -21
  83. {scout_browser-4.96.0.dist-info → scout_browser-4.97.0.dist-info}/WHEEL +0 -0
  84. {scout_browser-4.96.0.dist-info → scout_browser-4.97.0.dist-info}/entry_points.txt +0 -0
  85. {scout_browser-4.96.0.dist-info → scout_browser-4.97.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,5 +1,5 @@
1
1
  """The following exceptions follow PEP249:
2
- https://www.python.org/dev/peps/pep-0249
2
+ https://www.python.org/dev/peps/pep-0249
3
3
  """
4
4
 
5
5
 
scout/load/all.py CHANGED
@@ -1,9 +1,8 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  import logging
3
3
 
4
- from scout.constants import FILE_TYPE_MAP
4
+ from scout.constants import ORDERED_FILE_TYPE_MAP
5
5
  from scout.exceptions.config import ConfigError
6
- from scout.utils.sort import get_load_priority
7
6
 
8
7
  LOG = logging.getLogger(__name__)
9
8
 
@@ -55,30 +54,23 @@ def load_region(adapter, case_id, hgnc_id=None, chrom=None, start=None, end=None
55
54
  start = gene_caption["start"]
56
55
  end = gene_caption["end"]
57
56
 
58
- case_file_types = set()
59
-
60
- for file_type in FILE_TYPE_MAP:
61
- if case_obj.get("vcf_files", {}).get(file_type):
62
- case_file_types.add(
63
- (FILE_TYPE_MAP[file_type]["variant_type"], FILE_TYPE_MAP[file_type]["category"])
64
- )
65
-
66
- for variant_type, category in sorted(
67
- case_file_types,
68
- key=lambda tup: get_load_priority(variant_type=tup[0], category=tup[1]),
69
- ):
57
+ for file_type, vcf_dict in ORDERED_FILE_TYPE_MAP.items():
58
+ if not case_obj.get("vcf_files", {}).get(file_type):
59
+ continue
60
+ variant_type = vcf_dict["variant_type"]
61
+ variant_category = vcf_dict["category"]
70
62
  if variant_type == "research" and not case_obj["is_research"]:
71
63
  continue
72
64
 
73
65
  LOG.info(
74
66
  "Load {} {} variants for case: {} region: chr {}, start {}, end {}".format(
75
- category, variant_type.upper(), case_obj["_id"], chrom, start, end
67
+ variant_category, variant_type.upper(), case_obj["_id"], chrom, start, end
76
68
  )
77
69
  )
78
70
  adapter.load_variants(
79
71
  case_obj=case_obj,
80
72
  variant_type=variant_type,
81
- category=category,
73
+ category=variant_category,
82
74
  chrom=chrom,
83
75
  start=start,
84
76
  end=end,
scout/models/case/case.py CHANGED
@@ -37,6 +37,7 @@ individual = dict(
37
37
  tumor_type=str,
38
38
  tmb=str,
39
39
  msi=str,
40
+ hrd=str,
40
41
  tumor_purity=float,
41
42
  tissue_type=str,
42
43
  chromograph_images=str, # path to image files
@@ -17,7 +17,7 @@ except ImportError:
17
17
 
18
18
  from pydantic import BaseModel, Field, field_validator, model_validator
19
19
 
20
- from scout.constants import ANALYSIS_TYPES, FILE_TYPE_MAP, OMICS_FILE_TYPE_MAP
20
+ from scout.constants import ANALYSIS_TYPES, ORDERED_FILE_TYPE_MAP, ORDERED_OMICS_FILE_TYPE_MAP
21
21
  from scout.exceptions import PedigreeError
22
22
  from scout.utils.date import get_date
23
23
 
@@ -54,6 +54,9 @@ CASE_FILE_PATH_CHECKS = [
54
54
  "peddy_ped",
55
55
  "peddy_ped_check",
56
56
  "peddy_sex_check",
57
+ "somalier_ancestry",
58
+ "somalier_pairs",
59
+ "somalier_samples",
57
60
  "smn_tsv",
58
61
  "reference_info",
59
62
  "RNAfusion_inspector",
@@ -63,8 +66,8 @@ CASE_FILE_PATH_CHECKS = [
63
66
  "rna_delivery_report",
64
67
  ]
65
68
 
66
- VCF_FILE_PATH_CHECKS = FILE_TYPE_MAP.keys()
67
- OMICS_FILE_PATH_CHECKS = OMICS_FILE_TYPE_MAP.keys()
69
+ VCF_FILE_PATH_CHECKS = ORDERED_FILE_TYPE_MAP.keys()
70
+ OMICS_FILE_PATH_CHECKS = ORDERED_OMICS_FILE_TYPE_MAP.keys()
68
71
 
69
72
  GENOME_BUILDS = ["37", "38"]
70
73
  TRACKS = ["rare", "cancer"]
@@ -209,6 +212,7 @@ class SampleLoader(BaseModel):
209
212
  d4_file: Optional[str] = None
210
213
  display_name: Optional[str] = None
211
214
  father: Optional[str] = None
215
+ hrd: Optional[str] = None
212
216
  individual_id: str = Field(alias="sample_id")
213
217
  is_sma: Optional[str] = None
214
218
  is_sma_carrier: Optional[str] = None
@@ -247,8 +251,8 @@ class SampleLoader(BaseModel):
247
251
 
248
252
  @model_validator(mode="before")
249
253
  def convert_cancer_int_values_to_str(cls, values) -> "SampleLoader":
250
- """Sets 'msi' and 'msi' values for cancer cases to string. This is a required step in Pydantic2, in Pydantic1 values were just coerced from int to str."""
251
- for item in ["msi", "tmb"]:
254
+ """Sets 'msi' 'tmb' and 'hrd' values for cancer cases to string. This is a required step in Pydantic2, in Pydantic1 values were just coerced from int to str."""
255
+ for item in ["msi", "tmb", "hrd"]:
252
256
  if values.get(item):
253
257
  values[item] = str(values[item])
254
258
  return values
@@ -426,6 +430,9 @@ class CaseLoader(BaseModel):
426
430
  peddy_sex_check: Optional[str] = Field(None, alias="peddy_sex") # Soon to be deprecated
427
431
  phenotype_groups: Optional[List[str]] = None
428
432
  phenotype_terms: Optional[List[str]] = None
433
+ somalier_ancestry: Optional[str] = None
434
+ somalier_pairs: Optional[str] = None
435
+ somalier_samples: Optional[str] = None
429
436
  exe_ver: Optional[str] = None
430
437
  rank_model_version: Optional[str] = None
431
438
  rank_score_threshold: Optional[int] = 0
@@ -1,7 +1,7 @@
1
- """ Managed variant
1
+ """Managed variant
2
2
 
3
- For potentially causative variants that are not yet in ClinVar
4
- and have yet not been marked causative in any existing case.
3
+ For potentially causative variants that are not yet in ClinVar
4
+ and have yet not been marked causative in any existing case.
5
5
 
6
6
  """
7
7
 
@@ -1,7 +1,7 @@
1
- """ OMICS variant
1
+ """OMICS variant
2
2
 
3
- For potentially causative variants that are not yet in ClinVar
4
- and have yet not been marked causative in any existing case.
3
+ For potentially causative variants that are not yet in ClinVar
4
+ and have yet not been marked causative in any existing case.
5
5
 
6
6
  """
7
7
 
scout/parse/case.py CHANGED
@@ -1,12 +1,20 @@
1
1
  import logging
2
+ from typing import Any, Dict, Tuple
2
3
 
3
4
  from ped_parser import FamilyParser
4
5
 
5
- from scout.constants import PHENOTYPE_MAP, SEX_MAP
6
+ from scout.constants import PHENOTYPE_MAP, REV_SEX_MAP, SEX_MAP
6
7
  from scout.exceptions import PedigreeError
7
8
  from scout.models.case.case_loading_models import CaseLoader
8
9
  from scout.parse.mitodel import parse_mitodel_file
9
- from scout.parse.peddy import parse_peddy_ped, parse_peddy_ped_check, parse_peddy_sex_check
10
+ from scout.parse.pedqc import (
11
+ parse_peddy_ped,
12
+ parse_peddy_ped_check,
13
+ parse_peddy_sex_check,
14
+ parse_somalier_ancestry,
15
+ parse_somalier_pairs,
16
+ parse_somalier_samples,
17
+ )
10
18
  from scout.parse.smn import parse_smn_file
11
19
 
12
20
  LOG = logging.getLogger(__name__)
@@ -19,7 +27,7 @@ def parse_case_data(**kwargs):
19
27
  on the command line. Or all the information can be specified in a config file.
20
28
  Please see Scout documentation for further instructions.
21
29
 
22
- Possible keyword args:
30
+ Possible keyword args are formally available in the CaseLoader class, but here is a common list with explanations:
23
31
  cnv_report: Path to pdf file with CNV report
24
32
  config(dict): A yaml formatted config file
25
33
  coverage_qc_report: Path to html file with coverage and qc report
@@ -74,8 +82,10 @@ def parse_case_data(**kwargs):
74
82
  except KeyError:
75
83
  config_dict[key] = None
76
84
 
77
- # This will add information from peddy to the individuals
85
+ # This will add pedigree qc information from Peddy and Somalier to the individuals.
86
+ # Let the newer Somalier have the last word if there is any disagreement
78
87
  add_peddy_information(config_dict)
88
+ add_somalier_information(config_dict)
79
89
 
80
90
  if config_dict.get("smn_tsv"):
81
91
  add_smn_info(config_dict)
@@ -181,7 +191,104 @@ def add_smn_info_case(case_data):
181
191
  ]:
182
192
  ind[key] = smn_info[ind_id][key]
183
193
  except KeyError as err:
184
- LOG.warning("Individual {} has no SMN info to update: {}.".format(ind_id, err))
194
+ LOG.warning(f"Individual {ind_id} has no SMN info to update: {err}.")
195
+
196
+
197
+ def set_somalier_sex_check_ind(ind: Dict[str, str], sex_check: Dict[str, Dict[str, str]]):
198
+ """Check if Somalier has inferred the sex"""
199
+
200
+ ind_id = ind["individual_id"]
201
+ if ind_id in sex_check and all(
202
+ key in sex_check[ind_id] for key in ("sex", "original_pedigree_sex")
203
+ ):
204
+ ind["confirmed_sex"]: bool = (
205
+ sex_check[ind_id]["sex"] == REV_SEX_MAP[sex_check[ind_id]["original_pedigree_sex"]]
206
+ )
207
+
208
+
209
+ def set_somalier_confirmed_parent(
210
+ analysis_inds: Dict[str, Any], ind: Dict[str, Any], ped_check: Dict[Tuple, Any]
211
+ ):
212
+ """Check if Somalier confirmed parental relations.
213
+ First, check that we are looking at individual with parents.
214
+ Double-check that the child/parent pair is in somalier data and set ok.
215
+ If we demand Somalier be run with "relate --infer" we can skip this.
216
+ """
217
+
218
+ ind_id = ind["individual_id"]
219
+ for parent in ["mother", "father"]:
220
+ parent_id = ind[parent]
221
+ if parent_id == "0":
222
+ continue
223
+
224
+ for pair in ped_check:
225
+ if ind_id not in pair or parent_id not in pair:
226
+ continue
227
+ if (
228
+ ped_check[pair]["relatedness"] > 0.32
229
+ and ped_check[pair]["relatedness"] < 0.67
230
+ and ped_check[pair]["ibs0"] / ped_check[pair]["ibs2"] < 0.014
231
+ ):
232
+ analysis_inds[parent_id]["confirmed_parent"] = True
233
+ continue
234
+ # else if parent confirmation failed
235
+ analysis_inds[parent_id]["confirmed_parent"] = False
236
+
237
+
238
+ def set_somalier_sex_and_relatedness_checks(
239
+ case_config: dict,
240
+ ped_check: Dict[Tuple, Any],
241
+ sex_check: Dict[str, Dict],
242
+ ancestry_info: Dict[str, Dict],
243
+ ):
244
+ """
245
+ Update ancestry, sex and relatedness checks for individuals in case config based on parsed Somalier file content.
246
+ """
247
+ analysis_inds = {}
248
+ for ind in case_config["individuals"]:
249
+ ind_id = ind["individual_id"]
250
+ analysis_inds[ind_id] = ind
251
+
252
+ for ind_id in analysis_inds:
253
+ ind = analysis_inds[ind_id]
254
+ # Check if Somalier has inferred the ancestry
255
+ if ind_id in ancestry_info:
256
+ ind["predicted_ancestry"]: str = ancestry_info[ind_id].get(
257
+ "predicted_ancestry", "UNKNOWN"
258
+ )
259
+ set_somalier_sex_check_ind(ind, sex_check)
260
+ set_somalier_confirmed_parent(analysis_inds, ind, ped_check)
261
+
262
+
263
+ def add_somalier_information(case_config: dict):
264
+ """
265
+ Parse any somalier files, and update ancestry, sex and relatedness checks for individuals in case config
266
+ based on them.
267
+ """
268
+ ped_check = {}
269
+ sex_check = {}
270
+ ancestry_info = {}
271
+
272
+ if case_config.get("somalier_pairs"):
273
+ with open(case_config["somalier_pairs"], "r") as file_handle:
274
+ for pair_info in parse_somalier_pairs(file_handle):
275
+ ped_check[(pair_info["sample_a"], pair_info["sample_b"])] = pair_info
276
+
277
+ if case_config.get("somalier_samples"):
278
+ with open(case_config["somalier_samples"], "r") as file_handle:
279
+ for ind_info in parse_somalier_samples(file_handle):
280
+ sex_check[ind_info["sample_id"]] = ind_info
281
+
282
+ if case_config.get("somalier_ancestry"):
283
+ with open(case_config["somalier_ancestry"], "r") as file_handle:
284
+ for ind_info in parse_somalier_ancestry(file_handle):
285
+ ancestry_info[ind_info["sample_id"]] = ind_info
286
+
287
+ if not (ped_check or sex_check or ancestry_info):
288
+ return
289
+
290
+ LOG.info("Adding Somalier info")
291
+ set_somalier_sex_and_relatedness_checks(case_config, ped_check, sex_check, ancestry_info)
185
292
 
186
293
 
187
294
  def add_peddy_information(config_data):
scout/parse/pedqc.py ADDED
@@ -0,0 +1,127 @@
1
+ from typing import List
2
+
3
+ from scout.utils.convert import convert_number, make_bool
4
+
5
+
6
+ def tsv_to_info_dicts(
7
+ lines: List[str], separator: str = "\t", number_keys: List[str] = [], bool_keys: List[str] = []
8
+ ) -> List[dict]:
9
+ """Parse a tsv (or csv with "," as separator) file to a list of dicts, with the header fields as dict keys,
10
+ column values as dict values, and each list item one such dict for each row.
11
+ The number_keys and bool_keys are lists of key names to attempt to explicitly coerce values into number or bool before return.
12
+ """
13
+ info_dicts = []
14
+ for i, line in enumerate(lines):
15
+ line = line.rstrip()
16
+ if i == 0:
17
+ header = line.lstrip("#").split(separator)
18
+ continue
19
+ info_dict = dict(zip(header, line.split(separator)))
20
+ for number_key in number_keys:
21
+ if number_key in info_dict:
22
+ info_dict[number_key] = convert_number(info_dict[number_key])
23
+ for bool_key in bool_keys:
24
+ if bool_key in info_dict:
25
+ info_dict[bool_key] = make_bool(info_dict.get(bool_key))
26
+ info_dicts.append(info_dict)
27
+
28
+ return info_dicts
29
+
30
+
31
+ def parse_peddy_ped(lines: List[str]) -> List[dict]:
32
+ """Parse a peddy.ped file
33
+
34
+ ancestry-prediction: one of AFR AMR EAS EUR SAS UNKNOWN
35
+ PC1/PC2/PC3/PC4: the first 4 values after this sample was
36
+ projected onto the thousand genomes principal components.
37
+
38
+ idr_baf: inter-decile range (90th percentile - 10th percentile)
39
+ of b-allele frequency. We make a distribution of all sites of
40
+ alts / (ref + alts) and then report the difference between the
41
+ 90th and the 10th percentile.
42
+ Large values indicated likely sample contamination.
43
+ """
44
+ return tsv_to_info_dicts(
45
+ lines,
46
+ "\t",
47
+ number_keys=["PC1", "PC2", "PC3", "het_call_rate", "het_idr_baf", "het_mean_depth"],
48
+ )
49
+
50
+
51
+ def parse_peddy_ped_check(lines: List[str]) -> List[dict]:
52
+ """Parse a .ped_check.csv file
53
+
54
+ The following keys are explicitly coerced upon insertion into the returned dicts
55
+ hets_a - the number of sites at which sample_a was heterozygous
56
+ hets_b - the number of sites at which sample_b was heterozygous
57
+ ibs0 - the number of sites at which the 2 samples shared no alleles
58
+ (should approach 0 for parent-child pairs).
59
+ ibs2 - the number of sites and which the 2 samples where both
60
+ hom-ref, both het, or both hom-alt.
61
+ n - the number of sites that was used to predict the relatedness.
62
+ rel - the relatedness reported in the ped file.
63
+ pedigree_relatedness - the relatedness reported in the ped file.
64
+ rel_difference - difference between the preceding 2 columns.
65
+ shared_hets - the number of sites at which both samples were hets.
66
+
67
+ pedigree_parents - boolean indicating that this pair is a parent-child pair
68
+ according to the ped file.
69
+ predicted_parents - boolean indicating that this pair is expected to be a parent-child
70
+ pair according to the ibs0 (< 0.012) calculated from the genotypes.
71
+ parent_error - boolean indicating that the preceding 2 columns do not match
72
+ sample_duplication_error - boolean indicating that rel > 0.75 and ibs0 < 0.012
73
+ """
74
+ return tsv_to_info_dicts(
75
+ lines,
76
+ ",",
77
+ number_keys=[
78
+ "hets_a",
79
+ "hets_b",
80
+ "ibs0",
81
+ "ibs2",
82
+ "n",
83
+ "rel",
84
+ "pedigree_relatedness",
85
+ "rel_difference",
86
+ "shared_hets",
87
+ ],
88
+ bool_keys=[
89
+ "pedigree_parents",
90
+ "predicted_parents",
91
+ "parent_error",
92
+ "sample_duplication_error",
93
+ ],
94
+ )
95
+
96
+
97
+ def parse_peddy_sex_check(lines: List[str]) -> List[dict]:
98
+ """Parse a .ped_check.csv file
99
+
100
+ Type coerce the following keys for each dict in the returned sex_check dict:
101
+ error: boolean indicating whether there is a mismatch between chr genotypes and ped sex
102
+ hom_alt_count: number of homozygous-alternate calls
103
+ hom_ref_count: number of homozygous-reference calls
104
+ het_count: number of heterozygote calls
105
+ het_ratio: ratio of het_count / hom_alt_count. Low for males, high for females
106
+ """
107
+ return tsv_to_info_dicts(
108
+ lines,
109
+ ",",
110
+ number_keys=["hom_alt_count", "hom_ref_count", "het_count", "het_ratio"],
111
+ bool_keys=["error"],
112
+ )
113
+
114
+
115
+ def parse_somalier_pairs(lines: List[str]) -> List[dict]:
116
+ """Parse a Somalier pairs tsv file"""
117
+ return tsv_to_info_dicts(lines, "\t", ["relatedness", "ibs0", "ibs2"])
118
+
119
+
120
+ def parse_somalier_samples(lines: List[str]) -> List[dict]:
121
+ """Parse a Somalier samples tsv file"""
122
+ return tsv_to_info_dicts(lines, "\t")
123
+
124
+
125
+ def parse_somalier_ancestry(lines: List[str]) -> List[dict]:
126
+ """Parse a Somalier ancestry tsv file"""
127
+ return tsv_to_info_dicts(lines, "\t")
@@ -92,10 +92,10 @@ def parse_frequency(variant, info_key):
92
92
  info_key(str)
93
93
 
94
94
  Returns:
95
- frequency(float): or None if frequency does not exist
95
+ frequency(float): or None if frequency does not exist (or is ".", "0", or "-1")
96
96
  """
97
97
  raw_annotation = variant.INFO.get(info_key)
98
- raw_annotation = None if raw_annotation == "." else raw_annotation
98
+ raw_annotation = None if raw_annotation in [".", "-1", -1, 0, "0"] else raw_annotation
99
99
  frequency = float(raw_annotation) if raw_annotation else None
100
100
  return frequency
101
101
 
@@ -152,11 +152,14 @@ def parse_sv_frequency(variant, info_key):
152
152
  These have to be treated separately since some of them are not actually frequencies(float) but
153
153
  occurences(int)
154
154
  """
155
- value = variant.INFO.get(info_key, 0)
156
- if any([float_str in info_key.upper() for float_str in ["AF", "FRQ"]]):
157
- value = float(value)
155
+ raw_value = variant.INFO.get(info_key, 0)
156
+ if raw_value in [".", "-1", -1, 0, "0"]:
157
+ return None
158
+
159
+ if any(float_str in info_key.upper() for float_str in ["AF", "FRQ"]):
160
+ value = float(raw_value)
158
161
  else:
159
- value = int(value)
162
+ value = int(raw_value)
160
163
  if value > 0:
161
164
  return value
162
165
  return None
@@ -27,14 +27,14 @@ LOG = logging.getLogger(__name__)
27
27
 
28
28
  def parse_variant(
29
29
  variant: Variant,
30
- case,
31
- variant_type="clinical",
32
- rank_results_header=None,
33
- vep_header=None,
34
- individual_positions=None,
35
- category=None,
36
- local_archive_info=None,
37
- ):
30
+ case: dict,
31
+ variant_type: str = "clinical",
32
+ rank_results_header: list = None,
33
+ vep_header: list = None,
34
+ individual_positions: dict = None,
35
+ category: str = None,
36
+ local_archive_info: dict = None,
37
+ ) -> dict:
38
38
  """Return a parsed variant
39
39
 
40
40
  Get all the necessary information to build a variant object
@@ -183,10 +183,63 @@ def parse_variant(
183
183
 
184
184
  parsed_variant["frequencies"] = frequencies
185
185
 
186
- # loqus archive frequencies
186
+ set_loqus_archive_frequencies(parsed_variant, variant, local_archive_info)
187
+
188
+ set_severity_predictions(parsed_variant, variant, parsed_transcripts)
189
+
190
+ ###################### Add conservation ######################
191
+ parsed_variant["conservation"] = parse_conservations(variant, parsed_transcripts)
192
+
193
+ parsed_variant["callers"] = parse_callers(variant, category=category)
194
+ set_rank_result(parsed_variant, variant, rank_results_header)
195
+
196
+ ##################### Add type specific #####################
197
+ set_sv_specific_annotations(parsed_variant, variant)
198
+
199
+ set_mei_specific_annotations(parsed_variant, variant)
200
+
201
+ set_cancer_specific_annotations(parsed_variant, variant)
202
+
203
+ remove_nonetype(parsed_variant)
204
+ return parsed_variant
205
+
206
+
207
+ def set_mei_specific_annotations(parsed_variant: dict, variant: dict):
208
+ """Add MEI specific annotations"""
209
+ if parsed_variant.get("category") in ["mei"]:
210
+ mei_frequencies = parse_mei_frequencies(variant)
211
+ for key in mei_frequencies:
212
+ parsed_variant["frequencies"][key] = mei_frequencies[key]
213
+
214
+
215
+ def set_cancer_specific_annotations(parsed_variant: dict, variant: dict):
216
+ """
217
+ ###################### Add Cancer specific annotations ######################
218
+ # MSK_MVL indicates if variants are in the MSK managed variant list
219
+ # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5437632/
220
+ """
221
+ if variant.INFO.get("MSK_MVL"):
222
+ parsed_variant["mvl_tag"] = True
223
+
224
+
225
+ def set_sv_specific_annotations(parsed_variant: dict, variant: dict):
226
+ """
227
+ Add SV specific annotations
228
+ """
229
+ if parsed_variant.get("category") in ["sv", "cancer_sv"]:
230
+ sv_frequencies = parse_sv_frequencies(variant)
231
+ for key in sv_frequencies:
232
+ parsed_variant["frequencies"][key] = sv_frequencies[key]
233
+
234
+
235
+ def set_loqus_archive_frequencies(parsed_variant: dict, variant: dict, local_archive_info: dict):
236
+ """
237
+ loqusdb archive frequencies
238
+ Fist, RD germline, for MIP and Balsamic
239
+ Then, Cancer (Balsamic) Germline and Somatic loqus archives
240
+ SNVs contain INFO field Obs, SVs contain clinical_genomics_loqusObs
241
+ """
187
242
 
188
- # RD germline, for MIP and Balsamic
189
- # SNVs contain INFO field Obs, SVs contain clinical_genomics_loqusObs
190
243
  local_obs_old = (
191
244
  variant.INFO.get("Obs")
192
245
  or variant.INFO.get("clinical_genomics_loqusObs")
@@ -203,7 +256,6 @@ def parse_variant(
203
256
  parsed_variant["local_obs_old_freq"] = call_safe(float, local_frq_old)
204
257
  set_local_archive_info(parsed_variant, local_archive_info)
205
258
 
206
- # Cancer (Balsamic) Germline and Somatic loqus archives
207
259
  parsed_variant["local_obs_cancer_germline_old"] = call_safe(
208
260
  int, variant.INFO.get("Cancer_Germline_Obs")
209
261
  )
@@ -224,7 +276,12 @@ def parse_variant(
224
276
  float, variant.INFO.get("Cancer_Somatic_Frq")
225
277
  )
226
278
 
227
- ###################### Add severity predictions ######################
279
+
280
+ def set_severity_predictions(parsed_variant: dict, variant: dict, parsed_transcripts: dict):
281
+ """
282
+ Set severity predictions on parsed variant.
283
+ """
284
+
228
285
  parsed_variant["cadd_score"] = parse_cadd(variant, parsed_transcripts)
229
286
  parsed_variant["spidex"] = call_safe(float, variant.INFO.get("SPIDEX"))
230
287
 
@@ -234,31 +291,6 @@ def parse_variant(
234
291
  ) # This is actually the value of REVEL_rankscore
235
292
  parsed_variant["revel"] = get_highest_revel_score(parsed_transcripts)
236
293
 
237
- ###################### Add conservation ######################
238
- parsed_variant["conservation"] = parse_conservations(variant, parsed_transcripts)
239
-
240
- parsed_variant["callers"] = parse_callers(variant, category=category)
241
- set_rank_result(parsed_variant, variant, rank_results_header)
242
-
243
- ###################### Add SV specific annotations ######################
244
- sv_frequencies = parse_sv_frequencies(variant)
245
- for key in sv_frequencies:
246
- parsed_variant["frequencies"][key] = sv_frequencies[key]
247
-
248
- ###################### Add MEI specific annotations #####################
249
- mei_frequencies = parse_mei_frequencies(variant)
250
- for key in mei_frequencies:
251
- parsed_variant["frequencies"][key] = mei_frequencies[key]
252
-
253
- ###################### Add Cancer specific annotations ######################
254
- # MSK_MVL indicates if variants are in the MSK managed variant list
255
- # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5437632/
256
- if variant.INFO.get("MSK_MVL"):
257
- parsed_variant["mvl_tag"] = True
258
-
259
- remove_nonetype(parsed_variant)
260
- return parsed_variant
261
-
262
294
 
263
295
  def get_highest_revel_score(parsed_transcripts: List[dict]) -> Optional[float]:
264
296
  """Retrieve the highest REVEL_score value from parsed variant transcripts."""
@@ -484,7 +516,7 @@ def set_fusion_info(variant: Variant, parsed_variant: Dict[str, Any]):
484
516
 
485
517
 
486
518
  def add_gene_and_transcript_info_for_fusions(
487
- parsed_variant: Dict[str, Any]
519
+ parsed_variant: Dict[str, Any],
488
520
  ) -> List[Optional[Dict]]:
489
521
  """Add gene and transcript info for fusions. Return list of parsed
490
522
  transcripts for later use in parsing.
scout/server/app.py CHANGED
@@ -13,6 +13,7 @@ from flask_login import current_user
13
13
  from markdown import markdown as python_markdown
14
14
  from markupsafe import Markup
15
15
 
16
+ from scout import __version__
16
17
  from scout.constants import SPIDEX_HUMAN
17
18
  from scout.log import init_log
18
19
 
@@ -48,6 +49,7 @@ def create_app(config_file=None, config=None):
48
49
  app = Flask(__name__)
49
50
  CORS(app)
50
51
  app.jinja_env.add_extension("jinja2.ext.do")
52
+ app.jinja_env.globals["SCOUT_VERSION"] = __version__
51
53
 
52
54
  app.config.from_pyfile("config.py") # Load default config file
53
55
  if (
@@ -305,6 +305,7 @@ def set_sample_tracks(display_obj: dict, case_groups: list, chromosome: str):
305
305
 
306
306
  A missing file is indicated with the string "missing", and no track is made for such entries.
307
307
  """
308
+
308
309
  sample_tracks = []
309
310
 
310
311
  track_items = "mt_bams" if chromosome == "M" else "bam_files"
@@ -330,6 +331,7 @@ def set_sample_tracks(display_obj: dict, case_groups: list, chromosome: str):
330
331
  "indexURL": case[track_index_items][count],
331
332
  "format": case[track_items][count].split(".")[-1], # "bam" or "cram"
332
333
  "height": 700,
334
+ "show_soft_clips": case["track_items_soft_clips_settings"][count],
333
335
  }
334
336
  )
335
337
  display_obj["sample_tracks"] = sample_tracks
@@ -116,6 +116,9 @@
116
116
  url: "{{ url_for('alignviewers.remote_static', file=track.url) }}",
117
117
  indexURL: "{{ url_for('alignviewers.remote_static', file=track.indexURL) }}",
118
118
  sourceType: "file",
119
+ groupBy: "tag:HP",
120
+ colorBy: "basemod2:m",
121
+ showSoftClips: {{track.show_soft_clips | lower }},
119
122
  format: "{{ track.format }}",
120
123
  height: "{{track.height}}"
121
124
  },
@@ -1,5 +1,5 @@
1
1
  {% macro igv_script() %}
2
2
  <link rel="shortcut icon" href="//igv.org/web/img/favicon.ico">
3
3
  <!-- IGV JS-->
4
- <script src="https://cdn.jsdelivr.net/npm/igv@3.1.0/dist/igv.min.js" integrity="sha512-GqDTDFFltqWxc3IeSMcVcPNenhJE/OZnaMk7hkTk/shtC5bejPWstJF5AY8TsVpJWfK2HOzXYKUk95fS51Warw==" crossorigin="anonymous"></script>
4
+ <script src="https://cdn.jsdelivr.net/npm/igv@3.2.0/dist/igv.min.js" integrity="sha512-MHnbGQeONlQXyEs6PgiW2bhwywJW5IwUnRKfQKrPaVSrzopctBTU1VtOiEXMf/ZPBk47eFimlVRxdff+sdsyAg==" crossorigin="anonymous"></script>
5
5
  {% endmacro %}