scout-browser 4.81__py3-none-any.whl → 4.82.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. scout/__version__.py +1 -1
  2. scout/adapter/mongo/disease_terms.py +5 -2
  3. scout/adapter/mongo/query.py +23 -11
  4. scout/build/managed_variant.py +12 -1
  5. scout/build/variant/genotype.py +2 -0
  6. scout/build/variant/variant.py +5 -0
  7. scout/constants/clinvar.py +1 -1
  8. scout/constants/query_terms.py +3 -1
  9. scout/models/variant/variant.py +1 -0
  10. scout/parse/variant/frequency.py +56 -54
  11. scout/parse/variant/genotype.py +89 -15
  12. scout/parse/variant/transcript.py +17 -9
  13. scout/parse/variant/variant.py +12 -0
  14. scout/server/app.py +6 -3
  15. scout/server/blueprints/alignviewers/templates/alignviewers/utils.html +1 -1
  16. scout/server/blueprints/cases/controllers.py +2 -57
  17. scout/server/blueprints/cases/templates/cases/case_bionano.html +3 -24
  18. scout/server/blueprints/cases/templates/cases/case_report.html +87 -69
  19. scout/server/blueprints/cases/templates/cases/case_sma.html +2 -13
  20. scout/server/blueprints/cases/templates/cases/chanjo2_form.html +47 -0
  21. scout/server/blueprints/cases/templates/cases/collapsible_actionbar.html +4 -4
  22. scout/server/blueprints/cases/templates/cases/gene_panel.html +4 -11
  23. scout/server/blueprints/cases/templates/cases/individuals_table.html +2 -12
  24. scout/server/blueprints/cases/templates/cases/utils.html +20 -1
  25. scout/server/blueprints/cases/views.py +0 -22
  26. scout/server/blueprints/clinvar/controllers.py +3 -3
  27. scout/server/blueprints/clinvar/templates/clinvar/clinvar_submissions.html +29 -2
  28. scout/server/blueprints/clinvar/templates/clinvar/multistep_add_variant.html +36 -18
  29. scout/server/blueprints/clinvar/views.py +13 -1
  30. scout/server/blueprints/diagnoses/controllers.py +2 -0
  31. scout/server/blueprints/institutes/controllers.py +76 -38
  32. scout/server/blueprints/institutes/templates/overview/cases.html +54 -42
  33. scout/server/blueprints/managed_variants/templates/managed_variants/managed_variants.html +1 -1
  34. scout/server/blueprints/managed_variants/views.py +2 -4
  35. scout/server/blueprints/panels/templates/panels/panel.html +8 -7
  36. scout/server/blueprints/panels/views.py +2 -11
  37. scout/server/blueprints/phenotypes/templates/phenotypes/hpo_terms.html +3 -2
  38. scout/server/blueprints/variant/controllers.py +3 -2
  39. scout/server/blueprints/variant/templates/variant/components.html +1 -1
  40. scout/server/blueprints/variant/templates/variant/utils.html +3 -1
  41. scout/server/blueprints/variant/templates/variant/variant.html +20 -15
  42. scout/server/blueprints/variant/templates/variant/variant_details.html +78 -26
  43. scout/server/blueprints/variant/utils.py +9 -13
  44. scout/server/blueprints/variants/controllers.py +30 -3
  45. scout/server/blueprints/variants/forms.py +15 -1
  46. scout/server/blueprints/variants/templates/variants/components.html +55 -0
  47. scout/server/blueprints/variants/templates/variants/fusion-variants.html +3 -50
  48. scout/server/blueprints/variants/templates/variants/str-variants.html +8 -5
  49. scout/server/blueprints/variants/templates/variants/utils.html +57 -31
  50. scout/server/blueprints/variants/templates/variants/variants.html +1 -1
  51. scout/server/blueprints/variants/utils.py +7 -10
  52. scout/server/extensions/clinvar_extension.py +10 -2
  53. {scout_browser-4.81.dist-info → scout_browser-4.82.2.dist-info}/METADATA +6 -5
  54. {scout_browser-4.81.dist-info → scout_browser-4.82.2.dist-info}/RECORD +58 -57
  55. {scout_browser-4.81.dist-info → scout_browser-4.82.2.dist-info}/LICENSE +0 -0
  56. {scout_browser-4.81.dist-info → scout_browser-4.82.2.dist-info}/WHEEL +0 -0
  57. {scout_browser-4.81.dist-info → scout_browser-4.82.2.dist-info}/entry_points.txt +0 -0
  58. {scout_browser-4.81.dist-info → scout_browser-4.82.2.dist-info}/top_level.txt +0 -0
scout/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "4.81"
1
+ __version__ = "4.82.2"
@@ -10,6 +10,9 @@ from scout.exceptions import IntegrityError
10
10
  LOG = logging.getLogger(__name__)
11
11
 
12
12
  DISEASE_FILTER_PROJECT = {"hpo_terms": 0, "genes": 0}
13
+ REGEX = "$regex"
14
+ REGEX_OPTIONS = "$options"
15
+ REGEX_IGNORECASE = "i"
13
16
 
14
17
 
15
18
  class DiagnosisHandler(object):
@@ -32,8 +35,8 @@ class DiagnosisHandler(object):
32
35
  if query:
33
36
  query_dict = {
34
37
  "$or": [
35
- {"disease_nr": {"$regex": query, "$options": "i"}},
36
- {"description": {"$regex": query, "$options": "i"}},
38
+ {"disease_id": {REGEX: query, REGEX_OPTIONS: REGEX_IGNORECASE}},
39
+ {"description": {REGEX: query, REGEX_OPTIONS: REGEX_IGNORECASE}},
37
40
  ]
38
41
  }
39
42
  # If source is specified, add this restriction to the query
@@ -11,6 +11,8 @@ from scout.constants import (
11
11
  TRUSTED_REVSTAT_LEVEL,
12
12
  )
13
13
 
14
+ CRITERION_EXCLUDE_OPERATOR = {False: "$in", True: "$nin"}
15
+
14
16
  LOG = logging.getLogger(__name__)
15
17
 
16
18
 
@@ -256,7 +258,11 @@ class QueryHandler(object):
256
258
  if criterion in ["hgnc_symbols", "gene_panels"]:
257
259
  gene_query = self.gene_filter(query, build=build)
258
260
  if len(gene_query) > 0 or "hpo" in query.get("gene_panels", []):
259
- mongo_query["hgnc_ids"] = {"$in": gene_query}
261
+ mongo_query["hgnc_ids"] = {
262
+ CRITERION_EXCLUDE_OPERATOR[
263
+ bool(query.get("gene_panels_exclude"))
264
+ ]: gene_query
265
+ }
260
266
  continue
261
267
 
262
268
  if criterion == "chrom" and query.get("chrom"): # filter by coordinates
@@ -352,6 +358,7 @@ class QueryHandler(object):
352
358
  mongo_query["$and"] = coordinate_query + mongo_query["$and"]
353
359
  else:
354
360
  mongo_query["$and"] = coordinate_query
361
+
355
362
  return mongo_query
356
363
 
357
364
  def affected_inds_query(self, mongo_query, case_id, gt_query):
@@ -572,16 +579,21 @@ class QueryHandler(object):
572
579
  }
573
580
  )
574
581
 
575
- if criterion == "local_obs":
576
- local_obs = query.get("local_obs")
577
- mongo_secondary_query.append(
578
- {
579
- "$or": [
580
- {"local_obs_old": None},
581
- {"local_obs_old": {"$lt": local_obs + 1}},
582
- ]
583
- }
584
- )
582
+ for local_obs_old_type in [
583
+ "local_obs_old",
584
+ "local_obs_cancer_germline_old",
585
+ "local_obs_cancer_somatic_old",
586
+ ]:
587
+ if criterion == local_obs_old_type:
588
+ local_obs = query.get(local_obs_old_type)
589
+ mongo_secondary_query.append(
590
+ {
591
+ "$or": [
592
+ {local_obs_old_type: None},
593
+ {local_obs_old_type: {"$lt": local_obs + 1}},
594
+ ]
595
+ }
596
+ )
585
597
 
586
598
  if criterion == "local_obs_freq":
587
599
  local_obs_freq = query.get("local_obs_freq")
@@ -1,4 +1,7 @@
1
1
  import logging
2
+
3
+ from flask import flash
4
+
2
5
  from scout.models.managed_variant import ManagedVariant
3
6
 
4
7
  LOG = logging.getLogger(__name__)
@@ -32,7 +35,15 @@ def build_managed_variant(managed_variant_info):
32
35
  description=managed_variant_info.get("description", ""),
33
36
  )
34
37
  except KeyError:
35
- raise KeyError("Managed variant has to have chr, pos, ref and alt.")
38
+ flash(
39
+ "Managed variant has to have chr, pos, ref and alt",
40
+ "danger",
41
+ )
42
+ except ValueError as ve:
43
+ flash(
44
+ "Could not build managed variant {}".format(ve),
45
+ "danger",
46
+ )
36
47
 
37
48
  LOG.debug("Built managed variant %s", managed_variant.get("display_id"))
38
49
 
@@ -14,6 +14,7 @@ def build_genotype(gt_call):
14
14
  allele_depths = list, # int
15
15
  read_depth = int,
16
16
  genotype_quality = int,
17
+ alt_mc = int, # STR
17
18
  so = str, # STR type of reads that support allele: "a/a" where a in [SPANNING, FLANKING, INREPEAT]
18
19
  )
19
20
 
@@ -27,6 +28,7 @@ def build_genotype(gt_call):
27
28
  alt_frequency=gt_call["alt_frequency"] or -1,
28
29
  genotype_quality=gt_call["genotype_quality"],
29
30
  so=gt_call["so"],
31
+ alt_mc=gt_call["alt_mc"],
30
32
  ffpm=gt_call["ffpm"],
31
33
  split_read=gt_call["split_read"],
32
34
  )
@@ -199,7 +199,12 @@ def build_variant(
199
199
  variant_obj["str_pathologic_min"] = variant.get("str_pathologic_min")
200
200
  variant_obj["str_ref"] = variant.get("str_ref")
201
201
  variant_obj["str_repid"] = variant.get("str_repid")
202
+ variant_obj["str_trid"] = variant.get("str_trid")
203
+ variant_obj["str_pathologic_struc"] = variant.get("str_pathologic_struc")
204
+ variant_obj["str_struc"] = variant.get("str_struc")
205
+ variant_obj["str_motifs"] = variant.get("str_motifs")
202
206
  variant_obj["str_ru"] = variant.get("str_ru")
207
+ variant_obj["str_display_ru"] = variant.get("str_display_ru")
203
208
  variant_obj["str_source"] = variant.get("str_source")
204
209
  variant_obj["str_status"] = variant.get("str_status")
205
210
  variant_obj["str_swegen_mean"] = call_safe(float, variant.get("str_swegen_mean"))
@@ -184,7 +184,7 @@ CONDITION_PREFIX = {
184
184
  "MeSH": "",
185
185
  "MONDO": "MONDO:",
186
186
  "OMIM": "",
187
- "Orphanet": "ORPHA:",
187
+ "Orphanet": "ORPHA",
188
188
  }
189
189
 
190
190
  CLINVAR_ASSERTION_METHOD_CIT_DB_OPTIONS = {"DOI", "pmc", "PMID"}
@@ -31,7 +31,9 @@ PRIMARY_CRITERIA = ["clinsig"]
31
31
  # such as a Pathogenic ClinSig.
32
32
  SECONDARY_CRITERIA = [
33
33
  "gnomad_frequency",
34
- "local_obs",
34
+ "local_obs_old",
35
+ "local_obs_cancer_somatic_old",
36
+ "local_obs_cancer_germline_old",
35
37
  "local_obs_freq",
36
38
  "clingen_ngi",
37
39
  "swegen",
@@ -112,6 +112,7 @@ gt_call = dict(
112
112
  genotype_call=str,
113
113
  allele_depths=list, # int
114
114
  read_depth=int,
115
+ alt_mc=int,
115
116
  genotype_quality=int,
116
117
  so=str,
117
118
  )
@@ -2,7 +2,41 @@ from typing import Dict
2
2
 
3
3
  import cyvcf2
4
4
 
5
- swegen_keys = ["swegen", "swegenAF", "SWEGENAF"]
5
+ # SNV
6
+ SWEGEN_KEYS = ["swegen", "swegenAF", "SWEGENAF"]
7
+ THOUSAND_GENOMES_KEYS = ["1000GAF"]
8
+ THOUSAND_GENOMES_MAX_KEYS = ["1000G_MAX_AF"]
9
+
10
+ EXAC_KEYS = ["EXACAF"]
11
+ EXAC_MAX_KEYS = ["ExAC_MAX_AF", "EXAC_MAX_AF"]
12
+
13
+ # gnomAD has both SNV and SV
14
+ GNOMAD_INFO_KEYS = ["GNOMADAF", "GNOMAD_AF", "gnomADg_AF", "gnomad_svAF"]
15
+ GNOMAD_INFO_MAX_KEYS = ["gnomADg_AF_POPMAX", "GNOMADAF_popmax", "GNOMADAF_POPMAX", "GNOMADAF_MAX"]
16
+
17
+ # SV
18
+ CLINGEN_BENIGN_KEYS = [
19
+ "clingen_cgh_benignAF",
20
+ "clingen_cgh_benign",
21
+ "clingen_cgh_benignOCC",
22
+ ]
23
+
24
+ CLINGEN_PATHOGENIC_KEYS = [
25
+ "clingen_cgh_pathogenicAF",
26
+ "clingen_cgh_pathogenic",
27
+ "clingen_cgh_pathogenicOCC",
28
+ ]
29
+
30
+ CLINGEN_NGI_KEYS = ["clingen_ngi", "clingen_ngiAF", "clingen_ngiOCC"]
31
+
32
+ DECIPHER_KEYS = ["decipherAF", "decipher"]
33
+ CG_KEYS = ["clinical_genomics_mipAF", "clinical_genomics_mipOCC"]
34
+
35
+ # MEI
36
+ SWEGEN_ALU_KEYS = ["swegen_alu_FRQ", "swegen_alu_OCC"]
37
+ SWEGEN_HERV_KEYS = ["swegen_herv_FRQ", "swegen_herv_OCC"]
38
+ SWEGEN_L1_KEYS = ["swegen_l1_FRQ", "swegen_l1_OCC"]
39
+ SWEGEN_SVA_KEYS = ["swegen_sva_FRQ", "swegen_sva_OCC"]
6
40
 
7
41
 
8
42
  def parse_frequencies(variant, transcripts):
@@ -19,37 +53,27 @@ def parse_frequencies(variant, transcripts):
19
53
  frequencies(dict): A dictionary with the relevant frequencies
20
54
  """
21
55
  frequencies = {}
22
- # These lists could be extended...
23
- thousand_genomes_keys = ["1000GAF"]
24
- thousand_genomes_max_keys = ["1000G_MAX_AF"]
25
-
26
- exac_keys = ["EXACAF"]
27
- exac_max_keys = ["ExAC_MAX_AF", "EXAC_MAX_AF"]
28
56
 
29
- # Gnomad have both snv and sv frequencies
30
- gnomad_keys = ["GNOMADAF", "GNOMAD_AF", "gnomad_svAF"]
31
- gnomad_max_keys = ["GNOMADAF_popmax", "GNOMADAF_POPMAX", "GNOMADAF_MAX"]
32
-
33
- update_frequency_from_vcf(frequencies, variant, exac_keys, "exac")
34
- update_frequency_from_vcf(frequencies, variant, exac_max_keys, "exac_max")
35
- update_frequency_from_vcf(frequencies, variant, gnomad_keys, "gnomad")
36
- update_frequency_from_vcf(frequencies, variant, swegen_keys, "swegen")
37
- update_frequency_from_vcf(frequencies, variant, gnomad_max_keys, "gnomad_max")
38
- update_frequency_from_vcf(frequencies, variant, thousand_genomes_keys, "thousand_g")
39
- update_frequency_from_vcf(frequencies, variant, thousand_genomes_max_keys, "thousand_g_max")
57
+ update_frequency_from_vcf(frequencies, variant, EXAC_KEYS, "exac")
58
+ update_frequency_from_vcf(frequencies, variant, EXAC_MAX_KEYS, "exac_max")
59
+ update_frequency_from_vcf(frequencies, variant, GNOMAD_INFO_KEYS, "gnomad")
60
+ update_frequency_from_vcf(frequencies, variant, SWEGEN_KEYS, "swegen")
61
+ update_frequency_from_vcf(frequencies, variant, GNOMAD_INFO_MAX_KEYS, "gnomad_max")
62
+ update_frequency_from_vcf(frequencies, variant, THOUSAND_GENOMES_KEYS, "thousand_g")
63
+ update_frequency_from_vcf(frequencies, variant, THOUSAND_GENOMES_MAX_KEYS, "thousand_g_max")
40
64
 
41
65
  # For mitochondrial variants, keep both "hom" and "het" freqs
42
66
  update_frequency_from_vcf(frequencies, variant, ["GNOMAD_MT_AF_HOM"], "gnomad_mt_homoplasmic")
43
67
  update_frequency_from_vcf(frequencies, variant, ["GNOMAD_MT_AF_HET"], "gnomad_mt_heteroplasmic")
44
68
 
45
- # Search transcripts if not found in VCF
46
- if not frequencies:
47
- update_frequency_from_transcript(frequencies, transcripts)
48
-
49
69
  # These are SV-specific frequencies
50
70
  update_frequency_from_vcf(frequencies, variant, ["left_1000GAF"], "thousand_g_left")
51
71
  update_frequency_from_vcf(frequencies, variant, ["right_1000GAF"], "thousand_g_right")
52
72
 
73
+ # Search transcripts CSQ if not found in VCF INFO
74
+ if not frequencies:
75
+ update_frequency_from_transcript(frequencies, transcripts)
76
+
53
77
  return frequencies
54
78
 
55
79
 
@@ -85,31 +109,14 @@ def parse_sv_frequencies(variant: cyvcf2.Variant) -> Dict:
85
109
  """
86
110
  sv_frequencies = {}
87
111
 
88
- clingen_benign_keys = [
89
- "clingen_cgh_benignAF",
90
- "clingen_cgh_benign",
91
- "clingen_cgh_benignOCC",
92
- ]
93
-
94
- clingen_pathogenic_keys = [
95
- "clingen_cgh_pathogenicAF",
96
- "clingen_cgh_pathogenic",
97
- "clingen_cgh_pathogenicOCC",
98
- ]
99
-
100
- clingen_ngi_keys = ["clingen_ngi", "clingen_ngiAF", "clingen_ngiOCC"]
101
-
102
- decipher_keys = ["decipherAF", "decipher"]
103
- cg_keys = ["clinical_genomics_mipAF", "clinical_genomics_mipOCC"]
104
-
105
- update_sv_frequency_from_vcf(sv_frequencies, variant, clingen_benign_keys, "clingen_cgh_benign")
112
+ update_sv_frequency_from_vcf(sv_frequencies, variant, CLINGEN_BENIGN_KEYS, "clingen_cgh_benign")
106
113
  update_sv_frequency_from_vcf(
107
- sv_frequencies, variant, clingen_pathogenic_keys, "clingen_cgh_pathogenic"
114
+ sv_frequencies, variant, CLINGEN_PATHOGENIC_KEYS, "clingen_cgh_pathogenic"
108
115
  )
109
- update_sv_frequency_from_vcf(sv_frequencies, variant, clingen_ngi_keys, "clingen_ngi")
110
- update_sv_frequency_from_vcf(sv_frequencies, variant, swegen_keys, "swegen")
111
- update_sv_frequency_from_vcf(sv_frequencies, variant, decipher_keys, "decipher")
112
- update_sv_frequency_from_vcf(sv_frequencies, variant, cg_keys, "clingen_mip")
116
+ update_sv_frequency_from_vcf(sv_frequencies, variant, CLINGEN_NGI_KEYS, "clingen_ngi")
117
+ update_sv_frequency_from_vcf(sv_frequencies, variant, SWEGEN_KEYS, "swegen")
118
+ update_sv_frequency_from_vcf(sv_frequencies, variant, DECIPHER_KEYS, "decipher")
119
+ update_sv_frequency_from_vcf(sv_frequencies, variant, CG_KEYS, "clingen_mip")
113
120
 
114
121
  return sv_frequencies
115
122
 
@@ -117,17 +124,12 @@ def parse_sv_frequencies(variant: cyvcf2.Variant) -> Dict:
117
124
  def parse_mei_frequencies(variant: cyvcf2.Variant) -> Dict:
118
125
  """Parsing of some custom mei frequencies."""
119
126
 
120
- swegen_alu_keys = ["swegen_alu_FRQ", "swegen_alu_OCC"]
121
- swegen_herv_keys = ["swegen_herv_FRQ", "swegen_herv_OCC"]
122
- swegen_l1_keys = ["swegen_l1_FRQ", "swegen_l1_OCC"]
123
- swegen_sva_keys = ["swegen_sva_FRQ", "swegen_sva_OCC"]
124
-
125
127
  mei_frequencies = {}
126
128
 
127
- update_sv_frequency_from_vcf(mei_frequencies, variant, swegen_alu_keys, "swegen_alu")
128
- update_sv_frequency_from_vcf(mei_frequencies, variant, swegen_herv_keys, "swegen_herv")
129
- update_sv_frequency_from_vcf(mei_frequencies, variant, swegen_l1_keys, "swegen_l1")
130
- update_sv_frequency_from_vcf(mei_frequencies, variant, swegen_sva_keys, "swegen_sva")
129
+ update_sv_frequency_from_vcf(mei_frequencies, variant, SWEGEN_ALU_KEYS, "swegen_alu")
130
+ update_sv_frequency_from_vcf(mei_frequencies, variant, SWEGEN_HERV_KEYS, "swegen_herv")
131
+ update_sv_frequency_from_vcf(mei_frequencies, variant, SWEGEN_L1_KEYS, "swegen_l1")
132
+ update_sv_frequency_from_vcf(mei_frequencies, variant, SWEGEN_SVA_KEYS, "swegen_sva")
131
133
 
132
134
  if any(mei_frequencies.values()):
133
135
  max_mei_frequency = max(mei_frequencies.values())
@@ -19,7 +19,7 @@ Uses 'DV' to describe number of paired ends that supports the event and
19
19
  """
20
20
 
21
21
  import logging
22
- from typing import Dict, List, Optional, Tuple
22
+ from typing import Dict, List, Optional, Tuple, Union
23
23
 
24
24
  import cyvcf2
25
25
 
@@ -103,6 +103,14 @@ def parse_genotype(variant, ind, pos):
103
103
  (flanking_ref, flanking_alt) = _parse_format_entry(variant, pos, "ADFL")
104
104
  (inrepeat_ref, inrepeat_alt) = _parse_format_entry(variant, pos, "ADIR")
105
105
 
106
+ # TRGT long read STR specific
107
+ (_, mc_alt) = _parse_format_entry_trgt_mc(variant, pos)
108
+ gt_call["alt_mc"] = mc_alt
109
+
110
+ (sd_ref, sd_alt) = _parse_format_entry(variant, pos, "SD", float)
111
+ (ap_ref, ap_alt) = _parse_format_entry(variant, pos, "AP", float)
112
+ (am_ref, am_alt) = _parse_format_entry(variant, pos, "AM", float)
113
+
106
114
  # MEI specific
107
115
  (spanning_mei_ref, clip5_alt, clip3_alt) = get_mei_reads(
108
116
  variant, pos
@@ -395,33 +403,48 @@ def get_str_so(variant, pos):
395
403
  return str_so
396
404
 
397
405
 
398
- def _parse_format_entry(variant, pos, format_entry_name):
399
- """Parse genotype format entry for named integer values.
400
- Expects that ref/alt values could be separated by /.
406
+ def split_values(values: List[str]) -> List[str]:
407
+ """
408
+ Expects that ref/alt values could be separated by "/" or ",".
401
409
 
402
- Args:
403
- variant(cyvcf2.Variant)
404
- pos(int): individual position in VCF
405
- format_entry_name: name of format entry
406
- Returns:
407
- (ref(int), alt(int)) tuple
410
+ """
411
+ new_values = []
412
+ for value in values:
413
+ for delim in ["/", ","]:
414
+ if delim in value:
415
+ new_values = list(value.split(delim))
416
+
417
+ if new_values:
418
+ return new_values
419
+
420
+ return values
421
+
422
+
423
+ def _parse_format_entry(
424
+ variant: cyvcf2.Variant,
425
+ pos: int,
426
+ format_entry_name: str,
427
+ number_format: Optional[Union[float, int]] = int,
428
+ ) -> Tuple[Union[float, int], ...]:
429
+ """Parse genotype format entry for named integer values.
430
+ Expects that ref/alt values could be separated by "/" or ",".
431
+ Give individual position in VCF as pos and name of format entry to parse as format_entry_name.
408
432
  """
409
433
 
410
434
  ref = None
411
435
  alt = None
412
436
  if format_entry_name in variant.FORMAT:
413
437
  try:
414
- value = variant.format(format_entry_name)[pos]
415
- values = list(value.split("/"))
438
+ values = split_values(variant.format(format_entry_name)[pos])
416
439
 
417
440
  ref_value = None
418
441
  alt_value = None
419
442
 
420
443
  if len(values) > 1:
421
- ref_value = int(values[0])
422
- alt_value = int(values[1])
444
+ ref_value = (number_format)(values[0])
445
+ alt_value = (number_format)(values[1])
423
446
  if len(values) == 1:
424
- alt_value = int(values[0])
447
+ alt_value = (number_format)(values[0])
425
448
  if ref_value >= 0:
426
449
  ref = ref_value
427
450
  if alt_value >= 0:
@@ -429,3 +452,54 @@ def _parse_format_entry(variant, pos, format_entry_name):
429
452
  except (ValueError, TypeError) as _ignore_error:
430
453
  pass
431
454
  return (ref, alt)
455
+
456
+
457
+ def _parse_format_entry_trgt_mc(variant: cyvcf2.Variant, pos: int):
458
+ """Parse genotype entry for TRGT FORMAT MC
459
+
460
+ The MC format contains the Motif Counts for each allele, separated with "," and each motif in an expansion,
461
+ as a "_" separated list of the different available enumerated motifs. For some loci,
462
+ only certain motifs count towards a pathologic size, and if so a PathologicStruc INFO key is passed.
463
+ E.g. for non-reference motifs and more complex loci or alleles with different motifs.
464
+ As usual, VCF lines are decomposed, so at most one alt is present per entry.
465
+ The GT position gives us a ref index for any allele 0 in the call.
466
+ """
467
+
468
+ mc_ref = None
469
+ mc_alt = None
470
+
471
+ if "MC" not in variant.FORMAT:
472
+ return (mc_ref, mc_alt)
473
+
474
+ mc = variant.format("MC")[pos]
475
+ if not mc:
476
+ return (mc_ref, mc_alt)
477
+
478
+ ref_idx = None
479
+ gt = variant.genotypes[pos]
480
+ if gt:
481
+ for idx, allele in enumerate(gt):
482
+ if allele == 0:
483
+ ref_idx = idx
484
+
485
+ pathologic_struc = variant.INFO.get("PathologicStruc", None)
486
+ pathologic_counts = 0
487
+ for idx, allele in enumerate(mc.split(",")):
488
+ mcs = allele.split("_")
489
+
490
+ if len(mcs) > 1:
491
+ pathologic_mcs = pathologic_struc or range(len(mcs))
492
+
493
+ for index, count in enumerate(mcs):
494
+ if index in pathologic_mcs:
495
+ pathologic_counts += int(count)
496
+ else:
497
+ pathologic_counts = int(allele)
498
+
499
+ if ref_idx is not None and idx == ref_idx:
500
+ mc_ref = pathologic_counts
501
+ continue
502
+
503
+ mc_alt = pathologic_counts
504
+
505
+ return (mc_ref, mc_alt)
@@ -5,6 +5,10 @@ from scout.constants import SO_TERMS
5
5
 
6
6
  LOG = logging.getLogger(__name__)
7
7
 
8
+ # gnomAD transcript CSQ keys. Use plain (older) AF if available. For a secondary choice, prefer genomes over exomes.
9
+ GNOMAD_CSQ_KEYS = ["GNOMAD_AF", "GNOMADG_AF", "GNOMAD_EXOMES_AF"]
10
+ THOUSAND_GENOMES_CSQ_KEYS = ["AF", "1000GAF", "1000GP3_AF"]
11
+
8
12
 
9
13
  def parse_transcripts(raw_transcripts):
10
14
  """Parse transcript information from VCF variants
@@ -303,6 +307,10 @@ def set_variant_frequencies(transcript, entry):
303
307
  * 'gnomAD_AF' - gnomAD exomes, all populations combined
304
308
  * 'gnomAD_xxx_AF' - gnomAD exomes, individual populations
305
309
  * 'MAX_AF' - Max of all populations (1000G, gnomAD exomes, ESP)
310
+ In VEP 107/111 keys are
311
+ * 'gnomADg_AF' - genomes
312
+ * 'gnomAD_exomes_AF' - exomes
313
+ * 1000GP3_AF - 1000G Phase 3
306
314
 
307
315
  Reference: https://www.ensembl.org/info/docs/tools/vep/vep_formats.html
308
316
  """
@@ -312,30 +320,30 @@ def set_variant_frequencies(transcript, entry):
312
320
  try:
313
321
  for key in entry:
314
322
  # All frequencies endswith AF
315
- if not key.endswith("AF"):
323
+ if not (key.endswith("AF") or key.endswith("POPMAX")):
316
324
  continue
317
325
 
318
326
  value = entry[key]
319
- if not value:
327
+ if not value or value == ".":
320
328
  continue
321
329
 
322
- # This is the 1000G max af information
323
- if key == "AF" or key == "1000GAF":
330
+ if key in THOUSAND_GENOMES_CSQ_KEYS:
324
331
  transcript["thousand_g_maf"] = float(value)
325
332
  continue
326
333
 
327
- if key == "GNOMAD_AF":
328
- transcript["gnomad_maf"] = float(value)
329
- continue
334
+ for gnomad_ordered_key in GNOMAD_CSQ_KEYS:
335
+ if key == gnomad_ordered_key:
336
+ transcript["gnomad_maf"] = float(value)
337
+ break
330
338
 
331
339
  if key == "EXAC_MAX_AF":
332
340
  transcript["exac_max"] = float(value)
333
341
  transcript["exac_maf"] = float(value)
334
342
  continue
335
343
 
344
+ # remaining gnomAD or 1000G subpopulation frequencies and/or popmax values
336
345
  if "GNOMAD" in key:
337
346
  gnomad_freqs.append(float(value))
338
-
339
347
  else:
340
348
  thousandg_freqs.append(float(value))
341
349
 
@@ -354,4 +362,4 @@ def set_variant_frequencies(transcript, entry):
354
362
  )
355
363
  LOG.debug("Exception details", exc_info=True)
356
364
  LOG.debug("Current entry: %s", entry)
357
- LOG.warning("Only splitted and normalised VEP v90+ frequencies are supported")
365
+ LOG.warning("Only decomposed/split and normalised VEP v90+ frequencies are supported")
@@ -393,6 +393,18 @@ def set_str_info(variant: Variant, parsed_variant: Dict[str, Any]):
393
393
  # repeat id generally corresponds to gene symbol
394
394
  parsed_variant["str_repid"] = call_safe(str, variant.INFO.get("REPID"))
395
395
 
396
+ # repeat id from trgt - generally corresponds to gene symbol and/or disease
397
+ parsed_variant["str_trid"] = call_safe(str, variant.INFO.get("TRID"))
398
+
399
+ # repeat unit - used e g in PanelApp naming of STRs
400
+ parsed_variant["str_struc"] = call_safe(str, variant.INFO.get("STRUC"))
401
+
402
+ # repeat motif(s) - used e g in TRGT MC motif splits
403
+ parsed_variant["str_motifs"] = call_safe(str, variant.INFO.get("MOTIFS"))
404
+
405
+ # repeat pathologic motifs structure - list of indicies of pathologic motifs counting towards MC
406
+ parsed_variant["str_pathologic_struc"] = call_safe(str, variant.INFO.get("PathologicStruc"))
407
+
396
408
  # repeat unit - used e g in PanelApp naming of STRs
397
409
  parsed_variant["str_ru"] = call_safe(str, variant.INFO.get("RU"))
398
410
 
scout/server/app.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """Code for flask app"""
2
+
2
3
  import logging
3
4
  import os
4
5
  from datetime import timedelta
@@ -10,7 +11,7 @@ from flask import Flask, current_app, redirect, request, url_for
10
11
  from flask_babel import Babel
11
12
  from flask_cors import CORS
12
13
  from flask_login import current_user
13
- from flaskext.markdown import Markdown
14
+ from markdown import markdown as python_markdown
14
15
  from markupsafe import Markup
15
16
 
16
17
  from . import extensions
@@ -109,8 +110,6 @@ def configure_extensions(app):
109
110
  extensions.login_manager.init_app(app)
110
111
  extensions.mail.init_app(app)
111
112
 
112
- Markdown(app)
113
-
114
113
  if app.config.get("SQLALCHEMY_DATABASE_URI"):
115
114
  LOG.info("Chanjo extension enabled")
116
115
  configure_coverage(app)
@@ -227,6 +226,10 @@ def register_filters(app):
227
226
  # round all other numbers
228
227
  return round(number, ndigits)
229
228
 
229
+ @app.template_filter()
230
+ def markdown(text: str) -> Markup:
231
+ return Markup(python_markdown(text))
232
+
230
233
  @app.template_filter()
231
234
  def tuple_list_to_dict(tuple_list, key_elem, value_elem):
232
235
  """Accepts a list of tuples and returns a dictionary with tuple element = key_elem as keys and tuple element = value_elem as values"""
@@ -1,5 +1,5 @@
1
1
  {% macro igv_script() %}
2
2
  <link rel="shortcut icon" href="//igv.org/web/img/favicon.ico">
3
3
  <!-- IGV JS-->
4
- <script src="https://cdn.jsdelivr.net/npm/igv@2.15.8/dist/igv.min.js"></script>
4
+ <script src="https://cdn.jsdelivr.net/npm/igv@2.15.11/dist/igv.min.js"></script>
5
5
  {% endmacro %}