scout-browser 4.93.1__py3-none-any.whl → 4.95.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. scout/adapter/mongo/base.py +0 -0
  2. scout/adapter/mongo/hgnc.py +5 -1
  3. scout/adapter/mongo/managed_variant.py +4 -2
  4. scout/adapter/mongo/query.py +91 -54
  5. scout/adapter/mongo/variant.py +13 -8
  6. scout/build/panel.py +1 -1
  7. scout/commands/export/export_command.py +0 -0
  8. scout/commands/load/base.py +0 -0
  9. scout/commands/load/user.py +0 -0
  10. scout/commands/update/disease.py +0 -0
  11. scout/commands/update/genes.py +0 -0
  12. scout/commands/wipe_database.py +0 -0
  13. scout/constants/gene_tags.py +22 -12
  14. scout/demo/643594.research.mei.vcf.gz +0 -0
  15. scout/demo/643594.research.mei.vcf.gz.tbi +0 -0
  16. scout/load/panelapp.py +8 -12
  17. scout/parse/omim.py +5 -6
  18. scout/parse/panelapp.py +16 -42
  19. scout/parse/variant/compound.py +20 -21
  20. scout/parse/variant/gene.py +0 -0
  21. scout/parse/variant/genotype.py +0 -0
  22. scout/resources/custom_igv_tracks/mane.bb +0 -0
  23. scout/server/blueprints/cases/controllers.py +48 -0
  24. scout/server/blueprints/cases/templates/cases/case_report.html +17 -2
  25. scout/server/blueprints/cases/views.py +5 -5
  26. scout/server/blueprints/clinvar/controllers.py +4 -5
  27. scout/server/blueprints/institutes/controllers.py +129 -67
  28. scout/server/blueprints/institutes/forms.py +5 -2
  29. scout/server/blueprints/institutes/templates/overview/cases.html +6 -0
  30. scout/server/blueprints/institutes/templates/overview/utils.html +6 -5
  31. scout/server/blueprints/managed_variants/forms.py +17 -2
  32. scout/server/blueprints/managed_variants/templates/managed_variants/managed_variants.html +2 -2
  33. scout/server/blueprints/variant/templates/variant/components.html +27 -4
  34. scout/server/blueprints/variant/templates/variant/sv-variant.html +2 -2
  35. scout/server/blueprints/variant/templates/variant/tx_overview.html +3 -3
  36. scout/server/blueprints/variant/views.py +1 -2
  37. scout/server/blueprints/variants/forms.py +33 -5
  38. scout/server/blueprints/variants/templates/variants/cancer-sv-variants.html +4 -18
  39. scout/server/blueprints/variants/templates/variants/cancer-variants.html +2 -12
  40. scout/server/blueprints/variants/templates/variants/components.html +15 -1
  41. scout/server/blueprints/variants/templates/variants/sv-variants.html +2 -2
  42. scout/server/links.py +1 -1
  43. scout/utils/acmg.py +0 -1
  44. scout/utils/ccv.py +1 -9
  45. scout/utils/link.py +4 -3
  46. scout/utils/md5.py +0 -0
  47. {scout_browser-4.93.1.dist-info → scout_browser-4.95.0.dist-info}/METADATA +66 -45
  48. {scout_browser-4.93.1.dist-info → scout_browser-4.95.0.dist-info}/RECORD +41 -42
  49. {scout_browser-4.93.1.dist-info → scout_browser-4.95.0.dist-info}/WHEEL +1 -2
  50. scout/__version__.py +0 -1
  51. scout_browser-4.93.1.dist-info/top_level.txt +0 -1
  52. {scout_browser-4.93.1.dist-info → scout_browser-4.95.0.dist-info}/entry_points.txt +0 -0
  53. {scout_browser-4.93.1.dist-info → scout_browser-4.95.0.dist-info/licenses}/LICENSE +0 -0
File without changes
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Dict
2
+ from typing import Dict, Set
3
3
 
4
4
  import intervaltree
5
5
  from pymongo.errors import BulkWriteError, DuplicateKeyError
@@ -126,6 +126,10 @@ class GeneHandler(object):
126
126
 
127
127
  return None
128
128
 
129
+ def hgnc_ids(self) -> Set[int]:
130
+ """Returns all HGNC IDs present in the hgnc_gene collection."""
131
+ return set(self.hgnc_collection.distinct("hgnc_id"))
132
+
129
133
  def hgnc_genes(self, hgnc_symbol, build="37", search=False):
130
134
  """Fetch all hgnc genes that match a hgnc symbol
131
135
 
@@ -180,10 +180,12 @@ class ManagedVariantHandler(object):
180
180
  }
181
181
 
182
182
  if "position" in query_options:
183
- query["end"] = {"$gte": int(query_options["position"])}
183
+ position = max(int(query_options["position"]), 1)
184
+ query["end"] = {"$gte": position}
184
185
 
185
186
  if "end" in query_options:
186
- query["position"] = {"$lte": int(query_options["end"])}
187
+ end = max(int(query_options["end"]), 1)
188
+ query["position"] = {"$lte": end}
187
189
 
188
190
  if "sub_category" in query_options:
189
191
  query["sub_category"] = {"$in": query_options["sub_category"]}
@@ -469,68 +469,105 @@ class QueryHandler(object):
469
469
  mongo_query(dict): returned object contains coordinate filters
470
470
 
471
471
  """
472
- mongo_query["position"] = {"$lte": int(query["end"])}
473
- mongo_query["end"] = {"$gte": int(query["start"])}
472
+ start_pos = max(int(query["start"]), 1)
473
+ end_pos = max(int(query["end"]), 1)
474
+ mongo_query["position"] = {"$lte": end_pos}
475
+ mongo_query["end"] = {"$gte": start_pos}
474
476
 
475
477
  return mongo_query
476
478
 
477
- def sv_coordinate_query(self, query):
478
- """Adds genomic coordinated-related filters to the query object
479
- This method is called to buid coordinate query for sv variants
479
+ def get_position_query(self, chrom: str, start: int, end: int) -> dict:
480
+ """Helper function that returns a dictionary containing start and stop coordinates.
480
481
 
481
- Args:
482
- query(dict): a dictionary of query filters specified by the users
483
- mongo_query(dict): the query that is going to be submitted to the database
482
+ The position query consists of 3 parts, each of them elements of the $or
483
+ First part applies to searches when chromosome and end_chrom are the same.
484
+ Here are the possible overlapping search scenarios:
485
+ # Case 1
486
+ # filter xxxxxxxxx
487
+ # Variant xxxxxxxx
484
488
 
485
- Returns:
486
- coordinate_query(dict): returned object contains coordinate filters for sv variant
489
+ # Case 2
490
+ # filter xxxxxxxxx
491
+ # Variant xxxxxxxx
492
+
493
+ # Case 3
494
+ # filter xxxxxxxxx
495
+ # Variant xx
487
496
 
497
+ # Case 4
498
+ # filter xxxxxxxxx
499
+ # Variant xxxxxxxxxxxxxx
500
+
501
+ Second and third elements of the $or cover queries for variants where chromosome != end_chrom.
502
+ In this situation there are the following scenarios:
503
+ - Case chromosome != end_chrom, position matching 'chromosome'
504
+ - Case chromosome != end_chrom, position matching 'end_chrom'
488
505
  """
489
- coordinate_query = None
490
- chromosome_query = {"$or": [{"chromosome": query["chrom"]}, {"end_chrom": query["chrom"]}]}
491
- if query.get("start") and query.get("end"):
492
- # Query for overlapping intervals. Taking into account these cases:
493
- # 1
494
- # filter xxxxxxxxx
495
- # Variant xxxxxxxx
496
-
497
- # 2
498
- # filter xxxxxxxxx
499
- # Variant xxxxxxxx
500
-
501
- # 3
502
- # filter xxxxxxxxx
503
- # Variant xx
504
-
505
- # 4
506
- # filter xxxxxxxxx
507
- # Variant xxxxxxxxxxxxxx
508
- position_query = {
509
- "$or": [
510
- {"end": {"$gte": int(query["start"]), "$lte": int(query["end"])}}, # 1
511
- {
512
- "position": {
513
- "$lte": int(query["end"]),
514
- "$gte": int(query["start"]),
515
- }
516
- }, # 2
517
- {
518
- "$and": [
519
- {"position": {"$gte": int(query["start"])}},
520
- {"end": {"$lte": int(query["end"])}},
521
- ]
522
- }, # 3
523
- {
524
- "$and": [
525
- {"position": {"$lte": int(query["start"])}},
526
- {"end": {"$gte": int(query["end"])}},
527
- ]
528
- }, # 4
529
- ]
506
+
507
+ return {
508
+ "$or": [
509
+ # Case chromosome == end_chrom
510
+ {
511
+ "$and": [
512
+ {"chromosome": chrom},
513
+ {"end_chrom": chrom},
514
+ {
515
+ "$or": [
516
+ # Overlapping cases 1-4 (chromosome == end_chrom)
517
+ {"end": {"$gte": start, "$lte": end}}, # Case 1
518
+ {"position": {"$gte": start, "$lte": end}}, # Case 2
519
+ {
520
+ "$and": [
521
+ {"position": {"$lte": start}},
522
+ {"end": {"$gte": end}},
523
+ ]
524
+ }, # Case 3
525
+ {
526
+ "$and": [
527
+ {"position": {"$gte": start}},
528
+ {"end": {"$lte": end}},
529
+ ]
530
+ }, # Case 4
531
+ ]
532
+ },
533
+ ]
534
+ },
535
+ # Case chromosome != end_chrom, position matching 'chromosome'
536
+ {
537
+ "$and": [
538
+ {"chromosome": chrom},
539
+ {"end_chrom": {"$ne": chrom}},
540
+ {"position": {"$gte": start}},
541
+ {"position": {"$lte": end}},
542
+ ]
543
+ },
544
+ # Case chromosome != end_chrom, position matching 'end_chrom'
545
+ {
546
+ "$and": [
547
+ {"chromosome": {"$ne": chrom}},
548
+ {"end_chrom": chrom},
549
+ {"end": {"$gte": start}},
550
+ {"end": {"$lte": end}},
551
+ ]
552
+ },
553
+ ]
554
+ }
555
+
556
+ def sv_coordinate_query(self, query: dict) -> dict:
557
+ """Adds genomic coordinated-related filters to the query object
558
+ This method is called to build coordinate query for sv variants
559
+ """
560
+ if (
561
+ query.get("start") is not None and query.get("end") is not None
562
+ ): # query contains full coordinates
563
+ chrom = query["chrom"]
564
+ start = max(int(query["start"]), 1)
565
+ end = max(int(query["end"]), 1)
566
+ coordinate_query = self.get_position_query(chrom=chrom, start=start, end=end)
567
+ else: # query contains only chromosome info
568
+ coordinate_query = {
569
+ "$or": [{"chromosome": query["chrom"]}, {"end_chrom": query["chrom"]}]
530
570
  }
531
- coordinate_query = {"$and": [chromosome_query, position_query]}
532
- else:
533
- coordinate_query = chromosome_query
534
571
  return coordinate_query
535
572
 
536
573
  def gene_filter(self, query, build="37"):
@@ -311,14 +311,14 @@ class VariantHandler(VariantLoader):
311
311
  "category": variant_obj["category"], # sv
312
312
  "variant_type": variant_obj["variant_type"], # clinical or research
313
313
  "sub_category": variant_obj["sub_category"], # example -> "del"
314
- "$and": coordinate_query["$and"], # query for overlapping SV variants
314
+ "$or": coordinate_query["$or"], # query for overlapping SV variants
315
315
  }
316
-
317
316
  overlapping_svs = list(
318
317
  self.variant_collection.find(
319
318
  query,
320
319
  )
321
320
  )
321
+
322
322
  if not overlapping_svs:
323
323
  return None
324
324
  if len(overlapping_svs) == 1:
@@ -491,12 +491,17 @@ class VariantHandler(VariantLoader):
491
491
  if len(affected_ids) == 0:
492
492
  return []
493
493
  filters["case_id"] = case_obj["_id"]
494
- filters["samples"] = {
495
- "$elemMatch": {
496
- "sample_id": {"$in": affected_ids},
497
- "genotype_call": {"$regex": CARRIER},
498
- }
499
- }
494
+ filters["$or"] = [
495
+ {"samples": {"$size": 1}}, # Condition for samples with exactly one element
496
+ {
497
+ "samples": {
498
+ "$elemMatch": { # Condition for samples with more than one element: individual/sample should be carrier
499
+ "sample_id": {"$in": affected_ids},
500
+ "genotype_call": {"$regex": CARRIER},
501
+ }
502
+ }
503
+ },
504
+ ]
500
505
 
501
506
  if limit_genes:
502
507
  filters["genes.hgnc_id"] = {"$in": limit_genes}
scout/build/panel.py CHANGED
@@ -42,7 +42,7 @@ def build_gene(gene_info: dict, adapter) -> dict:
42
42
 
43
43
  # Add boolean flags
44
44
  gene_obj.update(
45
- {key: True for key in ["reduced_penetrance", "mosaicism"] if gene_info.get(key)}
45
+ {key: gene_info.get(key) for key in ["reduced_penetrance", "mosaicism"] if key in gene_info}
46
46
  )
47
47
 
48
48
  # Handle inheritance models
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -33,20 +33,30 @@ INHERITANCE_PALETTE = {
33
33
  "other": {"bgcolor": "bg-light", "text_color": "text-dark"},
34
34
  }
35
35
 
36
- INCOMPLETE_PENETRANCE_MAP = {"unknown": None, "Complete": None, "Incomplete": True}
36
+ INCOMPLETE_PENETRANCE_MAP = {"unknown": None, "None": None, "Complete": False, "Incomplete": True}
37
37
 
38
38
  MODELS_MAP = {
39
- "monoallelic_not_imprinted": ["AD"],
40
- "monoallelic_maternally_imprinted": ["AD"],
41
- "monoallelic_paternally_imprinted": ["AD"],
42
- "monoallelic": ["AD"],
43
- "biallelic": ["AR"],
44
- "monoallelic_and_biallelic": ["AD", "AR"],
45
- "monoallelic_and_more_severe_biallelic": ["AD", "AR"],
46
- "xlinked_biallelic": ["XR"],
47
- "xlinked_monoallelic": ["XD"],
48
- "mitochondrial": ["MT"],
49
- "unknown": [],
39
+ "MONOALLELIC, autosomal or pseudoautosomal, NOT imprinted": ["AD"],
40
+ "MONOALLELIC, autosomal or pseudoautosomal, imprinted status unknown": ["AD"],
41
+ "MONOALLELIC, autosomal or pseudoautosomal, maternally imprinted (paternal allele expressed)": [
42
+ "AD"
43
+ ],
44
+ "MONOALLELIC, autosomal or pseudoautosomal, paternally imprinted (maternal allele expressed)": [
45
+ "AD"
46
+ ],
47
+ "BIALLELIC, autosomal or pseudoautosomal": ["AR"],
48
+ "BOTH monoallelic and biallelic, autosomal or pseudoautosomal": ["AD", "AR"],
49
+ "BOTH monoallelic and biallelic (but BIALLELIC mutations cause a more SEVERE disease form), autosomal or pseudoautosomal": [
50
+ "AD",
51
+ "AR",
52
+ ],
53
+ "X-LINKED: hemizygous mutation in males, biallelic mutations in females": ["XR"],
54
+ "X-LINKED: hemizygous mutation in males, monoallelic mutations in females may cause disease (may be less severe, later onset than males)": [
55
+ "XD"
56
+ ],
57
+ "MITOCHONDRIAL": ["MT"],
58
+ "Other": [],
59
+ "Other - please specifiy in evaluation comments": [],
50
60
  }
51
61
 
52
62
  PANEL_GENE_INFO_TRANSCRIPTS = [
Binary file
Binary file
scout/load/panelapp.py CHANGED
@@ -1,9 +1,9 @@
1
1
  import logging
2
2
  import math
3
3
  from datetime import datetime
4
- from typing import Dict, List, Set
4
+ from typing import List, Set
5
5
 
6
- from click import Abort, progressbar
6
+ from click import progressbar
7
7
 
8
8
  from scout.adapter import MongoAdapter
9
9
  from scout.constants.panels import PRESELECTED_PANELAPP_PANEL_TYPE_SLUGS
@@ -28,15 +28,11 @@ def load_panelapp_panel(
28
28
  LOG.info("Fetching all panel app panels")
29
29
  panel_ids: List[str] = panelapp.get_panel_ids(signed_off=False)
30
30
 
31
- ensembl_id_to_hgnc_id_map: Dict[str, int] = adapter.ensembl_to_hgnc_id_mapping()
32
- hgnc_symbol_to_ensembl_id_map: Dict[int, str] = adapter.hgnc_symbol_ensembl_id_mapping()
33
-
34
- for _ in panel_ids:
31
+ for panel_id in panel_ids:
35
32
  panel_info: dict = panelapp.get_panel(panel_id)
36
33
  parsed_panel = parse_panelapp_panel(
34
+ hgnc_gene_ids=adapter.hgnc_ids(),
37
35
  panel_info=panel_info,
38
- ensembl_id_to_hgnc_id_map=ensembl_id_to_hgnc_id_map,
39
- hgnc_symbol_to_ensembl_id_map=hgnc_symbol_to_ensembl_id_map,
40
36
  institute=institute,
41
37
  confidence=confidence,
42
38
  )
@@ -54,8 +50,6 @@ def get_panelapp_genes(
54
50
  """Parse and collect genes from one or more panelApp panels."""
55
51
 
56
52
  genes = set()
57
- ensembl_id_to_hgnc_id_map: Dict[str, int] = adapter.ensembl_to_hgnc_id_mapping()
58
- hgnc_symbol_to_ensembl_id_map: Dict[int, str] = adapter.hgnc_symbol_ensembl_id_mapping()
59
53
 
60
54
  with progressbar(panel_ids, label="Parsing panels", length=len(panel_ids)) as panel_ids:
61
55
  for panel_id in panel_ids:
@@ -66,9 +60,8 @@ def get_panelapp_genes(
66
60
  continue
67
61
 
68
62
  parsed_panel = parse_panelapp_panel(
63
+ hgnc_gene_ids=adapter.hgnc_ids(),
69
64
  panel_info=panel_dict,
70
- ensembl_id_to_hgnc_id_map=ensembl_id_to_hgnc_id_map,
71
- hgnc_symbol_to_ensembl_id_map=hgnc_symbol_to_ensembl_id_map,
72
65
  institute=institute,
73
66
  confidence="green",
74
67
  )
@@ -86,6 +79,8 @@ def load_panelapp_green_panel(adapter: MongoAdapter, institute: str, force: bool
86
79
  """Translate panel type input from users to panel type slugs."""
87
80
  if not types_filter:
88
81
  return PRESELECTED_PANELAPP_PANEL_TYPE_SLUGS
82
+ if "all" in types_filter:
83
+ return available_types
89
84
  index_list = [int(typeint) - 1 for typeint in types_filter.replace(" ", "").split(",")]
90
85
  return [available_types[i] for i in index_list]
91
86
 
@@ -107,6 +102,7 @@ def load_panelapp_green_panel(adapter: MongoAdapter, institute: str, force: bool
107
102
  available_types: List[str] = panelapp.get_panel_types()
108
103
  for number, type in enumerate(available_types, 1):
109
104
  LOG.info(f"{number}: {type}")
105
+ LOG.info("all: all types above")
110
106
  preselected_options_idx: List[str] = [
111
107
  str(available_types.index(presel) + 1)
112
108
  for presel in PRESELECTED_PANELAPP_PANEL_TYPE_SLUGS
scout/parse/omim.py CHANGED
@@ -311,7 +311,6 @@ def get_mim_genes(genemap_lines, mim2gene_lines):
311
311
  mim_number = entry["mim_number"]
312
312
  inheritance = entry["inheritance"]
313
313
  phenotype_info = entry["phenotypes"]
314
- hgnc_symbol = entry["hgnc_symbol"]
315
314
  hgnc_symbols = entry["hgnc_symbols"]
316
315
  if mim_number in genes:
317
316
  genes[mim_number]["inheritance"] = inheritance
@@ -354,11 +353,11 @@ def get_mim_disease(genemap_lines: Iterable[str]) -> Dict[str, Any]:
354
353
  """
355
354
  diseases_found = {}
356
355
 
357
- # Genemap is a file with one entry per gene.
358
- # Each line hold a lot of information and in specific it
359
- # has information about the phenotypes that a gene is associated with
360
- # From this source we collect inheritane patterns and what hgnc symbols
361
- # a disease is associated with
356
+ # Genemap2 is a file with one entry per gene.
357
+ # Each line hold a lot of information and in particular it
358
+ # has information about the phenotypes that a gene is associated with.
359
+ # From this source we collect inheritance patterns and what hgnc symbols
360
+ # a disease is associated with.
362
361
  for entry in parse_genemap2(genemap_lines):
363
362
  hgnc_symbol = entry["hgnc_symbol"]
364
363
  for disease in entry["phenotypes"]:
scout/parse/panelapp.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """Code to parse panel information"""
2
2
 
3
3
  import logging
4
- from typing import Dict, Optional
4
+ from typing import Optional, Set
5
5
 
6
6
  from scout.constants import INCOMPLETE_PENETRANCE_MAP, MODELS_MAP, PANELAPP_CONFIDENCE_EXCLUDE
7
7
  from scout.utils.date import get_date
@@ -11,9 +11,8 @@ PANELAPP_PANELS_URL = "https://panelapp.genomicsengland.co.uk/panels/"
11
11
 
12
12
 
13
13
  def parse_panel_app_gene(
14
+ hgnc_gene_ids: Set[int],
14
15
  panelapp_gene: dict,
15
- ensembl_gene_hgnc_id_map: Dict[str, int],
16
- hgnc_symbol_ensembl_gene_map: Dict[str, str],
17
16
  confidence: str,
18
17
  ) -> dict:
19
18
  """Parse a panel app-formatted gene."""
@@ -23,55 +22,30 @@ def parse_panel_app_gene(
23
22
  if confidence_level in PANELAPP_CONFIDENCE_EXCLUDE[confidence]:
24
23
  return gene_info
25
24
 
26
- hgnc_symbol = panelapp_gene["gene_data"]["gene_symbol"]
27
- ensembl_ids = [
28
- version["ensembl_id"]
29
- for genome in panelapp_gene["gene_data"]["ensembl_genes"].values()
30
- for version in genome.values()
31
- ]
32
-
33
- if not ensembl_ids: # This gene is probably tagged as ensembl_ids_known_missing on PanelApp
34
- if hgnc_symbol in hgnc_symbol_ensembl_gene_map:
35
- LOG.warning(
36
- f"PanelApp gene {hgnc_symbol} does not contain Ensembl IDs. Using Ensembl IDs from internal gene collection instead."
37
- )
38
- ensembl_ids = [hgnc_symbol_ensembl_gene_map[hgnc_symbol]]
39
- else:
40
- LOG.warning(
41
- f"PanelApp gene {hgnc_symbol} does not contain Ensembl IDs and gene symbol does not correspond to a gene in scout."
42
- )
43
-
44
- hgnc_ids = set(
45
- ensembl_gene_hgnc_id_map.get(ensembl_id)
46
- for ensembl_id in ensembl_ids
47
- if ensembl_gene_hgnc_id_map.get(ensembl_id)
48
- )
49
- if not hgnc_ids:
50
- LOG.warning("Gene %s does not exist in database. Skipping gene...", hgnc_symbol)
25
+ gene_symbol = panelapp_gene["gene_data"]["gene_symbol"]
26
+ hgnc_id = int(panelapp_gene["gene_data"]["hgnc_id"].split(":")[1])
27
+ if hgnc_id not in hgnc_gene_ids:
28
+ LOG.warning("Gene %s does not exist in database. Skipping gene...", gene_symbol)
51
29
  return gene_info
52
30
 
53
- if len(hgnc_ids) > 1:
54
- LOG.warning("Gene %s has unclear identifier. Choose random id", hgnc_symbol)
31
+ gene_info["hgnc_id"] = hgnc_id
32
+ gene_info["hgnc_symbol"] = gene_symbol
55
33
 
56
- gene_info["hgnc_symbol"] = hgnc_symbol
57
- for hgnc_id in hgnc_ids:
58
- gene_info["hgnc_id"] = hgnc_id
34
+ if panelapp_gene["penetrance"] in ["Complete", "Incomplete"]:
35
+ gene_info["reduced_penetrance"] = INCOMPLETE_PENETRANCE_MAP.get(panelapp_gene["penetrance"])
59
36
 
60
- gene_info["reduced_penetrance"] = INCOMPLETE_PENETRANCE_MAP.get(panelapp_gene["penetrance"])
37
+ mode_of_inheritance = panelapp_gene.get("mode_of_inheritance")
38
+ if mode_of_inheritance not in MODELS_MAP:
39
+ LOG.warning(f"Mode of inheritance '{mode_of_inheritance}' not found in MODELS_MAP.")
61
40
 
62
- inheritance_models = []
63
- for model in MODELS_MAP.get(panelapp_gene["mode_of_inheritance"], []):
64
- inheritance_models.append(model)
65
-
66
- gene_info["inheritance_models"] = inheritance_models
41
+ gene_info["inheritance_models"] = MODELS_MAP.get(mode_of_inheritance, [])
67
42
 
68
43
  return gene_info
69
44
 
70
45
 
71
46
  def parse_panelapp_panel(
47
+ hgnc_gene_ids: Set[int],
72
48
  panel_info: dict,
73
- ensembl_id_to_hgnc_id_map: Dict[str, int],
74
- hgnc_symbol_to_ensembl_id_map: Dict[str, str],
75
49
  institute: Optional[str] = "cust000",
76
50
  confidence: Optional[str] = "green",
77
51
  ) -> dict:
@@ -101,7 +75,7 @@ def parse_panelapp_panel(
101
75
  nr_genes = 0
102
76
  for nr_genes, gene in enumerate(panel_info["genes"], 1):
103
77
  gene_info = parse_panel_app_gene(
104
- gene, ensembl_id_to_hgnc_id_map, hgnc_symbol_to_ensembl_id_map, confidence
78
+ hgnc_gene_ids=hgnc_gene_ids, panelapp_gene=gene, confidence=confidence
105
79
  )
106
80
  if not gene_info:
107
81
  nr_excluded += 1
@@ -1,43 +1,42 @@
1
1
  import logging
2
+ from typing import List
2
3
 
3
4
  from scout.utils.md5 import generate_md5_key
4
5
 
5
6
  LOG = logging.getLogger(__name__)
6
7
 
7
8
 
8
- def parse_compounds(compound_info, case_id, variant_type):
9
- """Get a list with compounds objects for this variant.
9
+ def parse_compounds(compound_info: str, case_id: str, variant_type: str) -> List[dict]:
10
+ """Get a list with compounds objects(dicts) for this variant.
10
11
 
11
- Arguments:
12
- compound_info(str): A Variant dictionary
13
- case_id (str): unique family id
14
- variant_type(str): 'research' or 'clinical'
12
+ Scout IDs do not have "chr" prefixed chromosome names, hence we lstrip that from
13
+ any compound names.
14
+
15
+ We need the case id to construct the correct id, as well as the variant type (clinical or research).
15
16
 
16
- Returns:
17
- compounds(list(dict)): A list of compounds
18
17
  """
19
- # We need the case to construct the correct id
18
+
20
19
  compounds = []
21
20
  if compound_info:
22
21
  for family_info in compound_info.split(","):
23
- splitted_entry = family_info.split(":")
22
+ split_entry = family_info.split(":")
24
23
  # This is the family id
25
- if splitted_entry[0] == case_id:
26
- for compound in splitted_entry[1].split("|"):
27
- splitted_compound = compound.split(">")
28
- compound_obj = {}
29
- compound_name = splitted_compound[0]
30
- compound_obj["variant"] = generate_md5_key(
31
- compound_name.split("_") + [variant_type, case_id]
32
- )
24
+ if split_entry[0] == case_id:
25
+ for compound in split_entry[1].split("|"):
26
+ split_compound = compound.split(">")
27
+ compound_name = split_compound[0].lstrip("chr")
28
+ compound_obj = {
29
+ "display_name": compound_name,
30
+ "variant": generate_md5_key(
31
+ compound_name.split("_") + [variant_type, case_id]
32
+ ),
33
+ }
33
34
 
34
35
  try:
35
- compound_score = float(splitted_compound[1])
36
+ compound_score = float(split_compound[1])
36
37
  except (TypeError, IndexError):
37
38
  compound_score = 0.0
38
-
39
39
  compound_obj["score"] = compound_score
40
- compound_obj["display_name"] = compound_name
41
40
 
42
41
  compounds.append(compound_obj)
43
42
 
File without changes
File without changes
@@ -62,6 +62,8 @@ from scout.server.utils import (
62
62
  case_has_rna_tracks,
63
63
  institute_and_case,
64
64
  )
65
+ from scout.utils.acmg import get_acmg_temperature
66
+ from scout.utils.ccv import get_ccv_temperature
65
67
 
66
68
  LOG = logging.getLogger(__name__)
67
69
 
@@ -611,6 +613,46 @@ def check_outdated_gene_panel(panel_obj, latest_panel):
611
613
  return extra_genes, missing_genes
612
614
 
613
615
 
616
+ def add_bayesian_acmg_classification(variant_obj: dict):
617
+ """Append info to display the ACMG VUS Bayesian score / temperature.
618
+ Criteria have a term and a modifier field on the db document
619
+ that are joined together in a string to conform to a regular
620
+ ACMG term format. A set of such terms are passed on for evaluation
621
+ to the same function as the ACMG classification form uses.
622
+ """
623
+ variant_acmg_classifications = list(
624
+ store.get_evaluations_case_specific(document_id=variant_obj["_id"])
625
+ )
626
+ if variant_acmg_classifications:
627
+ terms = set()
628
+ for criterium in variant_acmg_classifications[0].get("criteria", []):
629
+ term = criterium.get("term")
630
+ if criterium.get("modifier"):
631
+ term += f"_{criterium.get('modifier')}"
632
+ terms.add(term)
633
+ variant_obj["bayesian_acmg"] = get_acmg_temperature(terms)
634
+
635
+
636
+ def add_bayesian_ccv_classification(variant_obj: dict):
637
+ """Append info to display the CCV VUS Bayesian score / temperature.
638
+ Criteria have a term and a modifier field on the db document
639
+ that are joined together in a string to conform to a regular
640
+ CCV term format. A set of such terms are passed on for evaluation
641
+ to the same function as the CCV classification form uses.
642
+ """
643
+ variant_ccv_classifications = list(
644
+ store.get_ccv_evaluations_case_specific(document_id=variant_obj["_id"])
645
+ )
646
+ if variant_ccv_classifications:
647
+ terms = set()
648
+ for criterium in variant_ccv_classifications[0].get("ccv_criteria", []):
649
+ term = criterium.get("term")
650
+ if criterium.get("modifier"):
651
+ term += f"_{criterium.get('modifier')}"
652
+ terms.add(term)
653
+ variant_obj["bayesian_ccv"] = get_ccv_temperature(terms)
654
+
655
+
614
656
  def case_report_variants(store: MongoAdapter, case_obj: dict, institute_obj: dict, data: dict):
615
657
  """Gather evaluated variants info to include in case report."""
616
658
 
@@ -624,6 +666,8 @@ def case_report_variants(store: MongoAdapter, case_obj: dict, institute_obj: dic
624
666
  continue
625
667
  if case_key == "partial_causatives":
626
668
  var_obj["phenotypes"] = case_obj["partial_causatives"][var_id]
669
+ add_bayesian_acmg_classification(var_obj)
670
+ add_bayesian_ccv_classification(var_obj)
627
671
  evaluated_variants_by_type[eval_category].append(
628
672
  _get_decorated_var(var_obj=var_obj, institute_obj=institute_obj, case_obj=case_obj)
629
673
  )
@@ -663,6 +707,10 @@ def _append_evaluated_variant_by_type(
663
707
  """
664
708
  for eval_category, variant_key in CASE_REPORT_VARIANT_TYPES.items():
665
709
  if variant_key in var_obj and var_obj[variant_key] is not None:
710
+
711
+ add_bayesian_acmg_classification(var_obj)
712
+ add_bayesian_ccv_classification(var_obj)
713
+
666
714
  evaluated_variants_by_type[eval_category].append(
667
715
  _get_decorated_var(var_obj=var_obj, institute_obj=institute_obj, case_obj=case_obj)
668
716
  )