scout-browser 4.94.1__py3-none-any.whl → 4.96.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. scout/adapter/mongo/case.py +26 -25
  2. scout/adapter/mongo/hgnc.py +5 -1
  3. scout/adapter/mongo/managed_variant.py +4 -2
  4. scout/adapter/mongo/query.py +6 -4
  5. scout/adapter/mongo/variant.py +11 -6
  6. scout/build/case.py +3 -1
  7. scout/build/panel.py +1 -1
  8. scout/constants/acmg.py +25 -18
  9. scout/constants/gene_tags.py +22 -12
  10. scout/demo/643594.research.mei.vcf.gz +0 -0
  11. scout/demo/643594.research.mei.vcf.gz.tbi +0 -0
  12. scout/demo/cancer.load_config.yaml +1 -3
  13. scout/demo/rnafusion.load_config.yaml +1 -0
  14. scout/load/panelapp.py +8 -12
  15. scout/models/case/case_loading_models.py +3 -0
  16. scout/parse/case.py +1 -0
  17. scout/parse/omim.py +5 -6
  18. scout/parse/panelapp.py +16 -42
  19. scout/parse/variant/compound.py +20 -21
  20. scout/server/app.py +12 -0
  21. scout/server/blueprints/cases/controllers.py +38 -9
  22. scout/server/blueprints/cases/templates/cases/case_report.html +37 -4
  23. scout/server/blueprints/cases/templates/cases/collapsible_actionbar.html +12 -0
  24. scout/server/blueprints/clinvar/controllers.py +1 -0
  25. scout/server/blueprints/clinvar/templates/clinvar/clinvar_submissions.html +10 -14
  26. scout/server/blueprints/clinvar/views.py +18 -31
  27. scout/server/blueprints/managed_variants/forms.py +17 -2
  28. scout/server/blueprints/managed_variants/templates/managed_variants/managed_variants.html +2 -2
  29. scout/server/blueprints/variant/templates/variant/cancer-variant.html +2 -2
  30. scout/server/blueprints/variant/templates/variant/components.html +27 -4
  31. scout/server/blueprints/variant/templates/variant/sv-variant.html +2 -2
  32. scout/server/blueprints/variant/templates/variant/variant_details.html +1 -1
  33. scout/server/blueprints/variant/views.py +11 -5
  34. scout/server/blueprints/variants/forms.py +33 -5
  35. scout/server/blueprints/variants/templates/variants/cancer-variants.html +5 -4
  36. scout/server/blueprints/variants/templates/variants/str-variants.html +13 -9
  37. scout/server/extensions/clinvar_extension.py +56 -2
  38. scout/server/links.py +0 -14
  39. scout/utils/acmg.py +5 -5
  40. scout/utils/ccv.py +1 -9
  41. scout/utils/link.py +4 -3
  42. {scout_browser-4.94.1.dist-info → scout_browser-4.96.0.dist-info}/METADATA +1 -1
  43. {scout_browser-4.94.1.dist-info → scout_browser-4.96.0.dist-info}/RECORD +46 -46
  44. {scout_browser-4.94.1.dist-info → scout_browser-4.96.0.dist-info}/WHEEL +0 -0
  45. {scout_browser-4.94.1.dist-info → scout_browser-4.96.0.dist-info}/entry_points.txt +0 -0
  46. {scout_browser-4.94.1.dist-info → scout_browser-4.96.0.dist-info}/licenses/LICENSE +0 -0
@@ -982,7 +982,6 @@ class CaseHandler(object):
982
982
  variant_type=variant_type,
983
983
  category=category,
984
984
  )
985
- # add variants
986
985
  self.load_variants(
987
986
  case_obj=case_obj,
988
987
  variant_type=variant_type,
@@ -994,33 +993,34 @@ class CaseHandler(object):
994
993
  ),
995
994
  )
996
995
 
997
- except (IntegrityError, ValueError, ConfigError, KeyError) as error:
998
- LOG.warning(error)
999
-
1000
- self._load_omics_variants(case_obj, build=genome_build, update=update)
1001
-
1002
- if existing_case:
1003
- self.update_case_data_sharing(old_case=existing_case, new_case=case_obj)
1004
- case_obj["rerun_requested"] = False
1005
- if case_obj["status"] in ["active", "archived"]:
1006
- case_obj["status"] = "inactive"
1007
-
1008
- case_obj["variants_stats"] = self.case_variants_count(
1009
- case_id=case_obj["_id"],
1010
- institute_id=institute_obj["_id"],
1011
- force_update_case=True,
1012
- )
996
+ self._load_omics_variants(case_obj, build=genome_build, update=update)
1013
997
 
1014
- self.update_case_cli(case_obj, institute_obj)
1015
- # update Sanger status for the new inserted variants
1016
- self.update_case_sanger_variants(institute_obj, case_obj, old_sanger_variants)
998
+ except (IntegrityError, ValueError, ConfigError, KeyError) as error:
999
+ LOG.exception(error)
1000
+ raise error
1001
+ else:
1002
+ if not existing_case:
1003
+ LOG.info("Loading case %s into database", case_obj["display_name"])
1004
+ self.add_case(case_obj, institute_obj)
1005
+ finally:
1006
+ if existing_case:
1007
+ self.update_case_data_sharing(old_case=existing_case, new_case=case_obj)
1008
+ case_obj["rerun_requested"] = False
1009
+ if case_obj["status"] in ["active", "archived"]:
1010
+ case_obj["status"] = "inactive"
1011
+
1012
+ case_obj["variants_stats"] = self.case_variants_count(
1013
+ case_id=case_obj["_id"],
1014
+ institute_id=institute_obj["_id"],
1015
+ force_update_case=True,
1016
+ )
1017
1017
 
1018
- if keep_actions and old_evaluated_variants:
1019
- self.update_variant_actions(institute_obj, case_obj, old_evaluated_variants)
1018
+ self.update_case_cli(case_obj, institute_obj)
1019
+ # update Sanger status for the new inserted variants
1020
+ self.update_case_sanger_variants(institute_obj, case_obj, old_sanger_variants)
1020
1021
 
1021
- else:
1022
- LOG.info("Loading case %s into database", case_obj["display_name"])
1023
- self.add_case(case_obj, institute_obj)
1022
+ if keep_actions and old_evaluated_variants:
1023
+ self.update_variant_actions(institute_obj, case_obj, old_evaluated_variants)
1024
1024
 
1025
1025
  return case_obj
1026
1026
 
@@ -1146,6 +1146,7 @@ class CaseHandler(object):
1146
1146
  "RNAfusion_report": case_obj.get("RNAfusion_report"),
1147
1147
  "RNAfusion_report_research": case_obj.get("RNAfusion_report_research"),
1148
1148
  "rna_delivery_report": case_obj.get("rna_delivery_report"),
1149
+ "scout_load_version": case_obj.get("scout_load_version"),
1149
1150
  "smn_tsv": case_obj.get("smn_tsv"),
1150
1151
  "status": case_obj.get("status"),
1151
1152
  "sv_rank_model_version": case_obj.get("sv_rank_model_version"),
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Dict
2
+ from typing import Dict, Set
3
3
 
4
4
  import intervaltree
5
5
  from pymongo.errors import BulkWriteError, DuplicateKeyError
@@ -126,6 +126,10 @@ class GeneHandler(object):
126
126
 
127
127
  return None
128
128
 
129
+ def hgnc_ids(self) -> Set[int]:
130
+ """Returns all HGNC IDs present in the hgnc_gene collection."""
131
+ return set(self.hgnc_collection.distinct("hgnc_id"))
132
+
129
133
  def hgnc_genes(self, hgnc_symbol, build="37", search=False):
130
134
  """Fetch all hgnc genes that match a hgnc symbol
131
135
 
@@ -180,10 +180,12 @@ class ManagedVariantHandler(object):
180
180
  }
181
181
 
182
182
  if "position" in query_options:
183
- query["end"] = {"$gte": int(query_options["position"])}
183
+ position = max(int(query_options["position"]), 1)
184
+ query["end"] = {"$gte": position}
184
185
 
185
186
  if "end" in query_options:
186
- query["position"] = {"$lte": int(query_options["end"])}
187
+ end = max(int(query_options["end"]), 1)
188
+ query["position"] = {"$lte": end}
187
189
 
188
190
  if "sub_category" in query_options:
189
191
  query["sub_category"] = {"$in": query_options["sub_category"]}
@@ -469,8 +469,10 @@ class QueryHandler(object):
469
469
  mongo_query(dict): returned object contains coordinate filters
470
470
 
471
471
  """
472
- mongo_query["position"] = {"$lte": int(query["end"])}
473
- mongo_query["end"] = {"$gte": int(query["start"])}
472
+ start_pos = max(int(query["start"]), 1)
473
+ end_pos = max(int(query["end"]), 1)
474
+ mongo_query["position"] = {"$lte": end_pos}
475
+ mongo_query["end"] = {"$gte": start_pos}
474
476
 
475
477
  return mongo_query
476
478
 
@@ -559,8 +561,8 @@ class QueryHandler(object):
559
561
  query.get("start") is not None and query.get("end") is not None
560
562
  ): # query contains full coordinates
561
563
  chrom = query["chrom"]
562
- start = int(query["start"])
563
- end = int(query["end"])
564
+ start = max(int(query["start"]), 1)
565
+ end = max(int(query["end"]), 1)
564
566
  coordinate_query = self.get_position_query(chrom=chrom, start=start, end=end)
565
567
  else: # query contains only chromosome info
566
568
  coordinate_query = {
@@ -491,12 +491,17 @@ class VariantHandler(VariantLoader):
491
491
  if len(affected_ids) == 0:
492
492
  return []
493
493
  filters["case_id"] = case_obj["_id"]
494
- filters["samples"] = {
495
- "$elemMatch": {
496
- "sample_id": {"$in": affected_ids},
497
- "genotype_call": {"$regex": CARRIER},
498
- }
499
- }
494
+ filters["$or"] = [
495
+ {"samples": {"$size": 1}}, # Condition for samples with exactly one element
496
+ {
497
+ "samples": {
498
+ "$elemMatch": { # Condition for samples with more than one element: individual/sample should be carrier
499
+ "sample_id": {"$in": affected_ids},
500
+ "genotype_call": {"$regex": CARRIER},
501
+ }
502
+ }
503
+ },
504
+ ]
500
505
 
501
506
  if limit_genes:
502
507
  filters["genes.hgnc_id"] = {"$in": limit_genes}
scout/build/case.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import logging
2
2
  from datetime import datetime
3
3
  from typing import Dict
4
+ from scout import __version__
4
5
 
5
6
  from scout.constants import CUSTOM_CASE_REPORTS, PHENOTYPE_GROUPS
6
7
  from scout.exceptions import ConfigError, IntegrityError
@@ -154,6 +155,7 @@ def build_case(case_data, adapter):
154
155
  now = datetime.now()
155
156
  case_obj["created_at"] = now
156
157
  case_obj["updated_at"] = now
158
+ case_obj["scout_load_version"] = __version__
157
159
 
158
160
  if case_data.get("suspects"):
159
161
  case_obj["suspects"] = case_data["suspects"]
@@ -162,7 +164,7 @@ def build_case(case_data, adapter):
162
164
 
163
165
  case_obj["synopsis"] = case_data.get("synopsis", "")
164
166
 
165
- case_obj["status"] = "inactive"
167
+ case_obj["status"] = case_data.get("status") or "inactive"
166
168
  case_obj["is_research"] = False
167
169
  case_obj["research_requested"] = False
168
170
  case_obj["rerun_requested"] = False
scout/build/panel.py CHANGED
@@ -42,7 +42,7 @@ def build_gene(gene_info: dict, adapter) -> dict:
42
42
 
43
43
  # Add boolean flags
44
44
  gene_obj.update(
45
- {key: True for key in ["reduced_penetrance", "mosaicism"] if gene_info.get(key)}
45
+ {key: gene_info.get(key) for key in ["reduced_penetrance", "mosaicism"] if key in gene_info}
46
46
  )
47
47
 
48
48
  # Handle inheritance models
scout/constants/acmg.py CHANGED
@@ -309,48 +309,55 @@ ACMG_CRITERIA["benign impact"] = OrderedDict(
309
309
 
310
310
  ACMG_POTENTIAL_CONFLICTS = [
311
311
  (
312
- "PVS1",
313
- "PM4",
312
+ {"PVS1", "PM4"},
314
313
  "Use of PVS1 and PM4 together risks double-counting evidence (Tayoun et al 2019).",
315
314
  ),
316
315
  (
317
- "PVS1",
318
- "PM1",
316
+ {"PVS1", "PM1"},
319
317
  "Use of PVS1 and PM1 together is not recommended (Durkie et al 2024).",
320
318
  ),
321
319
  (
322
- "PVS1",
323
- "PP2",
320
+ {"PVS1", "PP2"},
324
321
  "Use of PVS1 and PP2 together is not recommended (Durkie et al 2024).",
325
322
  ),
326
323
  (
327
- "PVS1",
328
- "PS3",
324
+ {"PVS1", "PS3"},
329
325
  "Note that for RNA PS3 should only be taken with PVS1 for well established functional assays, not splicing alone (Walker 2023).",
330
326
  ),
331
327
  (
332
- "PS1",
333
- "PM4",
328
+ {"PS1", "PM4"},
334
329
  "Use of PS1 and PM4 together is not recommended (Durkie et al 2024).",
335
330
  ),
336
331
  (
337
- "PS1",
338
- "PM5",
332
+ {"PS1", "PM5"},
339
333
  "Use of PS1 and PM5 together conflicts with original definition (Richards et al 2015).",
340
334
  ),
341
335
  (
342
- "PS1",
343
- "PP3",
336
+ {"PS1", "PP3"},
344
337
  "Use of PS1 and PP3 together risks double-counting evidence (Tayoun et al 2019).",
345
338
  ),
346
339
  (
347
- "PS2",
348
- "PM6",
340
+ {"PS2", "PM6"},
349
341
  "Use of PS2 and PM6 together conflicts with original definition (Richards et al 2015).",
350
342
  ),
351
343
  (
352
- "PM1",
353
- "PP2",
344
+ {"PM1", "PP2"},
354
345
  "Avoid double-counting evidence for constraints in both PM1 and PP2 (Durkie et al 2024).",
355
346
  ),
347
+ (
348
+ {"PP1", "PP4"},
349
+ "When applying phenotype specificity and segregation data together, a point-system is available from ClinGen SVI (Biesecker et al 2024).",
350
+ ),
351
+ (
352
+ {"BS4", "PP4"},
353
+ "When applying phenotype specificity and segregation data together, a point-system is available from ClinGen SVI (Biesecker et al 2024).",
354
+ ),
355
+ (
356
+ {"PS2", "PP4"},
357
+ "Consider using PS2 without the PP4 criterion, based on the SVI Recommendation for de novo Criteria (PS2 & PM6).",
358
+ ),
359
+ (
360
+ {"PM6", "PP4"},
361
+ "Consider using PM6 without the PP4 criterion, based on the SVI Recommendation for de novo Criteria (PS2 & PM6).",
362
+ ),
356
363
  ]
@@ -33,20 +33,30 @@ INHERITANCE_PALETTE = {
33
33
  "other": {"bgcolor": "bg-light", "text_color": "text-dark"},
34
34
  }
35
35
 
36
- INCOMPLETE_PENETRANCE_MAP = {"unknown": None, "Complete": None, "Incomplete": True}
36
+ INCOMPLETE_PENETRANCE_MAP = {"unknown": None, "None": None, "Complete": False, "Incomplete": True}
37
37
 
38
38
  MODELS_MAP = {
39
- "monoallelic_not_imprinted": ["AD"],
40
- "monoallelic_maternally_imprinted": ["AD"],
41
- "monoallelic_paternally_imprinted": ["AD"],
42
- "monoallelic": ["AD"],
43
- "biallelic": ["AR"],
44
- "monoallelic_and_biallelic": ["AD", "AR"],
45
- "monoallelic_and_more_severe_biallelic": ["AD", "AR"],
46
- "xlinked_biallelic": ["XR"],
47
- "xlinked_monoallelic": ["XD"],
48
- "mitochondrial": ["MT"],
49
- "unknown": [],
39
+ "MONOALLELIC, autosomal or pseudoautosomal, NOT imprinted": ["AD"],
40
+ "MONOALLELIC, autosomal or pseudoautosomal, imprinted status unknown": ["AD"],
41
+ "MONOALLELIC, autosomal or pseudoautosomal, maternally imprinted (paternal allele expressed)": [
42
+ "AD"
43
+ ],
44
+ "MONOALLELIC, autosomal or pseudoautosomal, paternally imprinted (maternal allele expressed)": [
45
+ "AD"
46
+ ],
47
+ "BIALLELIC, autosomal or pseudoautosomal": ["AR"],
48
+ "BOTH monoallelic and biallelic, autosomal or pseudoautosomal": ["AD", "AR"],
49
+ "BOTH monoallelic and biallelic (but BIALLELIC mutations cause a more SEVERE disease form), autosomal or pseudoautosomal": [
50
+ "AD",
51
+ "AR",
52
+ ],
53
+ "X-LINKED: hemizygous mutation in males, biallelic mutations in females": ["XR"],
54
+ "X-LINKED: hemizygous mutation in males, monoallelic mutations in females may cause disease (may be less severe, later onset than males)": [
55
+ "XD"
56
+ ],
57
+ "MITOCHONDRIAL": ["MT"],
58
+ "Other": [],
59
+ "Other - please specifiy in evaluation comments": [],
50
60
  }
51
61
 
52
62
  PANEL_GENE_INFO_TRANSCRIPTS = [
Binary file
Binary file
@@ -44,9 +44,7 @@ delivery_report: scout/demo/delivery_report.html
44
44
  cnv_report: scout/demo/cancer_cnv_report.pdf
45
45
  coverage_qc_report: scout/demo/cancer_coverage_qc_report.html
46
46
 
47
-
48
- # meta data
49
- rank_model_version: '1.1'
47
+ # metadata
50
48
  rank_score_threshold: -100
51
49
  analysis_date: 2018-10-12 14:00:46
52
50
  human_genome_build: '37'
@@ -22,3 +22,4 @@ RNAfusion_inspector_research: scout/demo/rnafusion_inspector_example.html
22
22
  analysis_date: 2022-11-02 14:00:46
23
23
  human_genome_build: '38'
24
24
  track: cancer
25
+ status: 'prioritized'
scout/load/panelapp.py CHANGED
@@ -1,9 +1,9 @@
1
1
  import logging
2
2
  import math
3
3
  from datetime import datetime
4
- from typing import Dict, List, Set
4
+ from typing import List, Set
5
5
 
6
- from click import Abort, progressbar
6
+ from click import progressbar
7
7
 
8
8
  from scout.adapter import MongoAdapter
9
9
  from scout.constants.panels import PRESELECTED_PANELAPP_PANEL_TYPE_SLUGS
@@ -28,15 +28,11 @@ def load_panelapp_panel(
28
28
  LOG.info("Fetching all panel app panels")
29
29
  panel_ids: List[str] = panelapp.get_panel_ids(signed_off=False)
30
30
 
31
- ensembl_id_to_hgnc_id_map: Dict[str, int] = adapter.ensembl_to_hgnc_id_mapping()
32
- hgnc_symbol_to_ensembl_id_map: Dict[int, str] = adapter.hgnc_symbol_ensembl_id_mapping()
33
-
34
- for _ in panel_ids:
31
+ for panel_id in panel_ids:
35
32
  panel_info: dict = panelapp.get_panel(panel_id)
36
33
  parsed_panel = parse_panelapp_panel(
34
+ hgnc_gene_ids=adapter.hgnc_ids(),
37
35
  panel_info=panel_info,
38
- ensembl_id_to_hgnc_id_map=ensembl_id_to_hgnc_id_map,
39
- hgnc_symbol_to_ensembl_id_map=hgnc_symbol_to_ensembl_id_map,
40
36
  institute=institute,
41
37
  confidence=confidence,
42
38
  )
@@ -54,8 +50,6 @@ def get_panelapp_genes(
54
50
  """Parse and collect genes from one or more panelApp panels."""
55
51
 
56
52
  genes = set()
57
- ensembl_id_to_hgnc_id_map: Dict[str, int] = adapter.ensembl_to_hgnc_id_mapping()
58
- hgnc_symbol_to_ensembl_id_map: Dict[int, str] = adapter.hgnc_symbol_ensembl_id_mapping()
59
53
 
60
54
  with progressbar(panel_ids, label="Parsing panels", length=len(panel_ids)) as panel_ids:
61
55
  for panel_id in panel_ids:
@@ -66,9 +60,8 @@ def get_panelapp_genes(
66
60
  continue
67
61
 
68
62
  parsed_panel = parse_panelapp_panel(
63
+ hgnc_gene_ids=adapter.hgnc_ids(),
69
64
  panel_info=panel_dict,
70
- ensembl_id_to_hgnc_id_map=ensembl_id_to_hgnc_id_map,
71
- hgnc_symbol_to_ensembl_id_map=hgnc_symbol_to_ensembl_id_map,
72
65
  institute=institute,
73
66
  confidence="green",
74
67
  )
@@ -86,6 +79,8 @@ def load_panelapp_green_panel(adapter: MongoAdapter, institute: str, force: bool
86
79
  """Translate panel type input from users to panel type slugs."""
87
80
  if not types_filter:
88
81
  return PRESELECTED_PANELAPP_PANEL_TYPE_SLUGS
82
+ if "all" in types_filter:
83
+ return available_types
89
84
  index_list = [int(typeint) - 1 for typeint in types_filter.replace(" ", "").split(",")]
90
85
  return [available_types[i] for i in index_list]
91
86
 
@@ -107,6 +102,7 @@ def load_panelapp_green_panel(adapter: MongoAdapter, institute: str, force: bool
107
102
  available_types: List[str] = panelapp.get_panel_types()
108
103
  for number, type in enumerate(available_types, 1):
109
104
  LOG.info(f"{number}: {type}")
105
+ LOG.info("all: all types above")
110
106
  preselected_options_idx: List[str] = [
111
107
  str(available_types.index(presel) + 1)
112
108
  for presel in PRESELECTED_PANELAPP_PANEL_TYPE_SLUGS
@@ -8,6 +8,8 @@ from os.path import abspath, dirname, exists, isabs
8
8
  from pathlib import Path
9
9
  from typing import Any, Dict, List, Optional, Tuple, Union
10
10
 
11
+ from scout.constants import CASE_STATUSES
12
+
11
13
  try:
12
14
  from typing import Literal
13
15
  except ImportError:
@@ -436,6 +438,7 @@ class CaseLoader(BaseModel):
436
438
  smn_tsv: Optional[str] = None
437
439
  sv_rank_model_version: Optional[str] = None
438
440
  synopsis: Optional[Union[List[str], str]] = None
441
+ status: Optional[Literal[tuple(CASE_STATUSES)]] = None
439
442
  track: Literal["rare", "cancer"] = "rare"
440
443
  vcf_files: Optional[VcfFiles]
441
444
 
scout/parse/case.py CHANGED
@@ -34,6 +34,7 @@ def parse_case_data(**kwargs):
34
34
  RNAfusion_report: Path to the RNA fusion report
35
35
  RNAfusion_report_research: Path to the research RNA fusion report
36
36
  smn_tsv(str): Path to an SMN tsv file
37
+ status(str): Optional case status ("prioritized", "inactive", "ignored", "active", "solved", "archived")
37
38
  vcf_cancer(str): Path to a vcf file
38
39
  vcf_cancer_sv(str): Path to a vcf file
39
40
  vcf_fusion(str): Path to a vcf file
scout/parse/omim.py CHANGED
@@ -311,7 +311,6 @@ def get_mim_genes(genemap_lines, mim2gene_lines):
311
311
  mim_number = entry["mim_number"]
312
312
  inheritance = entry["inheritance"]
313
313
  phenotype_info = entry["phenotypes"]
314
- hgnc_symbol = entry["hgnc_symbol"]
315
314
  hgnc_symbols = entry["hgnc_symbols"]
316
315
  if mim_number in genes:
317
316
  genes[mim_number]["inheritance"] = inheritance
@@ -354,11 +353,11 @@ def get_mim_disease(genemap_lines: Iterable[str]) -> Dict[str, Any]:
354
353
  """
355
354
  diseases_found = {}
356
355
 
357
- # Genemap is a file with one entry per gene.
358
- # Each line hold a lot of information and in specific it
359
- # has information about the phenotypes that a gene is associated with
360
- # From this source we collect inheritane patterns and what hgnc symbols
361
- # a disease is associated with
356
+ # Genemap2 is a file with one entry per gene.
357
+ # Each line hold a lot of information and in particular it
358
+ # has information about the phenotypes that a gene is associated with.
359
+ # From this source we collect inheritance patterns and what hgnc symbols
360
+ # a disease is associated with.
362
361
  for entry in parse_genemap2(genemap_lines):
363
362
  hgnc_symbol = entry["hgnc_symbol"]
364
363
  for disease in entry["phenotypes"]:
scout/parse/panelapp.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """Code to parse panel information"""
2
2
 
3
3
  import logging
4
- from typing import Dict, Optional
4
+ from typing import Optional, Set
5
5
 
6
6
  from scout.constants import INCOMPLETE_PENETRANCE_MAP, MODELS_MAP, PANELAPP_CONFIDENCE_EXCLUDE
7
7
  from scout.utils.date import get_date
@@ -11,9 +11,8 @@ PANELAPP_PANELS_URL = "https://panelapp.genomicsengland.co.uk/panels/"
11
11
 
12
12
 
13
13
  def parse_panel_app_gene(
14
+ hgnc_gene_ids: Set[int],
14
15
  panelapp_gene: dict,
15
- ensembl_gene_hgnc_id_map: Dict[str, int],
16
- hgnc_symbol_ensembl_gene_map: Dict[str, str],
17
16
  confidence: str,
18
17
  ) -> dict:
19
18
  """Parse a panel app-formatted gene."""
@@ -23,55 +22,30 @@ def parse_panel_app_gene(
23
22
  if confidence_level in PANELAPP_CONFIDENCE_EXCLUDE[confidence]:
24
23
  return gene_info
25
24
 
26
- hgnc_symbol = panelapp_gene["gene_data"]["gene_symbol"]
27
- ensembl_ids = [
28
- version["ensembl_id"]
29
- for genome in panelapp_gene["gene_data"]["ensembl_genes"].values()
30
- for version in genome.values()
31
- ]
32
-
33
- if not ensembl_ids: # This gene is probably tagged as ensembl_ids_known_missing on PanelApp
34
- if hgnc_symbol in hgnc_symbol_ensembl_gene_map:
35
- LOG.warning(
36
- f"PanelApp gene {hgnc_symbol} does not contain Ensembl IDs. Using Ensembl IDs from internal gene collection instead."
37
- )
38
- ensembl_ids = [hgnc_symbol_ensembl_gene_map[hgnc_symbol]]
39
- else:
40
- LOG.warning(
41
- f"PanelApp gene {hgnc_symbol} does not contain Ensembl IDs and gene symbol does not correspond to a gene in scout."
42
- )
43
-
44
- hgnc_ids = set(
45
- ensembl_gene_hgnc_id_map.get(ensembl_id)
46
- for ensembl_id in ensembl_ids
47
- if ensembl_gene_hgnc_id_map.get(ensembl_id)
48
- )
49
- if not hgnc_ids:
50
- LOG.warning("Gene %s does not exist in database. Skipping gene...", hgnc_symbol)
25
+ gene_symbol = panelapp_gene["gene_data"]["gene_symbol"]
26
+ hgnc_id = int(panelapp_gene["gene_data"]["hgnc_id"].split(":")[1])
27
+ if hgnc_id not in hgnc_gene_ids:
28
+ LOG.warning("Gene %s does not exist in database. Skipping gene...", gene_symbol)
51
29
  return gene_info
52
30
 
53
- if len(hgnc_ids) > 1:
54
- LOG.warning("Gene %s has unclear identifier. Choose random id", hgnc_symbol)
31
+ gene_info["hgnc_id"] = hgnc_id
32
+ gene_info["hgnc_symbol"] = gene_symbol
55
33
 
56
- gene_info["hgnc_symbol"] = hgnc_symbol
57
- for hgnc_id in hgnc_ids:
58
- gene_info["hgnc_id"] = hgnc_id
34
+ if panelapp_gene["penetrance"] in ["Complete", "Incomplete"]:
35
+ gene_info["reduced_penetrance"] = INCOMPLETE_PENETRANCE_MAP.get(panelapp_gene["penetrance"])
59
36
 
60
- gene_info["reduced_penetrance"] = INCOMPLETE_PENETRANCE_MAP.get(panelapp_gene["penetrance"])
37
+ mode_of_inheritance = panelapp_gene.get("mode_of_inheritance")
38
+ if mode_of_inheritance not in MODELS_MAP:
39
+ LOG.warning(f"Mode of inheritance '{mode_of_inheritance}' not found in MODELS_MAP.")
61
40
 
62
- inheritance_models = []
63
- for model in MODELS_MAP.get(panelapp_gene["mode_of_inheritance"], []):
64
- inheritance_models.append(model)
65
-
66
- gene_info["inheritance_models"] = inheritance_models
41
+ gene_info["inheritance_models"] = MODELS_MAP.get(mode_of_inheritance, [])
67
42
 
68
43
  return gene_info
69
44
 
70
45
 
71
46
  def parse_panelapp_panel(
47
+ hgnc_gene_ids: Set[int],
72
48
  panel_info: dict,
73
- ensembl_id_to_hgnc_id_map: Dict[str, int],
74
- hgnc_symbol_to_ensembl_id_map: Dict[str, str],
75
49
  institute: Optional[str] = "cust000",
76
50
  confidence: Optional[str] = "green",
77
51
  ) -> dict:
@@ -101,7 +75,7 @@ def parse_panelapp_panel(
101
75
  nr_genes = 0
102
76
  for nr_genes, gene in enumerate(panel_info["genes"], 1):
103
77
  gene_info = parse_panel_app_gene(
104
- gene, ensembl_id_to_hgnc_id_map, hgnc_symbol_to_ensembl_id_map, confidence
78
+ hgnc_gene_ids=hgnc_gene_ids, panelapp_gene=gene, confidence=confidence
105
79
  )
106
80
  if not gene_info:
107
81
  nr_excluded += 1
@@ -1,43 +1,42 @@
1
1
  import logging
2
+ from typing import List
2
3
 
3
4
  from scout.utils.md5 import generate_md5_key
4
5
 
5
6
  LOG = logging.getLogger(__name__)
6
7
 
7
8
 
8
- def parse_compounds(compound_info, case_id, variant_type):
9
- """Get a list with compounds objects for this variant.
9
+ def parse_compounds(compound_info: str, case_id: str, variant_type: str) -> List[dict]:
10
+ """Get a list with compounds objects(dicts) for this variant.
10
11
 
11
- Arguments:
12
- compound_info(str): A Variant dictionary
13
- case_id (str): unique family id
14
- variant_type(str): 'research' or 'clinical'
12
+ Scout IDs do not have "chr" prefixed chromosome names, hence we lstrip that from
13
+ any compound names.
14
+
15
+ We need the case id to construct the correct id, as well as the variant type (clinical or research).
15
16
 
16
- Returns:
17
- compounds(list(dict)): A list of compounds
18
17
  """
19
- # We need the case to construct the correct id
18
+
20
19
  compounds = []
21
20
  if compound_info:
22
21
  for family_info in compound_info.split(","):
23
- splitted_entry = family_info.split(":")
22
+ split_entry = family_info.split(":")
24
23
  # This is the family id
25
- if splitted_entry[0] == case_id:
26
- for compound in splitted_entry[1].split("|"):
27
- splitted_compound = compound.split(">")
28
- compound_obj = {}
29
- compound_name = splitted_compound[0]
30
- compound_obj["variant"] = generate_md5_key(
31
- compound_name.split("_") + [variant_type, case_id]
32
- )
24
+ if split_entry[0] == case_id:
25
+ for compound in split_entry[1].split("|"):
26
+ split_compound = compound.split(">")
27
+ compound_name = split_compound[0].lstrip("chr")
28
+ compound_obj = {
29
+ "display_name": compound_name,
30
+ "variant": generate_md5_key(
31
+ compound_name.split("_") + [variant_type, case_id]
32
+ ),
33
+ }
33
34
 
34
35
  try:
35
- compound_score = float(splitted_compound[1])
36
+ compound_score = float(split_compound[1])
36
37
  except (TypeError, IndexError):
37
38
  compound_score = 0.0
38
-
39
39
  compound_obj["score"] = compound_score
40
- compound_obj["display_name"] = compound_name
41
40
 
42
41
  compounds.append(compound_obj)
43
42
 
scout/server/app.py CHANGED
@@ -13,6 +13,7 @@ from flask_login import current_user
13
13
  from markdown import markdown as python_markdown
14
14
  from markupsafe import Markup
15
15
 
16
+ from scout.constants import SPIDEX_HUMAN
16
17
  from scout.log import init_log
17
18
 
18
19
  from . import extensions
@@ -199,6 +200,17 @@ def register_filters(app):
199
200
  return "{:,}".format(int(value)).replace(",", " ")
200
201
  return value
201
202
 
203
+ @app.template_filter()
204
+ def spidex_human(spidex):
205
+ """Translate SPIDEX annotation to human readable string."""
206
+ if spidex is None:
207
+ return "not_reported"
208
+ if abs(spidex) < SPIDEX_HUMAN["low"]["pos"][1]:
209
+ return "low"
210
+ if abs(spidex) < SPIDEX_HUMAN["medium"]["pos"][1]:
211
+ return "medium"
212
+ return "high"
213
+
202
214
  @app.template_filter()
203
215
  def human_decimal(number, ndigits=4):
204
216
  """Return a standard representation of a decimal number.