scout-browser 4.82.2__py3-none-any.whl → 4.84__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. scout/__version__.py +1 -1
  2. scout/adapter/client.py +1 -0
  3. scout/adapter/mongo/base.py +0 -1
  4. scout/adapter/mongo/case.py +19 -37
  5. scout/adapter/mongo/case_events.py +98 -2
  6. scout/adapter/mongo/hgnc.py +39 -22
  7. scout/adapter/mongo/institute.py +3 -9
  8. scout/adapter/mongo/panel.py +2 -1
  9. scout/adapter/mongo/variant.py +12 -2
  10. scout/adapter/mongo/variant_loader.py +156 -141
  11. scout/build/genes/hgnc_gene.py +5 -134
  12. scout/commands/base.py +1 -0
  13. scout/commands/download/ensembl.py +1 -0
  14. scout/commands/download/everything.py +1 -0
  15. scout/commands/download/exac.py +1 -0
  16. scout/commands/download/hgnc.py +1 -0
  17. scout/commands/download/hpo.py +1 -0
  18. scout/commands/download/omim.py +1 -0
  19. scout/commands/export/database.py +1 -0
  20. scout/commands/load/panel.py +1 -0
  21. scout/commands/load/report.py +1 -0
  22. scout/commands/update/case.py +10 -10
  23. scout/commands/update/individual.py +6 -1
  24. scout/commands/update/omim.py +1 -0
  25. scout/commands/update/panelapp.py +1 -0
  26. scout/constants/file_types.py +86 -13
  27. scout/export/exon.py +1 -0
  28. scout/load/__init__.py +0 -1
  29. scout/load/all.py +8 -5
  30. scout/load/hgnc_gene.py +1 -1
  31. scout/load/panel.py +8 -4
  32. scout/load/setup.py +1 -0
  33. scout/models/case/case_loading_models.py +6 -16
  34. scout/models/hgnc_map.py +50 -87
  35. scout/models/phenotype_term.py +3 -3
  36. scout/parse/case.py +0 -1
  37. scout/parse/disease_terms.py +1 -0
  38. scout/parse/omim.py +1 -0
  39. scout/parse/orpha.py +1 -0
  40. scout/parse/panel.py +40 -15
  41. scout/parse/variant/conservation.py +1 -0
  42. scout/resources/__init__.py +3 -0
  43. scout/server/app.py +4 -50
  44. scout/server/blueprints/alignviewers/controllers.py +15 -17
  45. scout/server/blueprints/alignviewers/templates/alignviewers/igv_viewer.html +13 -3
  46. scout/server/blueprints/alignviewers/views.py +10 -15
  47. scout/server/blueprints/cases/controllers.py +70 -73
  48. scout/server/blueprints/cases/templates/cases/case.html +94 -71
  49. scout/server/blueprints/cases/templates/cases/collapsible_actionbar.html +1 -1
  50. scout/server/blueprints/cases/templates/cases/phenotype.html +8 -6
  51. scout/server/blueprints/cases/templates/cases/utils.html +3 -3
  52. scout/server/blueprints/cases/views.py +8 -6
  53. scout/server/blueprints/panels/forms.py +1 -0
  54. scout/server/blueprints/variant/controllers.py +14 -19
  55. scout/server/blueprints/variant/templates/variant/acmg.html +25 -16
  56. scout/server/blueprints/variant/templates/variant/components.html +11 -6
  57. scout/server/blueprints/variant/views.py +5 -2
  58. scout/server/blueprints/variants/controllers.py +12 -28
  59. scout/server/blueprints/variants/views.py +1 -1
  60. scout/server/config.py +16 -4
  61. scout/server/extensions/__init__.py +4 -2
  62. scout/server/extensions/beacon_extension.py +1 -0
  63. scout/server/extensions/bionano_extension.py +1 -0
  64. scout/server/extensions/chanjo_extension.py +59 -0
  65. scout/server/extensions/gens_extension.py +1 -0
  66. scout/server/extensions/ldap_extension.py +5 -3
  67. scout/server/extensions/loqus_extension.py +16 -14
  68. scout/server/extensions/matchmaker_extension.py +1 -0
  69. scout/server/extensions/mongo_extension.py +1 -0
  70. scout/server/extensions/phenopacket_extension.py +1 -0
  71. scout/server/extensions/rerunner_extension.py +1 -0
  72. scout/server/links.py +4 -4
  73. scout/server/static/bs_styles.css +20 -2
  74. scout/server/utils.py +16 -2
  75. scout/utils/acmg.py +33 -20
  76. scout/utils/ensembl_rest_clients.py +1 -0
  77. scout/utils/scout_requests.py +1 -0
  78. scout/utils/sort.py +21 -0
  79. scout/utils/track_resources.py +70 -0
  80. {scout_browser-4.82.2.dist-info → scout_browser-4.84.dist-info}/METADATA +2 -5
  81. {scout_browser-4.82.2.dist-info → scout_browser-4.84.dist-info}/RECORD +85 -84
  82. {scout_browser-4.82.2.dist-info → scout_browser-4.84.dist-info}/WHEEL +1 -1
  83. {scout_browser-4.82.2.dist-info → scout_browser-4.84.dist-info}/entry_points.txt +0 -1
  84. scout/load/case.py +0 -36
  85. scout/utils/cloud_resources.py +0 -61
  86. {scout_browser-4.82.2.dist-info → scout_browser-4.84.dist-info}/LICENSE +0 -0
  87. {scout_browser-4.82.2.dist-info → scout_browser-4.84.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  """Code for updating information on individuals
2
2
  """
3
+
3
4
  from pathlib import Path
4
5
 
5
6
  import click
@@ -95,4 +96,8 @@ def individual(case_id, ind, key, value):
95
96
 
96
97
  ind_obj[key] = value
97
98
 
98
- store.update_case(case_obj)
99
+ link = f"/{case_obj['owner']}/{case_obj['display_name']}"
100
+ institute_obj = store.institute(case_obj["owner"])
101
+ store.update_case_individual(
102
+ case_obj, user_obj=None, institute_obj=institute_obj, link=link, keep_date=False
103
+ )
@@ -1,4 +1,5 @@
1
1
  """Code to handle updates of the OMIM-AUTO gene panel via scout CLI"""
2
+
2
3
  import logging
3
4
 
4
5
  import click
@@ -1,4 +1,5 @@
1
1
  """Code to handle updates of the PANELAPP-GREEN gene panel via scout CLI"""
2
+
2
3
  import logging
3
4
 
4
5
  import click
@@ -1,17 +1,90 @@
1
1
  # Collect general information about the file types used in Scout
2
+ # Load priority determines load order, with lowest value loaded first.
2
3
 
3
4
  FILE_TYPE_MAP = {
4
- "vcf_cancer": {"category": "cancer", "variant_type": "clinical"},
5
- "vcf_cancer_sv": {"category": "cancer_sv", "variant_type": "clinical"},
6
- "vcf_cancer_research": {"category": "cancer", "variant_type": "research"},
7
- "vcf_cancer_sv_research": {"category": "cancer_sv", "variant_type": "research"},
8
- "vcf_fusion": {"category": "fusion", "variant_type": "clinical"},
9
- "vcf_fusion_research": {"category": "fusion", "variant_type": "research"},
10
- "vcf_snv": {"category": "snv", "variant_type": "clinical"},
11
- "vcf_snv_research": {"category": "snv", "variant_type": "research"},
12
- "vcf_sv": {"category": "sv", "variant_type": "clinical"},
13
- "vcf_sv_research": {"category": "sv", "variant_type": "research"},
14
- "vcf_str": {"category": "str", "variant_type": "clinical"},
15
- "vcf_mei": {"category": "mei", "variant_type": "clinical"},
16
- "vcf_mei_research": {"category": "mei", "variant_type": "research"},
5
+ "vcf_cancer": {
6
+ "category": "cancer",
7
+ "variant_type": "clinical",
8
+ "load_priority": 10,
9
+ },
10
+ "vcf_cancer_research": {
11
+ "category": "cancer",
12
+ "variant_type": "research",
13
+ "load_priority": 110,
14
+ },
15
+ "vcf_cancer_sv": {
16
+ "category": "cancer_sv",
17
+ "variant_type": "clinical",
18
+ "load_priority": 20,
19
+ },
20
+ "vcf_cancer_sv_research": {
21
+ "category": "cancer_sv",
22
+ "variant_type": "research",
23
+ "load_priority": 120,
24
+ },
25
+ "vcf_fusion": {
26
+ "category": "fusion",
27
+ "variant_type": "clinical",
28
+ "load_priority": 70,
29
+ },
30
+ "vcf_fusion_research": {
31
+ "category": "fusion",
32
+ "variant_type": "research",
33
+ "load_priority": 170,
34
+ },
35
+ "vcf_mei": {
36
+ "category": "mei",
37
+ "variant_type": "clinical",
38
+ "load_priority": 60,
39
+ },
40
+ "vcf_mei_research": {
41
+ "category": "mei",
42
+ "variant_type": "research",
43
+ "load_priority": 160,
44
+ },
45
+ "vcf_snv": {
46
+ "category": "snv",
47
+ "variant_type": "clinical",
48
+ "load_priority": 35,
49
+ },
50
+ "vcf_snv_mt": {
51
+ "category": "snv",
52
+ "variant_type": "clinical",
53
+ "load_priority": 30,
54
+ },
55
+ "vcf_snv_research": {
56
+ "category": "snv",
57
+ "variant_type": "research",
58
+ "load_priority": 135,
59
+ },
60
+ "vcf_snv_research_mt": {
61
+ "category": "snv",
62
+ "variant_type": "research",
63
+ "load_priority": 130,
64
+ },
65
+ "vcf_sv": {
66
+ "category": "sv",
67
+ "variant_type": "clinical",
68
+ "load_priority": 45,
69
+ },
70
+ "vcf_sv_mt": {
71
+ "category": "sv",
72
+ "variant_type": "clinical",
73
+ "load_priority": 40,
74
+ },
75
+ "vcf_sv_research": {
76
+ "category": "sv",
77
+ "variant_type": "research",
78
+ "load_priority": 145,
79
+ },
80
+ "vcf_sv_research_mt": {
81
+ "category": "sv",
82
+ "variant_type": "research",
83
+ "load_priority": 140,
84
+ },
85
+ "vcf_str": {
86
+ "category": "str",
87
+ "variant_type": "clinical",
88
+ "load_priority": 50,
89
+ },
17
90
  }
scout/export/exon.py CHANGED
@@ -11,6 +11,7 @@ head develop/mip_references/grch37_scout_exons_-2017-01-.bed
11
11
  7 65413656 65413769 7-65413658-65413767 NM_173517 21492 VKORC1L1
12
12
  5 159776172 159776790 5-159776174-159776788 NM_031908 14325 C1QTNF2
13
13
  """
14
+
14
15
  import logging
15
16
 
16
17
  LOG = logging.getLogger(__name__)
scout/load/__init__.py CHANGED
@@ -1,5 +1,4 @@
1
1
  from .all import load_scout
2
- from .case import load_case
3
2
  from .cytoband import load_cytobands
4
3
  from .exon import load_exons
5
4
  from .hgnc_gene import load_hgnc_genes
scout/load/all.py CHANGED
@@ -3,6 +3,7 @@ import logging
3
3
 
4
4
  from scout.constants import FILE_TYPE_MAP
5
5
  from scout.exceptions.config import ConfigError
6
+ from scout.utils.sort import get_load_priority
6
7
 
7
8
  LOG = logging.getLogger(__name__)
8
9
 
@@ -54,15 +55,18 @@ def load_region(adapter, case_id, hgnc_id=None, chrom=None, start=None, end=None
54
55
  start = gene_caption["start"]
55
56
  end = gene_caption["end"]
56
57
 
57
- case_file_types = []
58
+ case_file_types = set()
58
59
 
59
60
  for file_type in FILE_TYPE_MAP:
60
61
  if case_obj.get("vcf_files", {}).get(file_type):
61
- case_file_types.append(
62
+ case_file_types.add(
62
63
  (FILE_TYPE_MAP[file_type]["variant_type"], FILE_TYPE_MAP[file_type]["category"])
63
64
  )
64
65
 
65
- for variant_type, category in case_file_types:
66
+ for variant_type, category in sorted(
67
+ case_file_types,
68
+ key=lambda tup: get_load_priority(variant_type=tup[0], category=tup[1]),
69
+ ):
66
70
  if variant_type == "research" and not case_obj["is_research"]:
67
71
  continue
68
72
 
@@ -84,13 +88,12 @@ def load_region(adapter, case_id, hgnc_id=None, chrom=None, start=None, end=None
84
88
  adapter.case_variants_count(case_obj["_id"], case_obj["owner"], force_update_case=True)
85
89
 
86
90
 
87
- def load_scout(adapter, config, ped=None, update=False):
91
+ def load_scout(adapter, config, update=False):
88
92
  """Load a new case from a Scout config.
89
93
 
90
94
  Args:
91
95
  adapter(MongoAdapter)
92
96
  config(dict): loading info
93
- ped(Iterable(str)): Pedigree ingformation
94
97
  update(bool): If existing case should be updated
95
98
 
96
99
  DEPRECATED method, historically used by the CG monolith, which has since switched to call the Scout CLI instead.
scout/load/hgnc_gene.py CHANGED
@@ -91,7 +91,7 @@ def load_hgnc_genes(
91
91
  gene_objects.append(gene_obj)
92
92
 
93
93
  LOG.info("Nr of genes without coordinates in build %s: %s", build, non_existing)
94
- LOG.info(f"Loading {len(gene_objects)} genes to database")
94
+ LOG.info(f"Loading {len(gene_objects)} genes into the database")
95
95
  adapter.load_hgnc_bulk(gene_objects)
96
96
 
97
97
  LOG.info("Loading done. %s genes loaded", len(gene_objects))
scout/load/panel.py CHANGED
@@ -7,6 +7,7 @@ functions to load panels into the database
7
7
  import logging
8
8
  import math
9
9
  from datetime import datetime
10
+ from typing import Dict, List
10
11
 
11
12
  from click import Abort
12
13
  from flask.cli import current_app
@@ -110,7 +111,7 @@ def load_panel(panel_path, adapter, **kwargs):
110
111
  raise err
111
112
 
112
113
 
113
- def _panelapp_panel_ids():
114
+ def _panelapp_panel_ids() -> List[str]:
114
115
  """Fetch all PanelApp panel IDs"""
115
116
  json_lines = fetch_resource(PANELAPP_BASE_URL.format("list_panels"), json=True)
116
117
  return [panel_info["Panel_Id"] for panel_info in json_lines.get("result", [])]
@@ -129,11 +130,14 @@ def _parse_panelapp_panel(adapter, panel_id, institute, confidence):
129
130
  {'version': 3.3, 'date': datetime.datetime(2023, 1, 31, 16, 43, 37, 521719), 'display_name': 'Diabetes - neonatal onset - [GREEN]', 'institute': 'cust000', 'panel_type': 'clinical', 'genes': [list of genes], 'panel_id': '55a9041e22c1fc6711b0c6c0'}
130
131
 
131
132
  """
132
- hgnc_map = adapter.ensembl_to_hgnc_mapping()
133
+ ensembl_gene_hgnc_id_map: Dict[str, int] = adapter.ensembl_to_hgnc_id_mapping()
134
+ hgnc_symbol_ensembl_gene_map: Dict[str, str] = adapter.hgnc_symbol_ensembl_id_mapping()
135
+
133
136
  json_lines = fetch_resource(PANELAPP_BASE_URL.format("get_panel") + panel_id, json=True)
134
137
  parsed_panel = parse_panel_app_panel(
135
138
  panel_info=json_lines["result"],
136
- hgnc_map=hgnc_map,
139
+ ensembl_gene_hgnc_id_map=ensembl_gene_hgnc_id_map,
140
+ hgnc_symbol_ensembl_gene_map=hgnc_symbol_ensembl_gene_map,
137
141
  institute=institute,
138
142
  confidence=confidence,
139
143
  )
@@ -160,7 +164,7 @@ def load_panelapp_panel(adapter, panel_id=None, institute="cust000", confidence=
160
164
 
161
165
  if not panel_id:
162
166
  LOG.info("Fetching all panel app panels")
163
- panel_ids = _panelapp_panel_ids()
167
+ panel_ids: List[str] = _panelapp_panel_ids()
164
168
 
165
169
  for _ in panel_ids:
166
170
  parsed_panel = _parse_panelapp_panel(adapter, _, institute, confidence)
scout/load/setup.py CHANGED
@@ -5,6 +5,7 @@ This means add a default institute, a user and the internal definitions such as
5
5
  transcripts, hpo terms etc
6
6
 
7
7
  """
8
+
8
9
  import logging
9
10
 
10
11
  import yaml
@@ -15,7 +15,7 @@ except ImportError:
15
15
 
16
16
  from pydantic import BaseModel, Field, field_validator, model_validator
17
17
 
18
- from scout.constants import ANALYSIS_TYPES
18
+ from scout.constants import ANALYSIS_TYPES, FILE_TYPE_MAP
19
19
  from scout.exceptions import PedigreeError
20
20
  from scout.utils.date import get_date
21
21
 
@@ -58,21 +58,7 @@ CASE_FILE_PATH_CHECKS = [
58
58
  "RNAfusion_report_research",
59
59
  ]
60
60
 
61
- VCF_FILE_PATH_CHECKS = [
62
- "vcf_cancer",
63
- "vcf_cancer_research",
64
- "vcf_cancer_sv",
65
- "vcf_cancer_sv_research",
66
- "vcf_fusion",
67
- "vcf_fusion_research",
68
- "vcf_snv",
69
- "vcf_snv_research",
70
- "vcf_mei",
71
- "vcf_mei_research",
72
- "vcf_str",
73
- "vcf_sv",
74
- "vcf_sv_research",
75
- ]
61
+ VCF_FILE_PATH_CHECKS = FILE_TYPE_MAP.keys()
76
62
 
77
63
  GENOME_BUILDS = ["37", "38"]
78
64
  TRACKS = ["rare", "cancer"]
@@ -110,12 +96,16 @@ class VcfFiles(BaseModel):
110
96
  vcf_cancer_sv: Optional[str] = None
111
97
  vcf_cancer_sv_research: Optional[str] = None
112
98
  vcf_snv: Optional[str] = None
99
+ vcf_snv_mt: Optional[str] = None
113
100
  vcf_snv_research: Optional[str] = None
101
+ vcf_snv_research_mt: Optional[str] = None
114
102
  vcf_mei: Optional[str] = None
115
103
  vcf_mei_research: Optional[str] = None
116
104
  vcf_str: Optional[str] = None
117
105
  vcf_sv: Optional[str] = None
106
+ vcf_sv_mt: Optional[str] = None
118
107
  vcf_sv_research: Optional[str] = None
108
+ vcf_sv_research_mt: Optional[str] = None
119
109
  vcf_fusion: Optional[str] = None
120
110
  vcf_fusion_research: Optional[str] = None
121
111
 
scout/models/hgnc_map.py CHANGED
@@ -2,6 +2,8 @@ from __future__ import unicode_literals
2
2
 
3
3
  from typing import List, Optional
4
4
 
5
+ from pydantic import BaseModel, Field, field_validator, model_validator
6
+
5
7
 
6
8
  class Exon(dict):
7
9
  """Exon dictionary
@@ -66,90 +68,51 @@ class HgncTranscript(dict):
66
68
  self["mane_plus_clinical"] = mane_plus_clinical
67
69
 
68
70
 
69
- class HgncGene(dict):
70
- """HgncGene dictionary
71
-
72
- 'hgnc_id': int, # This is the hgnc id, required:
73
- 'hgnc_symbol': str, # The primary symbol, required
74
- 'ensembl_id': str, # required
75
- 'build': str, # '37' or '38', defaults to '37', required
76
-
77
- 'chromosome': str, # required
78
- 'start': int, # required
79
- 'end': int, # required
80
-
81
- 'description': str, # Gene description
82
- 'aliases': list(), # Gene symbol aliases, includes hgnc_symbol, str
83
- 'entrez_id': int,
84
- 'omim_id': int,
85
- 'pli_score': float,
86
- 'primary_transcripts': list(), # List of refseq transcripts (str)
87
- 'ucsc_id': str,
88
- 'uniprot_ids': list(), # List of str
89
- 'vega_id': str,
90
-
91
- # Inheritance information
92
- 'inheritance_models': list(), # List of model names
93
- 'incomplete_penetrance': bool, # Acquired from HPO
94
-
95
- # Phenotype information
96
- 'phenotypes': list(), # List of dictionaries with phenotype information
97
- """
98
-
99
- def __init__(
100
- self,
101
- hgnc_id,
102
- hgnc_symbol,
103
- ensembl_id,
104
- chrom,
105
- start,
106
- end,
107
- description=None,
108
- aliases=None,
109
- entrez_id=None,
110
- omim_id=None,
111
- pli_score=None,
112
- primary_transcripts=None,
113
- ucsc_id=None,
114
- uniprot_ids=None,
115
- vega_id=None,
116
- inheritance_models=None,
117
- incomplete_penetrance=False,
118
- phenotypes=None,
119
- build="37",
120
- ):
121
- super(HgncGene, self).__init__()
122
- self["hgnc_id"] = int(hgnc_id)
123
- self["hgnc_symbol"] = hgnc_symbol
124
- self["ensembl_id"] = ensembl_id
125
-
126
- self["chromosome"] = chrom
127
- self["start"] = int(start)
128
- self["end"] = int(end)
129
- self["length"] = self["end"] - self["start"]
130
-
131
- self["description"] = description
132
- self["aliases"] = aliases
133
- self["primary_transcripts"] = primary_transcripts
134
- self["inheritance_models"] = inheritance_models
135
- self["phenotypes"] = phenotypes
136
-
137
- self["entrez_id"] = entrez_id
138
- if entrez_id:
139
- self["entrez_id"] = int(entrez_id)
140
-
141
- self["omim_id"] = omim_id
142
- if omim_id:
143
- self["omim_id"] = int(omim_id)
144
-
145
- self["ucsc_id"] = ucsc_id
146
- self["uniprot_ids"] = uniprot_ids
147
- self["vega_id"] = vega_id
148
-
149
- self["pli_score"] = pli_score
150
- if pli_score:
151
- self["pli_score"] = float(pli_score)
152
-
153
- self["incomplete_penetrance"] = incomplete_penetrance
154
-
155
- self["build"] = build
71
+ class HgncGene(BaseModel):
72
+ hgnc_id: int
73
+ hgnc_symbol: str
74
+ build: str
75
+ chromosome: str
76
+ start: int
77
+ end: int
78
+ length: int
79
+ description: Optional[str] = None
80
+ ensembl_id: Optional[str] = Field(None, alias="ensembl_gene_id")
81
+ aliases: Optional[List[str]] = Field(None, alias="previous_symbols")
82
+ entrez_id: Optional[int] = None
83
+ omim_id: Optional[int] = None
84
+ primary_transcripts: Optional[List[str]] = Field(None, alias="ref_seq")
85
+ ucsc_id: Optional[str] = None
86
+ uniprot_ids: Optional[List[str]] = None
87
+ vega_id: Optional[str] = None
88
+ inheritance_models: Optional[List[str]] = None
89
+ incomplete_penetrance: Optional[bool] = False
90
+ phenotypes: Optional[List[dict]] = None
91
+ pli_score: Optional[float] = None
92
+ constraint_lof_oe: Optional[float] = None
93
+ constraint_lof_oe_ci_lower: Optional[float] = None
94
+ constraint_lof_oe_ci_upper: Optional[float] = None
95
+ constraint_lof_z: Optional[float] = None
96
+ constraint_mis_oe: Optional[float] = None
97
+ constraint_mis_oe_ci_lower: Optional[float] = None
98
+ constraint_mis_oe_ci_upper: Optional[float] = None
99
+ constraint_mis_z: Optional[float] = None
100
+
101
+ @model_validator(mode="before")
102
+ def set_gene_length(cls, values) -> "HgncGene":
103
+ """Set gene length."""
104
+ if None in [values.get("end"), values.get("start")]:
105
+ values.update({"length": None})
106
+ else:
107
+ values.update({"length": values.get("end") - values.get("start")})
108
+ return values
109
+
110
+ @field_validator("phenotypes", mode="before")
111
+ @classmethod
112
+ def set_phenotypes_inheritance(cls, phenotypes) -> Optional[List[dict]]:
113
+ """Convert field 'inheritance' of each phenotype in phenotypes from set to list."""
114
+ for phenotype in phenotypes:
115
+ phenotype["inheritance_models"] = list(phenotype.get("inheritance", {}))
116
+ phenotype.pop("inheritance", None)
117
+
118
+ return phenotypes
@@ -14,9 +14,9 @@ class HpoTerm(BaseModel):
14
14
  """
15
15
 
16
16
  hpo_id: str # id field in the hpo.obo file
17
- hpo_number: Optional[
18
- int
19
- ] = None # id field in the hpo.obo file, stripped of the 'HP:' part and the zeroes
17
+ hpo_number: Optional[int] = (
18
+ None # id field in the hpo.obo file, stripped of the 'HP:' part and the zeroes
19
+ )
20
20
  description: str # name field in the hpo.obo file
21
21
  ancestors: List = []
22
22
  all_ancestors: List = []
scout/parse/case.py CHANGED
@@ -86,7 +86,6 @@ def parse_case_data(**kwargs):
86
86
  config_dict["case_id"] = config_dict["family"]
87
87
 
88
88
  if config_dict.get("smn_tsv"):
89
- LOG.info("Adding SMN info from {}.".format(config_dict["smn_tsv"]))
90
89
  add_smn_info_case(config_dict)
91
90
 
92
91
  return remove_none_recursive(config_dict)
@@ -1,4 +1,5 @@
1
1
  """Code for parsing disease terms from OMIM and ORPHA data"""
2
+
2
3
  import logging
3
4
  from typing import Dict, List
4
5
 
scout/parse/omim.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """Code for parsing OMIM formatted files"""
2
+
2
3
  import logging
3
4
  from typing import Any, Dict, Iterable
4
5
 
scout/parse/orpha.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """Code for parsing ORPHA formatted files"""
2
+
2
3
  import logging
3
4
  from typing import Any, Dict, List
4
5
  from xml.etree.ElementTree import Element
scout/parse/panel.py CHANGED
@@ -1,6 +1,8 @@
1
1
  """Code to parse panel information"""
2
+
2
3
  import logging
3
4
  from datetime import datetime
5
+ from typing import Dict, List, Optional
4
6
 
5
7
  from scout.constants import (
6
8
  INCOMPLETE_PENETRANCE_MAP,
@@ -233,7 +235,12 @@ def parse_genes(gene_lines):
233
235
 
234
236
 
235
237
  def parse_gene_panel(
236
- path, institute="cust000", panel_id="test", panel_type="clinical", genes=None, **kwargs
238
+ path,
239
+ institute="cust000",
240
+ panel_id="test",
241
+ panel_type="clinical",
242
+ genes=None,
243
+ **kwargs,
237
244
  ):
238
245
  """Parse the panel info and return a gene panel
239
246
 
@@ -268,17 +275,14 @@ def parse_gene_panel(
268
275
  return gene_panel
269
276
 
270
277
 
271
- def parse_panel_app_gene(app_gene, hgnc_map, confidence):
272
- """Parse a panel app formatted gene.
278
+ def parse_panel_app_gene(
279
+ app_gene: dict,
280
+ ensembl_gene_hgnc_id_map: Dict[str, int],
281
+ hgnc_symbol_ensembl_gene_map: Dict[str, str],
282
+ confidence: str,
283
+ ) -> dict:
284
+ """Parse a panel app-formatted gene."""
273
285
 
274
- Args:
275
- app_gene(dict): dict with panel app info, where Ensembl ids are present as a loist with key "EnsembleGeneIds"
276
- hgnc_map(dict): a dictionary with Ensembl IDs as keys and HGNC ids as values
277
- confidence(str): enum green|amber|red
278
-
279
- Returns:
280
- gene_info(dict): Scout infromation
281
- """
282
286
  gene_info = {}
283
287
  confidence_level = app_gene["LevelOfConfidence"]
284
288
  # Return empty gene if not confident gene
@@ -288,8 +292,22 @@ def parse_panel_app_gene(app_gene, hgnc_map, confidence):
288
292
  hgnc_symbol = app_gene["GeneSymbol"]
289
293
 
290
294
  ensembl_ids = app_gene["EnsembleGeneIds"]
295
+
296
+ if not ensembl_ids: # This gene is probably tagged as ensembl_ids_known_missing on PanelApp
297
+ if hgnc_symbol in hgnc_symbol_ensembl_gene_map:
298
+ LOG.warning(
299
+ f"PanelApp gene {hgnc_symbol} does not contain Ensembl IDs. Using Ensembl IDs from internal gene collection instead."
300
+ )
301
+ ensembl_ids = [hgnc_symbol_ensembl_gene_map[hgnc_symbol]]
302
+ else:
303
+ LOG.warning(
304
+ f"PanelApp gene {hgnc_symbol} does not contain Ensembl IDs and gene symbol does not correspond to a gene in scout."
305
+ )
306
+
291
307
  hgnc_ids = set(
292
- hgnc_map.get(ensembl_id) for ensembl_id in ensembl_ids if hgnc_map.get(ensembl_id)
308
+ ensembl_gene_hgnc_id_map.get(ensembl_id)
309
+ for ensembl_id in ensembl_ids
310
+ if ensembl_gene_hgnc_id_map.get(ensembl_id)
293
311
  )
294
312
  if not hgnc_ids:
295
313
  LOG.warning("Gene %s does not exist in database. Skipping gene...", hgnc_symbol)
@@ -314,8 +332,13 @@ def parse_panel_app_gene(app_gene, hgnc_map, confidence):
314
332
 
315
333
 
316
334
  def parse_panel_app_panel(
317
- panel_info, hgnc_map, institute="cust000", panel_type="clinical", confidence="green"
318
- ):
335
+ panel_info: dict,
336
+ ensembl_gene_hgnc_id_map: Dict[str, int],
337
+ hgnc_symbol_ensembl_gene_map: Dict[str, str],
338
+ institute: Optional[str] = "cust000",
339
+ panel_type: Optional[str] = "clinical",
340
+ confidence: Optional[str] = "green",
341
+ ) -> dict:
319
342
  """Parse a PanelApp panel
320
343
 
321
344
  Args:
@@ -346,7 +369,9 @@ def parse_panel_app_panel(
346
369
  nr_excluded = 0
347
370
  nr_genes = 0
348
371
  for nr_genes, gene in enumerate(panel_info["Genes"], 1):
349
- gene_info = parse_panel_app_gene(gene, hgnc_map, confidence)
372
+ gene_info = parse_panel_app_gene(
373
+ gene, ensembl_gene_hgnc_id_map, hgnc_symbol_ensembl_gene_map, confidence
374
+ )
350
375
  if not gene_info:
351
376
  nr_excluded += 1
352
377
  continue
@@ -1,4 +1,5 @@
1
1
  """Code for parsing conservation"""
2
+
2
3
  import logging
3
4
  import numbers
4
5
 
@@ -12,3 +12,6 @@ cytoband_files = {
12
12
  "37": cytobands_37_path,
13
13
  "38": cytobands_38_path,
14
14
  }
15
+
16
+ # Custom IGV tracks
17
+ mane_igv_track_path = str(files(BASE_PATH).joinpath("custom_igv_tracks", "mane.bb"))