scout-browser 4.92__py3-none-any.whl → 4.95.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. scout/adapter/mongo/base.py +3 -0
  2. scout/adapter/mongo/case.py +27 -2
  3. scout/adapter/mongo/ccv.py +131 -0
  4. scout/adapter/mongo/hgnc.py +5 -1
  5. scout/adapter/mongo/managed_variant.py +4 -2
  6. scout/adapter/mongo/query.py +91 -54
  7. scout/adapter/mongo/variant.py +17 -11
  8. scout/adapter/mongo/variant_events.py +45 -1
  9. scout/build/ccv.py +59 -0
  10. scout/build/panel.py +1 -1
  11. scout/commands/export/export_command.py +0 -0
  12. scout/commands/load/base.py +0 -0
  13. scout/commands/load/user.py +0 -0
  14. scout/commands/serve.py +2 -1
  15. scout/commands/update/disease.py +0 -0
  16. scout/commands/update/genes.py +0 -0
  17. scout/commands/wipe_database.py +0 -0
  18. scout/constants/__init__.py +2 -0
  19. scout/constants/case_tags.py +2 -0
  20. scout/constants/ccv.py +244 -0
  21. scout/constants/gene_tags.py +22 -12
  22. scout/demo/643594.config.yaml +2 -2
  23. scout/demo/643594.research.mei.vcf.gz +0 -0
  24. scout/demo/643594.research.mei.vcf.gz.tbi +0 -0
  25. scout/demo/images/custom_images/1300x1000.jpg +0 -0
  26. scout/load/panelapp.py +8 -12
  27. scout/models/ccv_evaluation.py +26 -0
  28. scout/models/variant/variant.py +1 -0
  29. scout/parse/omim.py +5 -6
  30. scout/parse/panelapp.py +16 -42
  31. scout/parse/variant/compound.py +20 -21
  32. scout/parse/variant/gene.py +0 -0
  33. scout/parse/variant/genotype.py +0 -0
  34. scout/resources/custom_igv_tracks/mane.bb +0 -0
  35. scout/server/blueprints/cases/controllers.py +48 -0
  36. scout/server/blueprints/cases/templates/cases/case_report.html +61 -1
  37. scout/server/blueprints/cases/templates/cases/collapsible_actionbar.html +2 -2
  38. scout/server/blueprints/cases/templates/cases/index.html +0 -2
  39. scout/server/blueprints/cases/views.py +5 -5
  40. scout/server/blueprints/clinvar/controllers.py +4 -5
  41. scout/server/blueprints/institutes/controllers.py +129 -67
  42. scout/server/blueprints/institutes/forms.py +5 -2
  43. scout/server/blueprints/institutes/templates/overview/cases.html +6 -0
  44. scout/server/blueprints/institutes/templates/overview/causatives.html +1 -1
  45. scout/server/blueprints/institutes/templates/overview/utils.html +18 -6
  46. scout/server/blueprints/institutes/templates/overview/verified.html +1 -1
  47. scout/server/blueprints/institutes/views.py +4 -0
  48. scout/server/blueprints/managed_variants/forms.py +17 -2
  49. scout/server/blueprints/managed_variants/templates/managed_variants/managed_variants.html +2 -2
  50. scout/server/blueprints/panels/controllers.py +5 -6
  51. scout/server/blueprints/panels/templates/panels/panel.html +5 -5
  52. scout/server/blueprints/variant/controllers.py +148 -1
  53. scout/server/blueprints/variant/templates/variant/cancer-variant.html +1 -1
  54. scout/server/blueprints/variant/templates/variant/ccv.html +183 -0
  55. scout/server/blueprints/variant/templates/variant/components.html +86 -5
  56. scout/server/blueprints/variant/templates/variant/sv-variant.html +2 -2
  57. scout/server/blueprints/variant/templates/variant/tx_overview.html +3 -3
  58. scout/server/blueprints/variant/templates/variant/variant.html +1 -1
  59. scout/server/blueprints/variant/templates/variant/variant_details.html +29 -11
  60. scout/server/blueprints/variant/utils.py +21 -1
  61. scout/server/blueprints/variant/views.py +115 -5
  62. scout/server/blueprints/variants/controllers.py +31 -0
  63. scout/server/blueprints/variants/forms.py +33 -5
  64. scout/server/blueprints/variants/templates/variants/cancer-sv-variants.html +4 -18
  65. scout/server/blueprints/variants/templates/variants/cancer-variants.html +4 -13
  66. scout/server/blueprints/variants/templates/variants/components.html +77 -73
  67. scout/server/blueprints/variants/templates/variants/indicators.html +11 -0
  68. scout/server/blueprints/variants/templates/variants/sv-variants.html +2 -2
  69. scout/server/links.py +1 -1
  70. scout/server/static/custom_images.js +19 -2
  71. scout/utils/acmg.py +0 -1
  72. scout/utils/ccv.py +193 -0
  73. scout/utils/link.py +4 -3
  74. scout/utils/md5.py +0 -0
  75. {scout_browser-4.92.dist-info → scout_browser-4.95.0.dist-info}/METADATA +67 -45
  76. {scout_browser-4.92.dist-info → scout_browser-4.95.0.dist-info}/RECORD +70 -65
  77. {scout_browser-4.92.dist-info → scout_browser-4.95.0.dist-info}/WHEEL +1 -2
  78. scout/__version__.py +0 -1
  79. scout/demo/images/custom_images/640x480_two.jpg +0 -0
  80. scout_browser-4.92.dist-info/top_level.txt +0 -1
  81. {scout_browser-4.92.dist-info → scout_browser-4.95.0.dist-info}/entry_points.txt +0 -0
  82. {scout_browser-4.92.dist-info → scout_browser-4.95.0.dist-info/licenses}/LICENSE +0 -0
@@ -23,6 +23,7 @@ from .case_tags import (
23
23
  VERBS_ICONS_MAP,
24
24
  VERBS_MAP,
25
25
  )
26
+ from .ccv import CCV_COMPLETE_MAP, CCV_CRITERIA, CCV_MAP, CCV_OPTIONS, REV_CCV_MAP
26
27
  from .clinvar import (
27
28
  AFFECTED_STATUS,
28
29
  ALLELE_OF_ORIGIN,
@@ -126,6 +127,7 @@ COLLECTIONS = [
126
127
  "disease_term",
127
128
  "variant",
128
129
  "acmg",
130
+ "ccv",
129
131
  ]
130
132
 
131
133
  BUILDS = ["37", "38", "GRCh38"]
@@ -43,6 +43,7 @@ CASE_REPORT_VARIANT_TYPES = {
43
43
  "partial_causatives_detailed": "partial_causatives",
44
44
  "suspects_detailed": "suspects",
45
45
  "classified_detailed": "acmg_classification",
46
+ "ccv_classified_detailed": "ccv_classification",
46
47
  "tagged_detailed": "manual_rank",
47
48
  "tier_detailed": "cancer_tier",
48
49
  "dismissed_detailed": "dismiss_variant",
@@ -105,6 +106,7 @@ CASE_TAGS = {
105
106
 
106
107
  VERBS_MAP = {
107
108
  "acmg": "updated ACMG classification for",
109
+ "ccv": "updated ClinGen-CGC-VIGG classification for",
108
110
  "add_case": "added case",
109
111
  "add_cohort": "updated cohort for",
110
112
  "add_phenotype": "added HPO term for",
scout/constants/ccv.py ADDED
@@ -0,0 +1,244 @@
1
+ # -*- coding: utf-8 -*-
2
+ from collections import OrderedDict
3
+
4
+ # from worst to most certain benign
5
+ CCV_MAP = OrderedDict(
6
+ [
7
+ (4, "oncogenic"),
8
+ (3, "likely_oncogenic"),
9
+ (0, "uncertain_significance"),
10
+ (2, "likely_benign"),
11
+ (1, "benign"),
12
+ ]
13
+ )
14
+ # <a href="https://cancerhotspots.org" target="_blank">cancerhotspots.org</a>
15
+ REV_CCV_MAP = OrderedDict([(value, key) for key, value in CCV_MAP.items()])
16
+
17
+ CCV_OPTIONS = [
18
+ {"code": "oncogenic", "short": "O", "label": "Oncogenic", "color": "danger"},
19
+ {
20
+ "code": "likely_oncogenic",
21
+ "short": "LO",
22
+ "label": "Likely Oncogenic",
23
+ "color": "warning",
24
+ },
25
+ {
26
+ "code": "uncertain_significance",
27
+ "short": "VUS",
28
+ "label": "Uncertain Significance",
29
+ "color": "primary",
30
+ },
31
+ {"code": "likely_benign", "short": "LB", "label": "Likely Benign", "color": "info"},
32
+ {"code": "benign", "short": "B", "label": "Benign", "color": "success"},
33
+ ]
34
+
35
+ CCV_COMPLETE_MAP = OrderedDict([(option["code"], option) for option in CCV_OPTIONS])
36
+
37
+ CCV_CRITERIA = OrderedDict()
38
+
39
+ CCV_CRITERIA["oncogenicity"] = OrderedDict(
40
+ [
41
+ (
42
+ "Very Strong",
43
+ OrderedDict(
44
+ [
45
+ (
46
+ "OVS1",
47
+ {
48
+ "short": "Null variant in tumor supressor",
49
+ "description": "Null variant (nonsense, frameshift, canonical ±1 or 2 splice sites, initiation codon, single-exon or multiexon deletion) in a bona fide tumor suppressor gene.",
50
+ "documentation": 'Strength can be modified based on <a href="https://pubmed.ncbi.nlm.nih.gov/30192042/" target="blank">ClinGen’s recommendations for PVS1</a>',
51
+ },
52
+ )
53
+ ]
54
+ ),
55
+ ),
56
+ (
57
+ "Strong",
58
+ OrderedDict(
59
+ [
60
+ (
61
+ "OS1",
62
+ {
63
+ "short": "Same aa change as known oncogenic variant",
64
+ "description": "Same amino acid change as a previously established oncogenic variant (using this standard) regardless of nucleotide change.",
65
+ },
66
+ ),
67
+ (
68
+ "OS2",
69
+ {
70
+ "short": "Well-established functional studies",
71
+ "description": "Well-established in vitro or in vivo functional studies, supportive of an oncogenic effect of the variant.",
72
+ },
73
+ ),
74
+ (
75
+ "OS3",
76
+ {
77
+ "short": "Cancer hotspot: high frequency",
78
+ "description": "Located in one of the hotspots in cancerhotspots.org with at least 50 samples with a somatic variant at the same amino acid position, and the same amino acid change count in cancerhotspots.org in at least 10 samples.",
79
+ },
80
+ ),
81
+ ]
82
+ ),
83
+ ),
84
+ (
85
+ "Moderate",
86
+ OrderedDict(
87
+ [
88
+ (
89
+ "OM1",
90
+ {
91
+ "short": "Functional domain",
92
+ "description": "Located in a critical and well-established part of a functional domain (eg, active site of an enzyme).",
93
+ },
94
+ ),
95
+ (
96
+ "OM2",
97
+ {
98
+ "short": "Protein length change",
99
+ "description": "Protein length changes as a result of in-frame deletions/insertions in a known oncogene or tumor suppressor gene or stop-loss variants in a known tumor suppressor gene.",
100
+ },
101
+ ),
102
+ (
103
+ "OM3",
104
+ {
105
+ "short": "Cancer hotspot: moderate frequency",
106
+ "description": "Located in one of the hotspots in cancerhotspots.org with <50 samples with a somatic variant at the same amino acid position, and the same amino acid change count in cancerhotspots.org is at least 10.",
107
+ },
108
+ ),
109
+ (
110
+ "OM4",
111
+ {
112
+ "short": "Missense variant at aa with other oncogenic missense variant",
113
+ "description": "Missense variant at an amino acid residue where a different missense variant determined to be oncogenic (using this standard) has been documented. Amino acid difference from reference amino acid should be greater or at least approximately the same as for missense change determined to be oncogenic.",
114
+ },
115
+ ),
116
+ ]
117
+ ),
118
+ ),
119
+ (
120
+ "Supporting",
121
+ OrderedDict(
122
+ [
123
+ (
124
+ "OP1",
125
+ {
126
+ "short": "Computatinal evidence",
127
+ "description": "All used lines of computational evidence support an oncogenic effect of a variant (conservation/evolutionary, splicing effect, etc.).",
128
+ },
129
+ ),
130
+ (
131
+ "OP2",
132
+ {
133
+ "short": "Gene in a malignancy with a single genetic etiology",
134
+ "description": "Somatic variant in a gene in a malignancy with a single genetic etiology. Example: retinoblastoma is caused by bi-allelic RB1 inactivation.",
135
+ },
136
+ ),
137
+ (
138
+ "OP3",
139
+ {
140
+ "short": "Cancer hotspots: low frequency",
141
+ "description": "Located in one of the hotspots in cancerhotspots.org and the particular amino acid change count in cancerhotspots.org is below 10",
142
+ },
143
+ ),
144
+ (
145
+ "OP4",
146
+ {
147
+ "short": "Absent in population databases",
148
+ "description": "Absent from controls (or at an extremely low frequency) in gnomAD.",
149
+ },
150
+ ),
151
+ ]
152
+ ),
153
+ ),
154
+ ]
155
+ )
156
+
157
+ CCV_CRITERIA["benign impact"] = OrderedDict(
158
+ [
159
+ (
160
+ "Very Strong",
161
+ OrderedDict(
162
+ [
163
+ (
164
+ "SBVS1",
165
+ {
166
+ "short": "MAF is >0.05",
167
+ "description": "Minor allele frequency is >5%% in gnomAD in any 5 general continental populations: African, East Asian, European (non-Finnish), Latino, and South Asian.",
168
+ },
169
+ )
170
+ ]
171
+ ),
172
+ ),
173
+ (
174
+ "Strong",
175
+ OrderedDict(
176
+ [
177
+ (
178
+ "SBS1",
179
+ {
180
+ "short": "MAF is >0.01",
181
+ "description": "Minor allele frequency is >1%% in gnomAD in any 5 general continental populations: African, East Asian, European (non-Finnish), Latino, and South Asian. ",
182
+ },
183
+ ),
184
+ (
185
+ "SBS2",
186
+ {
187
+ "short": "Well-established functional studies",
188
+ "description": "Well-established in vitro or in vivo functional studies show no oncogenic effects.",
189
+ },
190
+ ),
191
+ ]
192
+ ),
193
+ ),
194
+ (
195
+ "Supporting",
196
+ OrderedDict(
197
+ [
198
+ (
199
+ "SBP1",
200
+ {
201
+ "short": "Computational evidence",
202
+ "description": "All used lines of computational evidence suggest no effect of a variant (conservation/evolutionary, splicing effect, etc.).",
203
+ },
204
+ ),
205
+ (
206
+ "SBP2",
207
+ {
208
+ "short": "Silent mutation (no predicted impact on splicing)",
209
+ "description": "A synonymous (silent) variant for which splicing prediction algorithms predict no effect on the splice consensus sequence nor the creation of a new splice site and the nucleotide is not highly conserved.",
210
+ },
211
+ ),
212
+ ]
213
+ ),
214
+ ),
215
+ ]
216
+ )
217
+
218
+ CCV_POTENTIAL_CONFLICTS = [
219
+ (
220
+ "OS2",
221
+ "OS1",
222
+ "If OS1 is applicable, OS2 can be used only if functional studies are based on the particular nucleotide change of the variant.",
223
+ ),
224
+ (
225
+ "OS3",
226
+ "OS1",
227
+ "OS3 cannot be used if OS1 is applicable, unless it is possible to observe hotspots on the basis of the particular nucleotide change.",
228
+ ),
229
+ (
230
+ "OM1",
231
+ "OVS1",
232
+ "OM1 cannot be used if OVS1 is applicable.",
233
+ ),
234
+ (
235
+ "OM3",
236
+ "OM1",
237
+ "OM3 cannot be used if OM1 is applicable.",
238
+ ),
239
+ (
240
+ "OM3",
241
+ "OM4",
242
+ "OM3 cannot be used if OM4 is applicable.",
243
+ ),
244
+ ]
@@ -33,20 +33,30 @@ INHERITANCE_PALETTE = {
33
33
  "other": {"bgcolor": "bg-light", "text_color": "text-dark"},
34
34
  }
35
35
 
36
- INCOMPLETE_PENETRANCE_MAP = {"unknown": None, "Complete": None, "Incomplete": True}
36
+ INCOMPLETE_PENETRANCE_MAP = {"unknown": None, "None": None, "Complete": False, "Incomplete": True}
37
37
 
38
38
  MODELS_MAP = {
39
- "monoallelic_not_imprinted": ["AD"],
40
- "monoallelic_maternally_imprinted": ["AD"],
41
- "monoallelic_paternally_imprinted": ["AD"],
42
- "monoallelic": ["AD"],
43
- "biallelic": ["AR"],
44
- "monoallelic_and_biallelic": ["AD", "AR"],
45
- "monoallelic_and_more_severe_biallelic": ["AD", "AR"],
46
- "xlinked_biallelic": ["XR"],
47
- "xlinked_monoallelic": ["XD"],
48
- "mitochondrial": ["MT"],
49
- "unknown": [],
39
+ "MONOALLELIC, autosomal or pseudoautosomal, NOT imprinted": ["AD"],
40
+ "MONOALLELIC, autosomal or pseudoautosomal, imprinted status unknown": ["AD"],
41
+ "MONOALLELIC, autosomal or pseudoautosomal, maternally imprinted (paternal allele expressed)": [
42
+ "AD"
43
+ ],
44
+ "MONOALLELIC, autosomal or pseudoautosomal, paternally imprinted (maternal allele expressed)": [
45
+ "AD"
46
+ ],
47
+ "BIALLELIC, autosomal or pseudoautosomal": ["AR"],
48
+ "BOTH monoallelic and biallelic, autosomal or pseudoautosomal": ["AD", "AR"],
49
+ "BOTH monoallelic and biallelic (but BIALLELIC mutations cause a more SEVERE disease form), autosomal or pseudoautosomal": [
50
+ "AD",
51
+ "AR",
52
+ ],
53
+ "X-LINKED: hemizygous mutation in males, biallelic mutations in females": ["XR"],
54
+ "X-LINKED: hemizygous mutation in males, monoallelic mutations in females may cause disease (may be less severe, later onset than males)": [
55
+ "XD"
56
+ ],
57
+ "MITOCHONDRIAL": ["MT"],
58
+ "Other": [],
59
+ "Other - please specifiy in evaluation comments": [],
50
60
  }
51
61
 
52
62
  PANEL_GENE_INFO_TRANSCRIPTS = [
@@ -113,8 +113,8 @@ custom_images:
113
113
  path: scout/demo/images/custom_images/640x480_one.png
114
114
  - title: A jpg image
115
115
  description: A very good description
116
- width: 500
117
- path: scout/demo/images/custom_images/640x480_two.jpg
116
+ width: 1300
117
+ path: scout/demo/images/custom_images/1300x1000.jpg
118
118
  section_two:
119
119
  - title: An SVG image
120
120
  description: Another very good description
Binary file
Binary file
scout/load/panelapp.py CHANGED
@@ -1,9 +1,9 @@
1
1
  import logging
2
2
  import math
3
3
  from datetime import datetime
4
- from typing import Dict, List, Set
4
+ from typing import List, Set
5
5
 
6
- from click import Abort, progressbar
6
+ from click import progressbar
7
7
 
8
8
  from scout.adapter import MongoAdapter
9
9
  from scout.constants.panels import PRESELECTED_PANELAPP_PANEL_TYPE_SLUGS
@@ -28,15 +28,11 @@ def load_panelapp_panel(
28
28
  LOG.info("Fetching all panel app panels")
29
29
  panel_ids: List[str] = panelapp.get_panel_ids(signed_off=False)
30
30
 
31
- ensembl_id_to_hgnc_id_map: Dict[str, int] = adapter.ensembl_to_hgnc_id_mapping()
32
- hgnc_symbol_to_ensembl_id_map: Dict[int, str] = adapter.hgnc_symbol_ensembl_id_mapping()
33
-
34
- for _ in panel_ids:
31
+ for panel_id in panel_ids:
35
32
  panel_info: dict = panelapp.get_panel(panel_id)
36
33
  parsed_panel = parse_panelapp_panel(
34
+ hgnc_gene_ids=adapter.hgnc_ids(),
37
35
  panel_info=panel_info,
38
- ensembl_id_to_hgnc_id_map=ensembl_id_to_hgnc_id_map,
39
- hgnc_symbol_to_ensembl_id_map=hgnc_symbol_to_ensembl_id_map,
40
36
  institute=institute,
41
37
  confidence=confidence,
42
38
  )
@@ -54,8 +50,6 @@ def get_panelapp_genes(
54
50
  """Parse and collect genes from one or more panelApp panels."""
55
51
 
56
52
  genes = set()
57
- ensembl_id_to_hgnc_id_map: Dict[str, int] = adapter.ensembl_to_hgnc_id_mapping()
58
- hgnc_symbol_to_ensembl_id_map: Dict[int, str] = adapter.hgnc_symbol_ensembl_id_mapping()
59
53
 
60
54
  with progressbar(panel_ids, label="Parsing panels", length=len(panel_ids)) as panel_ids:
61
55
  for panel_id in panel_ids:
@@ -66,9 +60,8 @@ def get_panelapp_genes(
66
60
  continue
67
61
 
68
62
  parsed_panel = parse_panelapp_panel(
63
+ hgnc_gene_ids=adapter.hgnc_ids(),
69
64
  panel_info=panel_dict,
70
- ensembl_id_to_hgnc_id_map=ensembl_id_to_hgnc_id_map,
71
- hgnc_symbol_to_ensembl_id_map=hgnc_symbol_to_ensembl_id_map,
72
65
  institute=institute,
73
66
  confidence="green",
74
67
  )
@@ -86,6 +79,8 @@ def load_panelapp_green_panel(adapter: MongoAdapter, institute: str, force: bool
86
79
  """Translate panel type input from users to panel type slugs."""
87
80
  if not types_filter:
88
81
  return PRESELECTED_PANELAPP_PANEL_TYPE_SLUGS
82
+ if "all" in types_filter:
83
+ return available_types
89
84
  index_list = [int(typeint) - 1 for typeint in types_filter.replace(" ", "").split(",")]
90
85
  return [available_types[i] for i in index_list]
91
86
 
@@ -107,6 +102,7 @@ def load_panelapp_green_panel(adapter: MongoAdapter, institute: str, force: bool
107
102
  available_types: List[str] = panelapp.get_panel_types()
108
103
  for number, type in enumerate(available_types, 1):
109
104
  LOG.info(f"{number}: {type}")
105
+ LOG.info("all: all types above")
110
106
  preselected_options_idx: List[str] = [
111
107
  str(available_types.index(presel) + 1)
112
108
  for presel in PRESELECTED_PANELAPP_PANEL_TYPE_SLUGS
@@ -0,0 +1,26 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ scout.models.ccv_evaluation
4
+ ~~~~~~~~~~~~~~~~~~
5
+
6
+ Define a document to describe a ClinGen-CGC-VIGG evaluation
7
+
8
+ Evaluations are stored in its own collection
9
+
10
+ """
11
+
12
+ from datetime import datetime
13
+
14
+ ccv_evaluation = dict(
15
+ variant_specific=str, # md5 document id
16
+ variant_id=str, # md5 variant id
17
+ institute_id=str, # Institute _id, required
18
+ case_id=str, # case_id, required
19
+ classification=str, # What did the evaluation end up in?
20
+ # All evaluations will have an author
21
+ user_id=str, # user email, required
22
+ user_name=str, # user name
23
+ criteria=list, # List of dictionaries with criterias
24
+ # timestamps
25
+ created_at=datetime,
26
+ )
@@ -94,6 +94,7 @@ variant = dict(
94
94
  manual_rank=int, # choices=[0, 1, 2, 3, 4, 5]
95
95
  dismiss_variant=list,
96
96
  acmg_classification=str, # choices=ACMG_TERMS
97
+ ccv_classification=str, # choices=CCV_TERMS
97
98
  )
98
99
 
99
100
  compound = dict(
scout/parse/omim.py CHANGED
@@ -311,7 +311,6 @@ def get_mim_genes(genemap_lines, mim2gene_lines):
311
311
  mim_number = entry["mim_number"]
312
312
  inheritance = entry["inheritance"]
313
313
  phenotype_info = entry["phenotypes"]
314
- hgnc_symbol = entry["hgnc_symbol"]
315
314
  hgnc_symbols = entry["hgnc_symbols"]
316
315
  if mim_number in genes:
317
316
  genes[mim_number]["inheritance"] = inheritance
@@ -354,11 +353,11 @@ def get_mim_disease(genemap_lines: Iterable[str]) -> Dict[str, Any]:
354
353
  """
355
354
  diseases_found = {}
356
355
 
357
- # Genemap is a file with one entry per gene.
358
- # Each line hold a lot of information and in specific it
359
- # has information about the phenotypes that a gene is associated with
360
- # From this source we collect inheritane patterns and what hgnc symbols
361
- # a disease is associated with
356
+ # Genemap2 is a file with one entry per gene.
357
+ # Each line hold a lot of information and in particular it
358
+ # has information about the phenotypes that a gene is associated with.
359
+ # From this source we collect inheritance patterns and what hgnc symbols
360
+ # a disease is associated with.
362
361
  for entry in parse_genemap2(genemap_lines):
363
362
  hgnc_symbol = entry["hgnc_symbol"]
364
363
  for disease in entry["phenotypes"]:
scout/parse/panelapp.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """Code to parse panel information"""
2
2
 
3
3
  import logging
4
- from typing import Dict, Optional
4
+ from typing import Optional, Set
5
5
 
6
6
  from scout.constants import INCOMPLETE_PENETRANCE_MAP, MODELS_MAP, PANELAPP_CONFIDENCE_EXCLUDE
7
7
  from scout.utils.date import get_date
@@ -11,9 +11,8 @@ PANELAPP_PANELS_URL = "https://panelapp.genomicsengland.co.uk/panels/"
11
11
 
12
12
 
13
13
  def parse_panel_app_gene(
14
+ hgnc_gene_ids: Set[int],
14
15
  panelapp_gene: dict,
15
- ensembl_gene_hgnc_id_map: Dict[str, int],
16
- hgnc_symbol_ensembl_gene_map: Dict[str, str],
17
16
  confidence: str,
18
17
  ) -> dict:
19
18
  """Parse a panel app-formatted gene."""
@@ -23,55 +22,30 @@ def parse_panel_app_gene(
23
22
  if confidence_level in PANELAPP_CONFIDENCE_EXCLUDE[confidence]:
24
23
  return gene_info
25
24
 
26
- hgnc_symbol = panelapp_gene["gene_data"]["gene_symbol"]
27
- ensembl_ids = [
28
- version["ensembl_id"]
29
- for genome in panelapp_gene["gene_data"]["ensembl_genes"].values()
30
- for version in genome.values()
31
- ]
32
-
33
- if not ensembl_ids: # This gene is probably tagged as ensembl_ids_known_missing on PanelApp
34
- if hgnc_symbol in hgnc_symbol_ensembl_gene_map:
35
- LOG.warning(
36
- f"PanelApp gene {hgnc_symbol} does not contain Ensembl IDs. Using Ensembl IDs from internal gene collection instead."
37
- )
38
- ensembl_ids = [hgnc_symbol_ensembl_gene_map[hgnc_symbol]]
39
- else:
40
- LOG.warning(
41
- f"PanelApp gene {hgnc_symbol} does not contain Ensembl IDs and gene symbol does not correspond to a gene in scout."
42
- )
43
-
44
- hgnc_ids = set(
45
- ensembl_gene_hgnc_id_map.get(ensembl_id)
46
- for ensembl_id in ensembl_ids
47
- if ensembl_gene_hgnc_id_map.get(ensembl_id)
48
- )
49
- if not hgnc_ids:
50
- LOG.warning("Gene %s does not exist in database. Skipping gene...", hgnc_symbol)
25
+ gene_symbol = panelapp_gene["gene_data"]["gene_symbol"]
26
+ hgnc_id = int(panelapp_gene["gene_data"]["hgnc_id"].split(":")[1])
27
+ if hgnc_id not in hgnc_gene_ids:
28
+ LOG.warning("Gene %s does not exist in database. Skipping gene...", gene_symbol)
51
29
  return gene_info
52
30
 
53
- if len(hgnc_ids) > 1:
54
- LOG.warning("Gene %s has unclear identifier. Choose random id", hgnc_symbol)
31
+ gene_info["hgnc_id"] = hgnc_id
32
+ gene_info["hgnc_symbol"] = gene_symbol
55
33
 
56
- gene_info["hgnc_symbol"] = hgnc_symbol
57
- for hgnc_id in hgnc_ids:
58
- gene_info["hgnc_id"] = hgnc_id
34
+ if panelapp_gene["penetrance"] in ["Complete", "Incomplete"]:
35
+ gene_info["reduced_penetrance"] = INCOMPLETE_PENETRANCE_MAP.get(panelapp_gene["penetrance"])
59
36
 
60
- gene_info["reduced_penetrance"] = INCOMPLETE_PENETRANCE_MAP.get(panelapp_gene["penetrance"])
37
+ mode_of_inheritance = panelapp_gene.get("mode_of_inheritance")
38
+ if mode_of_inheritance not in MODELS_MAP:
39
+ LOG.warning(f"Mode of inheritance '{mode_of_inheritance}' not found in MODELS_MAP.")
61
40
 
62
- inheritance_models = []
63
- for model in MODELS_MAP.get(panelapp_gene["mode_of_inheritance"], []):
64
- inheritance_models.append(model)
65
-
66
- gene_info["inheritance_models"] = inheritance_models
41
+ gene_info["inheritance_models"] = MODELS_MAP.get(mode_of_inheritance, [])
67
42
 
68
43
  return gene_info
69
44
 
70
45
 
71
46
  def parse_panelapp_panel(
47
+ hgnc_gene_ids: Set[int],
72
48
  panel_info: dict,
73
- ensembl_id_to_hgnc_id_map: Dict[str, int],
74
- hgnc_symbol_to_ensembl_id_map: Dict[str, str],
75
49
  institute: Optional[str] = "cust000",
76
50
  confidence: Optional[str] = "green",
77
51
  ) -> dict:
@@ -101,7 +75,7 @@ def parse_panelapp_panel(
101
75
  nr_genes = 0
102
76
  for nr_genes, gene in enumerate(panel_info["genes"], 1):
103
77
  gene_info = parse_panel_app_gene(
104
- gene, ensembl_id_to_hgnc_id_map, hgnc_symbol_to_ensembl_id_map, confidence
78
+ hgnc_gene_ids=hgnc_gene_ids, panelapp_gene=gene, confidence=confidence
105
79
  )
106
80
  if not gene_info:
107
81
  nr_excluded += 1
@@ -1,43 +1,42 @@
1
1
  import logging
2
+ from typing import List
2
3
 
3
4
  from scout.utils.md5 import generate_md5_key
4
5
 
5
6
  LOG = logging.getLogger(__name__)
6
7
 
7
8
 
8
- def parse_compounds(compound_info, case_id, variant_type):
9
- """Get a list with compounds objects for this variant.
9
+ def parse_compounds(compound_info: str, case_id: str, variant_type: str) -> List[dict]:
10
+ """Get a list with compounds objects(dicts) for this variant.
10
11
 
11
- Arguments:
12
- compound_info(str): A Variant dictionary
13
- case_id (str): unique family id
14
- variant_type(str): 'research' or 'clinical'
12
+ Scout IDs do not have "chr" prefixed chromosome names, hence we lstrip that from
13
+ any compound names.
14
+
15
+ We need the case id to construct the correct id, as well as the variant type (clinical or research).
15
16
 
16
- Returns:
17
- compounds(list(dict)): A list of compounds
18
17
  """
19
- # We need the case to construct the correct id
18
+
20
19
  compounds = []
21
20
  if compound_info:
22
21
  for family_info in compound_info.split(","):
23
- splitted_entry = family_info.split(":")
22
+ split_entry = family_info.split(":")
24
23
  # This is the family id
25
- if splitted_entry[0] == case_id:
26
- for compound in splitted_entry[1].split("|"):
27
- splitted_compound = compound.split(">")
28
- compound_obj = {}
29
- compound_name = splitted_compound[0]
30
- compound_obj["variant"] = generate_md5_key(
31
- compound_name.split("_") + [variant_type, case_id]
32
- )
24
+ if split_entry[0] == case_id:
25
+ for compound in split_entry[1].split("|"):
26
+ split_compound = compound.split(">")
27
+ compound_name = split_compound[0].lstrip("chr")
28
+ compound_obj = {
29
+ "display_name": compound_name,
30
+ "variant": generate_md5_key(
31
+ compound_name.split("_") + [variant_type, case_id]
32
+ ),
33
+ }
33
34
 
34
35
  try:
35
- compound_score = float(splitted_compound[1])
36
+ compound_score = float(split_compound[1])
36
37
  except (TypeError, IndexError):
37
38
  compound_score = 0.0
38
-
39
39
  compound_obj["score"] = compound_score
40
- compound_obj["display_name"] = compound_name
41
40
 
42
41
  compounds.append(compound_obj)
43
42
 
File without changes
File without changes