gsMap3D 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. gsMap/__init__.py +13 -0
  2. gsMap/__main__.py +4 -0
  3. gsMap/cauchy_combination_test.py +342 -0
  4. gsMap/cli.py +355 -0
  5. gsMap/config/__init__.py +72 -0
  6. gsMap/config/base.py +296 -0
  7. gsMap/config/cauchy_config.py +79 -0
  8. gsMap/config/dataclasses.py +235 -0
  9. gsMap/config/decorators.py +302 -0
  10. gsMap/config/find_latent_config.py +276 -0
  11. gsMap/config/format_sumstats_config.py +54 -0
  12. gsMap/config/latent2gene_config.py +461 -0
  13. gsMap/config/ldscore_config.py +261 -0
  14. gsMap/config/quick_mode_config.py +242 -0
  15. gsMap/config/report_config.py +81 -0
  16. gsMap/config/spatial_ldsc_config.py +334 -0
  17. gsMap/config/utils.py +286 -0
  18. gsMap/find_latent/__init__.py +3 -0
  19. gsMap/find_latent/find_latent_representation.py +312 -0
  20. gsMap/find_latent/gnn/distribution.py +498 -0
  21. gsMap/find_latent/gnn/encoder_decoder.py +186 -0
  22. gsMap/find_latent/gnn/gcn.py +85 -0
  23. gsMap/find_latent/gnn/gene_former.py +164 -0
  24. gsMap/find_latent/gnn/loss.py +18 -0
  25. gsMap/find_latent/gnn/st_model.py +125 -0
  26. gsMap/find_latent/gnn/train_step.py +177 -0
  27. gsMap/find_latent/st_process.py +781 -0
  28. gsMap/format_sumstats.py +446 -0
  29. gsMap/generate_ldscore.py +1018 -0
  30. gsMap/latent2gene/__init__.py +18 -0
  31. gsMap/latent2gene/connectivity.py +781 -0
  32. gsMap/latent2gene/entry_point.py +141 -0
  33. gsMap/latent2gene/marker_scores.py +1265 -0
  34. gsMap/latent2gene/memmap_io.py +766 -0
  35. gsMap/latent2gene/rank_calculator.py +590 -0
  36. gsMap/latent2gene/row_ordering.py +182 -0
  37. gsMap/latent2gene/row_ordering_jax.py +159 -0
  38. gsMap/ldscore/__init__.py +1 -0
  39. gsMap/ldscore/batch_construction.py +163 -0
  40. gsMap/ldscore/compute.py +126 -0
  41. gsMap/ldscore/constants.py +70 -0
  42. gsMap/ldscore/io.py +262 -0
  43. gsMap/ldscore/mapping.py +262 -0
  44. gsMap/ldscore/pipeline.py +615 -0
  45. gsMap/pipeline/quick_mode.py +134 -0
  46. gsMap/report/__init__.py +2 -0
  47. gsMap/report/diagnosis.py +375 -0
  48. gsMap/report/report.py +100 -0
  49. gsMap/report/report_data.py +1832 -0
  50. gsMap/report/static/js_lib/alpine.min.js +5 -0
  51. gsMap/report/static/js_lib/tailwindcss.js +83 -0
  52. gsMap/report/static/template.html +2242 -0
  53. gsMap/report/three_d_combine.py +312 -0
  54. gsMap/report/three_d_plot/three_d_plot_decorate.py +246 -0
  55. gsMap/report/three_d_plot/three_d_plot_prepare.py +202 -0
  56. gsMap/report/three_d_plot/three_d_plots.py +425 -0
  57. gsMap/report/visualize.py +1409 -0
  58. gsMap/setup.py +5 -0
  59. gsMap/spatial_ldsc/__init__.py +0 -0
  60. gsMap/spatial_ldsc/io.py +656 -0
  61. gsMap/spatial_ldsc/ldscore_quick_mode.py +912 -0
  62. gsMap/spatial_ldsc/spatial_ldsc_jax.py +382 -0
  63. gsMap/spatial_ldsc/spatial_ldsc_multiple_sumstats.py +439 -0
  64. gsMap/utils/__init__.py +0 -0
  65. gsMap/utils/generate_r2_matrix.py +610 -0
  66. gsMap/utils/jackknife.py +518 -0
  67. gsMap/utils/manhattan_plot.py +643 -0
  68. gsMap/utils/regression_read.py +177 -0
  69. gsMap/utils/torch_utils.py +23 -0
  70. gsmap3d-0.1.0a1.dist-info/METADATA +168 -0
  71. gsmap3d-0.1.0a1.dist-info/RECORD +74 -0
  72. gsmap3d-0.1.0a1.dist-info/WHEEL +4 -0
  73. gsmap3d-0.1.0a1.dist-info/entry_points.txt +2 -0
  74. gsmap3d-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,643 @@
1
+ """
2
+ Modified from dash-bio ManhattanPlot (https://github.com/plotly/dash-bio/blob/master/dash_bio/component_factory/_manhattan.py)
3
+ """
4
+
5
+ import warnings
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ import plotly.graph_objects as go
10
+ from pandas.api.types import is_numeric_dtype
11
+
12
+ # %%
13
+ SUGGESTIVE_LINE_LABEL = "suggestive line"
14
+ GENOMEWIDE_LINE_LABEL = "genomewide line"
15
+
16
+
17
+ def _get_hover_text(df, snpname=None, genename=None, annotationname=None):
18
+ """Format the hover text used in Manhattan and Volcano plots.
19
+ :param (dataFrame) df: A pandas dataframe.
20
+ :param (string) snpname: A string denoting the column name for the SNP
21
+ names (e.g., rs number). More generally, this column could be anything
22
+ that identifies each point being plotted. For example,
23
+ in an Epigenomewide association study (EWAS), this could be the probe
24
+ name or cg number. This column should be a character. This argument is
25
+ optional, however it is necessary to specify it if you want to
26
+ highlight points on the plot using the highlight argument in the
27
+ figure method.
28
+ :param (string) genename: A string denoting the column name for the
29
+ GENE names.
30
+ :param (string) annotationname: A string denoting the column name for
31
+ annotations. This could be any annotation information that you
32
+ want to include in the plot (e.g., zscore, effect size, minor allele
33
+ frequency).
34
+ """
35
+ hover_text = ""
36
+ if snpname is not None and snpname in df.columns:
37
+ hover_text = "SNP: " + df[snpname].astype(str)
38
+
39
+ if genename is not None and genename in df.columns:
40
+ hover_text = hover_text + "<br>GENE: " + df[genename].astype(str)
41
+
42
+ if annotationname is not None and annotationname in df.columns:
43
+ hover_text = hover_text + "<br>" + df[annotationname].astype(str)
44
+
45
+ return hover_text
46
+
47
+
48
+ def ManhattanPlot(
49
+ dataframe,
50
+ chrm="CHR",
51
+ bp="BP",
52
+ p="P",
53
+ snp="SNP",
54
+ gene="GENE",
55
+ annotation=None,
56
+ logp=True,
57
+ title="Manhattan Plot",
58
+ showgrid=True,
59
+ xlabel=None,
60
+ ylabel="-log10(p)",
61
+ point_size=5,
62
+ showlegend=True,
63
+ col=None,
64
+ suggestiveline_value=-np.log10(1e-8),
65
+ suggestiveline_color="#636efa",
66
+ suggestiveline_width=1,
67
+ genomewideline_value=-np.log10(5e-8),
68
+ genomewideline_color="#EF553B",
69
+ genomewideline_width=1,
70
+ highlight_color="red",
71
+ highlight_gene_list=None,
72
+ highlight=True,
73
+ **kwargs,
74
+ ):
75
+ """Returns a figure for a manhattan plot.
76
+
77
+ Keyword arguments:
78
+ - dataframe (dataframe; required): A pandas dataframe which must contain at
79
+ least the following three columns:
80
+ - the chromosome number
81
+ - genomic base-pair position
82
+ - a numeric quantity to plot such as a p-value or zscore
83
+ - chrm (string; default 'CHR'): A string denoting the column name for
84
+ the chromosome. This column must be float or integer. Minimum
85
+ number of chromosomes required is 1. If you have X, Y, or MT
86
+ chromosomes, be sure to renumber these 23, 24, 25, etc.
87
+ - bp (string; default 'BP'): A string denoting the column name for the
88
+ chromosomal position.
89
+ - p (string; default 'P'): A string denoting the column name for the
90
+ float quantity to be plotted on the y-axis. This column must be
91
+ numeric. It does not have to be a p-value. It can be any numeric
92
+ quantity such as peak heights, Bayes factors, test statistics. If
93
+ it is not a p-value, make sure to set logp = False.
94
+ - snp (string; default 'SNP'): A string denoting the column name for
95
+ the SNP names (e.g., rs number). More generally, this column could
96
+ be anything that identifies each point being plotted. For example,
97
+ in an Epigenomewide association study (EWAS), this could be the
98
+ probe name or cg number. This column should be a character. This
99
+ argument is optional, however it is necessary to specify it if you
100
+ want to highlight points on the plot, using the highlight argument
101
+ in the figure method.
102
+ - gene (string; default 'GENE'): A string denoting the column name for
103
+ the GENE names. This column could be a string or a float. More
104
+ generally, it could be any annotation information that you want
105
+ to include in the plot.
106
+ - annotation (string; optional): A string denoting the column to use
107
+ as annotations. This column could be a string or a float. It
108
+ could be any annotation information that you want to include in
109
+ the plot (e.g., zscore, effect size, minor allele frequency).
110
+ - logp (bool; optional): If True, the -log10 of the p-value is
111
+ plotted. It isn't very useful to plot raw p-values; however,
112
+ plotting the raw value could be useful for other genome-wide plots
113
+ (e.g., peak heights, Bayes factors, test statistics, other
114
+ "scores", etc.)
115
+ - title (string; default 'Manhattan Plot'): The title of the graph.
116
+ - showgrid (bool; default true): Boolean indicating whether gridlines
117
+ should be shown.
118
+ - xlabel (string; optional): Label of the x axis.
119
+ - ylabel (string; default '-log10(p)'): Label of the y axis.
120
+ - point_size (number; default 5): Size of the points of the Scatter
121
+ plot.
122
+ - showlegend (bool; default true): Boolean indicating whether legends
123
+ should be shown.
124
+ - col (string; optional): A string representing the color of the
125
+ points of the scatter plot. Can be in any color format accepted by
126
+ plotly.graph_objects.
127
+ - suggestiveline_value (bool | float; default 8): A value which must
128
+ be either False to deactivate the option, or a numerical value
129
+ corresponding to the p-value at which the line should be drawn.
130
+ The line has no influence on the data points.
131
+ - suggestiveline_color (string; default 'grey'): Color of the suggestive
132
+ line.
133
+ - suggestiveline_width (number; default 2): Width of the suggestive
134
+ line.
135
+ - genomewideline_value (bool | float; default -log10(5e-8)): A boolean
136
+ which must be either False to deactivate the option, or a numerical value
137
+ corresponding to the p-value above which the data points are
138
+ considered significant.
139
+ - genomewideline_color (string; default 'red'): Color of the genome-wide
140
+ line. Can be in any color format accepted by plotly.graph_objects.
141
+ - genomewideline_width (number; default 1): Width of the genome-wide
142
+ line.
143
+ - highlight (bool; default True): turning on/off the highlighting of
144
+ data points considered significant.
145
+ - highlight_color (string; default 'red'): Color of the data points
146
+ highlighted because they are significant. Can be in any color
147
+ format accepted by plotly.graph_objects.
148
+
149
+ # ...
150
+ Example 1: Random Manhattan Plot
151
+ '''
152
+ dataframe = pd.DataFrame(
153
+ np.random.randint(0,100,size=(100, 3)),
154
+ columns=['P', 'CHR', 'BP'])
155
+ fig = create_manhattan(dataframe, title='XYZ Manhattan plot')
156
+
157
+ plotly.offline.plot(fig, image='png')
158
+ '''
159
+
160
+ """
161
+ mh = _ManhattanPlot(
162
+ dataframe, chrm=chrm, bp=bp, p=p, snp=snp, gene=gene, annotation=annotation, logp=logp,
163
+ color_by=kwargs.get('highlight_color_by')
164
+ )
165
+
166
+ return mh.figure(
167
+ title=title,
168
+ showgrid=showgrid,
169
+ xlabel=xlabel,
170
+ ylabel=ylabel,
171
+ point_size=point_size,
172
+ showlegend=showlegend,
173
+ col=col,
174
+ suggestiveline_value=suggestiveline_value,
175
+ suggestiveline_color=suggestiveline_color,
176
+ suggestiveline_width=suggestiveline_width,
177
+ genomewideline_value=genomewideline_value,
178
+ genomewideline_color=genomewideline_color,
179
+ genomewideline_width=genomewideline_width,
180
+ highlight=highlight,
181
+ highlight_color=highlight_color,
182
+ highlight_gene_list=highlight_gene_list,
183
+ )
184
+
185
+
186
+ class _ManhattanPlot:
187
+ def __init__(
188
+ self, x, chrm="CHR", bp="BP", p="P", snp="SNP", gene="GENE", annotation=None, logp=True, color_by=None
189
+ ):
190
+ """
191
+ Keyword arguments:
192
+ - dataframe (dataframe; required): A pandas dataframe which
193
+ must contain at least the following three columns:
194
+ - the chromosome number
195
+ - genomic base-pair position
196
+ - a numeric quantity to plot such as a p-value or zscore
197
+ - chrm (string; default 'CHR'): A string denoting the column name for the
198
+ chromosome. This column must be float or integer. Minimum number
199
+ of chromosomes required is 1. If you have X, Y, or MT chromosomes,
200
+ be sure to renumber these 23, 24, 25, etc.
201
+ - bp (string; default 'BP'): A string denoting the column name for the
202
+ chromosomal position.
203
+ - p (string; default 'P'): A string denoting the column name for the
204
+ float quantity to be plotted on the y-axis. This column must be
205
+ numeric. This does not have to be a p-value. It can be any
206
+ numeric quantity such as peak heights, bayes factors, test
207
+ statistics. If it is not a p-value, make sure to set logp = FALSE.
208
+ - snp (string; default 'SNP'): A string denoting the column name for the
209
+ SNP names (e.g. rs number). More generally, this column could be
210
+ anything that identifies each point being plotted. For example, in
211
+ an Epigenomewide association study (EWAS) this could be the probe
212
+ name or cg number. This column should be a character. This
213
+ argument is optional, however it is necessary to specify if you
214
+ want to highlight points on the plot using the highlight argument
215
+ in the figure method.
216
+ - gene (string; default 'GENE'): A string denoting the column name for the
217
+ GENE names. This column could be a string or a float. More
218
+ generally, it could be any annotation information that you want
219
+ to include in the plot.
220
+ - annotation (string; optional): A string denoting the column name for
221
+ an annotation. This column could be a string or a float. This
222
+ could be any annotation information that you want to include in
223
+ the plot (e.g. zscore, effect size, minor allele frequency).
224
+ - logp (bool; default True): If True, the -log10 of the p-value is
225
+ plotted. It isn't very useful to plot raw p-values; however,
226
+ plotting the raw value could be useful for other genome-wide plots
227
+ (e.g., peak heights, Bayes factors, test statistics, other
228
+ "scores", etc.).
229
+
230
+ Returns
231
+ -------
232
+ - A ManhattanPlot object.
233
+ """
234
+ # checking the validity of the arguments
235
+
236
+ # Make sure you have chrm, bp and p columns and that they are of
237
+ # numeric type
238
+ if chrm not in x.columns.values:
239
+ raise KeyError("Column %s not found in 'x' data.frame" % chrm)
240
+ else:
241
+ if not is_numeric_dtype(x[chrm].dtype):
242
+ raise TypeError(
243
+ "%s column should be numeric. Do you have "
244
+ "'X', 'Y', 'MT', etc? If so change to "
245
+ "numbers and try again." % chrm
246
+ )
247
+
248
+ if bp not in x.columns.values:
249
+ raise KeyError("Column %s not found in 'x' data.frame" % bp)
250
+ else:
251
+ if not is_numeric_dtype(x[bp].dtype):
252
+ raise TypeError("%s column should be numeric type" % bp)
253
+
254
+ if p not in x.columns.values:
255
+ raise KeyError("Column %s not found in 'x' data.frame" % p)
256
+ else:
257
+ if not is_numeric_dtype(x[p].dtype):
258
+ raise TypeError("%s column should be numeric type" % p)
259
+
260
+ # Create a new DataFrame with columns named after chrm, bp, and p.
261
+ self.data = pd.DataFrame(data=x[[chrm, bp, p]])
262
+
263
+ if snp is not None:
264
+ if snp not in x.columns.values:
265
+ # Warn if you don't have a snp column
266
+ raise KeyError(
267
+ "snp argument specified as %s but column not found in 'x' data.frame" % snp
268
+ )
269
+ else:
270
+ # If the input DataFrame has a snp column, add it to the new
271
+ # DataFrame
272
+ self.data[snp] = x[snp]
273
+
274
+ if gene is not None:
275
+ if gene not in x.columns.values:
276
+ # Warn if you don't have a gene column
277
+ raise KeyError(
278
+ "gene argument specified as %s but column not found in 'x' data.frame" % gene
279
+ )
280
+ else:
281
+ # If the input DataFrame has a gene column, add it to the new
282
+ # DataFrame
283
+ self.data[gene] = x[gene]
284
+
285
+ if annotation is not None:
286
+ if annotation not in x.columns.values:
287
+ # Warn if you don't have an annotation column
288
+ raise KeyError(
289
+ "annotation argument specified as %s but column not "
290
+ "found in 'x' data.frame" % annotation
291
+ )
292
+ else:
293
+ # If the input DataFrame has a gene column, add it to the new
294
+ # DataFrame
295
+ self.data[annotation] = x[annotation]
296
+
297
+ if color_by is not None:
298
+ if color_by in x.columns:
299
+ self.data[color_by] = x[color_by]
300
+ else:
301
+ import logging
302
+ logging.getLogger(__name__).warning(f"color_by column {color_by} not found in input.")
303
+
304
+ self.xlabel = ""
305
+ self.ticks = []
306
+ self.ticksLabels = []
307
+ self.nChr = len(x[chrm].unique())
308
+ self.chrName = chrm
309
+ self.pName = p
310
+ self.snpName = snp
311
+ self.geneName = gene
312
+ self.annotationName = annotation
313
+ self.logp = logp
314
+
315
+ # Set positions, ticks, and labels for plotting
316
+
317
+ self.index = "INDEX"
318
+ self.pos = "POSITION"
319
+
320
+ self.data[self.index] = 0 # Initialize with zeros as default value
321
+
322
+ if not self.data.empty and len(self.data[chrm].unique()) > 0:
323
+ idx = 0
324
+ for i in self.data[chrm].unique():
325
+ idx = idx + 1
326
+ self.data.loc[self.data[chrm] == i, self.index] = int(idx)
327
+ else:
328
+ import logging
329
+
330
+ logger = logging.getLogger("gsMap.utils.manhattan_plot")
331
+ logger.warning(
332
+ "No chromosome data found or empty dataframe when creating Manhattan plot"
333
+ )
334
+
335
+ self.data[self.index] = self.data[self.index].astype(self.data[chrm].dtype)
336
+
337
+ # This section sets up positions and ticks. Ticks should be placed in
338
+ # the middle of a chromosome. The new pos column is added that keeps
339
+ # a running sum of the positions of each successive chromosome.
340
+ # For example:
341
+ # chrm bp pos
342
+ # 1 1 1
343
+ # 1 2 2
344
+ # 2 1 3
345
+ # 2 2 4
346
+ # 3 1 5
347
+
348
+ if self.nChr == 1:
349
+ # For a single chromosome
350
+ self.data[self.pos] = self.data[bp]
351
+ self.ticks.append(int(len(self.data[self.pos]) / 2.0) + 1)
352
+ self.xlabel = "Chromosome %s position" % (self.data[chrm].unique())
353
+ self.ticksLabels = self.ticks
354
+ else:
355
+ # For multiple chromosomes
356
+ lastbase = 0
357
+ for i in self.data[self.index].unique():
358
+ if i == 1:
359
+ self.data.loc[self.data[self.index] == i, self.pos] = self.data.loc[
360
+ self.data[self.index] == i, bp
361
+ ].values
362
+ else:
363
+ prevbp = self.data.loc[self.data[self.index] == i - 1, bp]
364
+ # Shift the basepair position by the largest bp of the
365
+ # current chromosome
366
+ lastbase = lastbase + prevbp.iat[-1]
367
+
368
+ self.data.loc[self.data[self.index] == i, self.pos] = (
369
+ self.data.loc[self.data[self.index] == i, bp].values + lastbase
370
+ )
371
+
372
+ tmin = min(self.data.loc[self.data[self.index] == i, self.pos])
373
+ tmax = max(self.data.loc[self.data[self.index] == i, self.pos])
374
+ self.ticks.append(int((tmin + tmax) / 2.0) + 1)
375
+
376
+ self.xlabel = "Chromosome"
377
+ self.data[self.pos] = self.data[self.pos].astype(self.data[bp].dtype)
378
+
379
+ if self.nChr > 10: # To avoid crowded labels
380
+ self.ticksLabels = [
381
+ t
382
+ if np.mod(int(t), 2) # Only every two ticks
383
+ else ""
384
+ for t in self.data[chrm].unique()
385
+ ]
386
+ else:
387
+ self.ticksLabels = self.data[chrm].unique() # All the ticks
388
+
389
+ def figure(
390
+ self,
391
+ title="Manhattan Plot",
392
+ showgrid=True,
393
+ xlabel=None,
394
+ ylabel="-log10(p)",
395
+ point_size=5,
396
+ showlegend=True,
397
+ col=None,
398
+ suggestiveline_value=-np.log10(1e-8),
399
+ suggestiveline_color="blue",
400
+ suggestiveline_width=1,
401
+ genomewideline_value=-np.log10(5e-8),
402
+ genomewideline_color="red",
403
+ genomewideline_width=1,
404
+ highlight=True,
405
+ highlight_color="red",
406
+ highlight_gene_list=None,
407
+ highlight_color_by=None,
408
+ ):
409
+ """Keyword arguments:
410
+ - title (string; default 'Manhattan Plot'): The title of the
411
+ graph.
412
+ - showgrid (bool; default True): Boolean indicating whether
413
+ gridlines should be shown.
414
+ - xlabel (string; optional): Label of the x axis.
415
+ - ylabel (string; default '-log10(p)'): Label of the y axis.
416
+ - point_size (number; default 5): Size of the points of the
417
+ scatter plot.
418
+ - showlegend (bool; default True): Boolean indicating whether
419
+ legends should be shown.
420
+ - col (string; optional): A string representing the color of the
421
+ points of the Scatter plot. Can be in any color format
422
+ accepted by plotly.graph_objects.
423
+ - suggestiveline_value (bool | float; default 8): A value which
424
+ must be either False to deactivate the option, or a numerical value
425
+ corresponding to the p-value at which the line should be
426
+ drawn. The line has no influence on the data points.
427
+ - suggestiveline_color (string; default 'grey'): Color of the
428
+ suggestive line.
429
+ - suggestiveline_width (number; default 2): Width of the
430
+ suggestive line.
431
+ - genomewideline_value (bool | float; default -log10(5e-8)): A
432
+ boolean which must be either False to deactivate the option, or a
433
+ numerical value corresponding to the p-value above which the
434
+ data points are considered significant.
435
+ - genomewideline_color (string; default 'red'): Color of the
436
+ genome-wide line. Can be in any color format accepted by
437
+ plotly.graph_objects.
438
+ - genomewideline_width (number; default 1): Width of the genome
439
+ wide line.
440
+ - highlight (bool; default True): Whether to turn on or off the
441
+ highlighting of data points considered significant.
442
+ - highlight_color (string; default 'red'): Color of the data
443
+ points highlighted because they are significant. Can be in any
444
+ color format accepted by plotly.graph_objects.
445
+ - highlight_color_by (string; optional): A column name in the
446
+ dataframe to use for coloring the highlighted points (gradient).
447
+
448
+ Returns
449
+ -------
450
+ - A figure formatted for plotly.graph_objects.
451
+
452
+ """
453
+ xmin = min(self.data[self.pos].values)
454
+ xmax = max(self.data[self.pos].values)
455
+
456
+ horizontallines = []
457
+
458
+ if suggestiveline_value:
459
+ suggestiveline = go.layout.Shape(
460
+ name=SUGGESTIVE_LINE_LABEL,
461
+ type="line",
462
+ fillcolor=suggestiveline_color,
463
+ line=dict(color=suggestiveline_color, width=suggestiveline_width),
464
+ x0=xmin,
465
+ x1=xmax,
466
+ xref="x",
467
+ y0=suggestiveline_value,
468
+ y1=suggestiveline_value,
469
+ yref="y",
470
+ )
471
+ horizontallines.append(suggestiveline)
472
+
473
+ if genomewideline_value:
474
+ genomewideline = go.layout.Shape(
475
+ name=GENOMEWIDE_LINE_LABEL,
476
+ type="line",
477
+ fillcolor=genomewideline_color,
478
+ line=dict(color=genomewideline_color, width=genomewideline_width),
479
+ x0=xmin,
480
+ x1=xmax,
481
+ xref="x",
482
+ y0=genomewideline_value,
483
+ y1=genomewideline_value,
484
+ yref="y",
485
+ )
486
+ horizontallines.append(genomewideline)
487
+
488
+ data_to_plot = [] # To contain the data traces
489
+ highlight_tmp = pd.DataFrame() # Empty DataFrame to contain the highlighted data
490
+
491
+ if highlight:
492
+ if not isinstance(highlight, bool):
493
+ if self.snpName not in self.data.columns.values:
494
+ raise KeyError(
495
+ "snp argument specified for highlight as %s but "
496
+ "column not found in the data.frame" % self.snpName
497
+ )
498
+ else:
499
+ if not highlight_gene_list:
500
+ raise KeyError("Please provide a list of genes to highlight")
501
+ common_genes = set(self.data[self.geneName].unique()).intersection(
502
+ highlight_gene_list
503
+ )
504
+ if len(common_genes) == 0:
505
+ # Don't raise error, just warn
506
+ import logging
507
+ logging.getLogger(__name__).warning("No common genes found in the data to highlight")
508
+ elif len(common_genes) < len(highlight_gene_list):
509
+ warnings.warn(
510
+ f"Some genes don't contain any SNP to highlight: "
511
+ f": {set(highlight_gene_list) - common_genes}",
512
+ stacklevel=2,
513
+ )
514
+
515
+ highlight_tmp = self.data[self.data[self.geneName].isin(common_genes)]
516
+
517
+ highlight_hover_text = _get_hover_text(
518
+ highlight_tmp,
519
+ snpname=self.snpName,
520
+ genename=self.geneName,
521
+ annotationname=self.annotationName,
522
+ )
523
+
524
+ # Remove the highlighted data from the DataFrame if not empty
525
+ if highlight_tmp.empty:
526
+ data = self.data
527
+ else:
528
+ data = self.data.drop(highlight_tmp.index)
529
+
530
+ if self.nChr == 1:
531
+ if col is None:
532
+ col = ["black"]
533
+
534
+ # If single chromosome, ticks and labels automatic.
535
+ layout = go.Layout(
536
+ title=title,
537
+ xaxis={
538
+ "title": self.xlabel if xlabel is None else xlabel,
539
+ "showgrid": showgrid,
540
+ "range": [xmin, xmax],
541
+ },
542
+ yaxis={"title": ylabel},
543
+ hovermode="closest",
544
+ )
545
+
546
+ hover_text = _get_hover_text(
547
+ data,
548
+ snpname=self.snpName,
549
+ genename=self.geneName,
550
+ annotationname=self.annotationName,
551
+ )
552
+
553
+ data_to_plot.append(
554
+ go.Scattergl(
555
+ x=data[self.pos].values,
556
+ y=-np.log10(data[self.pName].values) if self.logp else data[self.pName].values,
557
+ mode="markers",
558
+ showlegend=showlegend,
559
+ name="chr%i" % data[self.chrName].unique()[0],
560
+ marker={"color": col[0], "size": point_size},
561
+ text=hover_text,
562
+ )
563
+ )
564
+ else:
565
+ # if multiple chrms, use the ticks and labels you created above.
566
+ layout = go.Layout(
567
+ title=title,
568
+ xaxis={
569
+ "title": self.xlabel if xlabel is None else xlabel,
570
+ "showgrid": showgrid,
571
+ "range": [xmin, xmax],
572
+ "tickmode": "array",
573
+ "tickvals": self.ticks,
574
+ "ticktext": self.ticksLabels,
575
+ "ticks": "outside",
576
+ },
577
+ yaxis={"title": ylabel},
578
+ hovermode="closest",
579
+ )
580
+
581
+ icol = 0
582
+ if col is None:
583
+ col = ["black" if np.mod(i, 2) else "grey" for i in range(self.nChr)]
584
+
585
+ for i in data[self.index].unique():
586
+ tmp = data[data[self.index] == i]
587
+
588
+ chromo = tmp[self.chrName].unique() # Get chromosome name
589
+
590
+ hover_text = _get_hover_text(
591
+ tmp,
592
+ snpname=self.snpName,
593
+ genename=self.geneName,
594
+ annotationname=self.annotationName,
595
+ )
596
+
597
+ data_to_plot.append(
598
+ go.Scattergl(
599
+ x=tmp[self.pos].values,
600
+ y=-np.log10(tmp[self.pName].values)
601
+ if self.logp
602
+ else tmp[self.pName].values,
603
+ mode="markers",
604
+ showlegend=showlegend,
605
+ name="Chr%i" % chromo[0],
606
+ marker={"color": col[icol], "size": point_size},
607
+ text=hover_text,
608
+ )
609
+ )
610
+
611
+ icol = icol + 1
612
+
613
+ if not highlight_tmp.empty:
614
+ if highlight_color_by and highlight_color_by in highlight_tmp.columns:
615
+ marker_dict = dict(
616
+ color=highlight_tmp[highlight_color_by].values,
617
+ colorscale="Plasma",
618
+ showscale=True,
619
+ colorbar=dict(title=highlight_color_by, x=1.1, len=0.5),
620
+ size=point_size * 2,
621
+ )
622
+ else:
623
+ marker_dict = dict(
624
+ color=highlight_color,
625
+ size=point_size * 2,
626
+ )
627
+
628
+ data_to_plot.append(
629
+ go.Scattergl(
630
+ x=highlight_tmp[self.pos].values,
631
+ y=-np.log10(highlight_tmp[self.pName].values)
632
+ if self.logp
633
+ else highlight_tmp[self.pName].values,
634
+ mode="markers",
635
+ text=highlight_hover_text,
636
+ marker=marker_dict,
637
+ name="SNP-Gene Pairs of interest",
638
+ )
639
+ )
640
+
641
+ layout.shapes = horizontallines
642
+
643
+ return go.Figure(data=data_to_plot, layout=layout)