gsMap 1.62__py3-none-any.whl → 1.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,7 +11,7 @@ import pandas as pd
11
11
  import pyranges as pr
12
12
  from progress.bar import IncrementalBar
13
13
 
14
- from gsMap.generate_r2_matrix import PlinkBEDFileWithR2Cache, getBlockLefts, ID_List_Factory
14
+ from gsMap.utils.generate_r2_matrix import PlinkBEDFileWithR2Cache, getBlockLefts, ID_List_Factory
15
15
 
16
16
 
17
17
  logger = logging.getLogger(__name__)
@@ -52,7 +52,6 @@ class MakeAnnotationConfig:
52
52
  raise ValueError(f"Invalid ld_wind_unit: {self.ld_wind_unit}. Choose from 'CM', 'BP', or 'SNP'.")
53
53
 
54
54
 
55
-
56
55
  class Snp_Annotator:
57
56
  """
58
57
  1. Annotate SNPs based on score of genes.
@@ -517,44 +516,3 @@ def run_make_annotation(args: MakeAnnotationConfig):
517
516
  args, const_max_size
518
517
  )
519
518
  ldscore_generate.compute_ldscore()
520
-
521
-
522
- if __name__ == '__main__':
523
- parser = argparse.ArgumentParser(description='make_annotations.py',
524
- formatter_class=argparse.ArgumentDefaultsHelpFormatter)
525
- add_make_annotation_args(parser)
526
-
527
- # Store the Params
528
- TEST = True
529
- if TEST:
530
- name = 'Cortex_151507'
531
- TASK_ID = 2
532
- test_dir = '/storage/yangjianLab/chenwenhao/projects/202312_gsMap/data/gsMap_test/Nature_Neuroscience_2021'
533
- config = MakeAnnotationConfig(
534
- input_feather_file=f'{test_dir}/{name}/gene_markers/{name}_rank.feather',
535
- sample_name=name,
536
- output_dir=f'{test_dir}/{name}/snp_annotation/new_run',
537
- gtf_file='/storage/yangjianLab/songliyang/ReferenceGenome/GRCh37/gencode.v39lift37.annotation.gtf',
538
- bfile_root='/storage/yangjianLab/sharedata/LDSC_resource/1000G_EUR_Phase3_plink/1000G.EUR.QC',
539
- baseline_annotation=None,
540
- keep_snp_root='/storage/yangjianLab/sharedata/LDSC_resource/hapmap3_snps/hm',
541
- chr=TASK_ID,
542
- window_size=50000,
543
- cells_per_chunk=500,
544
- ld_wind=1,
545
- ld_wind_unit='CM',
546
- r2_cache_dir='/storage/yangjianLab/chenwenhao/projects/202312_gsMap/data/gsMap_test/r2_matrix',
547
- use_gpu=True,
548
- snps_per_chunk=100_000
549
- )
550
-
551
- else:
552
- args = parser.parse_args()
553
- config=MakeAnnotationConfig(**vars(args))
554
-
555
- logger.info(f'Running make_annotation for {config.sample_name}')
556
- pprint.pprint(config)
557
- start_time = time.time()
558
- run_make_annotation(config)
559
- end_time = time.time()
560
- logger.info(f'Make SNP annotation for {config.sample_name} finished. Time spent: {(end_time - start_time) / 60:.2f} min.')
@@ -0,0 +1,639 @@
1
+ '''
2
+ Modified from dash-bio ManhattanPlot (https://github.com/plotly/dash-bio/blob/master/dash_bio/component_factory/_manhattan.py)
3
+ '''
4
+ import numpy as np
5
+ import pandas as pd
6
+ from pandas.api.types import is_numeric_dtype
7
+
8
+ import plotly.graph_objects as go
9
+ import warnings
10
+
11
+ # %%
12
+ SUGGESTIVE_LINE_LABEL = "suggestive line"
13
+ GENOMEWIDE_LINE_LABEL = "genomewide line"
14
+
15
+
16
+ def _get_hover_text(df, snpname=None, genename=None, annotationname=None):
17
+ """Format the hover text used in Manhattan and Volcano plots.
18
+ :param (dataFrame) df: A pandas dataframe.
19
+ :param (string) snpname: A string denoting the column name for the SNP
20
+ names (e.g., rs number). More generally, this column could be anything
21
+ that identifies each point being plotted. For example,
22
+ in an Epigenomewide association study (EWAS), this could be the probe
23
+ name or cg number. This column should be a character. This argument is
24
+ optional, however it is necessary to specify it if you want to
25
+ highlight points on the plot using the highlight argument in the
26
+ figure method.
27
+ :param (string) genename: A string denoting the column name for the
28
+ GENE names.
29
+ :param (string) annotationname: A string denoting the column name for
30
+ annotations. This could be any annotation information that you
31
+ want to include in the plot (e.g., zscore, effect size, minor allele
32
+ frequency).
33
+ """
34
+ hover_text = ''
35
+ if snpname is not None and snpname in df.columns:
36
+ hover_text = 'SNP: ' + df[snpname].astype(str)
37
+
38
+ if genename is not None and genename in df.columns:
39
+ hover_text = hover_text \
40
+ + '<br>GENE: ' \
41
+ + df[genename].astype(str)
42
+
43
+ if annotationname is not None and annotationname in df.columns:
44
+ hover_text = hover_text \
45
+ + '<br>' \
46
+ + df[annotationname].astype(str)
47
+
48
+ return hover_text
49
+
50
+
51
+ def ManhattanPlot(
52
+ dataframe,
53
+ chrm="CHR",
54
+ bp="BP",
55
+ p="P",
56
+ snp="SNP",
57
+ gene="GENE",
58
+ annotation=None,
59
+ logp=True,
60
+ title="Manhattan Plot",
61
+ showgrid=True,
62
+ xlabel=None,
63
+ ylabel='-log10(p)',
64
+ point_size=5,
65
+ showlegend=True,
66
+ col=None,
67
+ suggestiveline_value=-np.log10(1e-8),
68
+ suggestiveline_color='#636efa',
69
+ suggestiveline_width=1,
70
+ genomewideline_value=-np.log10(5e-8),
71
+ genomewideline_color='#EF553B',
72
+ genomewideline_width=1,
73
+ highlight=True,
74
+ highlight_color="red",
75
+ highlight_gene_list=None,
76
+ ):
77
+ """Returns a figure for a manhattan plot.
78
+
79
+ Keyword arguments:
80
+ - dataframe (dataframe; required): A pandas dataframe which must contain at
81
+ least the following three columns:
82
+ - the chromosome number
83
+ - genomic base-pair position
84
+ - a numeric quantity to plot such as a p-value or zscore
85
+ - chrm (string; default 'CHR'): A string denoting the column name for
86
+ the chromosome. This column must be float or integer. Minimum
87
+ number of chromosomes required is 1. If you have X, Y, or MT
88
+ chromosomes, be sure to renumber these 23, 24, 25, etc.
89
+ - bp (string; default 'BP'): A string denoting the column name for the
90
+ chromosomal position.
91
+ - p (string; default 'P'): A string denoting the column name for the
92
+ float quantity to be plotted on the y-axis. This column must be
93
+ numeric. It does not have to be a p-value. It can be any numeric
94
+ quantity such as peak heights, Bayes factors, test statistics. If
95
+ it is not a p-value, make sure to set logp = False.
96
+ - snp (string; default 'SNP'): A string denoting the column name for
97
+ the SNP names (e.g., rs number). More generally, this column could
98
+ be anything that identifies each point being plotted. For example,
99
+ in an Epigenomewide association study (EWAS), this could be the
100
+ probe name or cg number. This column should be a character. This
101
+ argument is optional, however it is necessary to specify it if you
102
+ want to highlight points on the plot, using the highlight argument
103
+ in the figure method.
104
+ - gene (string; default 'GENE'): A string denoting the column name for
105
+ the GENE names. This column could be a string or a float. More
106
+ generally, it could be any annotation information that you want
107
+ to include in the plot.
108
+ - annotation (string; optional): A string denoting the column to use
109
+ as annotations. This column could be a string or a float. It
110
+ could be any annotation information that you want to include in
111
+ the plot (e.g., zscore, effect size, minor allele frequency).
112
+ - logp (bool; optional): If True, the -log10 of the p-value is
113
+ plotted. It isn't very useful to plot raw p-values; however,
114
+ plotting the raw value could be useful for other genome-wide plots
115
+ (e.g., peak heights, Bayes factors, test statistics, other
116
+ "scores", etc.)
117
+ - title (string; default 'Manhattan Plot'): The title of the graph.
118
+ - showgrid (bool; default true): Boolean indicating whether gridlines
119
+ should be shown.
120
+ - xlabel (string; optional): Label of the x axis.
121
+ - ylabel (string; default '-log10(p)'): Label of the y axis.
122
+ - point_size (number; default 5): Size of the points of the Scatter
123
+ plot.
124
+ - showlegend (bool; default true): Boolean indicating whether legends
125
+ should be shown.
126
+ - col (string; optional): A string representing the color of the
127
+ points of the scatter plot. Can be in any color format accepted by
128
+ plotly.graph_objects.
129
+ - suggestiveline_value (bool | float; default 8): A value which must
130
+ be either False to deactivate the option, or a numerical value
131
+ corresponding to the p-value at which the line should be drawn.
132
+ The line has no influence on the data points.
133
+ - suggestiveline_color (string; default 'grey'): Color of the suggestive
134
+ line.
135
+ - suggestiveline_width (number; default 2): Width of the suggestive
136
+ line.
137
+ - genomewideline_value (bool | float; default -log10(5e-8)): A boolean
138
+ which must be either False to deactivate the option, or a numerical value
139
+ corresponding to the p-value above which the data points are
140
+ considered significant.
141
+ - genomewideline_color (string; default 'red'): Color of the genome-wide
142
+ line. Can be in any color format accepted by plotly.graph_objects.
143
+ - genomewideline_width (number; default 1): Width of the genome-wide
144
+ line.
145
+ - highlight (bool; default True): turning on/off the highlighting of
146
+ data points considered significant.
147
+ - highlight_color (string; default 'red'): Color of the data points
148
+ highlighted because they are significant. Can be in any color
149
+ format accepted by plotly.graph_objects.
150
+
151
+ # ...
152
+ Example 1: Random Manhattan Plot
153
+ '''
154
+ dataframe = pd.DataFrame(
155
+ np.random.randint(0,100,size=(100, 3)),
156
+ columns=['P', 'CHR', 'BP'])
157
+ fig = create_manhattan(dataframe, title='XYZ Manhattan plot')
158
+
159
+ plotly.offline.plot(fig, image='png')
160
+ '''
161
+
162
+ """
163
+
164
+ mh = _ManhattanPlot(
165
+ dataframe,
166
+ chrm=chrm,
167
+ bp=bp,
168
+ p=p,
169
+ snp=snp,
170
+ gene=gene,
171
+ annotation=annotation,
172
+ logp=logp
173
+ )
174
+
175
+ return mh.figure(
176
+ title=title,
177
+ showgrid=showgrid,
178
+ xlabel=xlabel,
179
+ ylabel=ylabel,
180
+ point_size=point_size,
181
+ showlegend=showlegend,
182
+ col=col,
183
+ suggestiveline_value=suggestiveline_value,
184
+ suggestiveline_color=suggestiveline_color,
185
+ suggestiveline_width=suggestiveline_width,
186
+ genomewideline_value=genomewideline_value,
187
+ genomewideline_color=genomewideline_color,
188
+ genomewideline_width=genomewideline_width,
189
+ highlight=highlight,
190
+ highlight_color=highlight_color,
191
+ highlight_gene_list=highlight_gene_list
192
+ )
193
+
194
+
195
+ class _ManhattanPlot():
196
+
197
+ def __init__(
198
+ self,
199
+ x,
200
+ chrm="CHR",
201
+ bp="BP",
202
+ p="P",
203
+ snp="SNP",
204
+ gene="GENE",
205
+ annotation=None,
206
+ logp=True
207
+ ):
208
+ """
209
+ Keyword arguments:
210
+ - dataframe (dataframe; required): A pandas dataframe which
211
+ must contain at least the following three columns:
212
+ - the chromosome number
213
+ - genomic base-pair position
214
+ - a numeric quantity to plot such as a p-value or zscore
215
+ - chrm (string; default 'CHR'): A string denoting the column name for the
216
+ chromosome. This column must be float or integer. Minimum number
217
+ of chromosomes required is 1. If you have X, Y, or MT chromosomes,
218
+ be sure to renumber these 23, 24, 25, etc.
219
+ - bp (string; default 'BP'): A string denoting the column name for the
220
+ chromosomal position.
221
+ - p (string; default 'P'): A string denoting the column name for the
222
+ float quantity to be plotted on the y-axis. This column must be
223
+ numeric. This does not have to be a p-value. It can be any
224
+ numeric quantity such as peak heights, bayes factors, test
225
+ statistics. If it is not a p-value, make sure to set logp = FALSE.
226
+ - snp (string; default 'SNP'): A string denoting the column name for the
227
+ SNP names (e.g. rs number). More generally, this column could be
228
+ anything that identifies each point being plotted. For example, in
229
+ an Epigenomewide association study (EWAS) this could be the probe
230
+ name or cg number. This column should be a character. This
231
+ argument is optional, however it is necessary to specify if you
232
+ want to highlight points on the plot using the highlight argument
233
+ in the figure method.
234
+ - gene (string; default 'GENE'): A string denoting the column name for the
235
+ GENE names. This column could be a string or a float. More
236
+ generally, it could be any annotation information that you want
237
+ to include in the plot.
238
+ - annotation (string; optional): A string denoting the column name for
239
+ an annotation. This column could be a string or a float. This
240
+ could be any annotation information that you want to include in
241
+ the plot (e.g. zscore, effect size, minor allele frequency).
242
+ - logp (bool; default True): If True, the -log10 of the p-value is
243
+ plotted. It isn't very useful to plot raw p-values; however,
244
+ plotting the raw value could be useful for other genome-wide plots
245
+ (e.g., peak heights, Bayes factors, test statistics, other
246
+ "scores", etc.).
247
+
248
+ Returns:
249
+ - A ManhattanPlot object."""
250
+
251
+ # checking the validity of the arguments
252
+
253
+ # Make sure you have chrm, bp and p columns and that they are of
254
+ # numeric type
255
+ if chrm not in x.columns.values:
256
+ raise KeyError("Column %s not found in 'x' data.frame" % chrm)
257
+ else:
258
+ if not is_numeric_dtype(x[chrm].dtype):
259
+ raise TypeError("%s column should be numeric. Do you have "
260
+ "'X', 'Y', 'MT', etc? If so change to "
261
+ "numbers and try again." % chrm)
262
+
263
+ if bp not in x.columns.values:
264
+ raise KeyError("Column %s not found in 'x' data.frame" % bp)
265
+ else:
266
+ if not is_numeric_dtype(x[bp].dtype):
267
+ raise TypeError("%s column should be numeric type" % bp)
268
+
269
+ if p not in x.columns.values:
270
+ raise KeyError("Column %s not found in 'x' data.frame" % p)
271
+ else:
272
+ if not is_numeric_dtype(x[p].dtype):
273
+ raise TypeError("%s column should be numeric type" % p)
274
+
275
+ # Create a new DataFrame with columns named after chrm, bp, and p.
276
+ self.data = pd.DataFrame(data=x[[chrm, bp, p]])
277
+
278
+ if snp is not None:
279
+ if snp not in x.columns.values:
280
+ # Warn if you don't have a snp column
281
+ raise KeyError(
282
+ "snp argument specified as %s but column not found in "
283
+ "'x' data.frame" % snp)
284
+ else:
285
+ # If the input DataFrame has a snp column, add it to the new
286
+ # DataFrame
287
+ self.data[snp] = x[snp]
288
+
289
+ if gene is not None:
290
+ if gene not in x.columns.values:
291
+ # Warn if you don't have a gene column
292
+ raise KeyError(
293
+ "gene argument specified as %s but column not found in "
294
+ "'x' data.frame" % gene)
295
+ else:
296
+ # If the input DataFrame has a gene column, add it to the new
297
+ # DataFrame
298
+ self.data[gene] = x[gene]
299
+
300
+ if annotation is not None:
301
+ if annotation not in x.columns.values:
302
+ # Warn if you don't have an annotation column
303
+ raise KeyError(
304
+ "annotation argument specified as %s but column not "
305
+ "found in 'x' data.frame" % annotation
306
+ )
307
+ else:
308
+ # If the input DataFrame has a gene column, add it to the new
309
+ # DataFrame
310
+ self.data[annotation] = x[annotation]
311
+
312
+ self.xlabel = ""
313
+ self.ticks = []
314
+ self.ticksLabels = []
315
+ self.nChr = len(x[chrm].unique())
316
+ self.chrName = chrm
317
+ self.pName = p
318
+ self.snpName = snp
319
+ self.geneName = gene
320
+ self.annotationName = annotation
321
+ self.logp = logp
322
+
323
+ # Set positions, ticks, and labels for plotting
324
+
325
+ self.index = 'INDEX'
326
+ self.pos = 'POSITION'
327
+
328
+ # Fixes the bug where one chromosome is missing by adding a sequential
329
+ # index column.
330
+ idx = 0
331
+ for i in self.data[chrm].unique():
332
+ idx = idx + 1
333
+ self.data.loc[self.data[chrm] == i, self.index] = int(idx)
334
+ # Set the type to be the same as provided for chrm column
335
+ self.data[self.index] = \
336
+ self.data[self.index].astype(self.data[chrm].dtype)
337
+
338
+ # This section sets up positions and ticks. Ticks should be placed in
339
+ # the middle of a chromosome. The new pos column is added that keeps
340
+ # a running sum of the positions of each successive chromosome.
341
+ # For example:
342
+ # chrm bp pos
343
+ # 1 1 1
344
+ # 1 2 2
345
+ # 2 1 3
346
+ # 2 2 4
347
+ # 3 1 5
348
+
349
+ if self.nChr == 1:
350
+ # For a single chromosome
351
+ self.data[self.pos] = self.data[bp]
352
+ self.ticks.append(int(len(self.data[self.pos]) / 2.) + 1)
353
+ self.xlabel = "Chromosome %s position" % (self.data[chrm].unique())
354
+ self.ticksLabels = self.ticks
355
+ else:
356
+ # For multiple chromosomes
357
+ lastbase = 0
358
+ for i in self.data[self.index].unique():
359
+ if i == 1:
360
+ self.data.loc[self.data[self.index] == i, self.pos] = \
361
+ self.data.loc[self.data[self.index] == i, bp].values
362
+ else:
363
+ prevbp = self.data.loc[self.data[self.index] == i - 1, bp]
364
+ # Shift the basepair position by the largest bp of the
365
+ # current chromosome
366
+ lastbase = lastbase + prevbp.iat[-1]
367
+
368
+ self.data.loc[self.data[self.index] == i, self.pos] = \
369
+ self.data.loc[self.data[self.index] == i, bp].values \
370
+ + lastbase
371
+
372
+ tmin = min(self.data.loc[self.data[self.index] == i, self.pos])
373
+ tmax = max(self.data.loc[self.data[self.index] == i, self.pos])
374
+ self.ticks.append(int((tmin + tmax) / 2.) + 1)
375
+
376
+ self.xlabel = 'Chromosome'
377
+ self.data[self.pos] = self.data[self.pos].astype(
378
+ self.data[bp].dtype)
379
+
380
+ if self.nChr > 10: # To avoid crowded labels
381
+ self.ticksLabels = [
382
+ t if np.mod(int(t), 2) # Only every two ticks
383
+ else ''
384
+ for t in self.data[chrm].unique()
385
+ ]
386
+ else:
387
+ self.ticksLabels = self.data[chrm].unique() # All the ticks
388
+
389
+ def figure(
390
+ self,
391
+ title="Manhattan Plot",
392
+ showgrid=True,
393
+ xlabel=None,
394
+ ylabel='-log10(p)',
395
+ point_size=5,
396
+ showlegend=True,
397
+ col=None,
398
+ suggestiveline_value=-np.log10(1e-8),
399
+ suggestiveline_color='blue',
400
+ suggestiveline_width=1,
401
+ genomewideline_value=-np.log10(5e-8),
402
+ genomewideline_color='red',
403
+ genomewideline_width=1,
404
+ highlight=True,
405
+ highlight_color="red",
406
+ highlight_gene_list=None
407
+ ):
408
+ """Keyword arguments:
409
+ - title (string; default 'Manhattan Plot'): The title of the
410
+ graph.
411
+ - showgrid (bool; default True): Boolean indicating whether
412
+ gridlines should be shown.
413
+ - xlabel (string; optional): Label of the x axis.
414
+ - ylabel (string; default '-log10(p)'): Label of the y axis.
415
+ - point_size (number; default 5): Size of the points of the
416
+ scatter plot.
417
+ - showlegend (bool; default True): Boolean indicating whether
418
+ legends should be shown.
419
+ - col (string; optional): A string representing the color of the
420
+ points of the Scatter plot. Can be in any color format
421
+ accepted by plotly.graph_objects.
422
+ - suggestiveline_value (bool | float; default 8): A value which
423
+ must be either False to deactivate the option, or a numerical value
424
+ corresponding to the p-value at which the line should be
425
+ drawn. The line has no influence on the data points.
426
+ - suggestiveline_color (string; default 'grey'): Color of the
427
+ suggestive line.
428
+ - suggestiveline_width (number; default 2): Width of the
429
+ suggestive line.
430
+ - genomewideline_value (bool | float; default -log10(5e-8)): A
431
+ boolean which must be either False to deactivate the option, or a
432
+ numerical value corresponding to the p-value above which the
433
+ data points are considered significant.
434
+ - genomewideline_color (string; default 'red'): Color of the
435
+ genome-wide line. Can be in any color format accepted by
436
+ plotly.graph_objects.
437
+ - genomewideline_width (number; default 1): Width of the genome
438
+ wide line.
439
+ - highlight (bool; default True): Whether to turn on or off the
440
+ highlighting of data points considered significant.
441
+ - highlight_color (string; default 'red'): Color of the data
442
+ points highlighted because they are significant. Can be in any
443
+ color format accepted by plotly.graph_objects.
444
+
445
+ Returns:
446
+ - A figure formatted for plotly.graph_objects.
447
+
448
+ """
449
+
450
+ xmin = min(self.data[self.pos].values)
451
+ xmax = max(self.data[self.pos].values)
452
+
453
+ horizontallines = []
454
+
455
+ if suggestiveline_value:
456
+ suggestiveline = go.layout.Shape(
457
+ name=SUGGESTIVE_LINE_LABEL,
458
+ type="line",
459
+ fillcolor=suggestiveline_color,
460
+ line=dict(
461
+ color=suggestiveline_color,
462
+ width=suggestiveline_width
463
+ ),
464
+ x0=xmin, x1=xmax, xref="x",
465
+ y0=suggestiveline_value, y1=suggestiveline_value, yref="y"
466
+ )
467
+ horizontallines.append(suggestiveline)
468
+
469
+ if genomewideline_value:
470
+ genomewideline = go.layout.Shape(
471
+ name=GENOMEWIDE_LINE_LABEL,
472
+ type="line",
473
+ fillcolor=genomewideline_color,
474
+ line=dict(
475
+ color=genomewideline_color,
476
+ width=genomewideline_width
477
+ ),
478
+ x0=xmin, x1=xmax, xref="x",
479
+ y0=genomewideline_value, y1=genomewideline_value, yref="y"
480
+ )
481
+ horizontallines.append(genomewideline)
482
+
483
+ data_to_plot = [] # To contain the data traces
484
+ highlight_tmp = pd.DataFrame() # Empty DataFrame to contain the highlighted data
485
+
486
+ if highlight:
487
+ if not isinstance(highlight, bool):
488
+ if self.snpName not in self.data.columns.values:
489
+ raise KeyError(
490
+ "snp argument specified for highlight as %s but "
491
+ "column not found in the data.frame" % self.snpName
492
+ )
493
+ else:
494
+ if not highlight_gene_list:
495
+ raise KeyError(
496
+ "Please provide a list of genes to highlight"
497
+ )
498
+ common_genes = set(self.data[self.geneName].values).intersection(highlight_gene_list)
499
+ if len(common_genes) == 0:
500
+ raise Warning(
501
+ "No common genes found in the data to highlight"
502
+ )
503
+ elif len(common_genes) < len(highlight_gene_list):
504
+ warnings.warn(
505
+ f"Some genes don't contain any SNP to highlight: "
506
+ f": {set(highlight_gene_list) - common_genes}"
507
+ )
508
+
509
+ highlight_tmp = self.data
510
+
511
+ highlight_tmp = highlight_tmp[highlight_tmp[self.geneName].isin(common_genes)]
512
+
513
+ highlight_hover_text = _get_hover_text(
514
+ highlight_tmp,
515
+ snpname=self.snpName,
516
+ genename=self.geneName,
517
+ annotationname=self.annotationName
518
+ )
519
+
520
+
521
+
522
+ # Remove the highlighted data from the DataFrame if not empty
523
+ if highlight_tmp.empty:
524
+ data = self.data
525
+ else:
526
+ data = self.data.drop(self.data.index[highlight_tmp.index])
527
+
528
+ if self.nChr == 1:
529
+
530
+ if col is None:
531
+ col = ['black']
532
+
533
+ # If single chromosome, ticks and labels automatic.
534
+ layout = go.Layout(
535
+ title=title,
536
+ xaxis={
537
+ 'title': self.xlabel if xlabel is None else xlabel,
538
+ 'showgrid': showgrid,
539
+ 'range': [xmin, xmax],
540
+ },
541
+ yaxis={'title': ylabel},
542
+ hovermode='closest'
543
+ )
544
+
545
+ hover_text = _get_hover_text(
546
+ data,
547
+ snpname=self.snpName,
548
+ genename=self.geneName,
549
+ annotationname=self.annotationName
550
+ )
551
+
552
+ data_to_plot.append(
553
+ go.Scattergl(
554
+ x=data[self.pos].values,
555
+ y=-np.log10(data[self.pName].values) if self.logp
556
+ else data[self.pName].values,
557
+ mode="markers",
558
+ showlegend=showlegend,
559
+ name="chr%i" % data[self.chrName].unique(),
560
+ marker={
561
+ 'color': col[0],
562
+ 'size': point_size
563
+ },
564
+ text=hover_text
565
+ )
566
+ )
567
+ else:
568
+ # if multiple chrms, use the ticks and labels you created above.
569
+ layout = go.Layout(
570
+ title=title,
571
+ xaxis={
572
+ 'title': self.xlabel if xlabel is None else xlabel,
573
+ 'showgrid': showgrid,
574
+ 'range': [xmin, xmax],
575
+ 'tickmode': "array",
576
+ 'tickvals': self.ticks,
577
+ 'ticktext': self.ticksLabels,
578
+ 'ticks': "outside"
579
+ },
580
+ yaxis={'title': ylabel},
581
+ hovermode='closest'
582
+ )
583
+
584
+ icol = 0
585
+ if col is None:
586
+ col = [
587
+ 'black' if np.mod(i, 2)
588
+ else 'grey' for i in range(self.nChr)
589
+ ]
590
+
591
+ for i in data[self.index].unique():
592
+ tmp = data[data[self.index] == i]
593
+
594
+ chromo = tmp[self.chrName].unique() # Get chromosome name
595
+
596
+ hover_text = _get_hover_text(
597
+ tmp,
598
+ snpname=self.snpName,
599
+ genename=self.geneName,
600
+ annotationname=self.annotationName
601
+ )
602
+
603
+ data_to_plot.append(
604
+ go.Scattergl(
605
+ x=tmp[self.pos].values,
606
+ y=-np.log10(tmp[self.pName].values) if self.logp else tmp[self.pName].values,
607
+ mode="markers",
608
+ showlegend=showlegend,
609
+ name="Chr%i" % chromo,
610
+ marker={
611
+ 'color': col[icol],
612
+ 'size': point_size
613
+ },
614
+ text=hover_text
615
+ )
616
+ )
617
+
618
+ icol = icol + 1
619
+
620
+
621
+ if not highlight_tmp.empty:
622
+ data_to_plot.append(
623
+ go.Scattergl(
624
+ x=highlight_tmp[self.pos].values,
625
+ y=-np.log10(highlight_tmp[self.pName].values) if self.logp
626
+ else highlight_tmp[self.pName].values,
627
+ mode="markers",
628
+ text=highlight_hover_text,
629
+ marker=dict(
630
+ color=highlight_color,
631
+ size=point_size * 2,
632
+ ),
633
+ name="SNP-Gene Pairs of interest"
634
+ )
635
+ )
636
+
637
+ layout.shapes = horizontallines
638
+
639
+ return go.Figure(data=data_to_plot, layout=layout)