gsMap 1.71.2__py3-none-any.whl → 1.72.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsMap/GNN/adjacency_matrix.py +25 -27
- gsMap/GNN/model.py +9 -7
- gsMap/GNN/train.py +8 -11
- gsMap/__init__.py +3 -3
- gsMap/__main__.py +3 -2
- gsMap/cauchy_combination_test.py +75 -72
- gsMap/config.py +822 -316
- gsMap/create_slice_mean.py +154 -0
- gsMap/diagnosis.py +179 -101
- gsMap/find_latent_representation.py +28 -26
- gsMap/format_sumstats.py +233 -201
- gsMap/generate_ldscore.py +353 -209
- gsMap/latent_to_gene.py +92 -60
- gsMap/main.py +23 -14
- gsMap/report.py +39 -25
- gsMap/run_all_mode.py +86 -46
- gsMap/setup.py +1 -1
- gsMap/spatial_ldsc_multiple_sumstats.py +154 -80
- gsMap/utils/generate_r2_matrix.py +173 -140
- gsMap/utils/jackknife.py +84 -80
- gsMap/utils/manhattan_plot.py +180 -207
- gsMap/utils/regression_read.py +105 -122
- gsMap/visualize.py +82 -64
- {gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/METADATA +21 -6
- gsmap-1.72.3.dist-info/RECORD +31 -0
- {gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/WHEEL +1 -1
- gsMap/utils/make_annotations.py +0 -518
- gsmap-1.71.2.dist-info/RECORD +0 -31
- {gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/LICENSE +0 -0
- {gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/entry_points.txt +0 -0
gsMap/utils/manhattan_plot.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1
|
-
|
1
|
+
"""
|
2
2
|
Modified from dash-bio ManhattanPlot (https://github.com/plotly/dash-bio/blob/master/dash_bio/component_factory/_manhattan.py)
|
3
|
-
|
3
|
+
"""
|
4
|
+
|
5
|
+
import warnings
|
6
|
+
|
4
7
|
import numpy as np
|
5
8
|
import pandas as pd
|
6
|
-
from pandas.api.types import is_numeric_dtype
|
7
|
-
|
8
9
|
import plotly.graph_objects as go
|
9
|
-
import
|
10
|
+
from pandas.api.types import is_numeric_dtype
|
10
11
|
|
11
12
|
# %%
|
12
13
|
SUGGESTIVE_LINE_LABEL = "suggestive line"
|
@@ -31,69 +32,65 @@ def _get_hover_text(df, snpname=None, genename=None, annotationname=None):
|
|
31
32
|
want to include in the plot (e.g., zscore, effect size, minor allele
|
32
33
|
frequency).
|
33
34
|
"""
|
34
|
-
hover_text =
|
35
|
+
hover_text = ""
|
35
36
|
if snpname is not None and snpname in df.columns:
|
36
|
-
hover_text =
|
37
|
+
hover_text = "SNP: " + df[snpname].astype(str)
|
37
38
|
|
38
39
|
if genename is not None and genename in df.columns:
|
39
|
-
hover_text = hover_text
|
40
|
-
+ '<br>GENE: ' \
|
41
|
-
+ df[genename].astype(str)
|
40
|
+
hover_text = hover_text + "<br>GENE: " + df[genename].astype(str)
|
42
41
|
|
43
42
|
if annotationname is not None and annotationname in df.columns:
|
44
|
-
hover_text = hover_text
|
45
|
-
+ '<br>' \
|
46
|
-
+ df[annotationname].astype(str)
|
43
|
+
hover_text = hover_text + "<br>" + df[annotationname].astype(str)
|
47
44
|
|
48
45
|
return hover_text
|
49
46
|
|
50
47
|
|
51
48
|
def ManhattanPlot(
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
49
|
+
dataframe,
|
50
|
+
chrm="CHR",
|
51
|
+
bp="BP",
|
52
|
+
p="P",
|
53
|
+
snp="SNP",
|
54
|
+
gene="GENE",
|
55
|
+
annotation=None,
|
56
|
+
logp=True,
|
57
|
+
title="Manhattan Plot",
|
58
|
+
showgrid=True,
|
59
|
+
xlabel=None,
|
60
|
+
ylabel="-log10(p)",
|
61
|
+
point_size=5,
|
62
|
+
showlegend=True,
|
63
|
+
col=None,
|
64
|
+
suggestiveline_value=-np.log10(1e-8),
|
65
|
+
suggestiveline_color="#636efa",
|
66
|
+
suggestiveline_width=1,
|
67
|
+
genomewideline_value=-np.log10(5e-8),
|
68
|
+
genomewideline_color="#EF553B",
|
69
|
+
genomewideline_width=1,
|
70
|
+
highlight=True,
|
71
|
+
highlight_color="red",
|
72
|
+
highlight_gene_list=None,
|
76
73
|
):
|
77
74
|
"""Returns a figure for a manhattan plot.
|
78
75
|
|
79
|
-
Keyword arguments:
|
80
|
-
- dataframe (dataframe; required): A pandas dataframe which must contain at
|
76
|
+
Keyword arguments:
|
77
|
+
- dataframe (dataframe; required): A pandas dataframe which must contain at
|
81
78
|
least the following three columns:
|
82
79
|
- the chromosome number
|
83
80
|
- genomic base-pair position
|
84
81
|
- a numeric quantity to plot such as a p-value or zscore
|
85
|
-
- chrm (string; default 'CHR'): A string denoting the column name for
|
82
|
+
- chrm (string; default 'CHR'): A string denoting the column name for
|
86
83
|
the chromosome. This column must be float or integer. Minimum
|
87
84
|
number of chromosomes required is 1. If you have X, Y, or MT
|
88
85
|
chromosomes, be sure to renumber these 23, 24, 25, etc.
|
89
|
-
- bp (string; default 'BP'): A string denoting the column name for the
|
86
|
+
- bp (string; default 'BP'): A string denoting the column name for the
|
90
87
|
chromosomal position.
|
91
|
-
- p (string; default 'P'): A string denoting the column name for the
|
88
|
+
- p (string; default 'P'): A string denoting the column name for the
|
92
89
|
float quantity to be plotted on the y-axis. This column must be
|
93
90
|
numeric. It does not have to be a p-value. It can be any numeric
|
94
91
|
quantity such as peak heights, Bayes factors, test statistics. If
|
95
92
|
it is not a p-value, make sure to set logp = False.
|
96
|
-
- snp (string; default 'SNP'): A string denoting the column name for
|
93
|
+
- snp (string; default 'SNP'): A string denoting the column name for
|
97
94
|
the SNP names (e.g., rs number). More generally, this column could
|
98
95
|
be anything that identifies each point being plotted. For example,
|
99
96
|
in an Epigenomewide association study (EWAS), this could be the
|
@@ -101,50 +98,50 @@ Keyword arguments:
|
|
101
98
|
argument is optional, however it is necessary to specify it if you
|
102
99
|
want to highlight points on the plot, using the highlight argument
|
103
100
|
in the figure method.
|
104
|
-
- gene (string; default 'GENE'): A string denoting the column name for
|
101
|
+
- gene (string; default 'GENE'): A string denoting the column name for
|
105
102
|
the GENE names. This column could be a string or a float. More
|
106
103
|
generally, it could be any annotation information that you want
|
107
104
|
to include in the plot.
|
108
|
-
- annotation (string; optional): A string denoting the column to use
|
105
|
+
- annotation (string; optional): A string denoting the column to use
|
109
106
|
as annotations. This column could be a string or a float. It
|
110
107
|
could be any annotation information that you want to include in
|
111
108
|
the plot (e.g., zscore, effect size, minor allele frequency).
|
112
|
-
- logp (bool; optional): If True, the -log10 of the p-value is
|
109
|
+
- logp (bool; optional): If True, the -log10 of the p-value is
|
113
110
|
plotted. It isn't very useful to plot raw p-values; however,
|
114
111
|
plotting the raw value could be useful for other genome-wide plots
|
115
112
|
(e.g., peak heights, Bayes factors, test statistics, other
|
116
113
|
"scores", etc.)
|
117
|
-
- title (string; default 'Manhattan Plot'): The title of the graph.
|
118
|
-
- showgrid (bool; default true): Boolean indicating whether gridlines
|
114
|
+
- title (string; default 'Manhattan Plot'): The title of the graph.
|
115
|
+
- showgrid (bool; default true): Boolean indicating whether gridlines
|
119
116
|
should be shown.
|
120
|
-
- xlabel (string; optional): Label of the x axis.
|
121
|
-
- ylabel (string; default '-log10(p)'): Label of the y axis.
|
122
|
-
- point_size (number; default 5): Size of the points of the Scatter
|
117
|
+
- xlabel (string; optional): Label of the x axis.
|
118
|
+
- ylabel (string; default '-log10(p)'): Label of the y axis.
|
119
|
+
- point_size (number; default 5): Size of the points of the Scatter
|
123
120
|
plot.
|
124
|
-
- showlegend (bool; default true): Boolean indicating whether legends
|
121
|
+
- showlegend (bool; default true): Boolean indicating whether legends
|
125
122
|
should be shown.
|
126
|
-
- col (string; optional): A string representing the color of the
|
123
|
+
- col (string; optional): A string representing the color of the
|
127
124
|
points of the scatter plot. Can be in any color format accepted by
|
128
125
|
plotly.graph_objects.
|
129
|
-
- suggestiveline_value (bool | float; default 8): A value which must
|
126
|
+
- suggestiveline_value (bool | float; default 8): A value which must
|
130
127
|
be either False to deactivate the option, or a numerical value
|
131
128
|
corresponding to the p-value at which the line should be drawn.
|
132
129
|
The line has no influence on the data points.
|
133
|
-
- suggestiveline_color (string; default 'grey'): Color of the suggestive
|
134
|
-
line.
|
135
|
-
- suggestiveline_width (number; default 2): Width of the suggestive
|
130
|
+
- suggestiveline_color (string; default 'grey'): Color of the suggestive
|
136
131
|
line.
|
137
|
-
-
|
132
|
+
- suggestiveline_width (number; default 2): Width of the suggestive
|
133
|
+
line.
|
134
|
+
- genomewideline_value (bool | float; default -log10(5e-8)): A boolean
|
138
135
|
which must be either False to deactivate the option, or a numerical value
|
139
136
|
corresponding to the p-value above which the data points are
|
140
137
|
considered significant.
|
141
|
-
- genomewideline_color (string; default 'red'): Color of the genome-wide
|
138
|
+
- genomewideline_color (string; default 'red'): Color of the genome-wide
|
142
139
|
line. Can be in any color format accepted by plotly.graph_objects.
|
143
|
-
- genomewideline_width (number; default 1): Width of the genome-wide
|
144
|
-
|
145
|
-
- highlight (bool; default True): turning on/off the highlighting of
|
140
|
+
- genomewideline_width (number; default 1): Width of the genome-wide
|
141
|
+
line.
|
142
|
+
- highlight (bool; default True): turning on/off the highlighting of
|
146
143
|
data points considered significant.
|
147
|
-
- highlight_color (string; default 'red'): Color of the data points
|
144
|
+
- highlight_color (string; default 'red'): Color of the data points
|
148
145
|
highlighted because they are significant. Can be in any color
|
149
146
|
format accepted by plotly.graph_objects.
|
150
147
|
|
@@ -160,16 +157,8 @@ Keyword arguments:
|
|
160
157
|
'''
|
161
158
|
|
162
159
|
"""
|
163
|
-
|
164
160
|
mh = _ManhattanPlot(
|
165
|
-
dataframe,
|
166
|
-
chrm=chrm,
|
167
|
-
bp=bp,
|
168
|
-
p=p,
|
169
|
-
snp=snp,
|
170
|
-
gene=gene,
|
171
|
-
annotation=annotation,
|
172
|
-
logp=logp
|
161
|
+
dataframe, chrm=chrm, bp=bp, p=p, snp=snp, gene=gene, annotation=annotation, logp=logp
|
173
162
|
)
|
174
163
|
|
175
164
|
return mh.figure(
|
@@ -188,22 +177,13 @@ Keyword arguments:
|
|
188
177
|
genomewideline_width=genomewideline_width,
|
189
178
|
highlight=highlight,
|
190
179
|
highlight_color=highlight_color,
|
191
|
-
highlight_gene_list=highlight_gene_list
|
180
|
+
highlight_gene_list=highlight_gene_list,
|
192
181
|
)
|
193
182
|
|
194
183
|
|
195
|
-
class _ManhattanPlot
|
196
|
-
|
184
|
+
class _ManhattanPlot:
|
197
185
|
def __init__(
|
198
|
-
|
199
|
-
x,
|
200
|
-
chrm="CHR",
|
201
|
-
bp="BP",
|
202
|
-
p="P",
|
203
|
-
snp="SNP",
|
204
|
-
gene="GENE",
|
205
|
-
annotation=None,
|
206
|
-
logp=True
|
186
|
+
self, x, chrm="CHR", bp="BP", p="P", snp="SNP", gene="GENE", annotation=None, logp=True
|
207
187
|
):
|
208
188
|
"""
|
209
189
|
Keyword arguments:
|
@@ -245,9 +225,10 @@ class _ManhattanPlot():
|
|
245
225
|
(e.g., peak heights, Bayes factors, test statistics, other
|
246
226
|
"scores", etc.).
|
247
227
|
|
248
|
-
Returns
|
249
|
-
|
250
|
-
|
228
|
+
Returns
|
229
|
+
-------
|
230
|
+
- A ManhattanPlot object.
|
231
|
+
"""
|
251
232
|
# checking the validity of the arguments
|
252
233
|
|
253
234
|
# Make sure you have chrm, bp and p columns and that they are of
|
@@ -256,9 +237,11 @@ class _ManhattanPlot():
|
|
256
237
|
raise KeyError("Column %s not found in 'x' data.frame" % chrm)
|
257
238
|
else:
|
258
239
|
if not is_numeric_dtype(x[chrm].dtype):
|
259
|
-
raise TypeError(
|
260
|
-
|
261
|
-
|
240
|
+
raise TypeError(
|
241
|
+
"%s column should be numeric. Do you have "
|
242
|
+
"'X', 'Y', 'MT', etc? If so change to "
|
243
|
+
"numbers and try again." % chrm
|
244
|
+
)
|
262
245
|
|
263
246
|
if bp not in x.columns.values:
|
264
247
|
raise KeyError("Column %s not found in 'x' data.frame" % bp)
|
@@ -279,8 +262,8 @@ class _ManhattanPlot():
|
|
279
262
|
if snp not in x.columns.values:
|
280
263
|
# Warn if you don't have a snp column
|
281
264
|
raise KeyError(
|
282
|
-
"snp argument specified as %s but column not found in "
|
283
|
-
|
265
|
+
"snp argument specified as %s but column not found in 'x' data.frame" % snp
|
266
|
+
)
|
284
267
|
else:
|
285
268
|
# If the input DataFrame has a snp column, add it to the new
|
286
269
|
# DataFrame
|
@@ -290,8 +273,8 @@ class _ManhattanPlot():
|
|
290
273
|
if gene not in x.columns.values:
|
291
274
|
# Warn if you don't have a gene column
|
292
275
|
raise KeyError(
|
293
|
-
"gene argument specified as %s but column not found in "
|
294
|
-
|
276
|
+
"gene argument specified as %s but column not found in 'x' data.frame" % gene
|
277
|
+
)
|
295
278
|
else:
|
296
279
|
# If the input DataFrame has a gene column, add it to the new
|
297
280
|
# DataFrame
|
@@ -322,8 +305,8 @@ class _ManhattanPlot():
|
|
322
305
|
|
323
306
|
# Set positions, ticks, and labels for plotting
|
324
307
|
|
325
|
-
self.index =
|
326
|
-
self.pos =
|
308
|
+
self.index = "INDEX"
|
309
|
+
self.pos = "POSITION"
|
327
310
|
|
328
311
|
# Fixes the bug where one chromosome is missing by adding a sequential
|
329
312
|
# index column.
|
@@ -332,8 +315,7 @@ class _ManhattanPlot():
|
|
332
315
|
idx = idx + 1
|
333
316
|
self.data.loc[self.data[chrm] == i, self.index] = int(idx)
|
334
317
|
# Set the type to be the same as provided for chrm column
|
335
|
-
self.data[self.index] =
|
336
|
-
self.data[self.index].astype(self.data[chrm].dtype)
|
318
|
+
self.data[self.index] = self.data[self.index].astype(self.data[chrm].dtype)
|
337
319
|
|
338
320
|
# This section sets up positions and ticks. Ticks should be placed in
|
339
321
|
# the middle of a chromosome. The new pos column is added that keeps
|
@@ -349,7 +331,7 @@ class _ManhattanPlot():
|
|
349
331
|
if self.nChr == 1:
|
350
332
|
# For a single chromosome
|
351
333
|
self.data[self.pos] = self.data[bp]
|
352
|
-
self.ticks.append(int(len(self.data[self.pos]) / 2.) + 1)
|
334
|
+
self.ticks.append(int(len(self.data[self.pos]) / 2.0) + 1)
|
353
335
|
self.xlabel = "Chromosome %s position" % (self.data[chrm].unique())
|
354
336
|
self.ticksLabels = self.ticks
|
355
337
|
else:
|
@@ -357,96 +339,97 @@ class _ManhattanPlot():
|
|
357
339
|
lastbase = 0
|
358
340
|
for i in self.data[self.index].unique():
|
359
341
|
if i == 1:
|
360
|
-
self.data.loc[self.data[self.index] == i, self.pos] =
|
361
|
-
self.data
|
342
|
+
self.data.loc[self.data[self.index] == i, self.pos] = self.data.loc[
|
343
|
+
self.data[self.index] == i, bp
|
344
|
+
].values
|
362
345
|
else:
|
363
346
|
prevbp = self.data.loc[self.data[self.index] == i - 1, bp]
|
364
347
|
# Shift the basepair position by the largest bp of the
|
365
348
|
# current chromosome
|
366
349
|
lastbase = lastbase + prevbp.iat[-1]
|
367
350
|
|
368
|
-
self.data.loc[self.data[self.index] == i, self.pos] =
|
369
|
-
self.data.loc[self.data[self.index] == i, bp].values
|
370
|
-
|
351
|
+
self.data.loc[self.data[self.index] == i, self.pos] = (
|
352
|
+
self.data.loc[self.data[self.index] == i, bp].values + lastbase
|
353
|
+
)
|
371
354
|
|
372
355
|
tmin = min(self.data.loc[self.data[self.index] == i, self.pos])
|
373
356
|
tmax = max(self.data.loc[self.data[self.index] == i, self.pos])
|
374
|
-
self.ticks.append(int((tmin + tmax) / 2.) + 1)
|
357
|
+
self.ticks.append(int((tmin + tmax) / 2.0) + 1)
|
375
358
|
|
376
|
-
self.xlabel =
|
377
|
-
self.data[self.pos] = self.data[self.pos].astype(
|
378
|
-
self.data[bp].dtype)
|
359
|
+
self.xlabel = "Chromosome"
|
360
|
+
self.data[self.pos] = self.data[self.pos].astype(self.data[bp].dtype)
|
379
361
|
|
380
362
|
if self.nChr > 10: # To avoid crowded labels
|
381
363
|
self.ticksLabels = [
|
382
|
-
t
|
383
|
-
|
364
|
+
t
|
365
|
+
if np.mod(int(t), 2) # Only every two ticks
|
366
|
+
else ""
|
384
367
|
for t in self.data[chrm].unique()
|
385
368
|
]
|
386
369
|
else:
|
387
370
|
self.ticksLabels = self.data[chrm].unique() # All the ticks
|
388
371
|
|
389
372
|
def figure(
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
373
|
+
self,
|
374
|
+
title="Manhattan Plot",
|
375
|
+
showgrid=True,
|
376
|
+
xlabel=None,
|
377
|
+
ylabel="-log10(p)",
|
378
|
+
point_size=5,
|
379
|
+
showlegend=True,
|
380
|
+
col=None,
|
381
|
+
suggestiveline_value=-np.log10(1e-8),
|
382
|
+
suggestiveline_color="blue",
|
383
|
+
suggestiveline_width=1,
|
384
|
+
genomewideline_value=-np.log10(5e-8),
|
385
|
+
genomewideline_color="red",
|
386
|
+
genomewideline_width=1,
|
387
|
+
highlight=True,
|
388
|
+
highlight_color="red",
|
389
|
+
highlight_gene_list=None,
|
407
390
|
):
|
408
391
|
"""Keyword arguments:
|
409
|
-
|
392
|
+
- title (string; default 'Manhattan Plot'): The title of the
|
410
393
|
graph.
|
411
|
-
|
394
|
+
- showgrid (bool; default True): Boolean indicating whether
|
412
395
|
gridlines should be shown.
|
413
|
-
|
414
|
-
|
415
|
-
|
396
|
+
- xlabel (string; optional): Label of the x axis.
|
397
|
+
- ylabel (string; default '-log10(p)'): Label of the y axis.
|
398
|
+
- point_size (number; default 5): Size of the points of the
|
416
399
|
scatter plot.
|
417
|
-
|
400
|
+
- showlegend (bool; default True): Boolean indicating whether
|
418
401
|
legends should be shown.
|
419
|
-
|
402
|
+
- col (string; optional): A string representing the color of the
|
420
403
|
points of the Scatter plot. Can be in any color format
|
421
404
|
accepted by plotly.graph_objects.
|
422
|
-
|
405
|
+
- suggestiveline_value (bool | float; default 8): A value which
|
423
406
|
must be either False to deactivate the option, or a numerical value
|
424
407
|
corresponding to the p-value at which the line should be
|
425
408
|
drawn. The line has no influence on the data points.
|
426
|
-
|
409
|
+
- suggestiveline_color (string; default 'grey'): Color of the
|
427
410
|
suggestive line.
|
428
|
-
|
411
|
+
- suggestiveline_width (number; default 2): Width of the
|
429
412
|
suggestive line.
|
430
|
-
|
413
|
+
- genomewideline_value (bool | float; default -log10(5e-8)): A
|
431
414
|
boolean which must be either False to deactivate the option, or a
|
432
415
|
numerical value corresponding to the p-value above which the
|
433
416
|
data points are considered significant.
|
434
|
-
|
417
|
+
- genomewideline_color (string; default 'red'): Color of the
|
435
418
|
genome-wide line. Can be in any color format accepted by
|
436
419
|
plotly.graph_objects.
|
437
|
-
|
438
|
-
|
439
|
-
|
420
|
+
- genomewideline_width (number; default 1): Width of the genome
|
421
|
+
wide line.
|
422
|
+
- highlight (bool; default True): Whether to turn on or off the
|
440
423
|
highlighting of data points considered significant.
|
441
|
-
|
424
|
+
- highlight_color (string; default 'red'): Color of the data
|
442
425
|
points highlighted because they are significant. Can be in any
|
443
426
|
color format accepted by plotly.graph_objects.
|
444
427
|
|
445
|
-
|
446
|
-
|
428
|
+
Returns
|
429
|
+
-------
|
430
|
+
- A figure formatted for plotly.graph_objects.
|
447
431
|
|
448
432
|
"""
|
449
|
-
|
450
433
|
xmin = min(self.data[self.pos].values)
|
451
434
|
xmax = max(self.data[self.pos].values)
|
452
435
|
|
@@ -457,12 +440,13 @@ class _ManhattanPlot():
|
|
457
440
|
name=SUGGESTIVE_LINE_LABEL,
|
458
441
|
type="line",
|
459
442
|
fillcolor=suggestiveline_color,
|
460
|
-
line=dict(
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
443
|
+
line=dict(color=suggestiveline_color, width=suggestiveline_width),
|
444
|
+
x0=xmin,
|
445
|
+
x1=xmax,
|
446
|
+
xref="x",
|
447
|
+
y0=suggestiveline_value,
|
448
|
+
y1=suggestiveline_value,
|
449
|
+
yref="y",
|
466
450
|
)
|
467
451
|
horizontallines.append(suggestiveline)
|
468
452
|
|
@@ -471,12 +455,13 @@ class _ManhattanPlot():
|
|
471
455
|
name=GENOMEWIDE_LINE_LABEL,
|
472
456
|
type="line",
|
473
457
|
fillcolor=genomewideline_color,
|
474
|
-
line=dict(
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
458
|
+
line=dict(color=genomewideline_color, width=genomewideline_width),
|
459
|
+
x0=xmin,
|
460
|
+
x1=xmax,
|
461
|
+
xref="x",
|
462
|
+
y0=genomewideline_value,
|
463
|
+
y1=genomewideline_value,
|
464
|
+
yref="y",
|
480
465
|
)
|
481
466
|
horizontallines.append(genomewideline)
|
482
467
|
|
@@ -492,18 +477,17 @@ class _ManhattanPlot():
|
|
492
477
|
)
|
493
478
|
else:
|
494
479
|
if not highlight_gene_list:
|
495
|
-
raise KeyError(
|
496
|
-
|
497
|
-
|
498
|
-
|
480
|
+
raise KeyError("Please provide a list of genes to highlight")
|
481
|
+
common_genes = set(self.data[self.geneName].values).intersection(
|
482
|
+
highlight_gene_list
|
483
|
+
)
|
499
484
|
if len(common_genes) == 0:
|
500
|
-
raise Warning(
|
501
|
-
"No common genes found in the data to highlight"
|
502
|
-
)
|
485
|
+
raise Warning("No common genes found in the data to highlight")
|
503
486
|
elif len(common_genes) < len(highlight_gene_list):
|
504
487
|
warnings.warn(
|
505
488
|
f"Some genes don't contain any SNP to highlight: "
|
506
|
-
f": {set(highlight_gene_list) - common_genes}"
|
489
|
+
f": {set(highlight_gene_list) - common_genes}",
|
490
|
+
stacklevel=2,
|
507
491
|
)
|
508
492
|
|
509
493
|
highlight_tmp = self.data
|
@@ -514,11 +498,9 @@ class _ManhattanPlot():
|
|
514
498
|
highlight_tmp,
|
515
499
|
snpname=self.snpName,
|
516
500
|
genename=self.geneName,
|
517
|
-
annotationname=self.annotationName
|
501
|
+
annotationname=self.annotationName,
|
518
502
|
)
|
519
503
|
|
520
|
-
|
521
|
-
|
522
504
|
# Remove the highlighted data from the DataFrame if not empty
|
523
505
|
if highlight_tmp.empty:
|
524
506
|
data = self.data
|
@@ -526,42 +508,37 @@ class _ManhattanPlot():
|
|
526
508
|
data = self.data.drop(self.data.index[highlight_tmp.index])
|
527
509
|
|
528
510
|
if self.nChr == 1:
|
529
|
-
|
530
511
|
if col is None:
|
531
|
-
col = [
|
512
|
+
col = ["black"]
|
532
513
|
|
533
514
|
# If single chromosome, ticks and labels automatic.
|
534
515
|
layout = go.Layout(
|
535
516
|
title=title,
|
536
517
|
xaxis={
|
537
|
-
|
538
|
-
|
539
|
-
|
518
|
+
"title": self.xlabel if xlabel is None else xlabel,
|
519
|
+
"showgrid": showgrid,
|
520
|
+
"range": [xmin, xmax],
|
540
521
|
},
|
541
|
-
yaxis={
|
542
|
-
hovermode=
|
522
|
+
yaxis={"title": ylabel},
|
523
|
+
hovermode="closest",
|
543
524
|
)
|
544
525
|
|
545
526
|
hover_text = _get_hover_text(
|
546
527
|
data,
|
547
528
|
snpname=self.snpName,
|
548
529
|
genename=self.geneName,
|
549
|
-
annotationname=self.annotationName
|
530
|
+
annotationname=self.annotationName,
|
550
531
|
)
|
551
532
|
|
552
533
|
data_to_plot.append(
|
553
534
|
go.Scattergl(
|
554
535
|
x=data[self.pos].values,
|
555
|
-
y=-np.log10(data[self.pName].values) if self.logp
|
556
|
-
else data[self.pName].values,
|
536
|
+
y=-np.log10(data[self.pName].values) if self.logp else data[self.pName].values,
|
557
537
|
mode="markers",
|
558
538
|
showlegend=showlegend,
|
559
539
|
name="chr%i" % data[self.chrName].unique(),
|
560
|
-
marker={
|
561
|
-
|
562
|
-
'size': point_size
|
563
|
-
},
|
564
|
-
text=hover_text
|
540
|
+
marker={"color": col[0], "size": point_size},
|
541
|
+
text=hover_text,
|
565
542
|
)
|
566
543
|
)
|
567
544
|
else:
|
@@ -569,24 +546,21 @@ class _ManhattanPlot():
|
|
569
546
|
layout = go.Layout(
|
570
547
|
title=title,
|
571
548
|
xaxis={
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
549
|
+
"title": self.xlabel if xlabel is None else xlabel,
|
550
|
+
"showgrid": showgrid,
|
551
|
+
"range": [xmin, xmax],
|
552
|
+
"tickmode": "array",
|
553
|
+
"tickvals": self.ticks,
|
554
|
+
"ticktext": self.ticksLabels,
|
555
|
+
"ticks": "outside",
|
579
556
|
},
|
580
|
-
yaxis={
|
581
|
-
hovermode=
|
557
|
+
yaxis={"title": ylabel},
|
558
|
+
hovermode="closest",
|
582
559
|
)
|
583
560
|
|
584
561
|
icol = 0
|
585
562
|
if col is None:
|
586
|
-
col = [
|
587
|
-
'black' if np.mod(i, 2)
|
588
|
-
else 'grey' for i in range(self.nChr)
|
589
|
-
]
|
563
|
+
col = ["black" if np.mod(i, 2) else "grey" for i in range(self.nChr)]
|
590
564
|
|
591
565
|
for i in data[self.index].unique():
|
592
566
|
tmp = data[data[self.index] == i]
|
@@ -597,32 +571,31 @@ class _ManhattanPlot():
|
|
597
571
|
tmp,
|
598
572
|
snpname=self.snpName,
|
599
573
|
genename=self.geneName,
|
600
|
-
annotationname=self.annotationName
|
574
|
+
annotationname=self.annotationName,
|
601
575
|
)
|
602
576
|
|
603
577
|
data_to_plot.append(
|
604
578
|
go.Scattergl(
|
605
579
|
x=tmp[self.pos].values,
|
606
|
-
y=-np.log10(tmp[self.pName].values)
|
580
|
+
y=-np.log10(tmp[self.pName].values)
|
581
|
+
if self.logp
|
582
|
+
else tmp[self.pName].values,
|
607
583
|
mode="markers",
|
608
584
|
showlegend=showlegend,
|
609
585
|
name="Chr%i" % chromo,
|
610
|
-
marker={
|
611
|
-
|
612
|
-
'size': point_size
|
613
|
-
},
|
614
|
-
text=hover_text
|
586
|
+
marker={"color": col[icol], "size": point_size},
|
587
|
+
text=hover_text,
|
615
588
|
)
|
616
589
|
)
|
617
590
|
|
618
591
|
icol = icol + 1
|
619
592
|
|
620
|
-
|
621
593
|
if not highlight_tmp.empty:
|
622
594
|
data_to_plot.append(
|
623
595
|
go.Scattergl(
|
624
596
|
x=highlight_tmp[self.pos].values,
|
625
|
-
y=-np.log10(highlight_tmp[self.pName].values)
|
597
|
+
y=-np.log10(highlight_tmp[self.pName].values)
|
598
|
+
if self.logp
|
626
599
|
else highlight_tmp[self.pName].values,
|
627
600
|
mode="markers",
|
628
601
|
text=highlight_hover_text,
|
@@ -630,7 +603,7 @@ class _ManhattanPlot():
|
|
630
603
|
color=highlight_color,
|
631
604
|
size=point_size * 2,
|
632
605
|
),
|
633
|
-
name="SNP-Gene Pairs of interest"
|
606
|
+
name="SNP-Gene Pairs of interest",
|
634
607
|
)
|
635
608
|
)
|
636
609
|
|