gwaslab 3.4.14__py3-none-any.whl → 3.4.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/Sumstats.py +6 -4
- gwaslab/__init__.py +3 -1
- gwaslab/annotateplot.py +2 -2
- gwaslab/calculate_power.py +119 -42
- gwaslab/compare_effect.py +83 -17
- gwaslab/download.py +19 -4
- gwaslab/fill.py +183 -57
- gwaslab/miamiplot.py +25 -10
- gwaslab/mqqplot.py +4 -3
- gwaslab/plotrg.py +208 -75
- gwaslab/regionalplot.py +21 -3
- gwaslab/retrievedata.py +49 -18
- gwaslab/to_pickle.py +12 -0
- gwaslab/trumpetplot.py +0 -0
- gwaslab/version.py +3 -3
- {gwaslab-3.4.14.dist-info → gwaslab-3.4.16.dist-info}/METADATA +2 -2
- {gwaslab-3.4.14.dist-info → gwaslab-3.4.16.dist-info}/RECORD +20 -19
- {gwaslab-3.4.14.dist-info → gwaslab-3.4.16.dist-info}/LICENSE +0 -0
- {gwaslab-3.4.14.dist-info → gwaslab-3.4.16.dist-info}/WHEEL +0 -0
- {gwaslab-3.4.14.dist-info → gwaslab-3.4.16.dist-info}/top_level.txt +0 -0
gwaslab/fill.py
CHANGED
|
@@ -13,6 +13,8 @@ def filldata(
|
|
|
13
13
|
overwrite=False,
|
|
14
14
|
verbose=True,
|
|
15
15
|
only_sig=False,
|
|
16
|
+
sig_level=5e-8,
|
|
17
|
+
extreme=False,
|
|
16
18
|
log = Log()
|
|
17
19
|
):
|
|
18
20
|
|
|
@@ -33,39 +35,42 @@ def filldata(
|
|
|
33
35
|
to_fill.remove(i)
|
|
34
36
|
if verbose: log.write(" -Skipping columns: ",skip_cols)
|
|
35
37
|
if verbose: log.write(" -Filling columns: ",to_fill)
|
|
36
|
-
|
|
37
|
-
# beta to or ####################################################################################################
|
|
38
|
-
if "OR" in to_fill:
|
|
39
|
-
fill_or(sumstats,log,verbose=verbose)
|
|
40
|
-
|
|
41
|
-
# or to beta ####################################################################################################
|
|
42
|
-
if "BETA" in to_fill:
|
|
43
|
-
fill_beta(sumstats,log,verbose=verbose)
|
|
44
38
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
#
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
#
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
#
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
#
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
#
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
39
|
+
fill_iteratively(sumstats,to_fill,log,only_sig,df,extreme,verbose)
|
|
40
|
+
## beta to or ####################################################################################################
|
|
41
|
+
# if "OR" in to_fill:
|
|
42
|
+
# fill_or(sumstats,log,verbose=verbose)
|
|
43
|
+
#
|
|
44
|
+
## or to beta ####################################################################################################
|
|
45
|
+
# if "BETA" in to_fill:
|
|
46
|
+
# fill_beta(sumstats,log,verbose=verbose)
|
|
47
|
+
#
|
|
48
|
+
# if "SE" in to_fill:
|
|
49
|
+
# fill_se(sumstats,log,verbose=verbose)
|
|
50
|
+
## z/chi2 to p ##################################################################################################
|
|
51
|
+
# if "P" in to_fill:
|
|
52
|
+
# fill_p(sumstats,log,only_sig=only_sig,df=df,verbose=verbose)
|
|
53
|
+
#
|
|
54
|
+
## beta/se to z ##################################################################################################
|
|
55
|
+
# if "Z" in to_fill:
|
|
56
|
+
# fill_z(sumstats,log,verbose=verbose)
|
|
57
|
+
#
|
|
58
|
+
## z/p to chisq ##################################################################################################
|
|
59
|
+
# if "CHISQ" in to_fill:
|
|
60
|
+
# fill_chisq(sumstats,log,verbose=verbose)
|
|
61
|
+
## EAF to MAF ##################################################################################################
|
|
62
|
+
# if "MAF" in to_fill:
|
|
63
|
+
# fill_maf(sumstats,log,verbose=verbose)
|
|
64
|
+
## p to -log10(P) ###############################################################################################
|
|
65
|
+
# if "MLOG10P" in to_fill:
|
|
66
|
+
# if extreme==True:
|
|
67
|
+
# fill_extreme_mlog10p(sumstats,log,verbose=verbose)
|
|
68
|
+
# elif "P" not in sumstats.columns:
|
|
69
|
+
# fill_p(sumstats,log,verbose=verbose)
|
|
70
|
+
# fill_mlog10p(sumstats,log,verbose=verbose)
|
|
71
|
+
# sumstats = sumstats.drop(labels=["P"],axis=1)
|
|
72
|
+
# else:
|
|
73
|
+
# fill_mlog10p(sumstats,log,verbose=verbose)
|
|
69
74
|
|
|
70
75
|
# ###################################################################################
|
|
71
76
|
sumstats = sortcolumn(sumstats, verbose=verbose, log=log)
|
|
@@ -75,16 +80,19 @@ def filldata(
|
|
|
75
80
|
|
|
76
81
|
##########################################################################################################################
|
|
77
82
|
|
|
78
|
-
def fill_p(sumstats,log,df=None,only_sig=False,overwrite=False,verbose=True):
|
|
83
|
+
def fill_p(sumstats,log,df=None,only_sig=False,sig_level=5e-8,overwrite=False,verbose=True,filled_count=0):
|
|
79
84
|
# MLOG10P -> P
|
|
80
85
|
if "MLOG10P" in sumstats.columns:
|
|
81
86
|
if verbose: log.write(" - Filling P value using MLOG10P column...")
|
|
82
87
|
sumstats["P"] = np.power(10,-sumstats["MLOG10P"])
|
|
88
|
+
filled_count +=1
|
|
89
|
+
|
|
83
90
|
# Z -> P
|
|
84
91
|
elif "Z" in sumstats.columns:
|
|
85
92
|
if verbose: log.write(" - Filling P value using Z column...")
|
|
86
93
|
stats.chisqprob = lambda chisq, degree_of_freedom: stats.chi2.sf(chisq, degree_of_freedom)
|
|
87
94
|
sumstats["P"] = ss.chisqprob(sumstats["Z"]**2,1)
|
|
95
|
+
filled_count +=1
|
|
88
96
|
|
|
89
97
|
elif "CHISQ" in sumstats.columns:
|
|
90
98
|
#CHISQ -> P
|
|
@@ -92,83 +100,201 @@ def fill_p(sumstats,log,df=None,only_sig=False,overwrite=False,verbose=True):
|
|
|
92
100
|
stats.chisqprob = lambda chisq, degree_of_freedom: stats.chi2.sf(chisq, degree_of_freedom)
|
|
93
101
|
if df is None:
|
|
94
102
|
if only_sig is True and overwrite is True:
|
|
95
|
-
sumstats.loc[sumstats["P"]<
|
|
103
|
+
sumstats.loc[sumstats["P"]<sig_level,"P"] = stats.chisqprob(sumstats.loc[sumstats["P"]<sig_level,"CHISQ"],1)
|
|
104
|
+
filled_count +=1
|
|
96
105
|
else:
|
|
97
106
|
sumstats["P"] = stats.chisqprob(sumstats["CHISQ"],1)
|
|
107
|
+
filled_count +=1
|
|
98
108
|
else:
|
|
99
109
|
if only_sig is True and overwrite is True:
|
|
100
|
-
if verbose: log.write(" - Filling P value using CHISQ column for variants:" , sum(sumstats["P"]<
|
|
101
|
-
sumstats.loc[sumstats["P"]<
|
|
110
|
+
if verbose: log.write(" - Filling P value using CHISQ column for variants:" , sum(sumstats["P"]<sig_level))
|
|
111
|
+
sumstats.loc[sumstats["P"]<sig_level,"P"] = stats.chisqprob(sumstats.loc[sumstats["P"]<sig_level,"CHISQ"],sumstats.loc[sumstats["P"]<sig_level,df].astype("int"))
|
|
112
|
+
filled_count +=1
|
|
102
113
|
else:
|
|
103
114
|
if verbose: log.write(" - Filling P value using CHISQ column for all valid variants:")
|
|
104
115
|
sumstats["P"] = stats.chisqprob(sumstats["CHISQ"],sumstats[df].astype("int"))
|
|
105
|
-
|
|
116
|
+
filled_count +=1
|
|
117
|
+
else:
|
|
118
|
+
return 0
|
|
119
|
+
return 1
|
|
120
|
+
|
|
121
|
+
def fill_z(sumstats,log,verbose=True,filled_count=0):
|
|
106
122
|
# BETA/SE -> Z
|
|
107
123
|
if ("BETA" in sumstats.columns) and ("SE" in sumstats.columns):
|
|
108
124
|
if verbose: log.write(" - Filling Z using BETA/SE column...")
|
|
109
125
|
sumstats["Z"] = sumstats["BETA"]/sumstats["SE"]
|
|
110
|
-
|
|
111
|
-
|
|
126
|
+
filled_count +=1
|
|
127
|
+
else:
|
|
128
|
+
return 0
|
|
129
|
+
return 1
|
|
130
|
+
|
|
131
|
+
def fill_chisq(sumstats,log,verbose=True,filled_count=0):
|
|
112
132
|
# Z -> CHISQ
|
|
113
133
|
if "Z" in sumstats.columns:
|
|
114
134
|
if verbose: log.write(" - Filling CHISQ using Z column...")
|
|
115
135
|
sumstats["CHISQ"] = (sumstats["Z"])**2
|
|
136
|
+
filled_count +=1
|
|
116
137
|
elif "P" in sumstats.columns:
|
|
117
138
|
# P -> CHISQ
|
|
118
139
|
if verbose: log.write(" - Filling CHISQ using P column...")
|
|
119
140
|
sumstats["CHISQ"] = ss.chi2.isf(sumstats["P"], 1)
|
|
120
|
-
|
|
121
|
-
|
|
141
|
+
filled_count +=1
|
|
142
|
+
else:
|
|
143
|
+
return 0
|
|
144
|
+
return 1
|
|
145
|
+
|
|
146
|
+
def fill_or(sumstats,log,verbose=True,filled_count=0):
|
|
122
147
|
# BETA -> OR
|
|
123
148
|
if "BETA" in sumstats.columns:
|
|
124
149
|
if verbose: log.write(" - Filling OR using BETA column...")
|
|
125
150
|
sumstats["OR"] = np.exp(sumstats["BETA"])
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
151
|
+
filled_count +=1
|
|
152
|
+
# BETA/SE -> OR_95L / OR_95U
|
|
153
|
+
# get confidence interval 95
|
|
154
|
+
if ("BETA" in sumstats.columns) and ("SE" in sumstats.columns):
|
|
155
|
+
if verbose: log.write(" - Filling OR_95L/OR_95U using BETA/SE columns...")
|
|
156
|
+
# beta - 1.96 x se , beta + 1.96 x se
|
|
157
|
+
sumstats["OR_95L"] = np.exp(sumstats["BETA"]-ss.norm.ppf(0.975)*sumstats["SE"])
|
|
158
|
+
sumstats["OR_95U"] = np.exp(sumstats["BETA"]+ss.norm.ppf(0.975)*sumstats["SE"])
|
|
159
|
+
filled_count +=1
|
|
160
|
+
else:
|
|
161
|
+
return 0
|
|
162
|
+
return 1
|
|
163
|
+
def fill_or95(sumstats,log,verbose=True,filled_count=0):
|
|
136
164
|
# get confidence interval 95
|
|
137
165
|
if ("BETA" in sumstats.columns) and ("SE" in sumstats.columns):
|
|
138
166
|
if verbose: log.write(" - Filling OR_95L/OR_95U using BETA/SE columns...")
|
|
139
167
|
# beta - 1.96 x se , beta + 1.96 x se
|
|
140
168
|
sumstats["OR_95L"] = np.exp(sumstats["BETA"]-ss.norm.ppf(0.975)*sumstats["SE"])
|
|
141
169
|
sumstats["OR_95U"] = np.exp(sumstats["BETA"]+ss.norm.ppf(0.975)*sumstats["SE"])
|
|
142
|
-
|
|
143
|
-
|
|
170
|
+
filled_count +=1
|
|
171
|
+
else:
|
|
172
|
+
return 0
|
|
173
|
+
|
|
174
|
+
def fill_beta(sumstats,log,verbose=True,filled_count=0):
|
|
144
175
|
# OR -> beta
|
|
145
176
|
if "OR" in sumstats.columns:
|
|
146
177
|
if verbose: log.write(" - Filling BETA value using OR column...")
|
|
147
178
|
sumstats["BETA"] = np.log(sumstats["OR"])
|
|
179
|
+
filled_count +=1
|
|
180
|
+
else:
|
|
181
|
+
return 0
|
|
182
|
+
return 1
|
|
148
183
|
|
|
149
|
-
def fill_se(sumstats,log,verbose=True):
|
|
184
|
+
def fill_se(sumstats,log,verbose=True,filled_count=0):
|
|
150
185
|
# OR / OR_95L /OR_95U -> SE
|
|
151
186
|
if ("P" in sumstats.columns) and ("BETA" in sumstats.columns):
|
|
152
187
|
if verbose: log.write(" - Filling SE value using BETA and P column...")
|
|
153
188
|
sumstats["SE"]= np.abs(sumstats["BETA"]/ ss.norm.ppf(1-sumstats["P"]/2))
|
|
189
|
+
filled_count +=1
|
|
154
190
|
elif ("OR" in sumstats.columns) and ("OR_95U" in sumstats.columns):
|
|
155
191
|
if verbose: log.write(" - Filling SE value using OR/OR_95U column...")
|
|
156
192
|
#
|
|
157
193
|
sumstats["SE"]=(np.log(sumstats["OR_95U"]) - np.log(sumstats["OR"]))/ss.norm.ppf(0.975)
|
|
194
|
+
filled_count +=1
|
|
158
195
|
elif ("OR" in sumstats.columns) and ("OR_95L" in sumstats.columns):
|
|
159
196
|
if verbose: log.write(" - Filling SE value using OR/OR_95L column...")
|
|
160
197
|
sumstats["SE"]=(np.log(sumstats["OR"]) - np.log(sumstats["OR_95L"]))/ss.norm.ppf(0.975)
|
|
198
|
+
filled_count +=1
|
|
161
199
|
else:
|
|
162
200
|
if verbose: log.write(" - Not enough information to fill SE...")
|
|
163
|
-
|
|
164
|
-
|
|
201
|
+
return 0
|
|
202
|
+
return 1
|
|
203
|
+
|
|
204
|
+
def fill_mlog10p(sumstats,log,verbose=True,filled_count=0):
|
|
165
205
|
if "P" in sumstats.columns:
|
|
166
206
|
# P -> MLOG10P
|
|
167
207
|
if verbose: log.write(" - Filling MLOG10P using P column...")
|
|
168
208
|
sumstats["MLOG10P"] = -np.log10(sumstats["P"])
|
|
209
|
+
filled_count +=1
|
|
210
|
+
else:
|
|
211
|
+
return 0
|
|
212
|
+
return 1
|
|
213
|
+
def fill_extreme_mlog10p(sumstats,log,verbose=True,filled_count=0):
|
|
214
|
+
# ref: https://stackoverflow.com/questions/46416027/how-to-compute-p-values-from-z-scores-in-r-when-the-z-score-is-large-pvalue-muc/46416222#46416222
|
|
215
|
+
if "Z" in sumstats.columns:
|
|
216
|
+
# P -> MLOG10P
|
|
217
|
+
if verbose: log.write(" - Filling MLOG10P using Z column...")
|
|
218
|
+
sumstats = fill_extreme_mlog10(sumstats, "Z")
|
|
219
|
+
filled_count +=1
|
|
220
|
+
elif "BETA" in sumstats.columns and "SE" in sumstats.columns:
|
|
221
|
+
if verbose: log.write(" - Z column not available...")
|
|
222
|
+
if verbose: log.write(" - Filling Z using BETA/SE column...")
|
|
223
|
+
sumstats["Z"] = sumstats["BETA"]/sumstats["SE"]
|
|
224
|
+
if verbose: log.write(" - Filling MLOG10P using Z column...")
|
|
225
|
+
sumstats = fill_extreme_mlog10(sumstats, "Z")
|
|
226
|
+
filled_count +=1
|
|
227
|
+
else:
|
|
228
|
+
return 0
|
|
229
|
+
return 1
|
|
169
230
|
|
|
170
|
-
def fill_maf(sumstats,log,verbose=True):
|
|
231
|
+
def fill_maf(sumstats,log,verbose=True,filled_count=0):
|
|
171
232
|
if "EAF" in sumstats.columns:
|
|
172
233
|
# EAF -> MAF
|
|
173
234
|
if verbose: log.write(" - Filling MAF using EAF column...")
|
|
174
235
|
sumstats["MAF"] = sumstats["EAF"].apply(lambda x: min(x,1-x) if pd.notnull(x) else np.nan)
|
|
236
|
+
filled_count +=1
|
|
237
|
+
else:
|
|
238
|
+
return 0
|
|
239
|
+
return 1
|
|
240
|
+
|
|
241
|
+
####################################################################################################################
|
|
242
|
+
def fill_extreme_mlog10(sumstats, z):
|
|
243
|
+
log_pvalue = np.log(2) + ss.norm.logsf(np.abs(sumstats[z])) #two-sided
|
|
244
|
+
log10_pvalue = log_pvalue/np.log(10)
|
|
245
|
+
mantissa = 10**(log10_pvalue %1 )
|
|
246
|
+
exponent = log10_pvalue // 1
|
|
247
|
+
sumstats["MLOG10P"] = -log10_pvalue
|
|
248
|
+
sumstats["P_MANTISSA"]= mantissa
|
|
249
|
+
sumstats["P_EXPONENT"]= exponent
|
|
250
|
+
return sumstats
|
|
251
|
+
|
|
252
|
+
####################################################################################################################
|
|
253
|
+
def fill_iteratively(sumstats,to_fill,log,only_sig,df,extreme,verbose):
|
|
254
|
+
if verbose: log.write(" - Filling Columns iteratively...")
|
|
255
|
+
filled=[]
|
|
256
|
+
for i in range(len(to_fill)):
|
|
257
|
+
filled_count=0
|
|
258
|
+
previous_count=filled_count
|
|
259
|
+
# beta to or ####################################################################################################
|
|
260
|
+
if "OR" in to_fill:
|
|
261
|
+
status = fill_or(sumstats,log,verbose=verbose,filled_count=filled_count)
|
|
262
|
+
if status == 1 : to_fill.remove("OR")
|
|
263
|
+
# or to beta ####################################################################################################
|
|
264
|
+
if "BETA" in to_fill:
|
|
265
|
+
status = fill_beta(sumstats,log,verbose=verbose,filled_count=filled_count)
|
|
266
|
+
if status == 1 : to_fill.remove("BETA")
|
|
267
|
+
if "SE" in to_fill:
|
|
268
|
+
status = fill_se(sumstats,log,verbose=verbose,filled_count=filled_count)
|
|
269
|
+
if status == 1 : to_fill.remove("SE")
|
|
270
|
+
# z/chi2 to p ##################################################################################################
|
|
271
|
+
if "P" in to_fill:
|
|
272
|
+
status = fill_p(sumstats,log,only_sig=only_sig,df=df,sig_level=sig_level,verbose=verbose,filled_count=filled_count)
|
|
273
|
+
if status == 1 : to_fill.remove("P")
|
|
274
|
+
# beta/se to z ##################################################################################################
|
|
275
|
+
if "Z" in to_fill:
|
|
276
|
+
status = fill_z(sumstats,log,verbose=verbose,filled_count=filled_count)
|
|
277
|
+
if status == 1 : to_fill.remove("Z")
|
|
278
|
+
# z/p to chisq ##################################################################################################
|
|
279
|
+
if "CHISQ" in to_fill:
|
|
280
|
+
status = fill_chisq(sumstats,log,verbose=verbose,filled_count=filled_count)
|
|
281
|
+
if status == 1 : to_fill.remove("CHISQ")
|
|
282
|
+
# EAF to MAF ##################################################################################################
|
|
283
|
+
if "MAF" in to_fill:
|
|
284
|
+
status = fill_maf(sumstats,log,verbose=verbose,filled_count=filled_count)
|
|
285
|
+
if status == 1 : to_fill.remove("MAF")
|
|
286
|
+
# p to -log10(P) ###############################################################################################
|
|
287
|
+
if "MLOG10P" in to_fill:
|
|
288
|
+
if extreme==True:
|
|
289
|
+
status = fill_extreme_mlog10p(sumstats,log,verbose=verbose,filled_count=filled_count)
|
|
290
|
+
filled_count +=1
|
|
291
|
+
elif "P" not in sumstats.columns:
|
|
292
|
+
fill_p(sumstats,log,verbose=verbose)
|
|
293
|
+
status = fill_mlog10p(sumstats,log,verbose=verbose,filled_count=filled_count)
|
|
294
|
+
sumstats = sumstats.drop(labels=["P"],axis=1)
|
|
295
|
+
else:
|
|
296
|
+
status = fill_mlog10p(sumstats,log,verbose=verbose)
|
|
297
|
+
if status == 1 : to_fill.remove("MLOG10P")
|
|
298
|
+
|
|
299
|
+
if previous_count == filled_count:
|
|
300
|
+
break
|
gwaslab/miamiplot.py
CHANGED
|
@@ -36,13 +36,15 @@ from gwaslab.quickfix import _quick_extract_snp_in_region
|
|
|
36
36
|
from gwaslab.quickfix import _quick_assign_highlight_hue_pair
|
|
37
37
|
from gwaslab.quickfix import _quick_assign_marker_relative_size
|
|
38
38
|
from gwaslab.annotateplot import annotate_pair
|
|
39
|
-
|
|
39
|
+
from gwaslab.to_pickle import load_pickle
|
|
40
|
+
from gwaslab.to_pickle import load_data_from_pickle
|
|
40
41
|
def plot_miami(
|
|
41
42
|
path1,
|
|
42
43
|
path2,
|
|
43
44
|
cols1=None,
|
|
44
45
|
cols2=None,
|
|
45
46
|
sep=None,
|
|
47
|
+
mode="txt",
|
|
46
48
|
chr_dict = None,
|
|
47
49
|
chr_dict1 = False,
|
|
48
50
|
chr_dict2 = False,
|
|
@@ -111,6 +113,10 @@ def plot_miami(
|
|
|
111
113
|
log=Log()
|
|
112
114
|
):
|
|
113
115
|
## figuring arguments ###########################################################################################################
|
|
116
|
+
if cols1 is None:
|
|
117
|
+
cols1 = ["CHR","POS","P"]
|
|
118
|
+
if cols2 is None:
|
|
119
|
+
cols2 = ["CHR","POS","P"]
|
|
114
120
|
if highlight is None:
|
|
115
121
|
highlight = list()
|
|
116
122
|
if highlight1 is None:
|
|
@@ -163,9 +169,10 @@ def plot_miami(
|
|
|
163
169
|
titles=["",""]
|
|
164
170
|
if titles_pad is None:
|
|
165
171
|
titles_pad=[0.2,0.2]
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
172
|
+
if type(mode) is str:
|
|
173
|
+
modes =[ mode, mode]
|
|
174
|
+
else:
|
|
175
|
+
modes = mode
|
|
169
176
|
|
|
170
177
|
if verbose: log.write("Start to plot miami plot with the following basic settings:")
|
|
171
178
|
if verbose: log.write(" -Genome-wide significance level is set to "+str(sig_level)+" ...")
|
|
@@ -192,16 +199,24 @@ def plot_miami(
|
|
|
192
199
|
pos="POS"
|
|
193
200
|
|
|
194
201
|
## load sumstats1 ###########################################################################################################
|
|
195
|
-
if verbose: log.write(" -Loading sumstats1:" + path1)
|
|
202
|
+
if verbose: log.write(" -Loading sumstats1 ({} mode):".format(modes[0]) + path1)
|
|
196
203
|
if verbose: log.write(" -Sumstats1 CHR,POS,P information will be obtained from:",cols1)
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
204
|
+
if modes[0]=="pickle":
|
|
205
|
+
sumstats1 = load_data_from_pickle(path1,usecols=cols1)
|
|
206
|
+
else:
|
|
207
|
+
sumstats1 = pd.read_csv(path1,sep=sep[0],usecols=cols1,dtype={cols1[0]:"string",cols1[1]:"Int64",cols1[2]:"float64"},**readcsv_args)
|
|
200
208
|
|
|
201
209
|
## load sumstats2 ###########################################################################################################
|
|
202
|
-
if verbose: log.write(" -Loading sumstats2:" + path2)
|
|
210
|
+
if verbose: log.write(" -Loading sumstats2 ({} mode):".format(modes[1]) + path2)
|
|
203
211
|
if verbose: log.write(" -Sumstats2 CHR,POS,P information will be obtained from:",cols2)
|
|
204
|
-
|
|
212
|
+
if modes[1]=="pickle":
|
|
213
|
+
sumstats2 = load_data_from_pickle(path2,usecols=cols2)
|
|
214
|
+
else:
|
|
215
|
+
sumstats2 = pd.read_csv(path2,sep=sep[1],usecols=cols2,dtype={cols1[0]:"string",cols1[1]:"Int64",cols1[2]:"float64"},**readcsv_args)
|
|
216
|
+
|
|
217
|
+
sumstats1 = sumstats1.rename(columns={cols1[0]:"CHR",cols1[1]:"POS",cols1[2]:"P"})
|
|
218
|
+
sumstats1 = _quick_fix(sumstats1,chr_dict=chr_dict1, scaled=scaled1, verbose=verbose, log=log)
|
|
219
|
+
|
|
205
220
|
sumstats2 = sumstats2.rename(columns={cols2[0]:"CHR",cols2[1]:"POS",cols2[2]:"P"})
|
|
206
221
|
sumstats2 = _quick_fix(sumstats2,chr_dict=chr_dict2, scaled=scaled2, verbose=verbose, log=log)
|
|
207
222
|
|
gwaslab/mqqplot.py
CHANGED
|
@@ -29,6 +29,7 @@ from adjustText import adjust_text
|
|
|
29
29
|
from gwaslab.textreposition import adjust_text_position
|
|
30
30
|
from gwaslab.annotateplot import annotate_single
|
|
31
31
|
from gwaslab.qqplot import _plot_qq
|
|
32
|
+
from gwaslab.retrievedata import auto_check_vcf_chr_dict
|
|
32
33
|
from gwaslab.regionalplot import _plot_regional
|
|
33
34
|
from gwaslab.regionalplot import process_vcf
|
|
34
35
|
from gwaslab.quickfix import _get_largenumber
|
|
@@ -180,8 +181,7 @@ def mqqplot(insumstats,
|
|
|
180
181
|
chr_dict = get_chr_to_number()
|
|
181
182
|
if xtick_chr_dict is None:
|
|
182
183
|
xtick_chr_dict = get_number_to_chr()
|
|
183
|
-
|
|
184
|
-
vcf_chr_dict = get_number_to_chr()
|
|
184
|
+
|
|
185
185
|
if gtf_chr_dict is None:
|
|
186
186
|
gtf_chr_dict = get_number_to_chr()
|
|
187
187
|
if rr_chr_dict is None:
|
|
@@ -267,6 +267,7 @@ def mqqplot(insumstats,
|
|
|
267
267
|
additional_line_color = ["grey"]
|
|
268
268
|
lines_to_plot = -np.log10(lines_to_plot)
|
|
269
269
|
|
|
270
|
+
vcf_chr_dict = auto_check_vcf_chr_dict(vcf_path, vcf_chr_dict, verbose, log)
|
|
270
271
|
# Plotting mode selection : layout ####################################################################
|
|
271
272
|
# ax1 : manhattanplot / brisbane plot
|
|
272
273
|
# ax2 : qq plot
|
|
@@ -321,7 +322,7 @@ def mqqplot(insumstats,
|
|
|
321
322
|
|
|
322
323
|
# CHR and POS ########################################################################
|
|
323
324
|
# chrom and pos exists && (m || r mode)
|
|
324
|
-
if (chrom is not None) and (pos is not None) and (("m" in mode) or ("r" in mode)):
|
|
325
|
+
if (chrom is not None) and (pos is not None) and (("qq" in mode) or ("m" in mode) or ("r" in mode)):
|
|
325
326
|
# when manhattan plot, chrom and pos is needed.
|
|
326
327
|
if chrom in insumstats.columns:
|
|
327
328
|
usecols.append(chrom)
|