gwaslab 3.5.7__py3-none-any.whl → 3.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/__init__.py +2 -0
- gwaslab/bd_common_data.py +1 -0
- gwaslab/bd_get_hapmap3.py +0 -1
- gwaslab/data/formatbook.json +78 -0
- gwaslab/data/reference.json +3 -1
- gwaslab/g_Sumstats.py +110 -25
- gwaslab/g_SumstatsMulti.py +287 -0
- gwaslab/g_SumstatsPair.py +101 -16
- gwaslab/g_Sumstats_polars.py +245 -0
- gwaslab/g_headers.py +12 -3
- gwaslab/g_meta.py +124 -47
- gwaslab/g_meta_update.py +48 -0
- gwaslab/g_vchange_status_polars.py +44 -0
- gwaslab/g_version.py +2 -2
- gwaslab/hm_casting.py +169 -110
- gwaslab/hm_casting_polars.py +202 -0
- gwaslab/hm_harmonize_sumstats.py +19 -8
- gwaslab/io_load_ld.py +529 -0
- gwaslab/io_preformat_input.py +11 -0
- gwaslab/io_preformat_input_polars.py +632 -0
- gwaslab/io_process_args.py +25 -1
- gwaslab/io_read_ldsc.py +34 -3
- gwaslab/io_read_pipcs.py +62 -6
- gwaslab/prscs_gigrnd.py +122 -0
- gwaslab/prscs_mcmc_gtb.py +136 -0
- gwaslab/prscs_parse_genet.py +98 -0
- gwaslab/qc_build.py +53 -0
- gwaslab/qc_check_datatype.py +10 -8
- gwaslab/qc_check_datatype_polars.py +128 -0
- gwaslab/qc_fix_sumstats.py +25 -23
- gwaslab/qc_fix_sumstats_polars.py +193 -0
- gwaslab/util_ex_calculate_ldmatrix.py +49 -19
- gwaslab/util_ex_gwascatalog.py +71 -28
- gwaslab/util_ex_infer_ancestry.py +65 -0
- gwaslab/util_ex_ldsc.py +67 -21
- gwaslab/util_ex_match_ldmatrix.py +396 -0
- gwaslab/util_ex_run_2samplemr.py +0 -2
- gwaslab/util_ex_run_ccgwas.py +155 -0
- gwaslab/util_ex_run_coloc.py +1 -1
- gwaslab/util_ex_run_hyprcoloc.py +117 -0
- gwaslab/util_ex_run_magma.py +74 -0
- gwaslab/util_ex_run_mesusie.py +155 -0
- gwaslab/util_ex_run_mtag.py +92 -0
- gwaslab/util_ex_run_prscs.py +85 -0
- gwaslab/util_ex_run_susie.py +40 -9
- gwaslab/util_in_estimate_ess.py +18 -0
- gwaslab/util_in_fill_data.py +20 -1
- gwaslab/util_in_filter_value.py +10 -5
- gwaslab/util_in_get_sig.py +71 -13
- gwaslab/util_in_meta.py +168 -4
- gwaslab/util_in_meta_polars.py +174 -0
- gwaslab/viz_aux_annotate_plot.py +13 -2
- gwaslab/viz_plot_compare_effect.py +87 -23
- gwaslab/viz_plot_credible_sets.py +55 -11
- gwaslab/viz_plot_effect.py +22 -12
- gwaslab/viz_plot_miamiplot2.py +3 -2
- gwaslab/viz_plot_mqqplot.py +94 -84
- gwaslab/viz_plot_qqplot.py +9 -7
- gwaslab/viz_plot_regional2.py +2 -1
- gwaslab/viz_plot_stackedregional.py +4 -1
- {gwaslab-3.5.7.dist-info → gwaslab-3.6.0.dist-info}/METADATA +46 -68
- gwaslab-3.6.0.dist-info/RECORD +119 -0
- {gwaslab-3.5.7.dist-info → gwaslab-3.6.0.dist-info}/WHEEL +1 -1
- gwaslab-3.5.7.dist-info/RECORD +0 -96
- {gwaslab-3.5.7.dist-info → gwaslab-3.6.0.dist-info/licenses}/LICENSE +0 -0
- {gwaslab-3.5.7.dist-info → gwaslab-3.6.0.dist-info/licenses}/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.5.7.dist-info → gwaslab-3.6.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import numpy as np
|
|
3
|
+
import copy
|
|
4
|
+
import gc
|
|
5
|
+
from gwaslab.util_in_filter_value import filtervalues
|
|
6
|
+
from gwaslab.g_Log import Log
|
|
7
|
+
from math import floor
|
|
8
|
+
from gwaslab.g_Sumstats import Sumstats
|
|
9
|
+
from gwaslab.g_Sumstats_polars import Sumstatsp
|
|
10
|
+
from gwaslab.hm_casting import _merge_mold_with_sumstats_by_chrpos
|
|
11
|
+
from gwaslab.hm_casting import _align_with_mold
|
|
12
|
+
from gwaslab.hm_casting import _fill_missing_columns
|
|
13
|
+
from gwaslab.hm_casting import _check_daf
|
|
14
|
+
from gwaslab.hm_casting import _assign_warning_code
|
|
15
|
+
from gwaslab.hm_casting import _renaming_cols
|
|
16
|
+
from gwaslab.hm_casting import _sort_pair_cols
|
|
17
|
+
|
|
18
|
+
from gwaslab.hm_casting_polars import _merge_mold_with_sumstats_by_chrposp
|
|
19
|
+
from gwaslab.hm_casting_polars import _align_with_moldp
|
|
20
|
+
from gwaslab.hm_casting_polars import _fill_missing_columnsp
|
|
21
|
+
from gwaslab.hm_casting_polars import _renaming_colsp
|
|
22
|
+
from gwaslab.hm_casting_polars import _sort_pair_colsp
|
|
23
|
+
|
|
24
|
+
from gwaslab.qc_fix_sumstats import flipallelestats
|
|
25
|
+
from gwaslab.qc_fix_sumstats_polars import flipallelestatsp
|
|
26
|
+
from gwaslab.qc_check_datatype_polars import check_datatype
|
|
27
|
+
from gwaslab.qc_check_datatype_polars import check_dataframe_shape
|
|
28
|
+
from gwaslab.util_ex_calculate_ldmatrix import tofinemapping
|
|
29
|
+
from gwaslab.util_ex_run_coloc import _run_coloc_susie
|
|
30
|
+
from gwaslab.viz_plot_miamiplot2 import plot_miami2
|
|
31
|
+
from gwaslab.viz_plot_compare_af import plotdaf
|
|
32
|
+
from gwaslab.util_ex_run_2samplemr import _run_two_sample_mr
|
|
33
|
+
from gwaslab.util_ex_run_clumping import _clump
|
|
34
|
+
from gwaslab.util_ex_ldproxyfinder import _extract_with_ld_proxy
|
|
35
|
+
from gwaslab.g_headers import _get_headers
|
|
36
|
+
from gwaslab.util_ex_match_ldmatrix import tofinemapping_m
|
|
37
|
+
from gwaslab.util_ex_run_mesusie import _run_mesusie
|
|
38
|
+
from gwaslab.util_in_meta import meta_analyze_multi
|
|
39
|
+
from gwaslab.util_ex_run_hyprcoloc import _run_hyprcoloc
|
|
40
|
+
from gwaslab.util_in_get_sig import getsig
|
|
41
|
+
from gwaslab.util_in_fill_data import _get_multi_min
|
|
42
|
+
from gwaslab.g_meta import _init_meta
|
|
43
|
+
from gwaslab.g_meta_update import _update_meta
|
|
44
|
+
from gwaslab.qc_fix_sumstats import _process_build
|
|
45
|
+
from gwaslab.util_ex_run_mtag import _run_mtag
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class SumstatsMulti( ):
|
|
50
|
+
def __init__(self,
|
|
51
|
+
sumstatsObjects,
|
|
52
|
+
group_name=None,
|
|
53
|
+
build="99",
|
|
54
|
+
engine="pandas",
|
|
55
|
+
merge_mode="outer",
|
|
56
|
+
verbose=True ):
|
|
57
|
+
|
|
58
|
+
for i,sumstatsObject in enumerate(sumstatsObjects):
|
|
59
|
+
if not isinstance(sumstatsObject, Sumstats):
|
|
60
|
+
if not isinstance(sumstatsObject, Sumstatsp):
|
|
61
|
+
raise ValueError("Please provide GWASLab Sumstats Object #{}.".format(i+1))
|
|
62
|
+
|
|
63
|
+
self.log = Log()
|
|
64
|
+
self.meta = _init_meta(object="SumstatsMulti")
|
|
65
|
+
|
|
66
|
+
if engine=="polars":
|
|
67
|
+
import polars as pl
|
|
68
|
+
merge_mode="full"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
self.engine=engine
|
|
72
|
+
|
|
73
|
+
self.meta["gwaslab"]["number_of_studies"] = len(sumstatsObjects)
|
|
74
|
+
self.meta["gwaslab"]["genome_build"] = _process_build(build, log=self.log, verbose=False)
|
|
75
|
+
self.meta["gwaslab"]["objects"] = dict()
|
|
76
|
+
self.meta["gwaslab"]["study_index"] = dict()
|
|
77
|
+
|
|
78
|
+
if group_name is None:
|
|
79
|
+
self.group_name = "Group1"
|
|
80
|
+
self.meta["gwaslab"]["group_name"] = "Group1"
|
|
81
|
+
else:
|
|
82
|
+
self.group_name = group_name
|
|
83
|
+
self.meta["gwaslab"]["group_name"] = group_name
|
|
84
|
+
|
|
85
|
+
self.names=[]
|
|
86
|
+
self.hyprcoloc = {}
|
|
87
|
+
|
|
88
|
+
self.snp_info_cols = dict()
|
|
89
|
+
self.stats_cols = dict()
|
|
90
|
+
self.other_cols= dict()
|
|
91
|
+
|
|
92
|
+
self.log.write( "Start to create SumstatsMulti object..." )
|
|
93
|
+
|
|
94
|
+
for i,sumstatsObject in enumerate(sumstatsObjects):
|
|
95
|
+
self.log.write( " -Checking sumstats Object #{}...".format(i+1), verbose=verbose)
|
|
96
|
+
check_datatype(sumstatsObject.data, log=self.log, verbose=verbose)
|
|
97
|
+
check_dataframe_shape(sumstats=sumstatsObject.data,
|
|
98
|
+
log=self.log,
|
|
99
|
+
verbose=verbose)
|
|
100
|
+
|
|
101
|
+
if sumstatsObject.meta["gwaslab"]["study_name"] in self.names:
|
|
102
|
+
new_study_name = "{}_{}".format(sumstatsObject.meta["gwaslab"]["study_name"],i+1)
|
|
103
|
+
self.log.write( " -Sumstats Object #{} name: {}".format(i+1,new_study_name), verbose=verbose)
|
|
104
|
+
self.names.append(new_study_name)
|
|
105
|
+
else:
|
|
106
|
+
self.log.write( " -Sumstats Object #{} name: {}".format(i+1, sumstatsObject.meta["gwaslab"]["study_name"]), verbose=verbose)
|
|
107
|
+
self.names.append(sumstatsObject.meta["gwaslab"]["study_name"])
|
|
108
|
+
self.meta["gwaslab"]["objects"][i+1] = sumstatsObject.meta
|
|
109
|
+
self.meta["gwaslab"]["study_index"][i+1] = self.names[-1]
|
|
110
|
+
|
|
111
|
+
self.snp_info_cols[i] = list()
|
|
112
|
+
self.stats_cols[i] = list()
|
|
113
|
+
self.other_cols[i] = list()
|
|
114
|
+
|
|
115
|
+
for col in sumstatsObject.data.columns:
|
|
116
|
+
|
|
117
|
+
if col in _get_headers(mode="info"):
|
|
118
|
+
# extract SNP info columns from sumstats1
|
|
119
|
+
self.snp_info_cols[i].append(col)
|
|
120
|
+
elif col in _get_headers(mode="stats"):
|
|
121
|
+
self.stats_cols[i].append(col)
|
|
122
|
+
else:
|
|
123
|
+
self.other_cols[i].append(col)
|
|
124
|
+
|
|
125
|
+
self.meta["gwaslab"]["study_names_in_group"] = ",".join(self.names)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
self.log.write( " -Variant Info columns: {}".format(self.snp_info_cols[0]) , verbose=verbose)
|
|
130
|
+
for i in range(len(sumstatsObjects)):
|
|
131
|
+
self.log.write( " -Sumstats #{} variant statistics columns: {}".format(i+1, self.stats_cols[i]) , verbose=verbose)
|
|
132
|
+
self.log.write( " -Sumstats #{} other columns: {}".format(i+1, self.other_cols[i]) , verbose=verbose)
|
|
133
|
+
|
|
134
|
+
#for i,sumstatsObject in enumerate(sumstatsObjects):
|
|
135
|
+
# sumstatsObject.data["_RAW_INDEX_{}".format(i+1)] = range(len(sumstatsObject.data))
|
|
136
|
+
|
|
137
|
+
# extract only info and stats cols
|
|
138
|
+
self.data = sumstatsObjects[0].data
|
|
139
|
+
|
|
140
|
+
#rename with _1
|
|
141
|
+
if engine=="polars":
|
|
142
|
+
self.data = self.data.rename({"EA":"EA_1","NEA":"NEA_1","STATUS":"STATUS_1"})
|
|
143
|
+
self.data = self.data.rename({i:i + "_1" for i in self.stats_cols[0]})
|
|
144
|
+
self.data = self.data.rename({i:i + "_1" for i in self.other_cols[0]})
|
|
145
|
+
else:
|
|
146
|
+
self.data = self.data.rename(columns={"EA":"EA_1","NEA":"NEA_1","STATUS":"STATUS_1"})
|
|
147
|
+
self.data = self.data.rename(columns={i:i + "_1" for i in self.stats_cols[0]})
|
|
148
|
+
self.data = self.data.rename(columns={i:i + "_1" for i in self.other_cols[0]})
|
|
149
|
+
|
|
150
|
+
if engine=="polars":
|
|
151
|
+
self.data = pl.DataFrame(self.data)
|
|
152
|
+
for i, sumstatsObject in enumerate(sumstatsObjects):
|
|
153
|
+
if i >0:
|
|
154
|
+
self.log.write("Merging Sumstats #{} to main DataFrame...".format(i+1))
|
|
155
|
+
self.data = self._merge_two_sumstats(sumstatsObject.data,i=i,merge_mode=merge_mode,engine=engine)
|
|
156
|
+
self.log.write("Finished merging Sumstats #{} to main DataFrame.".format(i+1))
|
|
157
|
+
else:
|
|
158
|
+
for i, sumstatsObject in enumerate(sumstatsObjects):
|
|
159
|
+
if i >0:
|
|
160
|
+
self.log.write("Merging Sumstats #{} to main DataFrame...".format(i+1))
|
|
161
|
+
self.data = self._merge_two_sumstats(sumstatsObject.data,i=i,merge_mode=merge_mode,engine=engine)
|
|
162
|
+
self.log.write("Finished merging Sumstats #{} to main DataFrame.".format(i+1))
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _merge_two_sumstats(self,
|
|
166
|
+
sumstatsObject2,
|
|
167
|
+
verbose=True,
|
|
168
|
+
merge_mode="outer",
|
|
169
|
+
engine="pandas",
|
|
170
|
+
i=0):
|
|
171
|
+
|
|
172
|
+
# _1 _2
|
|
173
|
+
# add suffix
|
|
174
|
+
if engine=="polars":
|
|
175
|
+
if "EA" in self.data.columns:
|
|
176
|
+
self.data = self.data.rename({"EA":"EA_1","NEA":"NEA_1"})
|
|
177
|
+
else:
|
|
178
|
+
self.data = self.data.rename(columns={"EA":"EA_1","NEA":"NEA_1"})
|
|
179
|
+
|
|
180
|
+
#sumstats1 with suffix _1, sumstats2 with no suffix
|
|
181
|
+
if engine=="polars":
|
|
182
|
+
molded_sumstats = _merge_mold_with_sumstats_by_chrposp(mold=self.data,
|
|
183
|
+
sumstats=sumstatsObject2,
|
|
184
|
+
log=self.log,
|
|
185
|
+
verbose=verbose,
|
|
186
|
+
merge_mode=merge_mode,
|
|
187
|
+
stats_cols1 = self.other_cols[0],
|
|
188
|
+
stats_cols2 = self.other_cols[i],
|
|
189
|
+
suffixes=("_1",""),
|
|
190
|
+
return_not_matched_mold = False)
|
|
191
|
+
molded_sumstats = _align_with_moldp(molded_sumstats, log=self.log, verbose=verbose,suffixes=("_1",""))
|
|
192
|
+
molded_sumstats = flipallelestatsp(molded_sumstats, log=self.log, verbose=verbose)
|
|
193
|
+
molded_sumstats = molded_sumstats.drop(["EA","NEA"] )
|
|
194
|
+
molded_sumstats = molded_sumstats.rename({"EA_1":"EA","NEA_1":"NEA"})
|
|
195
|
+
else:
|
|
196
|
+
molded_sumstats = _merge_mold_with_sumstats_by_chrpos(mold=self.data,
|
|
197
|
+
sumstats=sumstatsObject2,
|
|
198
|
+
log=self.log,
|
|
199
|
+
verbose=verbose,
|
|
200
|
+
merge_mode=merge_mode,
|
|
201
|
+
stats_cols1 = self.other_cols[0],
|
|
202
|
+
stats_cols2 = self.other_cols[i],
|
|
203
|
+
suffixes=("_1",""),
|
|
204
|
+
return_not_matched_mold = False)
|
|
205
|
+
molded_sumstats = _align_with_mold(molded_sumstats, log=self.log, verbose=verbose,suffixes=("_1",""))
|
|
206
|
+
molded_sumstats = flipallelestats(molded_sumstats, log=self.log, verbose=verbose)
|
|
207
|
+
molded_sumstats = molded_sumstats.drop(columns=["EA","NEA"] )
|
|
208
|
+
molded_sumstats = molded_sumstats.rename(columns={"EA_1":"EA","NEA_1":"NEA"})
|
|
209
|
+
|
|
210
|
+
if not set(self.stats_cols[i]) == set(self.stats_cols[0]):
|
|
211
|
+
cols_to_fill = set(self.stats_cols[0]).difference(set(self.stats_cols[i]))
|
|
212
|
+
molded_sumstats = _fill_missing_columns(molded_sumstats, cols_to_fill, log=self.log, verbose=verbose)
|
|
213
|
+
|
|
214
|
+
if engine=="polars":
|
|
215
|
+
# rename sumstast2 with _2
|
|
216
|
+
molded_sumstats = _renaming_colsp(molded_sumstats,
|
|
217
|
+
self.stats_cols[0] + self.other_cols[i],
|
|
218
|
+
log=self.log,
|
|
219
|
+
verbose=verbose,
|
|
220
|
+
suffixes=("_1","_{}".format(i+1)))
|
|
221
|
+
else:
|
|
222
|
+
molded_sumstats = _renaming_cols(molded_sumstats,
|
|
223
|
+
self.stats_cols[0] + self.other_cols[i],
|
|
224
|
+
log=self.log,
|
|
225
|
+
verbose=verbose,
|
|
226
|
+
suffixes=("_1","_{}".format(i+1)))
|
|
227
|
+
|
|
228
|
+
molded_sumstats = _sort_pair_cols(molded_sumstats, verbose=verbose, log=self.log, suffixes=["_{}".format(j) for j in range(1,i+2)])
|
|
229
|
+
return molded_sumstats
|
|
230
|
+
|
|
231
|
+
def update_meta(self,**kwargs):
|
|
232
|
+
self.meta = _update_meta(self.meta, self.data, log = self.log, **kwargs)
|
|
233
|
+
|
|
234
|
+
def run_meta_analysis(self, **kwargs):
|
|
235
|
+
if self.engine == "polars":
|
|
236
|
+
from gwaslab.util_in_meta_polars import meta_analyze_polars
|
|
237
|
+
return meta_analyze_polars(self.data,nstudy = self.meta["gwaslab"]["number_of_studies"] ,**kwargs)
|
|
238
|
+
else:
|
|
239
|
+
return meta_analyze_multi(self.data,nstudy = self.meta["gwaslab"]["number_of_studies"] ,**kwargs)
|
|
240
|
+
|
|
241
|
+
def run_hyprcoloc(self,**kwargs):
|
|
242
|
+
hyprcoloc_res_combined = _run_hyprcoloc(self.data,
|
|
243
|
+
nstudy = self.meta["gwaslab"]["number_of_studies"],
|
|
244
|
+
study= self.meta["gwaslab"]["group_name"],
|
|
245
|
+
traits=self.names, **kwargs)
|
|
246
|
+
self.hyprcoloc = hyprcoloc_res_combined
|
|
247
|
+
|
|
248
|
+
def run_mtag(self,**kwargs):
|
|
249
|
+
_run_mtag( self.data,
|
|
250
|
+
nstudy = self.meta["gwaslab"]["number_of_studies"],
|
|
251
|
+
study= self.meta["gwaslab"]["group_name"],
|
|
252
|
+
traits=self.names,
|
|
253
|
+
**kwargs)
|
|
254
|
+
|
|
255
|
+
def get_lead(self, build=None, gls=False, **kwargs):
|
|
256
|
+
|
|
257
|
+
if "SNPID" in self.data.columns:
|
|
258
|
+
id_to_use = "SNPID"
|
|
259
|
+
else:
|
|
260
|
+
id_to_use = "rsID"
|
|
261
|
+
|
|
262
|
+
# extract build information from meta data
|
|
263
|
+
if build is None:
|
|
264
|
+
build = self.meta["gwaslab"]["genome_build"]
|
|
265
|
+
|
|
266
|
+
self.data = _get_multi_min(self.data,
|
|
267
|
+
col="P",
|
|
268
|
+
nstudy=self.meta["gwaslab"]["number_of_studies"])
|
|
269
|
+
|
|
270
|
+
output = getsig(self.data,
|
|
271
|
+
id=id_to_use,
|
|
272
|
+
chrom="CHR",
|
|
273
|
+
pos="POS",
|
|
274
|
+
p="P_MIN",
|
|
275
|
+
log=self.log,
|
|
276
|
+
build=build,
|
|
277
|
+
**kwargs)
|
|
278
|
+
# return sumstats object
|
|
279
|
+
|
|
280
|
+
if gls == True:
|
|
281
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
282
|
+
new_Sumstats_object.data = output
|
|
283
|
+
gc.collect()
|
|
284
|
+
return new_Sumstats_object
|
|
285
|
+
|
|
286
|
+
return output
|
|
287
|
+
|
gwaslab/g_SumstatsPair.py
CHANGED
|
@@ -24,6 +24,12 @@ from gwaslab.util_ex_run_2samplemr import _run_two_sample_mr
|
|
|
24
24
|
from gwaslab.util_ex_run_clumping import _clump
|
|
25
25
|
from gwaslab.util_ex_ldproxyfinder import _extract_with_ld_proxy
|
|
26
26
|
from gwaslab.g_headers import _get_headers
|
|
27
|
+
from gwaslab.util_ex_match_ldmatrix import tofinemapping_m
|
|
28
|
+
from gwaslab.util_ex_run_mesusie import _run_mesusie
|
|
29
|
+
from gwaslab.io_read_pipcs import _read_pipcs
|
|
30
|
+
from gwaslab.g_meta import _init_meta
|
|
31
|
+
from gwaslab.viz_plot_stackedregional import plot_stacked_mqq
|
|
32
|
+
from gwaslab.util_ex_run_ccgwas import _run_ccgwas
|
|
27
33
|
|
|
28
34
|
class SumstatsPair( ):
|
|
29
35
|
def __init__(self, sumstatsObject1, sumstatsObject2, study=None, suffixes = ("_1","_2") ,verbose=True ):
|
|
@@ -32,11 +38,29 @@ class SumstatsPair( ):
|
|
|
32
38
|
raise ValueError("Please provide GWASLab Sumstats Object #1.")
|
|
33
39
|
if not isinstance(sumstatsObject2, Sumstats):
|
|
34
40
|
raise ValueError("Please provide GWASLab Sumstats Object #2.")
|
|
41
|
+
|
|
42
|
+
self.meta = _init_meta(object="SumstatsPair")
|
|
43
|
+
|
|
35
44
|
if sumstatsObject1.meta["gwaslab"]["study_name"]!=sumstatsObject2.meta["gwaslab"]["study_name"]:
|
|
36
45
|
self.study_name = "{}_{}".format(sumstatsObject1.meta["gwaslab"]["study_name"], sumstatsObject2.meta["gwaslab"]["study_name"])
|
|
46
|
+
self.study_names = [sumstatsObject1.meta["gwaslab"]["study_name"], sumstatsObject2.meta["gwaslab"]["study_name"]]
|
|
37
47
|
else:
|
|
38
|
-
self.study_name = "{}_{}".format("
|
|
48
|
+
self.study_name = "{}_{}".format(sumstatsObject1.meta["gwaslab"]["study_name"]+"1", sumstatsObject2.meta["gwaslab"]["study_name"]+"2")
|
|
49
|
+
self.study_names = [sumstatsObject1.meta["gwaslab"]["study_name"]+"1", sumstatsObject2.meta["gwaslab"]["study_name"]+"2"]
|
|
50
|
+
|
|
51
|
+
self.meta["gwaslab"]["objects"] = dict()
|
|
52
|
+
self.meta["gwaslab"]["objects"][0] = sumstatsObject1.meta
|
|
53
|
+
self.meta["gwaslab"]["objects"][1] = sumstatsObject2.meta
|
|
54
|
+
|
|
55
|
+
#self.meta["gwaslab"]["study_name"] = self.study_name
|
|
56
|
+
self.meta["gwaslab"]["group_name"] = self.study_name
|
|
39
57
|
|
|
58
|
+
self.ldsc = dict()
|
|
59
|
+
self.ldsc[0] = sumstatsObject1.ldsc_h2
|
|
60
|
+
self.ldsc[1] = sumstatsObject2.ldsc_h2
|
|
61
|
+
self.ldsc_rg = sumstatsObject1.ldsc_rg
|
|
62
|
+
|
|
63
|
+
|
|
40
64
|
self.snp_info_cols = []
|
|
41
65
|
self.stats_cols =[]
|
|
42
66
|
self.stats_cols2 =[]
|
|
@@ -45,18 +69,26 @@ class SumstatsPair( ):
|
|
|
45
69
|
self.log = Log()
|
|
46
70
|
self.suffixes = suffixes
|
|
47
71
|
self.colocalization=pd.DataFrame()
|
|
72
|
+
|
|
48
73
|
self.sumstats1 = pd.DataFrame()
|
|
49
74
|
self.sumstats2 = pd.DataFrame()
|
|
75
|
+
self.ns = None
|
|
50
76
|
|
|
77
|
+
# TwosampleMR
|
|
51
78
|
self.mr =dict()
|
|
79
|
+
|
|
80
|
+
# clumping
|
|
52
81
|
self.clumps =dict()
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
82
|
+
|
|
83
|
+
# MESuSiE
|
|
84
|
+
self.mesusie = dict()
|
|
85
|
+
self.mesusie_res = pd.DataFrame()
|
|
86
|
+
|
|
87
|
+
# Coloc and Coloc SuSiE
|
|
88
|
+
self.coloc = dict()
|
|
89
|
+
self.coloc_susie_res = pd.DataFrame()
|
|
57
90
|
|
|
58
91
|
self.log.write( "Start to create SumstatsPair object..." )
|
|
59
|
-
|
|
60
92
|
self.log.write( " -Checking sumstats 1..." , verbose=verbose)
|
|
61
93
|
check_datatype(sumstatsObject1.data, log=self.log, verbose=verbose)
|
|
62
94
|
check_dataframe_shape(sumstats=sumstatsObject1.data,
|
|
@@ -77,6 +109,7 @@ class SumstatsPair( ):
|
|
|
77
109
|
self.stats_cols.append(i)
|
|
78
110
|
else:
|
|
79
111
|
self.other_cols.append(i)
|
|
112
|
+
|
|
80
113
|
for i in sumstatsObject2.data.columns:
|
|
81
114
|
if i in _get_headers(mode="info"):
|
|
82
115
|
continue
|
|
@@ -90,6 +123,8 @@ class SumstatsPair( ):
|
|
|
90
123
|
self.log.write( " -Sumstats1 other columns: {}".format(self.other_cols) , verbose=verbose)
|
|
91
124
|
self.log.write( " -Sumstats2 other columns: {}".format(self.other_cols2) , verbose=verbose)
|
|
92
125
|
|
|
126
|
+
sumstatsObject1.data["_RAW_INDEX_1"] = range(len(sumstatsObject1.data))
|
|
127
|
+
sumstatsObject2.data["_RAW_INDEX_2"] = range(len(sumstatsObject2.data))
|
|
93
128
|
# extract only info and stats cols
|
|
94
129
|
self.data = sumstatsObject1.data
|
|
95
130
|
|
|
@@ -98,7 +133,7 @@ class SumstatsPair( ):
|
|
|
98
133
|
self.data = self.data.rename(columns={i:i + suffixes[0] for i in self.stats_cols})
|
|
99
134
|
self.data = self.data.rename(columns={i:i + suffixes[0] for i in self.other_cols})
|
|
100
135
|
|
|
101
|
-
self.data, self.sumstats1 = self._merge_two_sumstats(sumstatsObject2, suffixes=suffixes)
|
|
136
|
+
self.data, self.sumstats1, self.sumstats2 = self._merge_two_sumstats(sumstatsObject2, suffixes=suffixes)
|
|
102
137
|
|
|
103
138
|
if "N{}".format(self.suffixes[0]) in self.data.columns and "N{}".format(self.suffixes[1]) in self.data.columns:
|
|
104
139
|
n1 = int(floor(self.data["N{}".format(self.suffixes[0])].mean()))
|
|
@@ -106,14 +141,24 @@ class SumstatsPair( ):
|
|
|
106
141
|
self.ns=(n1, n2)
|
|
107
142
|
else:
|
|
108
143
|
self.ns = None
|
|
144
|
+
sumstatsObject1.data = sumstatsObject1.data.drop(columns=["_RAW_INDEX_1"])
|
|
145
|
+
sumstatsObject2.data = sumstatsObject2.data.drop(columns=["_RAW_INDEX_2"])
|
|
109
146
|
|
|
110
|
-
def _merge_two_sumstats(self,
|
|
147
|
+
def _merge_two_sumstats(self,
|
|
148
|
+
sumstatsObject2,
|
|
149
|
+
threshold=0.2,
|
|
150
|
+
verbose=True,
|
|
151
|
+
windowsizeb=10,
|
|
152
|
+
ref_path=None,
|
|
153
|
+
suffixes=("_1","_2")):
|
|
111
154
|
|
|
112
155
|
# sumstats1 with suffix _1, sumstats2 with no suffix
|
|
113
|
-
molded_sumstats, sumstats1 = _merge_mold_with_sumstats_by_chrpos(mold=self.data,
|
|
156
|
+
molded_sumstats, sumstats1, sumstats2 = _merge_mold_with_sumstats_by_chrpos(mold=self.data,
|
|
114
157
|
sumstats=sumstatsObject2.data,
|
|
115
158
|
log=self.log,
|
|
116
159
|
verbose=verbose,
|
|
160
|
+
stats_cols1 = self.stats_cols,
|
|
161
|
+
stats_cols2 = self.stats_cols2,
|
|
117
162
|
suffixes=(suffixes[0],""),
|
|
118
163
|
return_not_matched_mold = True)
|
|
119
164
|
|
|
@@ -137,21 +182,53 @@ class SumstatsPair( ):
|
|
|
137
182
|
|
|
138
183
|
molded_sumstats = _sort_pair_cols(molded_sumstats, verbose=verbose, log=self.log)
|
|
139
184
|
|
|
140
|
-
return molded_sumstats, sumstats1
|
|
185
|
+
return molded_sumstats, sumstats1, sumstats2
|
|
141
186
|
|
|
142
187
|
|
|
143
188
|
def clump(self,**kwargs):
|
|
144
|
-
self.clumps["clumps"],self.clumps["clumps_raw"],self.clumps["plink_log"] = _clump(self.data, log=self.log, p="P_1",mlog10p="MLOG10P_1", study = self.
|
|
189
|
+
self.clumps["clumps"],self.clumps["clumps_raw"],self.clumps["plink_log"] = _clump(self.data, log=self.log, p="P_1",mlog10p="MLOG10P_1", study = self.meta["gwaslab"]["group_name"], **kwargs)
|
|
145
190
|
|
|
146
191
|
def to_coloc(self,**kwargs):
|
|
147
|
-
self.
|
|
192
|
+
self.coloc["path"],self.coloc["file"],self.coloc["plink_log"] = tofinemapping(self.data,study=self.meta["gwaslab"]["group_name"],suffixes=self.suffixes,log=self.log,**kwargs)
|
|
193
|
+
|
|
194
|
+
def to_mesusie(self,**kwargs):
|
|
195
|
+
self.mesusie["path"],self.mesusie["file"],self.mesusie["plink_log"] = tofinemapping_m(self.data,
|
|
196
|
+
studies = self.study_names,
|
|
197
|
+
group = self.meta["gwaslab"]["group_name"],
|
|
198
|
+
suffixes=self.suffixes,
|
|
199
|
+
log=self.log,
|
|
200
|
+
**kwargs)
|
|
201
|
+
|
|
202
|
+
def run_mesusie(self,**kwargs):
|
|
203
|
+
prefix = _run_mesusie(self.mesusie["path"],log=self.log,ncols=self.ns,**kwargs)
|
|
204
|
+
self.mesusie_res = _read_pipcs(self.data[["SNPID","CHR","POS"]],
|
|
205
|
+
prefix,
|
|
206
|
+
studie_names = self.study_name,
|
|
207
|
+
group=self.meta["gwaslab"]["group_name"])
|
|
208
|
+
|
|
209
|
+
def run_ccgwas(self,**kwargs):
|
|
210
|
+
_run_ccgwas(self.data,
|
|
211
|
+
meta = self.meta,
|
|
212
|
+
ldsc = self.ldsc,
|
|
213
|
+
ldsc_rg = self.ldsc_rg,
|
|
214
|
+
group=self.meta["gwaslab"]["group_name"],
|
|
215
|
+
studies = self.study_names,
|
|
216
|
+
log=self.log,
|
|
217
|
+
**kwargs)
|
|
148
218
|
|
|
219
|
+
def read_pipcs(self,prefix,**kwargs):
|
|
220
|
+
self.mesusie_res = _read_pipcs(self.data[["SNPID","CHR","POS"]],
|
|
221
|
+
prefix,
|
|
222
|
+
group=self.meta["gwaslab"]["group_name"],
|
|
223
|
+
studie_names = self.study_name,
|
|
224
|
+
**kwargs)
|
|
225
|
+
|
|
149
226
|
def run_coloc_susie(self,**kwargs):
|
|
150
|
-
self.
|
|
227
|
+
self.coloc_susie_res = _run_coloc_susie(self.coloc["path"],log=self.log,ncols=self.ns,**kwargs)
|
|
151
228
|
|
|
152
229
|
def run_two_sample_mr(self, clump=False, **kwargs):
|
|
153
|
-
exposure1 = self.
|
|
154
|
-
outcome2 = self.
|
|
230
|
+
exposure1 = self.meta["gwaslab"]["group_name"].split("_")[0]
|
|
231
|
+
outcome2 = self.meta["gwaslab"]["group_name"].split("_")[1]
|
|
155
232
|
_run_two_sample_mr(self,exposure1=exposure1,outcome2=outcome2, clump=clump,**kwargs)
|
|
156
233
|
|
|
157
234
|
def extract_with_ld_proxy(self,**arg):
|
|
@@ -166,9 +243,17 @@ class SumstatsPair( ):
|
|
|
166
243
|
self.data = filtervalues(self.data, expr,log=self.log,**kwargs)
|
|
167
244
|
gc.collect()
|
|
168
245
|
|
|
246
|
+
def stacked_mqq(self, **kwargs):
|
|
247
|
+
|
|
248
|
+
objects=[self.data[["SNPID","CHR","POS","EA","NEA","P_1"]].rename(columns={"P_1":"P"}),
|
|
249
|
+
self.data[["SNPID","CHR","POS","EA","NEA","P_2"]].rename(columns={"P_2":"P"}),
|
|
250
|
+
self.mesusie_res]
|
|
251
|
+
|
|
252
|
+
plot_stacked_mqq(objects=objects,
|
|
253
|
+
**kwargs)
|
|
254
|
+
|
|
169
255
|
## Visualization #############################################################################################################################################
|
|
170
256
|
def plot_miami(self,**kwargs):
|
|
171
|
-
|
|
172
257
|
plot_miami2(merged_sumstats=self.data,
|
|
173
258
|
suffixes=self.suffixes,
|
|
174
259
|
**kwargs)
|