gwaslab 3.5.7__py3-none-any.whl → 3.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

Files changed (67) hide show
  1. gwaslab/__init__.py +2 -0
  2. gwaslab/bd_common_data.py +1 -0
  3. gwaslab/bd_get_hapmap3.py +0 -1
  4. gwaslab/data/formatbook.json +78 -0
  5. gwaslab/data/reference.json +3 -1
  6. gwaslab/g_Sumstats.py +110 -25
  7. gwaslab/g_SumstatsMulti.py +287 -0
  8. gwaslab/g_SumstatsPair.py +101 -16
  9. gwaslab/g_Sumstats_polars.py +245 -0
  10. gwaslab/g_headers.py +12 -3
  11. gwaslab/g_meta.py +124 -47
  12. gwaslab/g_meta_update.py +48 -0
  13. gwaslab/g_vchange_status_polars.py +44 -0
  14. gwaslab/g_version.py +2 -2
  15. gwaslab/hm_casting.py +169 -110
  16. gwaslab/hm_casting_polars.py +202 -0
  17. gwaslab/hm_harmonize_sumstats.py +19 -8
  18. gwaslab/io_load_ld.py +529 -0
  19. gwaslab/io_preformat_input.py +11 -0
  20. gwaslab/io_preformat_input_polars.py +632 -0
  21. gwaslab/io_process_args.py +25 -1
  22. gwaslab/io_read_ldsc.py +34 -3
  23. gwaslab/io_read_pipcs.py +62 -6
  24. gwaslab/prscs_gigrnd.py +122 -0
  25. gwaslab/prscs_mcmc_gtb.py +136 -0
  26. gwaslab/prscs_parse_genet.py +98 -0
  27. gwaslab/qc_build.py +53 -0
  28. gwaslab/qc_check_datatype.py +10 -8
  29. gwaslab/qc_check_datatype_polars.py +128 -0
  30. gwaslab/qc_fix_sumstats.py +25 -23
  31. gwaslab/qc_fix_sumstats_polars.py +193 -0
  32. gwaslab/util_ex_calculate_ldmatrix.py +49 -19
  33. gwaslab/util_ex_gwascatalog.py +71 -28
  34. gwaslab/util_ex_infer_ancestry.py +65 -0
  35. gwaslab/util_ex_ldsc.py +67 -21
  36. gwaslab/util_ex_match_ldmatrix.py +396 -0
  37. gwaslab/util_ex_run_2samplemr.py +0 -2
  38. gwaslab/util_ex_run_ccgwas.py +155 -0
  39. gwaslab/util_ex_run_coloc.py +1 -1
  40. gwaslab/util_ex_run_hyprcoloc.py +117 -0
  41. gwaslab/util_ex_run_magma.py +74 -0
  42. gwaslab/util_ex_run_mesusie.py +155 -0
  43. gwaslab/util_ex_run_mtag.py +92 -0
  44. gwaslab/util_ex_run_prscs.py +85 -0
  45. gwaslab/util_ex_run_susie.py +40 -9
  46. gwaslab/util_in_estimate_ess.py +18 -0
  47. gwaslab/util_in_fill_data.py +20 -1
  48. gwaslab/util_in_filter_value.py +10 -5
  49. gwaslab/util_in_get_sig.py +71 -13
  50. gwaslab/util_in_meta.py +168 -4
  51. gwaslab/util_in_meta_polars.py +174 -0
  52. gwaslab/viz_aux_annotate_plot.py +13 -2
  53. gwaslab/viz_plot_compare_effect.py +87 -23
  54. gwaslab/viz_plot_credible_sets.py +55 -11
  55. gwaslab/viz_plot_effect.py +22 -12
  56. gwaslab/viz_plot_miamiplot2.py +3 -2
  57. gwaslab/viz_plot_mqqplot.py +94 -84
  58. gwaslab/viz_plot_qqplot.py +9 -7
  59. gwaslab/viz_plot_regional2.py +2 -1
  60. gwaslab/viz_plot_stackedregional.py +4 -1
  61. {gwaslab-3.5.7.dist-info → gwaslab-3.6.0.dist-info}/METADATA +46 -68
  62. gwaslab-3.6.0.dist-info/RECORD +119 -0
  63. {gwaslab-3.5.7.dist-info → gwaslab-3.6.0.dist-info}/WHEEL +1 -1
  64. gwaslab-3.5.7.dist-info/RECORD +0 -96
  65. {gwaslab-3.5.7.dist-info → gwaslab-3.6.0.dist-info/licenses}/LICENSE +0 -0
  66. {gwaslab-3.5.7.dist-info → gwaslab-3.6.0.dist-info/licenses}/LICENSE_before_v3.4.39 +0 -0
  67. {gwaslab-3.5.7.dist-info → gwaslab-3.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,287 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ import copy
4
+ import gc
5
+ from gwaslab.util_in_filter_value import filtervalues
6
+ from gwaslab.g_Log import Log
7
+ from math import floor
8
+ from gwaslab.g_Sumstats import Sumstats
9
+ from gwaslab.g_Sumstats_polars import Sumstatsp
10
+ from gwaslab.hm_casting import _merge_mold_with_sumstats_by_chrpos
11
+ from gwaslab.hm_casting import _align_with_mold
12
+ from gwaslab.hm_casting import _fill_missing_columns
13
+ from gwaslab.hm_casting import _check_daf
14
+ from gwaslab.hm_casting import _assign_warning_code
15
+ from gwaslab.hm_casting import _renaming_cols
16
+ from gwaslab.hm_casting import _sort_pair_cols
17
+
18
+ from gwaslab.hm_casting_polars import _merge_mold_with_sumstats_by_chrposp
19
+ from gwaslab.hm_casting_polars import _align_with_moldp
20
+ from gwaslab.hm_casting_polars import _fill_missing_columnsp
21
+ from gwaslab.hm_casting_polars import _renaming_colsp
22
+ from gwaslab.hm_casting_polars import _sort_pair_colsp
23
+
24
+ from gwaslab.qc_fix_sumstats import flipallelestats
25
+ from gwaslab.qc_fix_sumstats_polars import flipallelestatsp
26
+ from gwaslab.qc_check_datatype_polars import check_datatype
27
+ from gwaslab.qc_check_datatype_polars import check_dataframe_shape
28
+ from gwaslab.util_ex_calculate_ldmatrix import tofinemapping
29
+ from gwaslab.util_ex_run_coloc import _run_coloc_susie
30
+ from gwaslab.viz_plot_miamiplot2 import plot_miami2
31
+ from gwaslab.viz_plot_compare_af import plotdaf
32
+ from gwaslab.util_ex_run_2samplemr import _run_two_sample_mr
33
+ from gwaslab.util_ex_run_clumping import _clump
34
+ from gwaslab.util_ex_ldproxyfinder import _extract_with_ld_proxy
35
+ from gwaslab.g_headers import _get_headers
36
+ from gwaslab.util_ex_match_ldmatrix import tofinemapping_m
37
+ from gwaslab.util_ex_run_mesusie import _run_mesusie
38
+ from gwaslab.util_in_meta import meta_analyze_multi
39
+ from gwaslab.util_ex_run_hyprcoloc import _run_hyprcoloc
40
+ from gwaslab.util_in_get_sig import getsig
41
+ from gwaslab.util_in_fill_data import _get_multi_min
42
+ from gwaslab.g_meta import _init_meta
43
+ from gwaslab.g_meta_update import _update_meta
44
+ from gwaslab.qc_fix_sumstats import _process_build
45
+ from gwaslab.util_ex_run_mtag import _run_mtag
46
+
47
+
48
+
49
+ class SumstatsMulti( ):
50
+ def __init__(self,
51
+ sumstatsObjects,
52
+ group_name=None,
53
+ build="99",
54
+ engine="pandas",
55
+ merge_mode="outer",
56
+ verbose=True ):
57
+
58
+ for i,sumstatsObject in enumerate(sumstatsObjects):
59
+ if not isinstance(sumstatsObject, Sumstats):
60
+ if not isinstance(sumstatsObject, Sumstatsp):
61
+ raise ValueError("Please provide GWASLab Sumstats Object #{}.".format(i+1))
62
+
63
+ self.log = Log()
64
+ self.meta = _init_meta(object="SumstatsMulti")
65
+
66
+ if engine=="polars":
67
+ import polars as pl
68
+ merge_mode="full"
69
+
70
+
71
+ self.engine=engine
72
+
73
+ self.meta["gwaslab"]["number_of_studies"] = len(sumstatsObjects)
74
+ self.meta["gwaslab"]["genome_build"] = _process_build(build, log=self.log, verbose=False)
75
+ self.meta["gwaslab"]["objects"] = dict()
76
+ self.meta["gwaslab"]["study_index"] = dict()
77
+
78
+ if group_name is None:
79
+ self.group_name = "Group1"
80
+ self.meta["gwaslab"]["group_name"] = "Group1"
81
+ else:
82
+ self.group_name = group_name
83
+ self.meta["gwaslab"]["group_name"] = group_name
84
+
85
+ self.names=[]
86
+ self.hyprcoloc = {}
87
+
88
+ self.snp_info_cols = dict()
89
+ self.stats_cols = dict()
90
+ self.other_cols= dict()
91
+
92
+ self.log.write( "Start to create SumstatsMulti object..." )
93
+
94
+ for i,sumstatsObject in enumerate(sumstatsObjects):
95
+ self.log.write( " -Checking sumstats Object #{}...".format(i+1), verbose=verbose)
96
+ check_datatype(sumstatsObject.data, log=self.log, verbose=verbose)
97
+ check_dataframe_shape(sumstats=sumstatsObject.data,
98
+ log=self.log,
99
+ verbose=verbose)
100
+
101
+ if sumstatsObject.meta["gwaslab"]["study_name"] in self.names:
102
+ new_study_name = "{}_{}".format(sumstatsObject.meta["gwaslab"]["study_name"],i+1)
103
+ self.log.write( " -Sumstats Object #{} name: {}".format(i+1,new_study_name), verbose=verbose)
104
+ self.names.append(new_study_name)
105
+ else:
106
+ self.log.write( " -Sumstats Object #{} name: {}".format(i+1, sumstatsObject.meta["gwaslab"]["study_name"]), verbose=verbose)
107
+ self.names.append(sumstatsObject.meta["gwaslab"]["study_name"])
108
+ self.meta["gwaslab"]["objects"][i+1] = sumstatsObject.meta
109
+ self.meta["gwaslab"]["study_index"][i+1] = self.names[-1]
110
+
111
+ self.snp_info_cols[i] = list()
112
+ self.stats_cols[i] = list()
113
+ self.other_cols[i] = list()
114
+
115
+ for col in sumstatsObject.data.columns:
116
+
117
+ if col in _get_headers(mode="info"):
118
+ # extract SNP info columns from sumstats1
119
+ self.snp_info_cols[i].append(col)
120
+ elif col in _get_headers(mode="stats"):
121
+ self.stats_cols[i].append(col)
122
+ else:
123
+ self.other_cols[i].append(col)
124
+
125
+ self.meta["gwaslab"]["study_names_in_group"] = ",".join(self.names)
126
+
127
+
128
+
129
+ self.log.write( " -Variant Info columns: {}".format(self.snp_info_cols[0]) , verbose=verbose)
130
+ for i in range(len(sumstatsObjects)):
131
+ self.log.write( " -Sumstats #{} variant statistics columns: {}".format(i+1, self.stats_cols[i]) , verbose=verbose)
132
+ self.log.write( " -Sumstats #{} other columns: {}".format(i+1, self.other_cols[i]) , verbose=verbose)
133
+
134
+ #for i,sumstatsObject in enumerate(sumstatsObjects):
135
+ # sumstatsObject.data["_RAW_INDEX_{}".format(i+1)] = range(len(sumstatsObject.data))
136
+
137
+ # extract only info and stats cols
138
+ self.data = sumstatsObjects[0].data
139
+
140
+ #rename with _1
141
+ if engine=="polars":
142
+ self.data = self.data.rename({"EA":"EA_1","NEA":"NEA_1","STATUS":"STATUS_1"})
143
+ self.data = self.data.rename({i:i + "_1" for i in self.stats_cols[0]})
144
+ self.data = self.data.rename({i:i + "_1" for i in self.other_cols[0]})
145
+ else:
146
+ self.data = self.data.rename(columns={"EA":"EA_1","NEA":"NEA_1","STATUS":"STATUS_1"})
147
+ self.data = self.data.rename(columns={i:i + "_1" for i in self.stats_cols[0]})
148
+ self.data = self.data.rename(columns={i:i + "_1" for i in self.other_cols[0]})
149
+
150
+ if engine=="polars":
151
+ self.data = pl.DataFrame(self.data)
152
+ for i, sumstatsObject in enumerate(sumstatsObjects):
153
+ if i >0:
154
+ self.log.write("Merging Sumstats #{} to main DataFrame...".format(i+1))
155
+ self.data = self._merge_two_sumstats(sumstatsObject.data,i=i,merge_mode=merge_mode,engine=engine)
156
+ self.log.write("Finished merging Sumstats #{} to main DataFrame.".format(i+1))
157
+ else:
158
+ for i, sumstatsObject in enumerate(sumstatsObjects):
159
+ if i >0:
160
+ self.log.write("Merging Sumstats #{} to main DataFrame...".format(i+1))
161
+ self.data = self._merge_two_sumstats(sumstatsObject.data,i=i,merge_mode=merge_mode,engine=engine)
162
+ self.log.write("Finished merging Sumstats #{} to main DataFrame.".format(i+1))
163
+
164
+
165
+ def _merge_two_sumstats(self,
166
+ sumstatsObject2,
167
+ verbose=True,
168
+ merge_mode="outer",
169
+ engine="pandas",
170
+ i=0):
171
+
172
+ # _1 _2
173
+ # add suffix
174
+ if engine=="polars":
175
+ if "EA" in self.data.columns:
176
+ self.data = self.data.rename({"EA":"EA_1","NEA":"NEA_1"})
177
+ else:
178
+ self.data = self.data.rename(columns={"EA":"EA_1","NEA":"NEA_1"})
179
+
180
+ #sumstats1 with suffix _1, sumstats2 with no suffix
181
+ if engine=="polars":
182
+ molded_sumstats = _merge_mold_with_sumstats_by_chrposp(mold=self.data,
183
+ sumstats=sumstatsObject2,
184
+ log=self.log,
185
+ verbose=verbose,
186
+ merge_mode=merge_mode,
187
+ stats_cols1 = self.other_cols[0],
188
+ stats_cols2 = self.other_cols[i],
189
+ suffixes=("_1",""),
190
+ return_not_matched_mold = False)
191
+ molded_sumstats = _align_with_moldp(molded_sumstats, log=self.log, verbose=verbose,suffixes=("_1",""))
192
+ molded_sumstats = flipallelestatsp(molded_sumstats, log=self.log, verbose=verbose)
193
+ molded_sumstats = molded_sumstats.drop(["EA","NEA"] )
194
+ molded_sumstats = molded_sumstats.rename({"EA_1":"EA","NEA_1":"NEA"})
195
+ else:
196
+ molded_sumstats = _merge_mold_with_sumstats_by_chrpos(mold=self.data,
197
+ sumstats=sumstatsObject2,
198
+ log=self.log,
199
+ verbose=verbose,
200
+ merge_mode=merge_mode,
201
+ stats_cols1 = self.other_cols[0],
202
+ stats_cols2 = self.other_cols[i],
203
+ suffixes=("_1",""),
204
+ return_not_matched_mold = False)
205
+ molded_sumstats = _align_with_mold(molded_sumstats, log=self.log, verbose=verbose,suffixes=("_1",""))
206
+ molded_sumstats = flipallelestats(molded_sumstats, log=self.log, verbose=verbose)
207
+ molded_sumstats = molded_sumstats.drop(columns=["EA","NEA"] )
208
+ molded_sumstats = molded_sumstats.rename(columns={"EA_1":"EA","NEA_1":"NEA"})
209
+
210
+ if not set(self.stats_cols[i]) == set(self.stats_cols[0]):
211
+ cols_to_fill = set(self.stats_cols[0]).difference(set(self.stats_cols[i]))
212
+ molded_sumstats = _fill_missing_columns(molded_sumstats, cols_to_fill, log=self.log, verbose=verbose)
213
+
214
+ if engine=="polars":
215
+ # rename sumstast2 with _2
216
+ molded_sumstats = _renaming_colsp(molded_sumstats,
217
+ self.stats_cols[0] + self.other_cols[i],
218
+ log=self.log,
219
+ verbose=verbose,
220
+ suffixes=("_1","_{}".format(i+1)))
221
+ else:
222
+ molded_sumstats = _renaming_cols(molded_sumstats,
223
+ self.stats_cols[0] + self.other_cols[i],
224
+ log=self.log,
225
+ verbose=verbose,
226
+ suffixes=("_1","_{}".format(i+1)))
227
+
228
+ molded_sumstats = _sort_pair_cols(molded_sumstats, verbose=verbose, log=self.log, suffixes=["_{}".format(j) for j in range(1,i+2)])
229
+ return molded_sumstats
230
+
231
+ def update_meta(self,**kwargs):
232
+ self.meta = _update_meta(self.meta, self.data, log = self.log, **kwargs)
233
+
234
+ def run_meta_analysis(self, **kwargs):
235
+ if self.engine == "polars":
236
+ from gwaslab.util_in_meta_polars import meta_analyze_polars
237
+ return meta_analyze_polars(self.data,nstudy = self.meta["gwaslab"]["number_of_studies"] ,**kwargs)
238
+ else:
239
+ return meta_analyze_multi(self.data,nstudy = self.meta["gwaslab"]["number_of_studies"] ,**kwargs)
240
+
241
+ def run_hyprcoloc(self,**kwargs):
242
+ hyprcoloc_res_combined = _run_hyprcoloc(self.data,
243
+ nstudy = self.meta["gwaslab"]["number_of_studies"],
244
+ study= self.meta["gwaslab"]["group_name"],
245
+ traits=self.names, **kwargs)
246
+ self.hyprcoloc = hyprcoloc_res_combined
247
+
248
+ def run_mtag(self,**kwargs):
249
+ _run_mtag( self.data,
250
+ nstudy = self.meta["gwaslab"]["number_of_studies"],
251
+ study= self.meta["gwaslab"]["group_name"],
252
+ traits=self.names,
253
+ **kwargs)
254
+
255
+ def get_lead(self, build=None, gls=False, **kwargs):
256
+
257
+ if "SNPID" in self.data.columns:
258
+ id_to_use = "SNPID"
259
+ else:
260
+ id_to_use = "rsID"
261
+
262
+ # extract build information from meta data
263
+ if build is None:
264
+ build = self.meta["gwaslab"]["genome_build"]
265
+
266
+ self.data = _get_multi_min(self.data,
267
+ col="P",
268
+ nstudy=self.meta["gwaslab"]["number_of_studies"])
269
+
270
+ output = getsig(self.data,
271
+ id=id_to_use,
272
+ chrom="CHR",
273
+ pos="POS",
274
+ p="P_MIN",
275
+ log=self.log,
276
+ build=build,
277
+ **kwargs)
278
+ # return sumstats object
279
+
280
+ if gls == True:
281
+ new_Sumstats_object = copy.deepcopy(self)
282
+ new_Sumstats_object.data = output
283
+ gc.collect()
284
+ return new_Sumstats_object
285
+
286
+ return output
287
+
gwaslab/g_SumstatsPair.py CHANGED
@@ -24,6 +24,12 @@ from gwaslab.util_ex_run_2samplemr import _run_two_sample_mr
24
24
  from gwaslab.util_ex_run_clumping import _clump
25
25
  from gwaslab.util_ex_ldproxyfinder import _extract_with_ld_proxy
26
26
  from gwaslab.g_headers import _get_headers
27
+ from gwaslab.util_ex_match_ldmatrix import tofinemapping_m
28
+ from gwaslab.util_ex_run_mesusie import _run_mesusie
29
+ from gwaslab.io_read_pipcs import _read_pipcs
30
+ from gwaslab.g_meta import _init_meta
31
+ from gwaslab.viz_plot_stackedregional import plot_stacked_mqq
32
+ from gwaslab.util_ex_run_ccgwas import _run_ccgwas
27
33
 
28
34
  class SumstatsPair( ):
29
35
  def __init__(self, sumstatsObject1, sumstatsObject2, study=None, suffixes = ("_1","_2") ,verbose=True ):
@@ -32,11 +38,29 @@ class SumstatsPair( ):
32
38
  raise ValueError("Please provide GWASLab Sumstats Object #1.")
33
39
  if not isinstance(sumstatsObject2, Sumstats):
34
40
  raise ValueError("Please provide GWASLab Sumstats Object #2.")
41
+
42
+ self.meta = _init_meta(object="SumstatsPair")
43
+
35
44
  if sumstatsObject1.meta["gwaslab"]["study_name"]!=sumstatsObject2.meta["gwaslab"]["study_name"]:
36
45
  self.study_name = "{}_{}".format(sumstatsObject1.meta["gwaslab"]["study_name"], sumstatsObject2.meta["gwaslab"]["study_name"])
46
+ self.study_names = [sumstatsObject1.meta["gwaslab"]["study_name"], sumstatsObject2.meta["gwaslab"]["study_name"]]
37
47
  else:
38
- self.study_name = "{}_{}".format("STUDY1", "STUDY2")
48
+ self.study_name = "{}_{}".format(sumstatsObject1.meta["gwaslab"]["study_name"]+"1", sumstatsObject2.meta["gwaslab"]["study_name"]+"2")
49
+ self.study_names = [sumstatsObject1.meta["gwaslab"]["study_name"]+"1", sumstatsObject2.meta["gwaslab"]["study_name"]+"2"]
50
+
51
+ self.meta["gwaslab"]["objects"] = dict()
52
+ self.meta["gwaslab"]["objects"][0] = sumstatsObject1.meta
53
+ self.meta["gwaslab"]["objects"][1] = sumstatsObject2.meta
54
+
55
+ #self.meta["gwaslab"]["study_name"] = self.study_name
56
+ self.meta["gwaslab"]["group_name"] = self.study_name
39
57
 
58
+ self.ldsc = dict()
59
+ self.ldsc[0] = sumstatsObject1.ldsc_h2
60
+ self.ldsc[1] = sumstatsObject2.ldsc_h2
61
+ self.ldsc_rg = sumstatsObject1.ldsc_rg
62
+
63
+
40
64
  self.snp_info_cols = []
41
65
  self.stats_cols =[]
42
66
  self.stats_cols2 =[]
@@ -45,18 +69,26 @@ class SumstatsPair( ):
45
69
  self.log = Log()
46
70
  self.suffixes = suffixes
47
71
  self.colocalization=pd.DataFrame()
72
+
48
73
  self.sumstats1 = pd.DataFrame()
49
74
  self.sumstats2 = pd.DataFrame()
75
+ self.ns = None
50
76
 
77
+ # TwosampleMR
51
78
  self.mr =dict()
79
+
80
+ # clumping
52
81
  self.clumps =dict()
53
- self.ns = None
54
- self.finemapping = dict()
55
- #self.to_finemapping_file_path = ""
56
- #self.plink_log = ""
82
+
83
+ # MESuSiE
84
+ self.mesusie = dict()
85
+ self.mesusie_res = pd.DataFrame()
86
+
87
+ # Coloc and Coloc SuSiE
88
+ self.coloc = dict()
89
+ self.coloc_susie_res = pd.DataFrame()
57
90
 
58
91
  self.log.write( "Start to create SumstatsPair object..." )
59
-
60
92
  self.log.write( " -Checking sumstats 1..." , verbose=verbose)
61
93
  check_datatype(sumstatsObject1.data, log=self.log, verbose=verbose)
62
94
  check_dataframe_shape(sumstats=sumstatsObject1.data,
@@ -77,6 +109,7 @@ class SumstatsPair( ):
77
109
  self.stats_cols.append(i)
78
110
  else:
79
111
  self.other_cols.append(i)
112
+
80
113
  for i in sumstatsObject2.data.columns:
81
114
  if i in _get_headers(mode="info"):
82
115
  continue
@@ -90,6 +123,8 @@ class SumstatsPair( ):
90
123
  self.log.write( " -Sumstats1 other columns: {}".format(self.other_cols) , verbose=verbose)
91
124
  self.log.write( " -Sumstats2 other columns: {}".format(self.other_cols2) , verbose=verbose)
92
125
 
126
+ sumstatsObject1.data["_RAW_INDEX_1"] = range(len(sumstatsObject1.data))
127
+ sumstatsObject2.data["_RAW_INDEX_2"] = range(len(sumstatsObject2.data))
93
128
  # extract only info and stats cols
94
129
  self.data = sumstatsObject1.data
95
130
 
@@ -98,7 +133,7 @@ class SumstatsPair( ):
98
133
  self.data = self.data.rename(columns={i:i + suffixes[0] for i in self.stats_cols})
99
134
  self.data = self.data.rename(columns={i:i + suffixes[0] for i in self.other_cols})
100
135
 
101
- self.data, self.sumstats1 = self._merge_two_sumstats(sumstatsObject2, suffixes=suffixes)
136
+ self.data, self.sumstats1, self.sumstats2 = self._merge_two_sumstats(sumstatsObject2, suffixes=suffixes)
102
137
 
103
138
  if "N{}".format(self.suffixes[0]) in self.data.columns and "N{}".format(self.suffixes[1]) in self.data.columns:
104
139
  n1 = int(floor(self.data["N{}".format(self.suffixes[0])].mean()))
@@ -106,14 +141,24 @@ class SumstatsPair( ):
106
141
  self.ns=(n1, n2)
107
142
  else:
108
143
  self.ns = None
144
+ sumstatsObject1.data = sumstatsObject1.data.drop(columns=["_RAW_INDEX_1"])
145
+ sumstatsObject2.data = sumstatsObject2.data.drop(columns=["_RAW_INDEX_2"])
109
146
 
110
- def _merge_two_sumstats(self, sumstatsObject2, threshold=0.2, verbose=True,windowsizeb=10, ref_path=None,suffixes=("_1","_2")):
147
+ def _merge_two_sumstats(self,
148
+ sumstatsObject2,
149
+ threshold=0.2,
150
+ verbose=True,
151
+ windowsizeb=10,
152
+ ref_path=None,
153
+ suffixes=("_1","_2")):
111
154
 
112
155
  # sumstats1 with suffix _1, sumstats2 with no suffix
113
- molded_sumstats, sumstats1 = _merge_mold_with_sumstats_by_chrpos(mold=self.data,
156
+ molded_sumstats, sumstats1, sumstats2 = _merge_mold_with_sumstats_by_chrpos(mold=self.data,
114
157
  sumstats=sumstatsObject2.data,
115
158
  log=self.log,
116
159
  verbose=verbose,
160
+ stats_cols1 = self.stats_cols,
161
+ stats_cols2 = self.stats_cols2,
117
162
  suffixes=(suffixes[0],""),
118
163
  return_not_matched_mold = True)
119
164
 
@@ -137,21 +182,53 @@ class SumstatsPair( ):
137
182
 
138
183
  molded_sumstats = _sort_pair_cols(molded_sumstats, verbose=verbose, log=self.log)
139
184
 
140
- return molded_sumstats, sumstats1
185
+ return molded_sumstats, sumstats1, sumstats2
141
186
 
142
187
 
143
188
  def clump(self,**kwargs):
144
- self.clumps["clumps"],self.clumps["clumps_raw"],self.clumps["plink_log"] = _clump(self.data, log=self.log, p="P_1",mlog10p="MLOG10P_1", study = self.study_name, **kwargs)
189
+ self.clumps["clumps"],self.clumps["clumps_raw"],self.clumps["plink_log"] = _clump(self.data, log=self.log, p="P_1",mlog10p="MLOG10P_1", study = self.meta["gwaslab"]["group_name"], **kwargs)
145
190
 
146
191
  def to_coloc(self,**kwargs):
147
- self.finemapping["path"],self.finemapping["file"],self.finemapping["plink_log"] = tofinemapping(self.data,study=self.study_name,suffixes=self.suffixes,log=self.log,**kwargs)
192
+ self.coloc["path"],self.coloc["file"],self.coloc["plink_log"] = tofinemapping(self.data,study=self.meta["gwaslab"]["group_name"],suffixes=self.suffixes,log=self.log,**kwargs)
193
+
194
+ def to_mesusie(self,**kwargs):
195
+ self.mesusie["path"],self.mesusie["file"],self.mesusie["plink_log"] = tofinemapping_m(self.data,
196
+ studies = self.study_names,
197
+ group = self.meta["gwaslab"]["group_name"],
198
+ suffixes=self.suffixes,
199
+ log=self.log,
200
+ **kwargs)
201
+
202
+ def run_mesusie(self,**kwargs):
203
+ prefix = _run_mesusie(self.mesusie["path"],log=self.log,ncols=self.ns,**kwargs)
204
+ self.mesusie_res = _read_pipcs(self.data[["SNPID","CHR","POS"]],
205
+ prefix,
206
+ studie_names = self.study_name,
207
+ group=self.meta["gwaslab"]["group_name"])
208
+
209
+ def run_ccgwas(self,**kwargs):
210
+ _run_ccgwas(self.data,
211
+ meta = self.meta,
212
+ ldsc = self.ldsc,
213
+ ldsc_rg = self.ldsc_rg,
214
+ group=self.meta["gwaslab"]["group_name"],
215
+ studies = self.study_names,
216
+ log=self.log,
217
+ **kwargs)
148
218
 
219
+ def read_pipcs(self,prefix,**kwargs):
220
+ self.mesusie_res = _read_pipcs(self.data[["SNPID","CHR","POS"]],
221
+ prefix,
222
+ group=self.meta["gwaslab"]["group_name"],
223
+ studie_names = self.study_name,
224
+ **kwargs)
225
+
149
226
  def run_coloc_susie(self,**kwargs):
150
- self.colocalization = _run_coloc_susie(self.finemapping["path"],log=self.log,ncols=self.ns,**kwargs)
227
+ self.coloc_susie_res = _run_coloc_susie(self.coloc["path"],log=self.log,ncols=self.ns,**kwargs)
151
228
 
152
229
  def run_two_sample_mr(self, clump=False, **kwargs):
153
- exposure1 = self.study_name.split("_")[0]
154
- outcome2 = self.study_name.split("_")[1]
230
+ exposure1 = self.meta["gwaslab"]["group_name"].split("_")[0]
231
+ outcome2 = self.meta["gwaslab"]["group_name"].split("_")[1]
155
232
  _run_two_sample_mr(self,exposure1=exposure1,outcome2=outcome2, clump=clump,**kwargs)
156
233
 
157
234
  def extract_with_ld_proxy(self,**arg):
@@ -166,9 +243,17 @@ class SumstatsPair( ):
166
243
  self.data = filtervalues(self.data, expr,log=self.log,**kwargs)
167
244
  gc.collect()
168
245
 
246
+ def stacked_mqq(self, **kwargs):
247
+
248
+ objects=[self.data[["SNPID","CHR","POS","EA","NEA","P_1"]].rename(columns={"P_1":"P"}),
249
+ self.data[["SNPID","CHR","POS","EA","NEA","P_2"]].rename(columns={"P_2":"P"}),
250
+ self.mesusie_res]
251
+
252
+ plot_stacked_mqq(objects=objects,
253
+ **kwargs)
254
+
169
255
  ## Visualization #############################################################################################################################################
170
256
  def plot_miami(self,**kwargs):
171
-
172
257
  plot_miami2(merged_sumstats=self.data,
173
258
  suffixes=self.suffixes,
174
259
  **kwargs)