gwaslab 3.6.3__py3-none-any.whl → 3.6.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

gwaslab/bd_get_hapmap3.py CHANGED
@@ -42,14 +42,39 @@ def gethapmap3(sumstats,rsid="rsID",chrom="CHR", pos="POS", ea="EA", nea="NEA",b
42
42
  additional_cols= ["A1","A2"]
43
43
  else:
44
44
  additional_cols=[]
45
+
45
46
  hapmap3_ref = pd.read_csv(data_path,sep="\s+",usecols=["#CHROM","POS","rsid"]+additional_cols, dtype={"#CHROM":"string","POS":"string"})
47
+
46
48
  #rsid A1 A2 #CHROM POS
47
49
  #rs3094315 G A 1 752566
48
50
 
49
51
  if rsid in sumstats.columns:
50
52
  log.write(" -rsID will be used for matching...", verbose=verbose)
51
- output = sumstats.loc[sumstats[rsid].isin(hapmap3_ref["rsid"].values),:].copy()
52
- log.write(" -Raw input contains "+str(len(output))+" Hapmap3 variants based on rsID...", verbose=verbose)
53
+ hapmap3_ref = hapmap3_ref.rename(columns={"rsid":rsid})
54
+
55
+ output = sumstats.loc[sumstats[rsid].isin(hapmap3_ref[rsid].values),:].copy()
56
+
57
+ output = pd.merge(output, hapmap3_ref, on = rsid, how=how, suffixes=('', '_hapmap3'))
58
+
59
+ raw_rsid_count= len(output)
60
+ log.write(f" -Raw input contains {raw_rsid_count} Hapmap3 variants based on rsID...", verbose=verbose)
61
+
62
+ if match_allele:
63
+ log.write(" -Checking if alleles are same...")
64
+ is_matched = ((output[ea].astype("string") == output["A1"]) & (output[nea].astype("string") == output["A2"])) \
65
+ | ((output[ea].astype("string") == output["A2"]) & (output[nea].astype("string") == output["A1"]))
66
+ if how=="right":
67
+ is_matched = ((output[ea].astype("string") == output["A1"]) & (output[nea].astype("string") == output["A2"])) \
68
+ | ((output[ea].astype("string") == output["A2"]) & (output[nea].astype("string") == output["A1"])) | output[ea].isna()
69
+ output = output.loc[is_matched,:]
70
+ output = output.drop(columns=["#CHROM","A1","A2"] )
71
+ log.write(f" -Filtered {raw_rsid_count - len(output)} Hapmap3 variants due to unmatech alleles...", verbose=verbose)
72
+
73
+ for i in ["#CHROM","A1","A2","POS_hapmap3"]:
74
+ todrop=[]
75
+ if i in output.columns:
76
+ todrop.append(i)
77
+ output = output.drop(columns=todrop)
53
78
  return output
54
79
 
55
80
  elif chrom in sumstats.columns and pos in sumstats.columns:
gwaslab/g_Sumstats.py CHANGED
@@ -144,10 +144,11 @@ class Sumstats():
144
144
  dof=None,
145
145
  snpr2=None,
146
146
  status=None,
147
- other=[],
147
+ other=None,
148
+ exclude=None,
149
+ include=None,
148
150
  chrom_pat=None,
149
151
  snpid_pat=None,
150
- usekeys=None,
151
152
  direction=None,
152
153
  verbose=True,
153
154
  study="Study_1",
@@ -238,7 +239,8 @@ class Sumstats():
238
239
  trait=trait,
239
240
  status=status,
240
241
  other=other,
241
- usekeys=usekeys,
242
+ exclude=exclude,
243
+ include=include,
242
244
  chrom_pat=chrom_pat,
243
245
  snpid_pat=snpid_pat,
244
246
  verbose=verbose,
@@ -912,10 +914,13 @@ class Sumstats():
912
914
  verbose=verbose,
913
915
  **kwargs)
914
916
 
915
- def estimate_rg_by_ldsc(self, build=None, verbose=True, match_allele=True, how="right",**kwargs):
917
+ def estimate_rg_by_ldsc(self, build=None, verbose=True, match_allele=True, how="right", get_hm3=True,**kwargs):
916
918
  if build is None:
917
919
  build = self.meta["gwaslab"]["genome_build"]
918
- insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=match_allele, how=how )
920
+ if get_hm3==True:
921
+ insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=match_allele, how=how )
922
+ else:
923
+ insumstats = self.data
919
924
  ldsc_rg = _estimate_rg_by_ldsc(insumstats=insumstats,
920
925
  meta=self.meta,
921
926
  log=self.log,
@@ -53,6 +53,7 @@ class SumstatsMulti( ):
53
53
  build="99",
54
54
  engine="pandas",
55
55
  merge_mode="outer",
56
+ merge_by_id=False,
56
57
  verbose=True ):
57
58
 
58
59
  for i,sumstatsObject in enumerate(sumstatsObjects):
@@ -152,7 +153,7 @@ class SumstatsMulti( ):
152
153
  for i, sumstatsObject in enumerate(sumstatsObjects):
153
154
  if i >0:
154
155
  self.log.write("Merging Sumstats #{} to main DataFrame...".format(i+1))
155
- self.data = self._merge_two_sumstats(sumstatsObject.data,i=i,merge_mode=merge_mode,engine=engine)
156
+ self.data = self._merge_two_sumstats(sumstatsObject.data,i=i,merge_mode=merge_mode,engine=engine,merge_by_id=merge_by_id)
156
157
  self.log.write("Finished merging Sumstats #{} to main DataFrame.".format(i+1))
157
158
  else:
158
159
  for i, sumstatsObject in enumerate(sumstatsObjects):
@@ -167,6 +168,7 @@ class SumstatsMulti( ):
167
168
  verbose=True,
168
169
  merge_mode="outer",
169
170
  engine="pandas",
171
+ merge_by_id=False,
170
172
  i=0):
171
173
 
172
174
  # _1 _2
@@ -184,6 +186,7 @@ class SumstatsMulti( ):
184
186
  log=self.log,
185
187
  verbose=verbose,
186
188
  merge_mode=merge_mode,
189
+ merge_by_id=merge_by_id,
187
190
  stats_cols1 = self.other_cols[0],
188
191
  stats_cols2 = self.other_cols[i],
189
192
  suffixes=("_1",""),
gwaslab/g_version.py CHANGED
@@ -17,8 +17,8 @@ def _get_version():
17
17
  def gwaslab_info():
18
18
  # version meta information
19
19
  dic={
20
- "version":"3.6.3",
21
- "release_date":"202505"
20
+ "version":"3.6.4",
21
+ "release_date":"20250624"
22
22
  }
23
23
  return dic
24
24
 
@@ -18,30 +18,95 @@ def _merge_mold_with_sumstats_by_chrposp(mold, sumstats, ref_path=None,add_raw_i
18
18
  log=Log(),
19
19
  suffixes=("_MOLD",""),
20
20
  merge_mode="full",
21
+ merge_by_id=False,
21
22
  verbose=True,
22
23
  return_not_matched_mold =False):
23
24
 
24
25
  log.write("Start to merge sumstats...", verbose=verbose)
25
26
  if merge_mode=="full":
27
+
26
28
  sumstats = sumstats.rename({
27
29
  "SNPID":"_SNPID_RIGHT",
28
30
  "rsID":"_rsID_RIGHT"
29
31
  }, strict=False) #,
32
+
30
33
 
31
- # drop old ids
32
- cols_to_drop = []
33
- for i in sumstats.columns:
34
- if i in ["SNPID","rsID"]:
35
- cols_to_drop.append(i)
36
- if len(cols_to_drop)>0:
37
- log.write(" -Dropping old IDs:{}".format(cols_to_drop), verbose=verbose)
38
- sumstats = sumstats.drop(columns=cols_to_drop)
34
+ if merge_by_id==False:
35
+ # drop old ids
36
+ cols_to_drop = []
37
+ for i in sumstats.columns:
38
+ if i in ["SNPID","rsID"]:
39
+ cols_to_drop.append(i)
40
+ if len(cols_to_drop)>0:
41
+ log.write(" -Dropping old IDs:{}".format(cols_to_drop), verbose=verbose)
42
+ sumstats = sumstats.drop(columns=cols_to_drop)
43
+
44
+ ##################################################################################################################
45
+
46
+ # mold sumffix + mold
47
+ # add ASET
48
+ mold = mold.with_columns(
49
+ pl.when( pl.col("EA_1") > pl.col("NEA_1") )
50
+ .then( pl.col("EA_1") + ":" + pl.col("NEA_1") )
51
+ .otherwise( pl.col("NEA_1") + ":" + pl.col("EA_1") )
52
+ .alias("ASET")
53
+ )
54
+
55
+ sumstats = sumstats.with_columns(
56
+ pl.when( pl.col("EA") > pl.col("NEA") )
57
+ .then( pl.col("EA") + ":" + pl.col("NEA") )
58
+ .otherwise( pl.col("NEA") + ":" + pl.col("EA") )
59
+ .alias("ASET"))
60
+
61
+ sumstats_len = len(sumstats)
62
+ mold_len = len(mold)
63
+ sumstats = sumstats.unique(subset=["CHR","POS","ASET"])
64
+ mold = mold.unique(subset=["CHR","POS","ASET"])
65
+
66
+ log.write(f' -Left: dropping duplicated variants based on CHR,POS,ASET: {sumstats_len - len(sumstats)}')
67
+ log.write(f' -Right: dropping duplicated variants based on CHR,POS,ASET: {mold_len - len(mold)}')
39
68
 
40
- ##################################################################################################################
69
+ mold = mold.with_columns(
70
+ pl.when( pl.col("NEA_1").str.len_chars() != pl.col("EA_1").str.len_chars() )
71
+ .then(
72
+ pl.when( pl.col("EAF_1")<0.5 ).then(
73
+ pl.col("ASET") + ":" + pl.col("EA_1")
74
+ ).otherwise( pl.col("ASET") + ":" + pl.col("NEA_1") )
75
+ .alias("ASET")
76
+ )
77
+ .otherwise( pl.col("ASET") )
78
+ .alias("ASET")
79
+ )
80
+
81
+ sumstats = sumstats.with_columns(
82
+ pl.when( pl.col("NEA").str.len_chars() != pl.col("EA").str.len_chars() )
83
+ .then(
84
+ pl.when( pl.col("EAF")<0.5 ).then(
85
+ pl.col("ASET") + ":" + pl.col("EA")
86
+ ).otherwise( pl.col("ASET") + ":" + pl.col("NEA") )
87
+ .alias("ASET")
88
+ )
89
+ .otherwise( pl.col("ASET"))
90
+ .alias("ASET")
91
+ )
92
+
93
+ mold_sumstats = mold.join(sumstats, on=["CHR","POS","ASET"], how=merge_mode, suffix="_", coalesce=True)
94
+
95
+ elif merge_by_id==True:
96
+
97
+ sumstats = sumstats.rename({
98
+ "_SNPID_RIGHT":"SNPID",
99
+ }, strict=False)
100
+
41
101
 
42
- # mold sumffix + mold
102
+ sumstats_len = len(sumstats)
103
+ mold_len = len(mold)
104
+ sumstats = sumstats.unique(subset=["SNPID","CHR","POS"])
105
+ mold = mold.unique(subset=["SNPID","CHR","POS"])
106
+ log.write(f' -Left: dropping duplicated variants based on CHR,POS,SNPID: {sumstats_len - len(sumstats)}')
107
+ log.write(f' -Right: dropping duplicated variants based on CHR,POS,SNPID: {mold_len - len(mold)}')
108
+ mold_sumstats = mold.join(sumstats, on=["SNPID","CHR","POS"], how=merge_mode, suffix="_", coalesce=True)
43
109
 
44
- mold_sumstats = mold.join(sumstats, on=["CHR","POS"], how=merge_mode, suffix="_", coalesce=True)
45
110
 
46
111
  if merge_mode=="full":
47
112
  is_temp_na = mold_sumstats["EA_1"].is_null()
@@ -50,13 +115,17 @@ def _merge_mold_with_sumstats_by_chrposp(mold, sumstats, ref_path=None,add_raw_i
50
115
  for i in ["EA_1","NEA_1","EA","NEA"]:
51
116
  mold_sumstats = mold_sumstats.with_columns(pl.col(i).cast(pl.String).alias(i))
52
117
 
53
- # for variants not in template, copy snp info
54
- mold_sumstats = mold_sumstats.with_columns(
118
+ if merge_by_id==False:
119
+ mold_sumstats = mold_sumstats.with_columns(
55
120
  pl.when( is_temp_na )
56
121
  .then( pl.col("_SNPID_RIGHT") )
57
122
  .otherwise( pl.col("SNPID") )
58
123
  .alias("SNPID")
59
- ).with_columns(
124
+ )
125
+ mold_sumstats = mold_sumstats.drop(["_SNPID_RIGHT"])
126
+
127
+ # for variants not in template, copy snp info
128
+ mold_sumstats = mold_sumstats.with_columns(
60
129
  pl.when( is_temp_na )
61
130
  .then( pl.col("EA") )
62
131
  .otherwise( pl.col("EA_1") )
@@ -66,12 +135,18 @@ def _merge_mold_with_sumstats_by_chrposp(mold, sumstats, ref_path=None,add_raw_i
66
135
  .then( pl.col("NEA") )
67
136
  .otherwise( pl.col("NEA_1") )
68
137
  .alias("NEA_1")
138
+ ).with_columns(
139
+ pl.when( is_temp_na )
140
+ .then( pl.col("EAF") )
141
+ .otherwise( pl.col("EAF_1"))
142
+ .alias("EAF_1")
69
143
  ).with_columns(
70
144
  pl.when( is_temp_na )
71
145
  .then( pl.col("STATUS") )
72
146
  .otherwise( pl.col("STATUS_1") )
73
147
  .alias("STATUS_1")
74
148
  )
149
+
75
150
  #
76
151
  if "_rsID_RIGHT" in mold_sumstats.columns:
77
152
  mold_sumstats = mold_sumstats.with_columns(
@@ -87,7 +162,7 @@ def _merge_mold_with_sumstats_by_chrposp(mold, sumstats, ref_path=None,add_raw_i
87
162
 
88
163
  mold_sumstats = mold_sumstats.with_columns(
89
164
  pl.when( is_temp_na_2 )
90
- .then( pl.col("EA_1") )
165
+ .then( pl.col("EA_1") )
91
166
  .otherwise( pl.col("EA") )
92
167
  .alias("EA")
93
168
  ).with_columns(
@@ -97,10 +172,14 @@ def _merge_mold_with_sumstats_by_chrposp(mold, sumstats, ref_path=None,add_raw_i
97
172
  .alias("NEA")
98
173
  )
99
174
 
100
- mold_sumstats = mold_sumstats.drop(["_SNPID_RIGHT"])
175
+
176
+ if merge_by_id==False:
177
+ mold_sumstats = mold_sumstats.unique(subset=["CHR","POS","ASET"])
178
+ log.write(" -After merging by CHR, POS and ASET:{}".format(len(mold_sumstats)), verbose=verbose)
179
+ else:
180
+ mold_sumstats = mold_sumstats.unique(subset=["SNPID","CHR","POS"])
181
+ log.write(" -After merging by SNPID, CHR and POS:{}".format(len(mold_sumstats)), verbose=verbose)
101
182
 
102
- log.write(" -After merging by CHR and POS:{}".format(len(mold_sumstats)), verbose=verbose)
103
-
104
183
  mold_sumstats = _keep_variants_with_same_allele_setp(mold_sumstats,suffixes=suffixes)
105
184
 
106
185
  log.write(" -Matched variants:{}".format(len(mold_sumstats)), verbose=verbose)
@@ -116,13 +195,9 @@ def _keep_variants_with_same_allele_setp(sumstats, log=Log(),verbose=True,suffix
116
195
 
117
196
  is_perfect_match = (sumstats[ea2] == sumstats[ea1]) & (sumstats[nea2] == sumstats[nea1])
118
197
  is_flipped_match = (sumstats[ea2] == sumstats[nea1]) & (sumstats[nea2] == sumstats[ea1])
119
- is_allele_set_match = is_flipped_match | is_perfect_match
120
198
 
121
- log.write(" -Matching alleles and keeping only variants with same allele set: ", verbose=verbose)
122
199
  log.write(" -Perfect match: {}".format(sum(is_perfect_match)), verbose=verbose)
123
200
  log.write(" -Flipped match: {}".format(sum(is_flipped_match)), verbose=verbose)
124
- log.write(" -Unmatched : {}".format(sum(~is_allele_set_match)), verbose=verbose)
125
- sumstats = sumstats.filter(is_allele_set_match)
126
201
  return sumstats
127
202
 
128
203
  def _align_with_moldp(sumstats, log=Log(),verbose=True, suffixes=("_MOLD","")):
gwaslab/io_load_ld.py CHANGED
@@ -233,9 +233,7 @@ def process_ld(sumstats,
233
233
  lead_snpid = matched_snpid.group(0).split(":")[1:]
234
234
  lead_snpid[0]= int(lead_snpid[0])
235
235
  lead_series = pd.Series(lead_snpid)
236
-
237
- print(sumstats)
238
- print(sumstats.loc[lead_id, "REFINDEX"])
236
+
239
237
  # if lead pos is available:
240
238
  if sumstats.loc[lead_id, "REFINDEX"] is not None:
241
239
  lead_snp_ref_index = sumstats.loc[lead_id, "REFINDEX"]
@@ -59,7 +59,8 @@ def preformat(sumstats,
59
59
  trait=None,
60
60
  build=None,
61
61
  other=None,
62
- usekeys=None,
62
+ exclude=None,
63
+ include=None,
63
64
  chrom_pat=None,
64
65
  snpid_pat=None,
65
66
  verbose=False,
@@ -71,6 +72,11 @@ def preformat(sumstats,
71
72
  usecols = list()
72
73
  if other is None:
73
74
  other = list()
75
+ if exclude is None:
76
+ exclude = list()
77
+ if include is None:
78
+ include = list()
79
+
74
80
  dtype_dictionary = {}
75
81
  if readargs is None:
76
82
  readargs={}
@@ -78,7 +84,7 @@ def preformat(sumstats,
78
84
  # workflow:
79
85
  # 1. formatbook
80
86
  # 2. user specified header
81
- # 3. usekeys
87
+ # 3. include & exclude
82
88
  if tab_fmt=="parquet":
83
89
  if type(sumstats) is str:
84
90
  log.write("Start to load data from parquet file....",verbose=verbose)
@@ -318,18 +324,36 @@ def preformat(sumstats,
318
324
  study = raw_cols[9]
319
325
  usecols = usecols + [study]
320
326
 
321
- if usekeys is not None:
327
+ if len(include)>0:
322
328
  # extract only specified keys
323
329
  usecols_new =[]
324
- for i in usekeys:
330
+ for i in include:
331
+ # rename_dictionary: sumstats to gwaslab
325
332
  for k, v in rename_dictionary.items():
326
333
  if i == v:
334
+ # get list of sumstats header
327
335
  usecols_new.append(k)
328
336
  usecols_valid =[]
329
337
  for i in usecols_new:
330
338
  if i in usecols:
331
339
  usecols_valid.append(i)
340
+ log.write(f' -Include columns :{",".join(usecols_valid)}' ,verbose=verbose)
332
341
  usecols = usecols_valid
342
+
343
+ if len(exclude)>0:
344
+ # exclude specified keys
345
+ exclude_cols =[]
346
+ for i in exclude:
347
+ # rename_dictionary: sumstats to gwaslab
348
+ for k, v in rename_dictionary.items():
349
+ if i == v:
350
+ # get list of sumstats header
351
+ exclude_cols.append(k)
352
+ log.write(f' -Exclude columns :{",".join(exclude_cols)}' ,verbose=verbose)
353
+ for i in exclude_cols:
354
+ if i in usecols:
355
+ usecols.remove(i)
356
+
333
357
  #loading data ##########################################################################################################
334
358
 
335
359
  try:
@@ -107,6 +107,7 @@ def fixID(sumstats,
107
107
  except:
108
108
  log.write(" -Force converting rsID to pd.string data type...",verbose=verbose)
109
109
  sumstats[rsid] = sumstats[rsid].astype("string")
110
+
110
111
  if snpid in sumstats.columns:
111
112
  # convert to string datatype
112
113
  try:
@@ -147,6 +148,7 @@ def fixID(sumstats,
147
148
  log.write(" -A look at the unrecognized rsID :",set(sumstats.loc[(~is_rsid)&(~is_rs_chrpos),rsid].head()),"...", verbose=verbose)
148
149
 
149
150
  ############################ fixing chr pos###################################################
151
+
150
152
  if fixchrpos == True:
151
153
  # from snpid or rsid, extract CHR:POS to fix CHR and POS
152
154
  if snpid in sumstats.columns:
@@ -199,7 +199,6 @@ def _quick_assign_i_with_rank(sumstats, chrpad, use_rank=False, chrom="CHR",pos=
199
199
  posdiccul[i]= posdiccul[i-1] + posdiccul[i] + sumstats[pos].max()*chrpad
200
200
  else:
201
201
  posdiccul = _posdiccul
202
-
203
202
  # convert base pair postion to x axis position using the cumulative sum dictionary
204
203
  sumstats["_ADD"]=sumstats[chrom].apply(lambda x : posdiccul[int(x)-1])
205
204
 
@@ -985,6 +985,7 @@ def test_q(df,beta1,se1,beta2,se2,q_level=0.05,is_q_mc=False, log=Log(), verbose
985
985
  log.write(" -Bonferroni correction applied...", verbose=verbose)
986
986
  df[rawpq] = df[pq]
987
987
  df[pq] = df[pq] * len(df[pq])
988
+ df.loc[df[pq]>1,pq] = 1
988
989
 
989
990
  df.loc[df[pq]<q_level,"Edge_color"]="black"
990
991
  df.drop(columns=["Weight_1","Weight_2","BETA_FE"],inplace=True)
@@ -1330,17 +1331,20 @@ def configure_legend(fig, ax, legend_mode, is_q, is_q_mc, legend_elements, legen
1330
1331
  # handle.set_edgecolor("white")
1331
1332
 
1332
1333
  ## Move titles to the left
1333
- for item, label in zip(L.legendHandles, L.texts):
1334
- if label._text in legend_elements:
1335
- item.set_edgecolor("white")
1336
- #item._legmarker.set_markersize(scatterargs["s"]*1.5)
1337
- item._sizes = [scatterargs["s"]*2]
1338
- if legend_mode == "full":
1339
- if label._text in [legend_title, legend_title2]:
1340
- width=item.get_window_extent(fig.canvas.get_renderer()).width
1341
- label.set_ha('left')
1342
- label.set_position((-8*width,0))
1343
-
1334
+ try:
1335
+ for item, label in zip(L.legendHandles, L.texts):
1336
+ if label._text in legend_elements:
1337
+ item.set_edgecolor("white")
1338
+ #item._legmarker.set_markersize(scatterargs["s"]*1.5)
1339
+ item._sizes = [scatterargs["s"]*2]
1340
+ if legend_mode == "full":
1341
+ if label._text in [legend_title, legend_title2]:
1342
+ width=item.get_window_extent(fig.canvas.get_renderer()).width
1343
+ label.set_ha('left')
1344
+ label.set_position((-8*width,0))
1345
+ except:
1346
+ pass
1347
+
1344
1348
  ax.tick_params(axis='both', labelsize=font_kwargs["fontsize"])
1345
1349
  plt.setp(L.texts,**font_kwargs)
1346
1350
  plt.setp(L.get_title(),**font_kwargs)
@@ -70,8 +70,9 @@ def plot_miami2(
70
70
  region_hspace = 0.1,
71
71
  dpi=100,
72
72
  fontsize = 10,
73
+ xtick_label_size = 10,
73
74
  font_family="Arial",
74
- xlabel_coords=(-0.01, -0.027),
75
+ xlabel_coords=None,
75
76
  xtick_label_pad=None,
76
77
  verbose=True,
77
78
  xtickpad=None,
@@ -142,9 +143,12 @@ def plot_miami2(
142
143
  scatter_args={}
143
144
 
144
145
  if fig_args is None:
145
- fig_args= {"figsize":(15,5),"dpi":100}
146
+ fig_args= {"figsize":(15,10),"dpi":100}
146
147
  if save_args is None:
147
148
  save_args={"dpi":100,"facecolor":"white"}
149
+
150
+ if xlabel_coords is None:
151
+ xlabel_coords = (-0.01,- region_hspace/2 )
148
152
 
149
153
  # figure out mqq args
150
154
  mqq_args1,mqq_args2 = _sort_args_to_12(mqq_args)
@@ -157,19 +161,10 @@ def plot_miami2(
157
161
 
158
162
  if dpi!=100:
159
163
  fig_args["dpi"] = dpi
160
- if xtickpad is None:
161
- if "figsize" not in fig_args.keys():
162
- fig_args["figsize"] = (15,5)
163
- xtickpad = fig_args["figsize"][1] * region_hspace *72 / 6
164
- if xtick_label_pad is None:
165
- if "figsize" not in fig_args.keys():
166
- fig_args["figsize"] = (15,5)
167
- xtick_label_pad = 72 * fig_args["figsize"][1] * region_hspace / 6
168
164
 
169
165
  if titles is None:
170
166
  titles=["",""]
171
167
 
172
-
173
168
  titles_pad_adjusted=[1,0]
174
169
  if titles_pad is None:
175
170
  titles_pad=[0.2,0.2]
@@ -242,11 +237,12 @@ def plot_miami2(
242
237
  # P_1 scaled_P_1 P_2 scaled_P_2 TCHR+POS CHR POS
243
238
  log.write(" -Columns in merged sumstats: {}".format(",".join(merged_sumstats.columns)), verbose=verbose)
244
239
 
245
-
246
240
  del(sumstats1)
247
241
  del(sumstats2)
248
242
  garbage_collect.collect()
249
243
  #####################################################################################################################
244
+
245
+
250
246
  ##plotting
251
247
  if figax is None:
252
248
  #fig_args["figsize"] = (15,10)
@@ -254,7 +250,30 @@ def plot_miami2(
254
250
  plt.subplots_adjust(hspace=region_hspace)
255
251
  else:
256
252
  fig, ax1, ax5 = figax
253
+ ##########################################################################################################################
254
+ bbox1 = ax1.get_position()
255
+ bbox5 = ax5.get_position()
256
+
257
257
 
258
+
259
+ fig_height_inches = fig.get_figheight()
260
+ ax_height_inches = (bbox1.height + bbox5.height) * fig_height_inches /2
261
+ ax_height_points = ax_height_inches * 72
262
+ tick = ax1.xaxis.get_major_ticks()[0].tick1line
263
+ # Tick length is determined by its 'markersize' (in points)
264
+ tick_length = tick.get_markersize()
265
+
266
+ tick_points_to_pixels = tick_length * fig.dpi / 72.0
267
+ ax_height_pixels = bbox1.height * fig.get_figheight() * fig.dpi
268
+ tick_axes_length = tick_points_to_pixels / ax_height_pixels
269
+
270
+ if xtick_label_pad is None:
271
+ if "figsize" not in fig_args.keys():
272
+ fig_args["figsize"] = (15,10)
273
+ # (total hsapce - tick label font size) / 2
274
+ xtick_label_pad = 0
275
+ #xtick_label_pad = ((ax_height_points * region_hspace) - 2*tick_length - xtick_label_size) / 2
276
+ ########################################################################################################################
258
277
  #if same_ylim==True:
259
278
  #maxy = merged_sumstats[["scaled_P_1","scaled_P_2"]].max().max()
260
279
 
@@ -307,17 +326,23 @@ def plot_miami2(
307
326
  #ax5.set_xticks(chrom_df)
308
327
  ax5.set_xticklabels([])
309
328
  ax5.xaxis.set_ticks_position("top")
329
+ ax5.tick_params(axis='x', which='major', pad=0)
310
330
 
311
331
  # Ad#just the visibility for spines #######################################################
312
332
  ax1, ax5 = _set_spine_visibility(ax1, ax5)
313
333
  ######################################################################################################################
314
334
  #####################################################################################################################
315
335
  # set labels
316
- ax1.set_xlabel("Chromosome",fontsize=fontsize,family=font_family)
336
+ ax1.set_xlabel("Chromosome",fontsize=fontsize,family=font_family,labelpad=0, va="center",ha="center")
317
337
  ax1.xaxis.set_label_coords(xlabel_coords[0],xlabel_coords[1])
338
+
339
+ #ax1.tick_params(axis='x', which='major', pad=xtick_label_pad, labelsize = xtick_label_size)
318
340
 
319
- ax1.tick_params(axis='x', which='major', pad=xtick_label_pad)
320
-
341
+ for label in ax1.get_xticklabels():
342
+ label.set_y( xlabel_coords[1] + tick_axes_length )
343
+ ax1.tick_params(axis='x', which='major', pad=xtick_label_pad, labelsize = xtick_label_size)
344
+ plt.setp(ax1.get_xticklabels(), ha='center',va="center")
345
+
321
346
  ax1.set_ylabel("$\mathregular{-log_{10}(P)}$",fontsize=fontsize,family=font_family)
322
347
  ax5.set_ylabel("$\mathregular{-log_{10}(P)}$",fontsize=fontsize,family=font_family)
323
348
 
@@ -1178,22 +1178,33 @@ def _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats, pos, chrpad, xtight, l
1178
1178
 
1179
1179
  else:
1180
1180
  chrpad_to_remove = sumstats[pos].max()*chrpad
1181
+
1181
1182
  if ax1 is not None:
1182
- xmin, xmax = ax1.get_xlim()
1183
+ #xmin, xmax = ax1.get_xlim()
1184
+ xmax = sumstats["i"].max()
1185
+ xmin= sumstats["i"].min()
1186
+
1187
+ #length = xmax - xmin
1183
1188
  length = xmax - xmin
1184
1189
 
1185
1190
  if xpad is not None:
1186
1191
  log.write(" -Adjusting X padding on both side: {}".format(xpad), verbose=verbose)
1187
1192
  pad = xpad* length #sumstats["i"].max()
1188
1193
  ax1.set_xlim([xmin - pad + chrpad_to_remove, xmax + pad - chrpad_to_remove])
1194
+
1189
1195
  if xpad is None and xpadl is not None:
1190
1196
  log.write(" -Adjusting X padding on left side: {}".format(xpadl), verbose=verbose)
1191
- xmin, xmax = ax1.get_xlim()
1197
+
1198
+ xmax = ax1.get_xlim()[1]
1199
+
1192
1200
  pad = xpadl*length # sumstats["i"].max()
1193
1201
  ax1.set_xlim([xmin - pad + chrpad_to_remove ,xmax])
1202
+
1194
1203
  if xpad is None and xpadr is not None:
1195
1204
  log.write(" -Adjusting X padding on right side: {}".format(xpadr), verbose=verbose)
1196
- xmin, xmax = ax1.get_xlim()
1205
+
1206
+ xmin = ax1.get_xlim()[0]
1207
+
1197
1208
  pad = xpadr*length # sumstats["i"].max()
1198
1209
  ax1.set_xlim([xmin, xmax + pad - chrpad_to_remove])
1199
1210
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gwaslab
3
- Version: 3.6.3
3
+ Version: 3.6.4
4
4
  Summary: A collection of handy tools for GWAS SumStats
5
5
  Author-email: Yunye <gwaslab@gmail.com>
6
6
  Project-URL: Homepage, https://cloufield.github.io/gwaslab/
@@ -26,6 +26,7 @@ Requires-Dist: pyensembl==2.2.3
26
26
  Requires-Dist: gtfparse==1.3.0
27
27
  Requires-Dist: h5py>=3.10.0
28
28
  Requires-Dist: pyarrow
29
+ Requires-Dist: polars>=1.27.0
29
30
  Dynamic: license-file
30
31
 
31
32
  # GWASLab
@@ -2,12 +2,12 @@ gwaslab/__init__.py,sha256=Lpa15i924wRu1jZ9_B2Tz5724FQkdb9O5Wg3tQPWR68,2683
2
2
  gwaslab/bd_common_data.py,sha256=jqkdKib-tc8RKyYMPy8z2KF3YwCWIr-6STMCi4xR7KY,13899
3
3
  gwaslab/bd_config.py,sha256=TP-r-DPhJD3XnRYZbw9bQHXaDIkiRgK8bG9HCt-UaLc,580
4
4
  gwaslab/bd_download.py,sha256=cDDk2C5IvjeAzvPvVYGTkI4Ss33DUtEDjGo8eAbQRvY,15663
5
- gwaslab/bd_get_hapmap3.py,sha256=qWTvIRZsd7F3nT9sN2NSXUsxZJRf5k4HLgJ6kN0qaUc,4107
5
+ gwaslab/bd_get_hapmap3.py,sha256=1y_mt3Sj-D6Fm4NvVAYJKfE-BPaKPI-8wX93OkmxYbE,5402
6
6
  gwaslab/cache_manager.py,sha256=HOTnSkCOyGEPLRl90WT8D_6pAdI8d8AzenMIDGuCeWc,28113
7
7
  gwaslab/g_Log.py,sha256=C3Zv-_6c3C9ms8bgQ-ytplz22sjk7euqXYkWr9zNeAs,1573
8
8
  gwaslab/g_Phenotypes.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- gwaslab/g_Sumstats.py,sha256=VowMdFIwYfGABbhrpMRd3OV8HPRwWjYWTU4p6TAn4Q4,43781
10
- gwaslab/g_SumstatsMulti.py,sha256=2y8brnKAs5I9tUexQJiFwOsFLC4MyACn7IybvvXsRDU,13828
9
+ gwaslab/g_Sumstats.py,sha256=BKHF4YWPXZBW9XXU9KKkvkhInXZYducvDdC8q7kstbI,43930
10
+ gwaslab/g_SumstatsMulti.py,sha256=kYlbmvq3_PLnr4Car6DV5RIItmvCvznfUAjlYguu-2o,14016
11
11
  gwaslab/g_SumstatsPair.py,sha256=PmVPGU32degDuU5RDG9igyT1sFAbEDh6alrRulUphyk,13181
12
12
  gwaslab/g_SumstatsSet.py,sha256=AiTISWPfmu8NTGa8j9Yuts8DNw1pEUENYyPoS0HXp5I,29866
13
13
  gwaslab/g_SumstatsT.py,sha256=u_DighLMnMxwTLnqm-B58pA0G6WXRj6pudPyKMVKjSU,2133
@@ -18,13 +18,13 @@ gwaslab/g_meta.py,sha256=pXtSsQfFPv2UJmOxKMmLtcsp_Ku_H73YP7PnlfMd0sg,6472
18
18
  gwaslab/g_meta_update.py,sha256=dWgz4kcq9bvsXycCjfhoYtcJHlEP4HRdYoQ8KFqMyTQ,2490
19
19
  gwaslab/g_vchange_status.py,sha256=w3zsYYOcCaI3PTeboonvkQjudzUAfVIgATzRdiPViZs,1939
20
20
  gwaslab/g_vchange_status_polars.py,sha256=kxyGQCur0ibVFBCyZghA-XNf_kLDXKK-l7VC-Om2IdA,1839
21
- gwaslab/g_version.py,sha256=Jop2Odt1nWgJsn3Dghjc76Mmetuwqm1xAqeFva3yVgk,1962
21
+ gwaslab/g_version.py,sha256=YdDG4Pd2xl9SjNiP8yq5slF3aQ4KCaQj83-AWW_edBk,1964
22
22
  gwaslab/hm_casting.py,sha256=xoq1E4Tp5VC4aLWfq9-_AfiQzb1WZAHrnZG33W4sCOE,14178
23
- gwaslab/hm_casting_polars.py,sha256=_3ZeMg3mRsa7vHpxObFErOledpaydhC_PxStcwlYO2c,8371
23
+ gwaslab/hm_casting_polars.py,sha256=CAPfCucj-ARtwP80EuHc9q2gLdj8TL_XQ7_sEC9LxA0,11440
24
24
  gwaslab/hm_harmonize_sumstats.py,sha256=2MeWnWmMHpLWjUMTVqExpg_3mE1VIaBsh6Mz3ffJCMc,84761
25
25
  gwaslab/hm_rsid_to_chrpos.py,sha256=ODWREO0jPN0RAfNzL5fRzSRANfhiksOvUVPuEsFZQqA,6552
26
- gwaslab/io_load_ld.py,sha256=P4v-6WO3boamvc3i_iAfwZlo-z4JwBez81S5CkxpoS4,21916
27
- gwaslab/io_preformat_input.py,sha256=3C33kvtPpVfaidWWBDx6Z4Nu60Wj4L80SX80xD_beUY,25713
26
+ gwaslab/io_load_ld.py,sha256=pJemMmHn5GrEbIloqGPX3tBStjkA6DVrAoerGOUBbqc,21827
27
+ gwaslab/io_preformat_input.py,sha256=jIacIVymCfHVBlonUCFRmUJobLcxMWn2w_vWqXjESPg,26538
28
28
  gwaslab/io_preformat_input_polars.py,sha256=HGdi6rXPQnYjTW8fMUds-uF6Lt8uElL3Er_Afv3OjTc,24767
29
29
  gwaslab/io_process_args.py,sha256=TIv0DyaVEcHnLBGdOD52GBZiO6nAQJycAmmVdEYPmRE,1918
30
30
  gwaslab/io_read_ldsc.py,sha256=9tV4AfQZmCAyiNSR9uALmXWOytWhBdT0pfMAY5Mx_QQ,13407
@@ -44,7 +44,7 @@ gwaslab/prscs_parse_genet.py,sha256=N7UmyvdO--yTKvbjMNQCVucRIs4PbRGf93maGkh8PP4,
44
44
  gwaslab/qc_build.py,sha256=oeJqV-zbn-9FZ7SsmT4DPuYTt_IIeXw0XtmEkSzN5qA,2145
45
45
  gwaslab/qc_check_datatype.py,sha256=XuL4sg-OkdIiGaHg98JHZems0Uo-OyF5DlUTyE9gUd4,4580
46
46
  gwaslab/qc_check_datatype_polars.py,sha256=rJ1dIruxclHi37zd5xpS1ml30-200NI-FoEAVTe9_Eo,4218
47
- gwaslab/qc_fix_sumstats.py,sha256=dZcjMWFmRrH9gRFbt3YVS1t4xuR7zlMvMvbn400cF4s,98438
47
+ gwaslab/qc_fix_sumstats.py,sha256=qjIJxBr_6HpKQLkKRPrbLZEEOr5DzP8Go82AOxgDadY,98448
48
48
  gwaslab/qc_fix_sumstats_polars.py,sha256=5DY2PkWiZdfY-k8jXW69YnYPUmJiAw8uaolG8Oztr5g,9600
49
49
  gwaslab/run_script.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
50
  gwaslab/util_abf_finemapping.py,sha256=LRcopjtkT-iXtKPAJIzR4qjPdhD7nrS_BGit4EW89FM,3054
@@ -85,17 +85,17 @@ gwaslab/util_in_snphwe.py,sha256=-KpIDx6vn_nah6H55IkV2OyjXQVXV13XyBL069WE1wM,175
85
85
  gwaslab/viz_aux_annotate_plot.py,sha256=IyGhqFQofrVW9A8UxJ5oNVH96czQ-2_i4s1lVvf2hww,25696
86
86
  gwaslab/viz_aux_chromatin.py,sha256=aWZaXOSvGyZY7wQcoFDaqHRYCSHZbi_K4Q70HruN9ts,4125
87
87
  gwaslab/viz_aux_property.py,sha256=UIaivghnLXYpTwkKnXRK0F28Jbn9L6OaICk3K73WZaU,33
88
- gwaslab/viz_aux_quickfix.py,sha256=cGX5i3WBmvKIiqck8V00caDg-pvKOO709Ux3DBXsUrM,18693
88
+ gwaslab/viz_aux_quickfix.py,sha256=v_ZYM2KO39qvk38t0SwopVG3yHjW5MB9DzHX6kvdh1k,18692
89
89
  gwaslab/viz_aux_reposition_text.py,sha256=iRIP-Rkltlei068HekJcVubiqPrunBqvAoSQ1eHk04M,4304
90
90
  gwaslab/viz_aux_save_figure.py,sha256=HwIRDMYpeXfkBgb7mqzLN7OVPMz163U-ZVOlQJABzeg,2811
91
91
  gwaslab/viz_plot_compare_af.py,sha256=qtXW45-Sq_ugK8ZfqBYMpmf58SKi3lB3YyHnzn_akcE,5344
92
- gwaslab/viz_plot_compare_effect.py,sha256=-v9S8kk06eqBSwny2l0hasqf2z7-rKieGUKnghqyhM0,69370
92
+ gwaslab/viz_plot_compare_effect.py,sha256=QC3TsxKLk1bpiNNO0xh5fQRVcTcETmG3Zr5PXSUX3QY,69480
93
93
  gwaslab/viz_plot_credible_sets.py,sha256=RC5ZsuW8-0w0S1h4c--PZrYwfHfgZ3clM-9JcoMLJWk,5841
94
94
  gwaslab/viz_plot_effect.py,sha256=qbM6c1IB2HlUlMNgFZlJ5G8ODQJ8-oSWD8t0Q8DDuz8,10653
95
95
  gwaslab/viz_plot_forestplot.py,sha256=xgOnefh737CgdQxu5naVyRNBX1NQXPFKzf51fbh6afs,6771
96
96
  gwaslab/viz_plot_miamiplot.py,sha256=rCFEp7VNuVqeBBG3WRkmFAtFklbF79BvIQQYiSY70VY,31238
97
- gwaslab/viz_plot_miamiplot2.py,sha256=eVS2v1YFEO0xZ-2zs_DCCvEx-Hqbt0wQkbgCHiRfqeE,16247
98
- gwaslab/viz_plot_mqqplot.py,sha256=mlcsA_wEnv7N9mP6CVL15WXWHJ0-8TwqHuznJXgPwCE,71103
97
+ gwaslab/viz_plot_miamiplot2.py,sha256=5O4Ho8_FsSkBcunFVlbAkYuLAMxNqE6swLVtmvkoR6c,17495
98
+ gwaslab/viz_plot_mqqplot.py,sha256=W0rKC8I0KrUvv5zakiTcPTEF_ttrUucrwoFS5MHLIyQ,71283
99
99
  gwaslab/viz_plot_phe_heatmap.py,sha256=qoXVeFTIm-n8IinNbDdPFVBSz2yGCGK6QzTstXv6aj4,9532
100
100
  gwaslab/viz_plot_qqplot.py,sha256=cB4vRlFv69zWY9NMLfSkfAbirYp3_EEW2kQiBTEMDoc,7483
101
101
  gwaslab/viz_plot_regional2.py,sha256=BoL1V56ww9B2_vFkABgln_f6OrzobiFjUISI5X6XXMM,43146
@@ -112,9 +112,9 @@ gwaslab/data/hapmap3_SNPs/hapmap3_db150_hg19.snplist.gz,sha256=qD9RsC5S2h6l-OdpW
112
112
  gwaslab/data/hapmap3_SNPs/hapmap3_db151_hg38.snplist.gz,sha256=Y8ZT2FIAhbhlgCJdE9qQVAiwnV_fcsPt72usBa7RSBM,10225828
113
113
  gwaslab/data/high_ld/high_ld_hla_hg19.bed.gz,sha256=R7IkssKu0L4WwkU9SrS84xCMdrkkKL0gnTNO_OKbG0Y,219
114
114
  gwaslab/data/high_ld/high_ld_hla_hg38.bed.gz,sha256=76CIU0pibDJ72Y6UY-TbIKE9gEPwTELAaIbCXyjm80Q,470
115
- gwaslab-3.6.3.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
116
- gwaslab-3.6.3.dist-info/licenses/LICENSE_before_v3.4.39,sha256=GhLOU_1UDEKeOacYhsRN_m9u-eIuVTazSndZPeNcTZA,1066
117
- gwaslab-3.6.3.dist-info/METADATA,sha256=tIgPWN8dJTS-x4WFwAhEMN3Ex-XJRqdTZYFd8qM7u3w,7043
118
- gwaslab-3.6.3.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
119
- gwaslab-3.6.3.dist-info/top_level.txt,sha256=PyY6hWtrALpv2MAN3kjkIAzJNmmBTH5a2risz9KwH08,8
120
- gwaslab-3.6.3.dist-info/RECORD,,
115
+ gwaslab-3.6.4.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
116
+ gwaslab-3.6.4.dist-info/licenses/LICENSE_before_v3.4.39,sha256=GhLOU_1UDEKeOacYhsRN_m9u-eIuVTazSndZPeNcTZA,1066
117
+ gwaslab-3.6.4.dist-info/METADATA,sha256=0-B3gOWawfkr19AO_m-H8S-Mgtxe5o_yvCAGCJx59_Y,7073
118
+ gwaslab-3.6.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
119
+ gwaslab-3.6.4.dist-info/top_level.txt,sha256=PyY6hWtrALpv2MAN3kjkIAzJNmmBTH5a2risz9KwH08,8
120
+ gwaslab-3.6.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.4.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5