gwaslab 3.4.46__py3-none-any.whl → 3.4.48__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

gwaslab/g_Sumstats.py CHANGED
@@ -76,6 +76,8 @@ from gwaslab.util_ex_ldsc import _estimate_rg_by_ldsc
76
76
  from gwaslab.util_ex_ldsc import _estimate_h2_cts_by_ldsc
77
77
  from gwaslab.util_ex_ldsc import _estimate_partitioned_h2_by_ldsc
78
78
  from gwaslab.bd_get_hapmap3 import gethapmap3
79
+ from gwaslab.util_abf_finemapping import abf_finemapping
80
+ from gwaslab.util_abf_finemapping import make_cs
79
81
  import gc
80
82
 
81
83
  #20220309
@@ -135,6 +137,7 @@ class Sumstats():
135
137
  self.data = pd.DataFrame()
136
138
  self.log = Log()
137
139
  self.ldsc_h2 = None
140
+ self.ldsc_h2_results = None
138
141
  self.ldsc_rg = None
139
142
  self.ldsc_h2_cts = None
140
143
  self.ldsc_partitioned_h2_summary = None
@@ -756,13 +759,20 @@ class Sumstats():
756
759
  else:
757
760
  output = lambdaGC(self.data[["CHR",mode]],mode=mode,**kwargs)
758
761
  self.meta["Genomic inflation factor"] = output
759
- return output
762
+ return output
763
+
764
+ def abf_finemapping(self, region=None, chrpos=None, snpid=None,**kwargs):
765
+ region_data = abf_finemapping(self.data.copy(),region=region,chrpos=chrpos,snpid=snpid,log=self.log, **kwargs)
766
+ credible_sets = make_cs(region_data,threshold=0.95,log=self.log)
767
+ return region_data, credible_sets
768
+
769
+
760
770
  ## LDSC ##############################################################################################
761
771
  def estimate_h2_by_ldsc(self, build=None, verbose=True, match_allele=True, **kwargs):
762
772
  if build is None:
763
773
  build = self.meta["gwaslab"]["genome_build"]
764
774
  insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=True, how="right" )
765
- self.ldsc_h2 = _estimate_h2_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **kwargs)
775
+ self.ldsc_h2, self.ldsc_h2_results = _estimate_h2_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **kwargs)
766
776
 
767
777
  def estimate_rg_by_ldsc(self, build=None, verbose=True, match_allele=True, **kwargs):
768
778
  if build is None:
gwaslab/g_version.py CHANGED
@@ -15,16 +15,16 @@ def _get_version():
15
15
  def gwaslab_info():
16
16
  # version meta information
17
17
  dic={
18
- "version":"3.4.46",
19
- "release_date":"20240624"
18
+ "version":"3.4.48",
19
+ "release_date":"20240822"
20
20
  }
21
21
  return dic
22
22
 
23
- def _checking_plink_version(v=2,log=Log(), verbose=True):
24
- if v==1:
25
- which_plink_script = "plink --version"
26
- elif v==2:
27
- which_plink_script = "plink2 --version"
23
+ def _checking_plink_version(plink=None,plink2=None,log=Log(), verbose=True):
24
+ if plink is not None:
25
+ which_plink_script = "{} --version".format(plink)
26
+ elif plink2 is not None:
27
+ which_plink_script = "{} --version".format(plink2)
28
28
  output = subprocess.check_output(which_plink_script, stderr=subprocess.STDOUT, shell=True,text=True)
29
29
  log.write(" -PLINK version: {}".format(output.strip()))
30
30
  return log
gwaslab/io_read_ldsc.py CHANGED
@@ -198,16 +198,29 @@ def read_greml(filelist=[]):
198
198
  return summary
199
199
 
200
200
  def parse_ldsc_summary(ldsc_summary):
201
- summary = pd.DataFrame(columns = ['h2_obs', 'h2_se','Lambda_gc','Mean_chi2','Intercept','Intercept_se',"Ratio","Ratio_se"])
201
+
202
202
  lines = ldsc_summary.split("\n")
203
+
204
+ columns = ['h2_obs', 'h2_se','Lambda_gc','Mean_chi2','Intercept','Intercept_se',"Ratio","Ratio_se","Catagories"]
205
+
206
+ summary = pd.DataFrame(columns = columns)
207
+
203
208
  row={}
209
+
204
210
  try:
205
211
  objects = re.compile('[a-zA-Z\s\d]+:|[-0-9.]+[e]?[-0-9.]+|NA').findall(lines[0])
206
212
  row["h2_obs"]=objects[1]
207
213
  row["h2_se"]=objects[2]
208
214
 
209
- ##next line lambda gc
215
+ ##check categories
216
+ if len(lines) == 6:
217
+ objects = re.compile(' -Categories:(.+)').findall(lines[1])
218
+ row["Catagories"] = objects[0].strip()
219
+ lines.pop(1)
220
+ else:
221
+ row["Catagories"] = "NA"
210
222
 
223
+ ##next line lambda gc
211
224
  objects = re.compile('[a-zA-Z\s\d]+:|[-0-9.]+[e]?[-0-9.]+|NA').findall(lines[1])
212
225
  row["Lambda_gc"] = objects[1]
213
226
  ##next line Mean_chi2
@@ -240,6 +253,7 @@ def parse_ldsc_summary(ldsc_summary):
240
253
  row["Intercept_se"]="NA"
241
254
  row["Ratio"]="NA"
242
255
  row["Ratio_se"]="NA"
256
+ row["Catagories"] = "NA"
243
257
 
244
258
  #summary = summary.append(row,ignore_index=True)
245
259
  row = pd.DataFrame([row], columns = summary.columns)
gwaslab/io_to_formats.py CHANGED
@@ -212,8 +212,10 @@ def tofmt(sumstats,
212
212
  log.write(" -Start outputting sumstats in "+fmt+" format...")
213
213
 
214
214
  if "CHR" in sumstats.columns:
215
+ # output X,Y,MT instead of 23,24,25
215
216
  if xymt_number is False and pd.api.types.is_integer_dtype(sumstats["CHR"]):
216
217
  sumstats["CHR"]= sumstats["CHR"].map(get_number_to_chr(xymt=xymt,prefix=chr_prefix))
218
+ # add prefix to CHR
217
219
  elif chr_prefix is not None:
218
220
  sumstats["CHR"]= chr_prefix + sumstats["CHR"].astype("string")
219
221
 
@@ -437,17 +439,20 @@ def _configure_output_cols_and_args(sumstats, rename_dictionary, cols, no_status
437
439
  ouput_cols.append(i)
438
440
 
439
441
  # + additional cols and remove duplicated
440
- ouput_cols = list(set(ouput_cols + cols))
442
+ ouput_cols_final = []
443
+ for i in ouput_cols + cols:
444
+ if i not in ouput_cols_final:
445
+ ouput_cols_final.append(i)
441
446
 
442
447
  # remove STATUS
443
448
  try:
444
449
  if no_status == True:
445
- ouput_cols.remove("STATUS")
450
+ ouput_cols_final.remove("STATUS")
446
451
  except:
447
452
  pass
448
453
 
449
454
  #filter and rename to target fromat headers
450
- sumstats = sumstats[ouput_cols]
455
+ sumstats = sumstats[ouput_cols_final]
451
456
  sumstats = sumstats.rename(columns=rename_dictionary)
452
457
 
453
458
  # configure target format args and reorder columns
@@ -1497,7 +1497,11 @@ def liftover_variant(sumstats,
1497
1497
  status="STATUS",
1498
1498
  from_build="19",
1499
1499
  to_build="38"):
1500
- converter = get_lifter("hg"+from_build,"hg"+to_build)
1500
+ try:
1501
+ converter = get_lifter("hg"+from_build,"hg"+to_build,one_based=True)
1502
+ except:
1503
+ converter = get_lifter("hg"+from_build,"hg"+to_build)
1504
+
1501
1505
  dic= get_number_to_chr(in_chr=False,xymt=["X","Y","M"])
1502
1506
  dic2= get_chr_to_number(out_chr=False)
1503
1507
  for i in sumstats[chrom].unique():
@@ -1549,7 +1553,6 @@ def parallelizeliftovervariant(sumstats,n_cores=1,chrom="CHR", pos="POS", from_b
1549
1553
  pool.close()
1550
1554
  pool.join()
1551
1555
  ############################################################################
1552
-
1553
1556
  unmap_num = len(sumstats.loc[sumstats[pos].isna(),:])
1554
1557
 
1555
1558
  if remove is True:
@@ -0,0 +1,67 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from gwaslab.g_Log import Log
4
+ from gwaslab.util_in_filter_value import _get_flanking_by_chrpos
5
+ from gwaslab.util_in_filter_value import _get_flanking_by_id
6
+
7
+ # Calculate PIP based on approximate Bayesian factor (ABF)
8
+ # Wakefield, J. A bayesian measure of the probability of false discovery in genetic epidemiology studies. Am J Hum Genet 81, 208–227 (2007).
9
+
10
+
11
+ def calc_abf(insumstats,w=0.2,log=Log(),verbose=True,**kwargs):
12
+
13
+
14
+
15
+ log.write("Start to calculate approximate Bayesian factor for {} variants".format(len(insumstats)),verbose=verbose)
16
+ log.write(" - Reference: akefield, J. A bayesian measure of the probability of false discovery in genetic epidemiology studies. Am J Hum Genet 81, 208–227 (2007).",verbose=verbose)
17
+ log.write(" - Priors for the standard deviation W of the effect size parameter β : {} ".format(w),verbose=verbose)
18
+ # binary -> w=0.2
19
+ # quant -> w=0.15
20
+ omega = w**2
21
+ se = insumstats["SE"]
22
+ v = se**2
23
+ r = omega / (omega+v)
24
+ beta = insumstats["BETA"]
25
+ z = beta/se
26
+ insumstats = insumstats.copy()
27
+
28
+ # (6) ABF -> reciprocal
29
+ insumstats.loc[:, "log_ABF"] = 1/2* (np.log(1-r) + (r * z**2))
30
+
31
+ return insumstats
32
+
33
+ def calc_PIP(insumstats,log=Log(),verbose=True,**kwargs):
34
+ # Calculate the logarithmic sum of each ABF to find the logarithm of total_abf
35
+ log_total_abf = np.log(np.sum(np.exp(insumstats["log_ABF"] - np.max(insumstats["log_ABF"])))) + np.max(insumstats["log_ABF"])
36
+ insumstats = insumstats.copy()
37
+ log.write("Start to calculate PIP for {} variants".format(len(insumstats)),verbose=verbose)
38
+ # Calculate PIP on a logarithmic scale by subtracting log_total_abf from each log_abf
39
+ insumstats.loc[:, "log_PIP"] = insumstats['log_ABF'] - log_total_abf
40
+ # Convert PIP on logarithmic scale to exponential and back to normal scale
41
+ insumstats.loc[:, "PIP"] = np.exp(insumstats['log_PIP'])
42
+ return insumstats
43
+
44
+ def abf_finemapping(insumstats,region=None,chrpos=None,snpid=None, log=Log(),**kwargs):
45
+
46
+ if region is not None:
47
+ region_data = insumstats[(insumstats["CHR"] == region[0]) & (insumstats["POS"] >= region[1]) & (insumstats["POS"] <= region[2])]
48
+ elif chrpos is not None:
49
+ region_data = _get_flanking_by_chrpos(insumstats, chrpos=chrpos,**kwargs)
50
+ elif snpid is not None:
51
+ region_data = _get_flanking_by_id(insumstats, snpid=snpid,**kwargs)
52
+
53
+ region_data = calc_abf(region_data,log=log,**kwargs)
54
+ region_data = calc_PIP(region_data,log=log,**kwargs)
55
+ return region_data
56
+
57
+ def make_cs(insumstats,threshold=0.95,log=Log(),verbose=True):
58
+ insumstats = insumstats.sort_values(by="PIP",ascending=False)
59
+ pip_sum = 0
60
+ cs = pd.DataFrame()
61
+ for index, row in insumstats.iterrows():
62
+ cs = pd.concat([cs,pd.DataFrame(row).T])
63
+ pip_sum += row["PIP"]
64
+ if pip_sum > threshold:
65
+ break
66
+ log.write("Finished constructing a {}% credible set with {} variant(s)".format(str(threshold * 100),str(len(cs))),verbose=verbose)
67
+ return cs
@@ -17,6 +17,8 @@ def tofinemapping(sumstats,
17
17
  vcf=None,
18
18
  loci=None,
19
19
  out="./",
20
+ plink="plink",
21
+ plink2="plink2",
20
22
  windowsizekb=1000,
21
23
  n_cores=1,
22
24
  mode="r",
@@ -56,6 +58,9 @@ def tofinemapping(sumstats,
56
58
  else:
57
59
  sig_df = sumstats.loc[sumstats["SNPID"].isin(loci),:]
58
60
 
61
+ log.write(" -plink1.9 path: {}".format(plink),verbose=verbose)
62
+ log.write(" -plink2 path: {}".format(plink2),verbose=verbose)
63
+
59
64
  # Drop duplicate!!!!
60
65
  log.write(" -Dropping duplicated SNPIDs...",verbose=verbose)
61
66
  sumstats = sumstats.drop_duplicates(subset=["SNPID"]).copy()
@@ -68,11 +73,13 @@ def tofinemapping(sumstats,
68
73
  if exclude_hla==True:
69
74
  sig_df = _exclude_hla(sig_df, log=log, verbose=verbose)
70
75
 
76
+ sig_df = sig_df.reset_index()
77
+
71
78
  ## for each lead variant
72
79
  for index, row in sig_df.iterrows():
73
80
  # extract snplist in each locus
74
81
  gc.collect()
75
-
82
+ log.write(" -Locus #{}---------------------------------------------------------------".format(index+1))
76
83
  log.write(" -Processing locus with lead variant {} at CHR {} POS {} ...".format(row["SNPID"],row["CHR"],row["POS"]))
77
84
  locus_sumstats = _extract_variants_in_locus(sumstats, windowsizekb, locus = (row["CHR"],row["POS"]))
78
85
 
@@ -84,7 +91,10 @@ def tofinemapping(sumstats,
84
91
  n_cores=n_cores,
85
92
  log=log,
86
93
  load_bim=True,
87
- overwrite=overwrite,**kwargs)
94
+ overwrite=overwrite,
95
+ plink=plink,
96
+ plink2=plink2,
97
+ **kwargs)
88
98
 
89
99
  ## check available snps with reference file
90
100
  matched_sumstats = _align_sumstats_with_bim(row=row,
@@ -114,7 +124,10 @@ def tofinemapping(sumstats,
114
124
  windowsizekb=windowsizekb,
115
125
  out=out,
116
126
  plink_log=plink_log,
117
- log=log,filetype=filetype,
127
+ log=log,
128
+ filetype=filetype,
129
+ plink=plink,
130
+ plink2=plink2,
118
131
  verbose=verbose)
119
132
 
120
133
 
@@ -143,12 +156,12 @@ def tofinemapping(sumstats,
143
156
 
144
157
 
145
158
 
146
- def _calculate_ld_r(study, matched_sumstats_snpid, row, bfile_prefix, n_cores, windowsizekb,out,plink_log,log,memory,mode,filetype,verbose=True):
159
+ def _calculate_ld_r(study, matched_sumstats_snpid, row, bfile_prefix, n_cores, windowsizekb,out,plink_log,log,memory,mode,filetype,plink,plink2,verbose=True):
147
160
  '''
148
161
  Calculate LD r matrix by calling PLINK; return file name and log
149
162
  '''
150
163
  log.write(" -Start to calculate LD r matrix...",verbose=verbose)
151
- log = _checking_plink_version(v=1, log=log)
164
+ log = _checking_plink_version(plink=plink, log=log)
152
165
  if "@" in bfile_prefix:
153
166
  bfile_to_use = bfile_prefix.replace("@",str(row["CHR"]))
154
167
  else:
@@ -165,7 +178,7 @@ def _calculate_ld_r(study, matched_sumstats_snpid, row, bfile_prefix, n_cores, w
165
178
  raise ValueError("Please use bfile instead of pfile for PLINK1.")
166
179
 
167
180
  script_vcf_to_bfile = """
168
- plink \
181
+ {} \
169
182
  --bfile {} \
170
183
  --keep-allele-order \
171
184
  --extract {} \
@@ -175,7 +188,7 @@ def _calculate_ld_r(study, matched_sumstats_snpid, row, bfile_prefix, n_cores, w
175
188
  --threads {} {}\
176
189
  --write-snplist \
177
190
  --out {}
178
- """.format(bfile_to_use, snplist_path , row["CHR"], mode, n_cores, memory_flag if memory is not None else "", output_prefix)
191
+ """.format(plink, bfile_to_use, snplist_path , row["CHR"], mode, n_cores, memory_flag if memory is not None else "", output_prefix)
179
192
 
180
193
  try:
181
194
  output = subprocess.check_output(script_vcf_to_bfile, stderr=subprocess.STDOUT, shell=True,text=True)
@@ -18,6 +18,8 @@ def _calculate_prs(sumstats,
18
18
  memory=None,
19
19
  overwrite=False,
20
20
  mode=None,delete=True,
21
+ plink="plink",
22
+ plink2="plink2",
21
23
  log=Log(),**kwargs):
22
24
 
23
25
  #matching_alleles
@@ -30,14 +32,18 @@ def _calculate_prs(sumstats,
30
32
  chrlist.sort()
31
33
  plink_log = ""
32
34
  #process reference fileWWW
33
- bfile_prefix, plink_log, ref_bim, filetype = _process_plink_input_files( chrlist=chrlist,
35
+ bfile_prefix, plink_log, ref_bim, filetype = _process_plink_input_files(
36
+ chrlist=chrlist,
34
37
  bfile=bfile,
35
38
  vcf=vcf,
36
39
  plink_log=plink_log,
37
40
  n_cores=n_cores,
38
41
  log=log,
39
42
  load_bim=False,
40
- overwrite=overwrite,**kwargs)
43
+ overwrite=overwrite,
44
+ plink=plink,
45
+ plink2=plink2,
46
+ **kwargs)
41
47
  score_file_path_list =[]
42
48
  for index, chrom in enumerate(chrlist):
43
49
  chr_sumstats = sumstats.loc[sumstats["CHR"]==chrom,:].copy()
@@ -61,7 +67,7 @@ def _calculate_prs(sumstats,
61
67
  plink_log=plink_log,
62
68
  log=log,
63
69
  memory=memory,
64
- mode=mode,filetype=filetype)
70
+ mode=mode,filetype=filetype,plink2=plink2)
65
71
  score_file_path_list.append(score_file_path)
66
72
  if delete == True:
67
73
  os.remove(model_path)
@@ -71,10 +77,10 @@ def _calculate_prs(sumstats,
71
77
 
72
78
 
73
79
 
74
- def _run_calculate_prs(study, chrom , model_path, bfile_prefix, n_cores, out, plink_log, log, memory,filetype, mode=None):
80
+ def _run_calculate_prs(study, chrom , model_path, bfile_prefix, n_cores, out, plink_log, log, memory,filetype, plink2,mode=None):
75
81
 
76
82
  log.write(" -Start to calculate PRS for Chr {}...".format(chrom))
77
- _checking_plink_version(v=2, log=log)
83
+ _checking_plink_version(plink2=plink2, log=log)
78
84
 
79
85
  if "@" in bfile_prefix:
80
86
  bpfile_to_use = bfile_prefix.replace("@",str(chrom))
@@ -92,13 +98,13 @@ def _run_calculate_prs(study, chrom , model_path, bfile_prefix, n_cores, out, pl
92
98
  memory_flag = "--memory {}".format(memory)
93
99
 
94
100
  script_vcf_to_bfile = """
95
- plink2 \
101
+ {} \
96
102
  {} \
97
103
  --score {} 1 2 3 header {} cols=+scoresums,+denom ignore-dup-ids \
98
104
  --chr {} \
99
105
  --threads {} {}\
100
106
  --out {}
101
- """.format(file_flag, model_path , mode if mode is not None else "", chrom, n_cores, memory_flag if memory is not None else "", output_prefix)
107
+ """.format(plink2, file_flag, model_path , mode if mode is not None else "", chrom, n_cores, memory_flag if memory is not None else "", output_prefix)
102
108
 
103
109
  try:
104
110
  output = subprocess.check_output(script_vcf_to_bfile, stderr=subprocess.STDOUT, shell=True,text=True)
gwaslab/util_ex_ldsc.py CHANGED
@@ -304,9 +304,16 @@ def _estimate_h2_by_ldsc(insumstats, log, verbose=True, munge=False, munge_args=
304
304
  log.write(" -LDSC log:", verbose=verbose)
305
305
  summary = estimate_h2(sumstats, default_args, log)
306
306
 
307
+ results_table = None
308
+ if type(summary) is tuple:
309
+ results_table = summary[1]
310
+ summary = summary[0]
311
+ log.write(" -Coefficient results have been stored in .ldsc_h2_results", verbose=verbose)
312
+
313
+
307
314
  log.write(" -Results have been stored in .ldsc_h2", verbose=verbose)
308
315
  finished(log=log,verbose=verbose,end_line=_end_line)
309
- return parse_ldsc_summary(summary)
316
+ return parse_ldsc_summary(summary), results_table
310
317
 
311
318
 
312
319
  ####################################################################################################################
@@ -20,7 +20,9 @@ def _process_plink_input_files(chrlist,
20
20
  bgen_mode="ref-first",
21
21
  convert="bfile",
22
22
  memory=None,
23
- load_bim=False):
23
+ load_bim=False,
24
+ plink="plink",
25
+ plink2="plink2"):
24
26
  """
25
27
  Process input files (bfile,pfile,vcf,bgen) to either PLINK1 bed/bim/fam or PLINK2 pgen/psam/pvar.
26
28
 
@@ -66,7 +68,9 @@ def _process_plink_input_files(chrlist,
66
68
  convert=convert,
67
69
  memory=memory,
68
70
  overwrite=overwrite,
69
- load_bim=load_bim)
71
+ load_bim=load_bim,
72
+ plink=plink,
73
+ plink2=plink2)
70
74
  filetype = convert
71
75
  elif filetype == "bgen":
72
76
  ref_file_prefix, plink_log, ref_bims = _process_bgen(ref_file_prefix=ref_file_prefix,
@@ -81,7 +85,9 @@ def _process_plink_input_files(chrlist,
81
85
  convert=convert,
82
86
  memory=memory,
83
87
  overwrite=overwrite,
84
- load_bim=load_bim)
88
+ load_bim=load_bim,
89
+ plink=plink,
90
+ plink2=plink2)
85
91
  filetype = convert
86
92
  return ref_file_prefix, plink_log, ref_bims, filetype
87
93
 
@@ -199,11 +205,13 @@ def _process_vcf(ref_file_prefix,
199
205
  convert="bfile",
200
206
  memory=None,
201
207
  overwrite=False,
202
- load_bim=False):
208
+ load_bim=False,
209
+ plink="plink",
210
+ plink2="plink2"):
203
211
  log.write(" -Processing VCF : {}...".format(ref_file_prefix))
204
212
 
205
213
  #check plink version
206
- log = _checking_plink_version(v=2,log=log)
214
+ log = _checking_plink_version(plink2=plink2,log=log)
207
215
 
208
216
  # file path prefix to return
209
217
  if is_wild_card==True:
@@ -243,14 +251,15 @@ def _process_vcf(ref_file_prefix,
243
251
  #if not existing or overwrite is True
244
252
  if (not is_file_exist) or overwrite:
245
253
  script_vcf_to_bfile = """
246
- plink2 \
254
+ {} \
247
255
  --vcf {} \
248
256
  --chr {} \
249
257
  {} \
250
258
  --rm-dup force-first \
251
259
  --threads {}{}\
252
260
  --out {}
253
- """.format(vcf_to_load,
261
+ """.format(plink2,
262
+ vcf_to_load,
254
263
  i,
255
264
  make_flag,
256
265
  n_cores, memory_flag,
@@ -288,11 +297,13 @@ def _process_bgen(ref_file_prefix,
288
297
  convert="bfile",
289
298
  memory=None,
290
299
  overwrite=False,
291
- load_bim=False):
300
+ load_bim=False,
301
+ plink="plink",
302
+ plink2="plink2"):
292
303
  log.write(" -Processing BGEN files : {}...".format(ref_file_prefix))
293
304
 
294
305
  #check plink version
295
- log = _checking_plink_version(v=2,log=log)
306
+ log = _checking_plink_version(log=log,plink2=plink2)
296
307
 
297
308
  # file path prefix to return
298
309
  if is_wild_card==True:
@@ -338,14 +349,14 @@ def _process_bgen(ref_file_prefix,
338
349
  #if not existing or overwrite is True
339
350
  if (not is_file_exist) or overwrite:
340
351
  script_vcf_to_bfile = """
341
- plink2 \
352
+ {} \
342
353
  --bgen {} {} {}\
343
354
  --chr {} \
344
355
  {} \
345
356
  --rm-dup force-first \
346
357
  --threads {}{}\
347
358
  --out {}
348
- """.format(bgen_to_load, bgen_mode, sample_flag,
359
+ """.format(plink2,bgen_to_load, bgen_mode, sample_flag,
349
360
  i,
350
361
  make_flag,
351
362
  n_cores, memory_flag,
@@ -11,7 +11,7 @@ from gwaslab.g_version import _checking_plink_version
11
11
  def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
12
12
  p="P",mlog10p="MLOG10P", overwrite=False, study=None, bfile=None,
13
13
  n_cores=1, memory=None, chrom=None, clump_p1=5e-8, clump_p2=5e-8, clump_r2=0.01, clump_kb=250,
14
- log=Log(),verbose=True):
14
+ log=Log(),verbose=True,plink="plink",plink2="plink2"):
15
15
  ##start function with col checking##########################################################
16
16
  _start_line = "perfrom clumping"
17
17
  _end_line = "clumping"
@@ -111,7 +111,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
111
111
  bfile_to_use = bfile
112
112
 
113
113
  log.write(" -Performing clumping for CHR {}...".format(i),verbose=verbose)
114
- log = _checking_plink_version(v=2, log=log)
114
+ log = _checking_plink_version(plink2=plink2, log=log)
115
115
  if memory is not None:
116
116
  memory_flag = "--memory {}".format(memory)
117
117
 
@@ -123,7 +123,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
123
123
  if scaled == True:
124
124
  # clumping using LOG10P
125
125
  script = """
126
- plink2 \
126
+ {} \
127
127
  {}\
128
128
  --chr {} \
129
129
  --clump {} \
@@ -136,11 +136,11 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
136
136
  --clump-kb {} \
137
137
  --threads {} {}\
138
138
  --out {}
139
- """.format(file_flag, chrom, clump, mlog10p,clump_log10_p1, clump_log10_p2, clump_r2, clump_kb, n_cores, memory_flag if memory is not None else "", out_single_chr)
139
+ """.format(plink2, file_flag, chrom, clump, mlog10p,clump_log10_p1, clump_log10_p2, clump_r2, clump_kb, n_cores, memory_flag if memory is not None else "", out_single_chr)
140
140
  else:
141
141
  # clumping using P
142
142
  script = """
143
- plink2 \
143
+ {} \
144
144
  {}\
145
145
  --chr {} \
146
146
  --clump {} \
@@ -152,7 +152,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
152
152
  --clump-kb {} \
153
153
  --threads {} {}\
154
154
  --out {}
155
- """.format(file_flag, chrom, clump, p, clump_p1, clump_p2, clump_r2, clump_kb, n_cores,memory_flag if memory is not None else "", out_single_chr)
155
+ """.format(plink2,file_flag, chrom, clump, p, clump_p1, clump_p2, clump_r2, clump_kb, n_cores,memory_flag if memory is not None else "", out_single_chr)
156
156
 
157
157
  try:
158
158
  output = subprocess.check_output(script, stderr=subprocess.STDOUT, shell=True,text=True)
@@ -38,6 +38,7 @@ def annotate_single(
38
38
  region,
39
39
  region_anno_bbox_args,
40
40
  skip,
41
+ anno_height=1,
41
42
  amode="int",
42
43
  snpid="SNPID",
43
44
  chrom="CHR",
@@ -131,7 +132,7 @@ def annotate_single(
131
132
 
132
133
  #xy=(row["i"],row["scaled_P"]+0.2)
133
134
  xy=(row["i"],row["scaled_P"]+0.01*maxy)
134
- xytext=(last_pos,1.15*maxy*arm_scale)
135
+ xytext=(last_pos,1.15*maxy*arm_scale*anno_height)
135
136
 
136
137
  if anno_fixed_arm_length is not None:
137
138
  armB_length_in_point = anno_fixed_arm_length
@@ -63,6 +63,8 @@ def _plot_chromatin_state(region_chromatin_files,
63
63
  fig,
64
64
  ax,
65
65
  xlim_i,
66
+ fontsize = 12,
67
+ font_family = "Arial",
66
68
  log=Log(),
67
69
  verbose=True):
68
70
  '''
@@ -101,11 +103,10 @@ def _plot_chromatin_state(region_chromatin_files,
101
103
 
102
104
  ## add stripe label
103
105
  if len(region_chromatin_labels) == len(region_chromatin_files):
104
- ax.set_yticks([i*0.1 for i in range(len(region_chromatin_labels))],
105
- region_chromatin_labels)
106
+ ax.set_yticks([i*0.1 for i in range(len(region_chromatin_labels))], region_chromatin_labels, fontsize=fontsize, family=font_family)
106
107
  else:
107
108
  ax.set_yticks(ticks=[])
108
109
 
109
- ax.set_xticks(ticks=[])
110
+ #ax.set_xticks(ticks=[])
110
111
  ax.invert_yaxis()
111
112
  return fig
@@ -286,8 +286,9 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, lines_to_pl
286
286
  log.write(" -Converting data above cut line...",verbose=verbose)
287
287
  if ylabels is not None:
288
288
  ylabels = pd.Series(ylabels)
289
- maxy = series.max()
290
289
  series = series.copy()
290
+
291
+ maxy = series.max()
291
292
  if "b" not in mode:
292
293
  log.write(" -Maximum -log10(P) value is "+str(maxy) +" .", verbose=verbose)
293
294
  elif "b" in mode:
@@ -77,8 +77,10 @@ def compare_effect(path1,
77
77
  scaled2 = True
78
78
  if is_q_mc=="fdr" or is_q_mc=="bon":
79
79
  is_q = True
80
- else:
81
- raise ValueError("Please select either fdr or bon for is_q_mc.")
80
+
81
+ if is_q == True:
82
+ if is_q_mc not in [False,"fdr","bon","non"]:
83
+ raise ValueError("Please select either fdr or bon or non for is_q_mc.")
82
84
  if save_args is None:
83
85
  save_args = {"dpi":300,"facecolor":"white"}
84
86
  if reg_box is None:
@@ -247,7 +247,10 @@ def plot_miami2(
247
247
  plt.subplots_adjust(hspace=region_hspace)
248
248
  else:
249
249
  fig, ax1, ax5 = figax
250
-
250
+
251
+ #if same_ylim==True:
252
+ #maxy = merged_sumstats[["scaled_P_1","scaled_P_2"]].max().max()
253
+
251
254
  log.write("Start to create Manhattan plot for sumstats1...", verbose=verbose)
252
255
  fig,log = mqqplot(merged_sumstats,
253
256
  chrom="CHR",
@@ -284,14 +287,8 @@ def plot_miami2(
284
287
  _if_quick_qc=False,
285
288
  **mqq_args2)
286
289
  log.write("Finished creating Manhattan plot for sumstats2".format(_get_version()), verbose=verbose)
290
+
287
291
 
288
- if same_ylim==True:
289
- ylim1_converted = ax1.get_ylim()
290
- ylim2_converted = ax5.get_ylim()
291
- if ylim1_converted > ylim2_converted:
292
- ax5.set_ylim(ylim1_converted)
293
- else:
294
- ax1.set_ylim(ylim2_converted)
295
292
  #####################################################################################################################
296
293
 
297
294
  ax5.set_xlabel("")