gwaslab 3.5.3__py3-none-any.whl → 3.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

gwaslab/__init__.py CHANGED
@@ -46,4 +46,5 @@ from gwaslab.util_ex_process_h5 import process_vcf_to_hfd5
46
46
  from gwaslab.util_ex_run_susie import _run_susie_rss as run_susie_rss
47
47
  from gwaslab.io_read_tabular import _read_tabular as read_tabular
48
48
  from gwaslab.util_in_meta import meta_analyze
49
- from gwaslab.viz_plot_scatter_with_reg import scatter
49
+ from gwaslab.viz_plot_scatter_with_reg import scatter
50
+ from gwaslab.util_in_fill_data import rank_based_int
gwaslab/g_Sumstats.py CHANGED
@@ -80,6 +80,8 @@ from gwaslab.util_ex_ldsc import _estimate_partitioned_h2_by_ldsc
80
80
  from gwaslab.bd_get_hapmap3 import gethapmap3
81
81
  from gwaslab.util_abf_finemapping import abf_finemapping
82
82
  from gwaslab.util_abf_finemapping import make_cs
83
+ from gwaslab.io_read_pipcs import _read_pipcs
84
+ from gwaslab.viz_plot_credible_sets import _plot_cs
83
85
  import gc
84
86
  from gwaslab.viz_plot_phe_heatmap import _gwheatmap
85
87
 
@@ -88,6 +90,7 @@ class Sumstats():
88
90
  def __init__(self,
89
91
  sumstats,
90
92
  fmt=None,
93
+ tab_fmt="tsv",
91
94
  snpid=None,
92
95
  rsid=None,
93
96
  chrom=None,
@@ -154,10 +157,17 @@ class Sumstats():
154
157
  self.meta["gwaslab"]["species"] = species
155
158
 
156
159
  # initialize attributes for clumping and finmapping
157
- self.to_finemapping_file_path = ""
158
- self.to_finemapping_file = pd.DataFrame()
159
- self.plink_log = ""
160
- self.clumps = pd.DataFrame()
160
+ #self.to_finemapping_file_path = ""
161
+ #self.to_finemapping_file = pd.DataFrame()
162
+ #self.plink_log = ""
163
+
164
+ # path / file / plink_log
165
+ self.finemapping = dict()
166
+
167
+ # clumps / clumps_raw / plink_log
168
+ self.clumps = dict()
169
+
170
+ #
161
171
  self.pipcs = pd.DataFrame()
162
172
 
163
173
  # print gwaslab version information
@@ -167,6 +177,7 @@ class Sumstats():
167
177
  self.data = preformat(
168
178
  sumstats=sumstats,
169
179
  fmt=fmt,
180
+ tab_fmt = tab_fmt,
170
181
  snpid=snpid,
171
182
  rsid=rsid,
172
183
  chrom=chrom,
@@ -822,21 +833,31 @@ class Sumstats():
822
833
  # external ################################################################################################
823
834
 
824
835
  def calculate_ld_matrix(self,**kwargs):
825
- self.to_finemapping_file_path, self.to_finemapping_file, self.plink_log = tofinemapping(self.data,study = self.meta["gwaslab"]["study_name"],**kwargs)
836
+ self.finemapping["path"],self.finemapping["file"],self.finemapping["plink_log"]= tofinemapping(self.data,study = self.meta["gwaslab"]["study_name"],**kwargs)
837
+ #self.to_finemapping_file_path, self.to_finemapping_file, self.plink_log = tofinemapping(self.data,study = self.meta["gwaslab"]["study_name"],**kwargs)
826
838
 
827
839
  def run_susie_rss(self,**kwargs):
828
- self.pipcs=_run_susie_rss(self.to_finemapping_file_path,**kwargs)
840
+ self.pipcs=_run_susie_rss(self.finemapping["path"],**kwargs)
841
+ #self.pipcs=_run_susie_rss(self.to_finemapping_file_path,**kwargs)
829
842
 
830
843
  def clump(self,**kwargs):
831
- self.clumps,self.plink_log = _clump(self.data, log=self.log, study = self.meta["gwaslab"]["study_name"], **kwargs)
844
+ self.clumps["clumps"], self.clumps["clumps_raw"], self.clumps["plink_log"] = _clump(self.data, log=self.log, study = self.meta["gwaslab"]["study_name"], **kwargs)
832
845
 
833
846
  def calculate_prs(self,**kwargs):
834
847
  combined_results_summary = _calculate_prs(self.data, log=self.log, study = self.meta["gwaslab"]["study_name"], **kwargs)
835
848
  return combined_results_summary
836
-
849
+
850
+ # loading aux data
851
+ def read_pipcs(self,prefix,**kwargs):
852
+ self.pipcs = _read_pipcs(self.data[["SNPID","CHR","POS"]],prefix, **kwargs)
853
+
854
+ def plot_pipcs(self, region,**kwargs):
855
+ _plot_cs(self.pipcs, region, **kwargs)
837
856
  # to_format ###############################################################################################
838
857
 
839
858
  def to_format(self, path, build=None, verbose=True, **kwargs):
840
859
  if build is None:
841
860
  build = self.meta["gwaslab"]["genome_build"]
842
861
  _to_format(self.data, path, log=self.log, verbose=verbose, meta=self.meta, build=build, **kwargs)
862
+
863
+
gwaslab/g_SumstatsPair.py CHANGED
@@ -23,6 +23,7 @@ from gwaslab.viz_plot_compare_af import plotdaf
23
23
  from gwaslab.util_ex_run_2samplemr import _run_two_sample_mr
24
24
  from gwaslab.util_ex_run_clumping import _clump
25
25
  from gwaslab.util_ex_ldproxyfinder import _extract_with_ld_proxy
26
+ from gwaslab.g_headers import _get_headers
26
27
 
27
28
  class SumstatsPair( ):
28
29
  def __init__(self, sumstatsObject1, sumstatsObject2, study=None, suffixes = ("_1","_2") ,verbose=True ):
@@ -35,6 +36,7 @@ class SumstatsPair( ):
35
36
  self.study_name = "{}_{}".format(sumstatsObject1.meta["gwaslab"]["study_name"], sumstatsObject2.meta["gwaslab"]["study_name"])
36
37
  else:
37
38
  self.study_name = "{}_{}".format("STUDY1", "STUDY2")
39
+
38
40
  self.snp_info_cols = []
39
41
  self.stats_cols =[]
40
42
  self.stats_cols2 =[]
@@ -45,11 +47,13 @@ class SumstatsPair( ):
45
47
  self.colocalization=pd.DataFrame()
46
48
  self.sumstats1 = pd.DataFrame()
47
49
  self.sumstats2 = pd.DataFrame()
48
- self.mr = {}
49
- self.clumps ={}
50
+
51
+ self.mr =dict()
52
+ self.clumps =dict()
50
53
  self.ns = None
51
- self.to_finemapping_file_path = ""
52
- self.plink_log = ""
54
+ self.finemapping = dict()
55
+ #self.to_finemapping_file_path = ""
56
+ #self.plink_log = ""
53
57
 
54
58
  self.log.write( "Start to create SumstatsPair object..." )
55
59
 
@@ -66,16 +70,17 @@ class SumstatsPair( ):
66
70
  verbose=verbose)
67
71
 
68
72
  for i in sumstatsObject1.data.columns:
69
- if i in ["SNPID","rsID","CHR","POS","EA","NEA","STATUS"]:
73
+ if i in _get_headers(mode="info"):
74
+ # extract SNP info columns from sumstats1
70
75
  self.snp_info_cols.append(i)
71
- elif i in ["BETA","SE","P","MLOG10P","N","N_CASE","N_CONTROL","Z","T","F","OR","OR_95L","OR_95U","HR","HR_95L","HR_95U","MAF","EAF","BETA_95L","BETA_95U"]:
76
+ elif i in _get_headers(mode="stats"):
72
77
  self.stats_cols.append(i)
73
78
  else:
74
79
  self.other_cols.append(i)
75
80
  for i in sumstatsObject2.data.columns:
76
- if i in ["SNPID","rsID","CHR","POS","EA","NEA","STATUS"]:
81
+ if i in _get_headers(mode="info"):
77
82
  continue
78
- elif i in ["BETA","SE","P","MLOG10P","N","N_CASE","N_CONTROL","Z","T","F","OR","OR_95L","OR_95U","HR","HR_95L","HR_95U","MAF","EAF","BETA_95L","BETA_95U"]:
83
+ elif i in _get_headers(mode="stats"):
79
84
  self.stats_cols2.append(i)
80
85
  else:
81
86
  self.other_cols2.append(i)
@@ -136,14 +141,13 @@ class SumstatsPair( ):
136
141
 
137
142
 
138
143
  def clump(self,**kwargs):
139
- self.clumps["clumps"], self.clumps["plink_log"] = _clump(self.data, log=self.log, p="P_1",mlog10p="MLOG10P_1", study = self.study_name, **kwargs)
144
+ self.clumps["clumps"],self.clumps["clumps_raw"],self.clumps["plink_log"] = _clump(self.data, log=self.log, p="P_1",mlog10p="MLOG10P_1", study = self.study_name, **kwargs)
140
145
 
141
146
  def to_coloc(self,**kwargs):
142
- self.to_finemapping_file_path, output_file_list, self.plink_log = tofinemapping(self.data,study=self.study_name,suffixes=self.suffixes,log=self.log,**kwargs)
147
+ self.finemapping["path"],self.finemapping["file"],self.finemapping["plink_log"] = tofinemapping(self.data,study=self.study_name,suffixes=self.suffixes,log=self.log,**kwargs)
143
148
 
144
149
  def run_coloc_susie(self,**kwargs):
145
-
146
- self.colocalization = _run_coloc_susie(self.to_finemapping_file_path,log=self.log,ncols=self.ns,**kwargs)
150
+ self.colocalization = _run_coloc_susie(self.finemapping["path"],log=self.log,ncols=self.ns,**kwargs)
147
151
 
148
152
  def run_two_sample_mr(self, clump=False, **kwargs):
149
153
  exposure1 = self.study_name.split("_")[0]
gwaslab/g_headers.py ADDED
@@ -0,0 +1,131 @@
1
+ dtype_dic={
2
+ 'SNPID' : 'string' ,
3
+ 'rsID' : 'string' ,
4
+ 'CHR' : 'Int64' ,
5
+ 'POS' : 'Int64' ,
6
+ 'EA' : 'category' ,
7
+ 'NEA' : 'category' ,
8
+ 'STATUS' : 'category' ,
9
+ 'REF' : 'category' ,
10
+ 'ALT' : 'category' ,
11
+ 'EAF' : 'float64' ,
12
+ 'NEAF' : 'float64' ,
13
+ 'MAF' : 'float64' ,
14
+ 'INFO' : 'float32' ,
15
+ 'BETA' : 'float64' ,
16
+ 'SE' : 'float64' ,
17
+ 'BETA_95U' : 'float64' ,
18
+ 'BETA_95L' : 'float64' ,
19
+ 'OR' : 'float64' ,
20
+ 'OR_95U' : 'float64' ,
21
+ 'OR_95L' : 'float64' ,
22
+ 'HR' : 'float64' ,
23
+ 'HR_95U' : 'float64' ,
24
+ 'HR_95L' : 'float64' ,
25
+ 'CHISQ' : 'float64' ,
26
+ 'Z' : 'float64' ,
27
+ 'T' : 'float64' ,
28
+ 'F' : 'float64' ,
29
+ 'P' : 'float64' ,
30
+ 'P_MANTISSA' : 'float64' ,
31
+ 'P_EXPONENT' : 'float64' ,
32
+ 'MLOG10P' : 'float64' ,
33
+ 'SNPR2' : 'float64' ,
34
+ 'DOF' : 'Int64' ,
35
+ 'P_HET' : 'float64' ,
36
+ 'I2_HET' : 'float64' ,
37
+ 'DENSITY' : 'Int64' ,
38
+ 'N' : 'Int64' ,
39
+ 'N_CASE' : 'Int64' ,
40
+ 'N_CONTROL' : 'Int64' ,
41
+ 'GENENAME' : 'string' ,
42
+ 'CIS/TRANS' : 'string' ,
43
+ 'DISTANCE_TO_KNOWN' : 'Int64' ,
44
+ 'LOCATION_OF_KNOWN' : 'string' ,
45
+ 'KNOWN_ID' : 'string' ,
46
+ 'KNOWN_PUBMED_ID' : 'string' ,
47
+ 'KNOWN_AUTHOR' : 'string' ,
48
+ 'KNOWN_SET_VARIANT' : 'string' ,
49
+ 'KNOWN_VARIANT' : 'string' ,
50
+ 'KNOWN_SET' : 'string' ,
51
+ 'NOVEL' : 'string' ,
52
+ 'PIP' :' float64 ',
53
+ 'CREDIBLE_SET_INDEX': 'Int64' ,
54
+ 'N_SNP' : 'Int64' ,
55
+ 'LOCUS' : 'string' ,
56
+ 'STUDY' : 'string' ,
57
+ }
58
+
59
+
60
+ description_dic={
61
+ 'SNPID' :' variant ID (CHR:POS:NEA:EA) ',
62
+ 'rsID' :' dbSNP rsID ',
63
+ 'CHR' :' chromosome number (X 23, Y 24, MT 25) ',
64
+ 'POS' :' base pair position ',
65
+ 'EA' :' effect allele ',
66
+ 'NEA' :' non-effect allele ',
67
+ 'STATUS' :' variant standardization & harmonization status ',
68
+ 'REF' :' reference allele in reference genome ',
69
+ 'ALT' :' alternative allele ',
70
+ 'EAF' :' effect allele frequency ',
71
+ 'NEAF' :' non-effect allele frequency ',
72
+ 'MAF' :' minor allele frequency ',
73
+ 'INFO' :' imputation INFO/RSQ ',
74
+ 'BETA' :' effect size beta ',
75
+ 'SE' :' standard error of beta ',
76
+ 'BETA_95U' :' upper bound of beta 95% condidence interval ',
77
+ 'BETA_95L' :' lower bound of beta 95% condidence interval ',
78
+ 'OR' :' odds ratio ',
79
+ 'OR_95U' :' upper bound of OR 95% condidence interval ',
80
+ 'OR_95L' :' lower bound of OR 95% condidence interval ',
81
+ 'HR' :' hazard ratio ',
82
+ 'HR_95U' :' upper bound of HR 95% condidence interval ',
83
+ 'HR_95L' :' lower bound of HR 95% condidence interval ',
84
+ 'CHISQ' :' chi square ',
85
+ 'Z' :' z score ',
86
+ 'T' :' t statistics ',
87
+ 'F' :' F statistics ',
88
+ 'P' :' P value ',
89
+ 'P_MANTISSA' :' P mantissa ',
90
+ 'P_EXPONENT' :' P exponent ',
91
+ 'MLOG10P' :' $-log_{10}(P)$ ',
92
+ 'SNPR2' :' per variant R2 ',
93
+ 'DOF' :' degree of freedom ',
94
+ 'P_HET' :' heterogeneity test P value ',
95
+ 'I2_HET' :' heterogeneity I2 ',
96
+ 'DENSITY' :' signal density ',
97
+ 'N' :' total sample size ',
98
+ 'N_CASE' :' number of cases ',
99
+ 'N_CONTROL' :' number of controls ',
100
+ 'GENENAME' :' nearest gene symbol ',
101
+ 'CIS/TRANS' :' whether the variant is in cis or trans region ',
102
+ 'DISTANCE_TO_KNOWN' :' distance to nearest known variants ',
103
+ 'LOCATION_OF_KNOWN' :' relative location to nearest known variants ',
104
+ 'KNOWN_ID' :' nearest known variant ID ',
105
+ 'KNOWN_PUBMED_ID' :' pubmed ID of the known variant ',
106
+ 'KNOWN_AUTHOR' :' author of the study ',
107
+ 'KNOWN_SET_VARIANT' :' known set and overlapping variant ',
108
+ 'KNOWN_VARIANT' :' known variant overlapping with the variant ',
109
+ 'KNOWN_SET' :' variant set of the known variant ',
110
+ 'PIP' :' Posterior Inclusion Probability ',
111
+ 'CREDIBLE_SET_INDEX':' credible sets index ',
112
+ 'N_SNP' :' number of variants included in this locus for finemapping ',
113
+ 'LOCUS' :' locus name, usually the lead variant of the locus ',
114
+ 'STUDY' :' study name '}
115
+
116
+ def _get_headers(mode="all"):
117
+ if mode=="info":
118
+ return ["SNPID","rsID","CHR","POS","EA","NEA","STATUS"]
119
+ elif mode=="stats":
120
+ return ["BETA","SE","P","MLOG10P","N","N_CASE","N_CONTROL","Z","T","F","OR","OR_95L","OR_95U","HR","HR_95L","HR_95U","MAF","EAF","BETA_95L","BETA_95U"]
121
+ else:
122
+ return description_dic.keys()
123
+
124
+ def _check_overlap_with_reserved_keys(other):
125
+ overlapped=[]
126
+ for i in other:
127
+ if i in _get_headers():
128
+ overlapped.append(i)
129
+ return overlapped
130
+
131
+
gwaslab/g_meta.py CHANGED
@@ -55,4 +55,5 @@ def _append_meta_record(old, new):
55
55
  if old == "Unknown" or old== "Unchecked":
56
56
  return new
57
57
  else:
58
- return "{}, {}".format(old, new)
58
+ return "{}, {}".format(old, new)
59
+
gwaslab/g_version.py CHANGED
@@ -6,7 +6,7 @@ import numpy as np
6
6
  def _show_version(log=Log(), verbose=True):
7
7
  # show version when loading sumstats
8
8
  log.write("GWASLab v{} https://cloufield.github.io/gwaslab/".format(gwaslab_info()["version"]),verbose=verbose)
9
- log.write("(C) 2022-2024, Yunye He, Kamatani Lab, MIT License, gwaslab@gmail.com",verbose=verbose)
9
+ log.write("(C) 2022-2025, Yunye He, Kamatani Lab, GPL-3.0 license, gwaslab@gmail.com",verbose=verbose)
10
10
 
11
11
  def _get_version():
12
12
  # return short version string like v3.4.33
@@ -15,8 +15,8 @@ def _get_version():
15
15
  def gwaslab_info():
16
16
  # version meta information
17
17
  dic={
18
- "version":"3.5.3",
19
- "release_date":"20241217"
18
+ "version":"3.5.5",
19
+ "release_date":"20250102"
20
20
  }
21
21
  return dic
22
22
 
@@ -8,11 +8,13 @@ from gwaslab.bd_common_data import get_format_dict
8
8
  from gwaslab.qc_fix_sumstats import sortcolumn
9
9
  from gwaslab.qc_fix_sumstats import _process_build
10
10
  from gwaslab.qc_check_datatype import check_datatype
11
+ from gwaslab.qc_check_datatype import quick_convert_datatype
11
12
  from gwaslab.qc_check_datatype import check_dataframe_memory_usage
12
-
13
+ from gwaslab.g_headers import _check_overlap_with_reserved_keys
13
14
  #20221030
14
15
  def preformat(sumstats,
15
16
  fmt=None,
17
+ tab_fmt="tsv",
16
18
  snpid=None,
17
19
  rsid=None,
18
20
  chrom=None,
@@ -66,12 +68,21 @@ def preformat(sumstats,
66
68
  rename_dictionary = {}
67
69
  usecols = []
68
70
  dtype_dictionary ={}
69
-
71
+ if readargs is None:
72
+ readargs={}
70
73
  #######################################################################################################################################################
71
74
  # workflow:
72
75
  # 1. formatbook
73
76
  # 2. user specified header
74
77
  # 3. usekeys
78
+ if tab_fmt=="parquet":
79
+ if type(sumstats) is str:
80
+ log.write("Start to load data from parquet file....",verbose=verbose)
81
+ log.write(" -path: {}".format(sumstats),verbose=verbose)
82
+ sumstats = pd.read_parquet(sumstats,**readargs)
83
+ log.write("Finished loading parquet file into pd.DataFrame....",verbose=verbose)
84
+ else:
85
+ raise ValueError("Please input a path for parquet file.")
75
86
 
76
87
  if fmt is not None:
77
88
  # loading format parameters
@@ -145,9 +156,11 @@ def preformat(sumstats,
145
156
  if key in raw_cols:
146
157
  usecols.append(key)
147
158
  if value in ["EA","NEA"]:
148
- dtype_dictionary[value]="category"
149
- if value in ["CHR","STATUS"]:
150
- dtype_dictionary[value]="string"
159
+ dtype_dictionary[key]="category"
160
+ if value in ["STATUS"]:
161
+ dtype_dictionary[key]="string"
162
+ if value in ["CHR"]:
163
+ dtype_dictionary[key]="string"
151
164
 
152
165
  except ValueError:
153
166
  raise ValueError("Please input a path or a pd.DataFrame, and make sure the separator is correct and the columns you specified are in the file.")
@@ -276,6 +289,8 @@ def preformat(sumstats,
276
289
  rename_dictionary[status]="STATUS"
277
290
  dtype_dictionary[status]="string"
278
291
  if other:
292
+ overlapped = _check_overlap_with_reserved_keys(other)
293
+ log.warning("Columns with headers overlapping with GWASLab reserved keywords:{}".format(overlapped),verbose=verbose)
279
294
  usecols = usecols + other
280
295
  for i in other:
281
296
  rename_dictionary[i] = i
@@ -359,8 +374,13 @@ def preformat(sumstats,
359
374
  sumstats = sumstats[usecols].copy()
360
375
  for key,value in dtype_dictionary.items():
361
376
  if key in usecols:
362
- sumstats[key] = sumstats[key].astype(value)
363
-
377
+ astype = value
378
+ if rename_dictionary[key]=="CHR":
379
+ astype ="Int64"
380
+ try:
381
+ sumstats[key] = sumstats[key].astype(astype)
382
+ except:
383
+ sumstats[key] = sumstats[key].astype("string")
364
384
  except ValueError:
365
385
  raise ValueError("Please input a path or a pd.DataFrame, and make sure it contain the columns.")
366
386
 
@@ -400,6 +420,8 @@ def preformat(sumstats,
400
420
 
401
421
  ## reodering ###################################################################################################
402
422
  sumstats = sortcolumn(sumstats=sumstats,log=log,verbose=verbose)
423
+ sumstats = quick_convert_datatype(sumstats,log=log,verbose=verbose)
424
+
403
425
  check_datatype(sumstats,log=log,verbose=verbose)
404
426
  gc.collect()
405
427
  check_dataframe_memory_usage(sumstats,log=log,verbose=verbose)
@@ -0,0 +1,23 @@
1
+ import pandas as pd
2
+ from gwaslab.g_Log import Log
3
+ from gwaslab.qc_check_datatype import check_datatype
4
+ from gwaslab.qc_check_datatype import check_dataframe_memory_usage
5
+
6
+ def _read_pipcs(data, output_prefix, log=Log(),verbose=True):
7
+ log.write("Start to load PIP and CREDIBLE_SET_INDEX from file...",verbose=verbose)
8
+ log.write(" -File:{}.pipcs".format(output_prefix),verbose=verbose)
9
+
10
+ pipcs = pd.read_csv("{}.pipcs".format(output_prefix))
11
+
12
+ log.write(" -Merging CHR and POS from main dataframe...",verbose=verbose)
13
+ pipcs = _merge_chrpos(data,pipcs)
14
+
15
+ log.write(" -Current pipcs Dataframe shape :",len(pipcs)," x ", len(pipcs.columns),verbose=verbose)
16
+ check_datatype(pipcs,log=log,verbose=verbose)
17
+ check_dataframe_memory_usage(pipcs,log=log,verbose=verbose)
18
+ log.write("Finished loading PIP and CREDIBLE_SET_INDEX from file!",verbose=verbose)
19
+ return pipcs
20
+
21
+ def _merge_chrpos(data,pipcs):
22
+ df = pd.merge(pipcs, data,on="SNPID",how="left")
23
+ return df
gwaslab/io_to_formats.py CHANGED
@@ -114,48 +114,49 @@ def _to_format(sumstats,
114
114
 
115
115
  #######################################################################################################
116
116
  #formatting float statistics
117
- onetime_log.write(" -Formatting statistics ...",verbose=verbose)
118
117
 
119
- formats = {
120
- 'EAF': '{:.4g}',
121
- 'MAF': '{:.4g}',
122
- 'BETA': '{:.4f}',
123
- 'SE': '{:.4f}',
124
- 'BETA_95U': '{:.4f}',
125
- 'BETA_95L': '{:.4f}',
126
- 'Z': '{:.4f}',
127
- 'CHISQ': '{:.4f}',
128
- 'F': '{:.4f}',
129
- 'OR': '{:.4f}',
130
- 'OR_95U': '{:.4f}',
131
- 'OR_95L': '{:.4f}',
132
- 'HR': '{:.4f}',
133
- 'HR_95U': '{:.4f}',
134
- 'HR_95L': '{:.4f}',
135
- 'INFO': '{:.4f}',
136
- 'P': '{:.4e}',
137
- 'MLOG10P': '{:.4f}',
138
- 'DAF': '{:.4f}'}
139
-
140
- for col, f in float_formats.items():
141
- if col in output.columns:
142
- formats[col]=f
143
-
144
- for col, f in formats.items():
145
- if col in output.columns:
146
- if str(output[col].dtype) in ["Float32","Float64","float64","float32","float16","float"]:
147
- output[col] = output[col].map(f.format)
148
-
149
- onetime_log.write(" -Float statistics formats:",verbose=verbose)
150
- keys=[]
151
- values=[]
152
- for key,value in formats.items():
153
- if key in output.columns:
154
- keys.append(key)
155
- values.append(value)
156
-
157
- onetime_log.write(" - Columns :",keys,verbose=verbose)
158
- onetime_log.write(" - Output formats:",values,verbose=verbose)
118
+ if tab_fmt!="parquet":
119
+ onetime_log.write(" -Formatting statistics ...",verbose=verbose)
120
+ formats = {
121
+ 'EAF': '{:.4g}',
122
+ 'MAF': '{:.4g}',
123
+ 'BETA': '{:.4f}',
124
+ 'SE': '{:.4f}',
125
+ 'BETA_95U': '{:.4f}',
126
+ 'BETA_95L': '{:.4f}',
127
+ 'Z': '{:.4f}',
128
+ 'CHISQ': '{:.4f}',
129
+ 'F': '{:.4f}',
130
+ 'OR': '{:.4f}',
131
+ 'OR_95U': '{:.4f}',
132
+ 'OR_95L': '{:.4f}',
133
+ 'HR': '{:.4f}',
134
+ 'HR_95U': '{:.4f}',
135
+ 'HR_95L': '{:.4f}',
136
+ 'INFO': '{:.4f}',
137
+ 'P': '{:.4e}',
138
+ 'MLOG10P': '{:.4f}',
139
+ 'DAF': '{:.4f}'}
140
+
141
+ for col, f in float_formats.items():
142
+ if col in output.columns:
143
+ formats[col]=f
144
+
145
+ for col, f in formats.items():
146
+ if col in output.columns:
147
+ if str(output[col].dtype) in ["Float32","Float64","float64","float32","float16","float"]:
148
+ output[col] = output[col].map(f.format)
149
+
150
+ onetime_log.write(" -Float statistics formats:",verbose=verbose)
151
+ keys=[]
152
+ values=[]
153
+ for key,value in formats.items():
154
+ if key in output.columns:
155
+ keys.append(key)
156
+ values.append(value)
157
+
158
+ onetime_log.write(" - Columns :",keys,verbose=verbose)
159
+ onetime_log.write(" - Output formats:",values,verbose=verbose)
159
160
 
160
161
  ##########################################################################################################
161
162
  # output, mapping column names
@@ -233,7 +234,7 @@ def tofmt(sumstats,
233
234
  if xymt_number is False and pd.api.types.is_integer_dtype(sumstats["CHR"]):
234
235
  sumstats["CHR"]= sumstats["CHR"].map(get_number_to_chr(xymt=xymt,prefix=chr_prefix))
235
236
  # add prefix to CHR
236
- elif chr_prefix is not None:
237
+ elif len(chr_prefix)>0:
237
238
  sumstats["CHR"]= chr_prefix + sumstats["CHR"].astype("string")
238
239
 
239
240
  ####################################################################################################################
@@ -409,7 +410,7 @@ def _write_tabular(sumstats,rename_dictionary, path, tab_fmt, to_csvargs, to_tab
409
410
  log.write(f" -@ detected: writing each chromosome to a single file...",verbose=verbose)
410
411
  log.write(" -Chromosomes:{}...".format(list(sumstats["CHR"].unique())),verbose=verbose)
411
412
  for single_chr in list(sumstats["CHR"].unique()):
412
- single_path = path.replace("@",single_chr)
413
+ single_path = path.replace("@","{}".format(single_chr))
413
414
 
414
415
  fast_to_csv(sumstats.loc[sumstats[chr_header]==single_chr,:],
415
416
  single_path,
@@ -422,7 +423,7 @@ def _write_tabular(sumstats,rename_dictionary, path, tab_fmt, to_csvargs, to_tab
422
423
  log.write(f" -@ detected: writing each chromosome to a single file...",verbose=verbose)
423
424
  log.write(" -Chromosomes:{}...".format(list(sumstats["CHR"].unique())),verbose=verbose)
424
425
  for single_chr in list(sumstats["CHR"].unique()):
425
- single_path = path.replace("@",single_chr)
426
+ single_path = path.replace("@","{}".format(single_chr))
426
427
 
427
428
  sumstats.loc[sumstats[chr_header]==single_chr,:].to_csv(path, index=None, **to_csvargs)
428
429
  else: