gwaslab 3.4.37__py3-none-any.whl → 3.4.38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/data/formatbook.json +722 -721
- gwaslab/g_Log.py +8 -0
- gwaslab/g_Sumstats.py +26 -147
- gwaslab/g_SumstatsPair.py +6 -2
- gwaslab/g_Sumstats_summary.py +3 -3
- gwaslab/g_version.py +2 -2
- gwaslab/hm_casting.py +29 -15
- gwaslab/hm_harmonize_sumstats.py +291 -163
- gwaslab/hm_rsid_to_chrpos.py +1 -1
- gwaslab/io_preformat_input.py +43 -37
- gwaslab/io_to_formats.py +428 -295
- gwaslab/qc_check_datatype.py +3 -3
- gwaslab/qc_fix_sumstats.py +793 -682
- gwaslab/util_ex_calculate_ldmatrix.py +29 -11
- gwaslab/util_ex_gwascatalog.py +1 -1
- gwaslab/util_ex_ldproxyfinder.py +1 -1
- gwaslab/util_ex_process_ref.py +3 -3
- gwaslab/util_ex_run_coloc.py +26 -4
- gwaslab/util_in_convert_h2.py +1 -1
- gwaslab/util_in_fill_data.py +2 -2
- gwaslab/util_in_filter_value.py +122 -34
- gwaslab/util_in_get_density.py +2 -2
- gwaslab/util_in_get_sig.py +41 -9
- gwaslab/viz_aux_quickfix.py +24 -19
- gwaslab/viz_aux_reposition_text.py +7 -4
- gwaslab/viz_aux_save_figure.py +6 -5
- gwaslab/viz_plot_compare_af.py +5 -5
- gwaslab/viz_plot_miamiplot2.py +28 -20
- gwaslab/viz_plot_mqqplot.py +109 -72
- gwaslab/viz_plot_qqplot.py +11 -8
- gwaslab/viz_plot_regionalplot.py +3 -1
- gwaslab/viz_plot_trumpetplot.py +15 -6
- {gwaslab-3.4.37.dist-info → gwaslab-3.4.38.dist-info}/METADATA +2 -2
- {gwaslab-3.4.37.dist-info → gwaslab-3.4.38.dist-info}/RECORD +37 -37
- {gwaslab-3.4.37.dist-info → gwaslab-3.4.38.dist-info}/LICENSE +0 -0
- {gwaslab-3.4.37.dist-info → gwaslab-3.4.38.dist-info}/WHEEL +0 -0
- {gwaslab-3.4.37.dist-info → gwaslab-3.4.38.dist-info}/top_level.txt +0 -0
gwaslab/g_Log.py
CHANGED
|
@@ -2,6 +2,7 @@ import time
|
|
|
2
2
|
class Log():
|
|
3
3
|
def __init__(self):
|
|
4
4
|
self.log_text=str(time.ctime(time.time()))+ " " + "Sumstats Object created."+ "\n"
|
|
5
|
+
|
|
5
6
|
def write(self,*message,end="\n",show_time=True, verbose=True):
|
|
6
7
|
if show_time is True:
|
|
7
8
|
if verbose: print(str(time.ctime(time.time())),*message,end=end)
|
|
@@ -9,6 +10,13 @@ class Log():
|
|
|
9
10
|
else:
|
|
10
11
|
if verbose: print(*message,end=end)
|
|
11
12
|
self.log_text = self.log_text + " ".join(map(str,message)) + end
|
|
13
|
+
|
|
14
|
+
def warning(self,*message,end="\n",show_time=True, verbose=True):
|
|
15
|
+
self.write(" #WARNING! {}".format(" ".join(map(str,message))),
|
|
16
|
+
end=end,
|
|
17
|
+
show_time=show_time,
|
|
18
|
+
verbose=verbose)
|
|
19
|
+
|
|
12
20
|
def show(self):
|
|
13
21
|
print(self.log_text)
|
|
14
22
|
def save(self,path,verbose=True):
|
gwaslab/g_Sumstats.py
CHANGED
|
@@ -5,7 +5,7 @@ import copy
|
|
|
5
5
|
from gwaslab.g_Sumstats_summary import summarize
|
|
6
6
|
from gwaslab.g_Sumstats_summary import lookupstatus
|
|
7
7
|
from gwaslab.io_preformat_input import preformat
|
|
8
|
-
from gwaslab.io_to_formats import
|
|
8
|
+
from gwaslab.io_to_formats import _to_format
|
|
9
9
|
from gwaslab.g_Log import Log
|
|
10
10
|
from gwaslab.qc_fix_sumstats import fixID
|
|
11
11
|
from gwaslab.qc_fix_sumstats import removedup
|
|
@@ -35,6 +35,8 @@ from gwaslab.util_in_filter_value import filterregionout
|
|
|
35
35
|
from gwaslab.util_in_filter_value import inferbuild
|
|
36
36
|
from gwaslab.util_in_filter_value import sampling
|
|
37
37
|
from gwaslab.util_in_filter_value import _get_flanking
|
|
38
|
+
from gwaslab.util_in_filter_value import _get_flanking_by_chrpos
|
|
39
|
+
from gwaslab.util_in_filter_value import _get_flanking_by_id
|
|
38
40
|
from gwaslab.util_in_calculate_gc import lambdaGC
|
|
39
41
|
from gwaslab.util_in_convert_h2 import _get_per_snp_r2
|
|
40
42
|
from gwaslab.util_in_get_sig import getsig
|
|
@@ -449,7 +451,7 @@ class Sumstats():
|
|
|
449
451
|
|
|
450
452
|
# utilities ############################################################################################################
|
|
451
453
|
# filter series ######################################################################
|
|
452
|
-
def
|
|
454
|
+
def filter_flanking(self, inplace=False,**args):
|
|
453
455
|
if inplace is False:
|
|
454
456
|
new_Sumstats_object = copy.deepcopy(self)
|
|
455
457
|
new_Sumstats_object.data = _get_flanking(new_Sumstats_object.data, **args)
|
|
@@ -457,6 +459,22 @@ class Sumstats():
|
|
|
457
459
|
else:
|
|
458
460
|
self.data = _get_flanking(self.data, **args)
|
|
459
461
|
|
|
462
|
+
def filter_flanking_by_chrpos(self, chrpos, inplace=False,**args):
|
|
463
|
+
if inplace is False:
|
|
464
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
465
|
+
new_Sumstats_object.data = _get_flanking_by_chrpos(new_Sumstats_object.data, chrpos, **args)
|
|
466
|
+
return new_Sumstats_object
|
|
467
|
+
else:
|
|
468
|
+
self.data = _get_flanking_by_chrpos(self.data, chrpos,**args)
|
|
469
|
+
|
|
470
|
+
def filter_flanking_by_id(self, snpid, inplace=False,**args):
|
|
471
|
+
if inplace is False:
|
|
472
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
473
|
+
new_Sumstats_object.data = _get_flanking_by_id(new_Sumstats_object.data, snpid, **args)
|
|
474
|
+
return new_Sumstats_object
|
|
475
|
+
else:
|
|
476
|
+
self.data = _get_flanking_by_id(self.data, snpid, **args)
|
|
477
|
+
|
|
460
478
|
def filter_value(self, expr, inplace=False, **args):
|
|
461
479
|
if inplace is False:
|
|
462
480
|
new_Sumstats_object = copy.deepcopy(self)
|
|
@@ -548,8 +566,10 @@ class Sumstats():
|
|
|
548
566
|
|
|
549
567
|
return plot
|
|
550
568
|
|
|
551
|
-
def plot_trumpet(self, **args):
|
|
552
|
-
|
|
569
|
+
def plot_trumpet(self, build=None, **args):
|
|
570
|
+
if build is None:
|
|
571
|
+
build = self.meta["gwaslab"]["genome_build"]
|
|
572
|
+
fig = plottrumpet(self.data,build = build, **args)
|
|
553
573
|
return fig
|
|
554
574
|
|
|
555
575
|
def get_lead(self, build=None, gls=False, **args):
|
|
@@ -670,148 +690,7 @@ class Sumstats():
|
|
|
670
690
|
|
|
671
691
|
# to_format ###############################################################################################
|
|
672
692
|
|
|
673
|
-
def to_format(self,
|
|
674
|
-
path="./sumstats",
|
|
675
|
-
fmt="gwaslab",
|
|
676
|
-
extract=None,
|
|
677
|
-
exclude=None,
|
|
678
|
-
cols=None,
|
|
679
|
-
id_use="rsID",
|
|
680
|
-
hapmap3=False,
|
|
681
|
-
exclude_hla=False,
|
|
682
|
-
hla_range=(25,34),
|
|
683
|
-
build=None,
|
|
684
|
-
n=None,
|
|
685
|
-
verbose=True,
|
|
686
|
-
no_status=False,
|
|
687
|
-
output_log=True,
|
|
688
|
-
to_csvargs=None,
|
|
689
|
-
float_formats=None,
|
|
690
|
-
xymt_number=False,
|
|
691
|
-
xymt=None,
|
|
692
|
-
chr_prefix="",
|
|
693
|
-
ssfmeta=False,
|
|
694
|
-
md5sum=False,
|
|
695
|
-
bgzip=False,
|
|
696
|
-
tabix=False,
|
|
697
|
-
tabix_indexargs={}):
|
|
693
|
+
def to_format(self, path, build=None, **args):
|
|
698
694
|
if build is None:
|
|
699
695
|
build = self.meta["gwaslab"]["genome_build"]
|
|
700
|
-
|
|
701
|
-
if to_csvargs is None:
|
|
702
|
-
to_csvargs = {}
|
|
703
|
-
if float_formats is None:
|
|
704
|
-
float_formats={}
|
|
705
|
-
if cols is None:
|
|
706
|
-
cols=[]
|
|
707
|
-
if xymt is None:
|
|
708
|
-
xymt = ["X","Y","MT"]
|
|
709
|
-
|
|
710
|
-
formatlist= get_formats_list() + ["vep","bed","annovar","vcf"]
|
|
711
|
-
if fmt in formatlist:
|
|
712
|
-
if verbose: onetime_log.write("Start to format the output sumstats in: ",fmt, " format")
|
|
713
|
-
else:
|
|
714
|
-
raise ValueError("Please select a format to output")
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
#######################################################################################################
|
|
718
|
-
# filter
|
|
719
|
-
output = self.data.copy()
|
|
720
|
-
if extract is not None:
|
|
721
|
-
output = output.loc[output[id_use].isin(extract),:]
|
|
722
|
-
|
|
723
|
-
if exclude is not None:
|
|
724
|
-
output = output.loc[~output[id_use].isin(exclude),:]
|
|
725
|
-
|
|
726
|
-
#hla and hapmap3 #######################################################################################
|
|
727
|
-
suffix=fmt
|
|
728
|
-
|
|
729
|
-
#exclude hla
|
|
730
|
-
if exclude_hla is True:
|
|
731
|
-
if verbose: onetime_log.write(" -Excluding variants in MHC (HLA) region ...")
|
|
732
|
-
before = len(output)
|
|
733
|
-
is_hla = (output["CHR"].astype("string") == "6") & (output["POS"].astype("Int64") > hla_range[0]*1000000) & (output["POS"].astype("Int64") < hla_range[1]*1000000)
|
|
734
|
-
output = output.loc[~is_hla,:]
|
|
735
|
-
after = len(output)
|
|
736
|
-
if verbose: onetime_log.write(" -Exclude "+ str(before - after) + " variants in MHC (HLA) region : {}Mb - {}Mb.".format(hla_range[0], hla_range[1]))
|
|
737
|
-
suffix = "noMHC."+suffix
|
|
738
|
-
|
|
739
|
-
#extract hapmap3 SNPs
|
|
740
|
-
if hapmap3 is True:
|
|
741
|
-
output = gethapmap3(output,build=build,verbose=True)
|
|
742
|
-
after = len(output)
|
|
743
|
-
if verbose: onetime_log.write(" -Extract "+ str(after) + " variants in Hapmap3 datasets for build "+build+".")
|
|
744
|
-
suffix = "hapmap3."+suffix
|
|
745
|
-
|
|
746
|
-
# add a n column
|
|
747
|
-
if n is not None:
|
|
748
|
-
output["N"] = n
|
|
749
|
-
|
|
750
|
-
#######################################################################################################
|
|
751
|
-
#formatting float statistics
|
|
752
|
-
if verbose: onetime_log.write(" -Formatting statistics ...")
|
|
753
|
-
|
|
754
|
-
formats = {'EAF': '{:.4g}',
|
|
755
|
-
'BETA': '{:.4f}',
|
|
756
|
-
'Z': '{:.4f}',
|
|
757
|
-
'CHISQ': '{:.4f}',
|
|
758
|
-
'SE': '{:.4f}',
|
|
759
|
-
'OR': '{:.4f}',
|
|
760
|
-
'OR_95U': '{:.4f}',
|
|
761
|
-
'OR_95L': '{:.4f}',
|
|
762
|
-
'INFO': '{:.4f}',
|
|
763
|
-
'P': '{:.4e}',
|
|
764
|
-
'MLOG10P': '{:.4f}',
|
|
765
|
-
'DAF': '{:.4f}'
|
|
766
|
-
}
|
|
767
|
-
|
|
768
|
-
for col, f in float_formats.items():
|
|
769
|
-
if col in output.columns:
|
|
770
|
-
formats[col]=f
|
|
771
|
-
for col, f in formats.items():
|
|
772
|
-
if col in output.columns:
|
|
773
|
-
if output[col].dtype in ["float64","float32","float16","float"]:
|
|
774
|
-
output[col] = output[col].map(f.format)
|
|
775
|
-
if verbose:
|
|
776
|
-
onetime_log.write(" - Float statistics formats:")
|
|
777
|
-
keys=[]
|
|
778
|
-
values=[]
|
|
779
|
-
for key,value in formats.items():
|
|
780
|
-
if key in output.columns:
|
|
781
|
-
keys.append(key)
|
|
782
|
-
values.append(value)
|
|
783
|
-
onetime_log.write(" - Columns:",keys)
|
|
784
|
-
onetime_log.write(" - Output formats:",values)
|
|
785
|
-
|
|
786
|
-
##########################################################################################################
|
|
787
|
-
# output, mapping column names
|
|
788
|
-
|
|
789
|
-
if fmt in get_formats_list() + ["vep","bed","annovar","vcf"]:
|
|
790
|
-
tofmt(output,
|
|
791
|
-
path=path,
|
|
792
|
-
fmt=fmt,
|
|
793
|
-
cols=cols,
|
|
794
|
-
suffix=suffix,
|
|
795
|
-
build=build,
|
|
796
|
-
verbose=True,
|
|
797
|
-
no_status=no_status,
|
|
798
|
-
log=onetime_log,
|
|
799
|
-
to_csvargs=to_csvargs,
|
|
800
|
-
chr_prefix=chr_prefix,
|
|
801
|
-
meta = self.meta,
|
|
802
|
-
ssfmeta=ssfmeta,
|
|
803
|
-
bgzip=bgzip,
|
|
804
|
-
tabix=tabix,
|
|
805
|
-
tabix_indexargs=tabix_indexargs,
|
|
806
|
-
md5sum=md5sum,
|
|
807
|
-
xymt_number=xymt_number,
|
|
808
|
-
xymt=xymt)
|
|
809
|
-
if output_log is True:
|
|
810
|
-
log_path = path + "."+ suffix + ".log"
|
|
811
|
-
if verbose: onetime_log.write(" -Saving log file to: {}".format(log_path))
|
|
812
|
-
if verbose: onetime_log.write("Finished outputting successfully!")
|
|
813
|
-
try:
|
|
814
|
-
onetime_log.save(log_path, verbose=False)
|
|
815
|
-
except:
|
|
816
|
-
pass
|
|
817
|
-
|
|
696
|
+
_to_format(self.data, path, log=self.log, meta=self.meta, build=build, **args)
|
gwaslab/g_SumstatsPair.py
CHANGED
|
@@ -28,8 +28,10 @@ class SumstatsPair( ):
|
|
|
28
28
|
raise ValueError("Please provide GWASLab Sumstats Object #1.")
|
|
29
29
|
if not isinstance(sumstatsObject2, Sumstats):
|
|
30
30
|
raise ValueError("Please provide GWASLab Sumstats Object #2.")
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
if sumstatsObject1.meta["gwaslab"]["study_name"]!=sumstatsObject2.meta["gwaslab"]["study_name"]:
|
|
32
|
+
self.study_name = "{}_{}".format(sumstatsObject1.meta["gwaslab"]["study_name"], sumstatsObject2.meta["gwaslab"]["study_name"])
|
|
33
|
+
else:
|
|
34
|
+
self.study_name = "{}_{}".format("STUDY1", "STUDY2")
|
|
33
35
|
self.snp_info_cols = []
|
|
34
36
|
self.stats_cols =[]
|
|
35
37
|
self.other_cols=[]
|
|
@@ -42,6 +44,8 @@ class SumstatsPair( ):
|
|
|
42
44
|
self.clumps ={}
|
|
43
45
|
self.ns = None
|
|
44
46
|
|
|
47
|
+
self.log.write( "Start to create SumstatsPair object..." )
|
|
48
|
+
|
|
45
49
|
for i in sumstatsObject1.data.columns:
|
|
46
50
|
if i in ["SNPID","rsID","CHR","POS","EA","NEA","STATUS"]:
|
|
47
51
|
self.snp_info_cols.append(i)
|
gwaslab/g_Sumstats_summary.py
CHANGED
|
@@ -15,7 +15,7 @@ def summarize(insumstats,
|
|
|
15
15
|
for i in [snpid,rsid,eaf,p,n,status]:
|
|
16
16
|
if i in insumstats.columns:
|
|
17
17
|
cols.append(i)
|
|
18
|
-
sumstats= insumstats
|
|
18
|
+
sumstats= insumstats[cols].copy()
|
|
19
19
|
###############################################################################
|
|
20
20
|
numeric_cols=[]
|
|
21
21
|
output = {}
|
|
@@ -68,7 +68,7 @@ def summarize(insumstats,
|
|
|
68
68
|
sumstats.drop(columns='uniq_index',inplace=True)
|
|
69
69
|
status_dic = {}
|
|
70
70
|
for index,row in status_summary.iterrows():
|
|
71
|
-
status_dic[str(index)]=row[0]
|
|
71
|
+
status_dic[str(index)]=row.iloc[0]
|
|
72
72
|
output["STATUS"]=status_dic
|
|
73
73
|
numeric_cols.append("STATUS")
|
|
74
74
|
df = pd.DataFrame.from_dict({(i,j): output[i][j]
|
|
@@ -84,7 +84,7 @@ def summarize(insumstats,
|
|
|
84
84
|
return df
|
|
85
85
|
|
|
86
86
|
def sum_status(id_to_use, sumstats):
|
|
87
|
-
results = sumstats.groupby("STATUS").count()
|
|
87
|
+
results = sumstats.groupby("STATUS",observed=True).count()
|
|
88
88
|
results = results.loc[results[id_to_use]>0,:].sort_values(id_to_use,ascending=False)
|
|
89
89
|
return results
|
|
90
90
|
|
gwaslab/g_version.py
CHANGED
gwaslab/hm_casting.py
CHANGED
|
@@ -14,9 +14,11 @@ def _merge_mold_with_sumstats(mold, sumstats, ref_path=None, windowsizeb=10, log
|
|
|
14
14
|
for i in sumstats.columns:
|
|
15
15
|
if i in ["SNPID","rsID"]:
|
|
16
16
|
cols_to_drop.append(i)
|
|
17
|
+
|
|
18
|
+
log.write("Start to merge sumstats...", verbose=verbose)
|
|
17
19
|
|
|
18
20
|
if len(cols_to_drop)>0:
|
|
19
|
-
log.write("Dropping old IDs:{}".format(cols_to_drop))
|
|
21
|
+
log.write(" -Dropping old IDs:{}".format(cols_to_drop), verbose=verbose)
|
|
20
22
|
sumstats = sumstats.drop(columns=cols_to_drop)
|
|
21
23
|
|
|
22
24
|
if ref_path is not None :
|
|
@@ -30,17 +32,18 @@ def _merge_mold_with_sumstats(mold, sumstats, ref_path=None, windowsizeb=10, log
|
|
|
30
32
|
mold["_IDENTIFIER_FOR_VARIANT"] = range(len(mold))
|
|
31
33
|
|
|
32
34
|
mold_sumstats = pd.merge(mold, sumstats, on=["CHR","POS"], how="inner",suffixes=suffixes)
|
|
33
|
-
log.write("After merging by CHR and POS:{}".format(len(mold_sumstats)))
|
|
35
|
+
log.write(" -After merging by CHR and POS:{}".format(len(mold_sumstats)), verbose=verbose)
|
|
34
36
|
|
|
35
37
|
mold_sumstats = _keep_variants_with_same_allele_set(mold_sumstats,suffixes=suffixes)
|
|
36
|
-
|
|
38
|
+
|
|
39
|
+
log.write(" -Matched variants:{}".format(len(mold_sumstats)), verbose=verbose)
|
|
37
40
|
|
|
38
|
-
if ref_path is not None:
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
41
|
+
#if ref_path is not None:
|
|
42
|
+
# # match removed sumstats
|
|
43
|
+
# mold_removed = mold.loc[~mold[index1].isin(mold_sumstats[index1]),:]
|
|
44
|
+
# iron_removed = sumstats.loc[~sumstats[index2].isin(mold_sumstats[index2]),:]
|
|
45
|
+
# _match_two_sumstats(mold_removed,iron_removed,ref_path,windowsizeb=windowsizeb)
|
|
46
|
+
# mold_sumstats.drop(columns=["_INDEX",""])
|
|
44
47
|
|
|
45
48
|
if return_not_matched_mold == True:
|
|
46
49
|
sumstats1 = mold.loc[~mold["_IDENTIFIER_FOR_VARIANT"].isin(mold_sumstats["_IDENTIFIER_FOR_VARIANT"]),:]
|
|
@@ -59,14 +62,17 @@ def _keep_variants_with_same_allele_set(sumstats, log=Log(),verbose=True,suffixe
|
|
|
59
62
|
|
|
60
63
|
all_alleles = set(list(sumstats[ea1].unique())+list(sumstats[nea1].unique())+list(sumstats[ea2].unique())+list(sumstats[nea2].unique()))
|
|
61
64
|
allele_type = CategoricalDtype(categories=all_alleles, ordered=False)
|
|
62
|
-
sumstats
|
|
65
|
+
sumstats[[nea1,ea1,nea2,ea2]] = sumstats[[nea1,ea1,nea2,ea2]].astype(allele_type)
|
|
63
66
|
|
|
64
67
|
is_perfect_match = (sumstats[ea2] == sumstats[ea1]) & (sumstats[nea2] == sumstats[nea1])
|
|
65
68
|
is_flipped_match = (sumstats[ea2] == sumstats[nea1]) & (sumstats[nea2] == sumstats[ea1])
|
|
66
69
|
is_allele_set_match = is_flipped_match | is_perfect_match
|
|
67
70
|
|
|
68
|
-
|
|
69
|
-
|
|
71
|
+
log.write(" -Matching alleles and keeping only variants with same allele set: ", verbose=verbose)
|
|
72
|
+
log.write(" -Perfect match: {}".format(sum(is_perfect_match)), verbose=verbose)
|
|
73
|
+
log.write(" -Flipped match: {}".format(sum(is_flipped_match)), verbose=verbose)
|
|
74
|
+
log.write(" -Unmatched : {}".format(sum(~is_allele_set_match)), verbose=verbose)
|
|
75
|
+
|
|
70
76
|
return sumstats.loc[is_allele_set_match,:]
|
|
71
77
|
|
|
72
78
|
def _align_with_mold(sumstats, log=Log(),verbose=True, suffixes=("_MOLD","")):
|
|
@@ -77,10 +83,18 @@ def _align_with_mold(sumstats, log=Log(),verbose=True, suffixes=("_MOLD","")):
|
|
|
77
83
|
nea2="NEA"+suffixes[1]
|
|
78
84
|
status1="STATUS"+suffixes[0]
|
|
79
85
|
status2="STATUS"+suffixes[1]
|
|
86
|
+
|
|
80
87
|
is_perfect_match = (sumstats[ea2] == sumstats[ea1]) & (sumstats[nea2] == sumstats[nea1])
|
|
81
88
|
is_flipped_match = (sumstats[ea2] == sumstats[nea1]) & (sumstats[nea2] == sumstats[ea1])
|
|
82
89
|
|
|
90
|
+
log.write(" -Aligning alleles with reference: ", verbose=verbose)
|
|
91
|
+
log.write(" -Perfect match: {}".format(sum(is_perfect_match)), verbose=verbose)
|
|
92
|
+
log.write(" -Flipped match: {}".format(sum(is_flipped_match)), verbose=verbose)
|
|
93
|
+
|
|
94
|
+
log.write(" -For perfect match: copy STATUS from reference...", verbose=verbose)
|
|
83
95
|
sumstats.loc[is_perfect_match,status2] = copy_status(sumstats.loc[is_perfect_match,status1], sumstats.loc[is_perfect_match,status2],6)
|
|
96
|
+
|
|
97
|
+
log.write(" -For Flipped match: convert STATUS xxxxx[456789]x to xxxxx3x...", verbose=verbose)
|
|
84
98
|
sumstats.loc[is_flipped_match,status2] = vchange_status(sumstats.loc[is_flipped_match,status2],6,"456789","333333")
|
|
85
99
|
|
|
86
100
|
return sumstats
|
|
@@ -119,9 +133,9 @@ def _sort_pair_cols(molded_sumstats, verbose=True, log=Log(), order=None, stats_
|
|
|
119
133
|
if i not in order:
|
|
120
134
|
output_columns.append(i)
|
|
121
135
|
|
|
122
|
-
if verbose: log.write(" -Reordering columns to :", ",".join(output_columns))
|
|
123
|
-
molded_sumstats = molded_sumstats
|
|
124
|
-
if verbose: log.write("Finished sorting columns successfully!")
|
|
136
|
+
if verbose: log.write(" -Reordering columns to :", ",".join(output_columns), verbose=verbose)
|
|
137
|
+
molded_sumstats = molded_sumstats[ output_columns]
|
|
138
|
+
if verbose: log.write("Finished sorting columns successfully!", verbose=verbose)
|
|
125
139
|
|
|
126
140
|
return molded_sumstats
|
|
127
141
|
|