gwaslab 3.4.45__py3-none-any.whl → 3.4.47__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/__init__.py +2 -1
- gwaslab/bd_common_data.py +22 -0
- gwaslab/g_Sumstats.py +2 -0
- gwaslab/g_version.py +7 -7
- gwaslab/hm_harmonize_sumstats.py +3 -2
- gwaslab/io_preformat_input.py +22 -1
- gwaslab/io_to_formats.py +8 -3
- gwaslab/qc_fix_sumstats.py +8 -1
- gwaslab/util_ex_calculate_ldmatrix.py +20 -7
- gwaslab/util_ex_calculate_prs.py +13 -7
- gwaslab/util_ex_process_ref.py +22 -11
- gwaslab/util_in_filter_value.py +38 -2
- gwaslab/util_in_get_sig.py +32 -8
- gwaslab/util_in_meta.py +234 -0
- gwaslab/util_in_snphwe.py +58 -0
- gwaslab/viz_aux_chromatin.py +112 -0
- gwaslab/viz_plot_compare_effect.py +4 -1
- gwaslab/viz_plot_mqqplot.py +82 -42
- gwaslab/viz_plot_regional2.py +792 -0
- gwaslab/viz_plot_regionalplot.py +4 -0
- gwaslab/viz_plot_stackedregional.py +97 -22
- {gwaslab-3.4.45.dist-info → gwaslab-3.4.47.dist-info}/METADATA +5 -5
- {gwaslab-3.4.45.dist-info → gwaslab-3.4.47.dist-info}/RECORD +27 -23
- {gwaslab-3.4.45.dist-info → gwaslab-3.4.47.dist-info}/WHEEL +1 -1
- {gwaslab-3.4.45.dist-info → gwaslab-3.4.47.dist-info}/LICENSE +0 -0
- {gwaslab-3.4.45.dist-info → gwaslab-3.4.47.dist-info}/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.4.45.dist-info → gwaslab-3.4.47.dist-info}/top_level.txt +0 -0
gwaslab/util_in_meta.py
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import numpy as np
|
|
4
|
+
from scipy.stats.distributions import chi2
|
|
5
|
+
from scipy.stats import norm
|
|
6
|
+
from gwaslab.g_Log import Log
|
|
7
|
+
from gwaslab.io_to_pickle import load_data_from_pickle
|
|
8
|
+
from gwaslab.g_Sumstats import Sumstats
|
|
9
|
+
import gc
|
|
10
|
+
|
|
11
|
+
def meta_analyze(sumstats_list,random_effects=False, match_allele=True, log=Log()):
|
|
12
|
+
|
|
13
|
+
###########################################################################
|
|
14
|
+
columns=["SNPID","CHR","POS","EA","NEA"]
|
|
15
|
+
results_df = pd.DataFrame(columns=columns)
|
|
16
|
+
|
|
17
|
+
log.write("Start to perform meta-analysis...")
|
|
18
|
+
log.write(" -Datasets:")
|
|
19
|
+
for index,sumstats_path in enumerate(sumstats_list):
|
|
20
|
+
if isinstance(sumstats_path, pd.DataFrame):
|
|
21
|
+
log.write(" -Sumstats #{}: {} ".format(index, sumstats_path))
|
|
22
|
+
elif isinstance(sumstats_path, Sumstats):
|
|
23
|
+
log.write(" -Sumstats #{}: {} ".format(index, sumstats_path))
|
|
24
|
+
else:
|
|
25
|
+
log.write(" -Sumstats #{}: {} ".format(index, sumstats_path))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# extract all variants information
|
|
29
|
+
log.write(" -Iterating through {} datasets to determine variant list...".format(len(sumstats_list)))
|
|
30
|
+
|
|
31
|
+
for index,sumstats_path in enumerate(sumstats_list):
|
|
32
|
+
sumstats = get_sumstats(sumstats_path,usekeys=["SNPID","CHR","POS","EA","NEA"])
|
|
33
|
+
new_rows = sumstats.loc[~sumstats["SNPID"].isin(results_df["SNPID"]),["SNPID","CHR","POS","EA","NEA"]]
|
|
34
|
+
log.write(" -Sumstats #{}: {} new variants (out of {}) are being added to analysis...".format(index, len(new_rows),len(sumstats)))
|
|
35
|
+
|
|
36
|
+
if len(new_rows)>0:
|
|
37
|
+
if len(results_df) == 0:
|
|
38
|
+
results_df = new_rows
|
|
39
|
+
else:
|
|
40
|
+
results_df = pd.concat([results_df, new_rows],ignore_index=True)
|
|
41
|
+
del sumstats
|
|
42
|
+
del new_rows
|
|
43
|
+
gc.collect()
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
###########################################################################
|
|
48
|
+
log.write(" -Initiating result DataFrame...")
|
|
49
|
+
columns=["SNPID","CHR","POS","EA","NEA","_BETAW_SUM","_EA_N","_NEA_N","_BETA2W_SUM","_W_SUM","EAF","N","DIRECTION","BETA","SE","DOF"]
|
|
50
|
+
results_df = results_df.set_index("SNPID")
|
|
51
|
+
results_df["N"] = 0
|
|
52
|
+
results_df["_BETAW_SUM"] = 0.0
|
|
53
|
+
results_df["_BETA2W_SUM"] = 0.0
|
|
54
|
+
results_df["_W_SUM"] = 0.0
|
|
55
|
+
results_df["_W2_SUM"] = 0.0
|
|
56
|
+
results_df["_EA_N"] = 0.0
|
|
57
|
+
results_df["_NEA_N"] = 0.0
|
|
58
|
+
results_df["N"] = 0
|
|
59
|
+
results_df["DIRECTION"] = ""
|
|
60
|
+
results_df["BETA"] = 0.0
|
|
61
|
+
results_df["SE"] = 0.0
|
|
62
|
+
results_df["DOF"] = -1
|
|
63
|
+
|
|
64
|
+
dtype_dict ={
|
|
65
|
+
"_BETAW_SUM":"float64",
|
|
66
|
+
"_EA_N":"float64",
|
|
67
|
+
"_NEA_N":"float64",
|
|
68
|
+
"_BETA2W_SUM":"float64",
|
|
69
|
+
"_W_SUM":"float64",
|
|
70
|
+
"BETA":"float64",
|
|
71
|
+
"SE":"float64",
|
|
72
|
+
"N":"Int64",
|
|
73
|
+
"DOF":"Int64"
|
|
74
|
+
}
|
|
75
|
+
results_df=results_df.astype(dtype_dict)
|
|
76
|
+
###########################################################################
|
|
77
|
+
|
|
78
|
+
log.write(" -Iterating through {} datasets to compute statistics for fixed-effect model...".format(len(sumstats_list)))
|
|
79
|
+
for index,sumstats_path in enumerate(sumstats_list):
|
|
80
|
+
to_use_sumstats = process_sumstats(sumstats_path,
|
|
81
|
+
results_df[["EA","NEA"]],
|
|
82
|
+
index=index,
|
|
83
|
+
match_allele=match_allele,)
|
|
84
|
+
sumstats_index = to_use_sumstats.index
|
|
85
|
+
results_df_not_in_sumstat_index = results_df.index[~results_df.index.isin(to_use_sumstats.index)]
|
|
86
|
+
|
|
87
|
+
# N and DOF
|
|
88
|
+
results_df.loc[sumstats_index, "N"] += to_use_sumstats["N"]
|
|
89
|
+
results_df.loc[sumstats_index, "DOF"] += 1
|
|
90
|
+
|
|
91
|
+
# BEAT and SE
|
|
92
|
+
results_df.loc[sumstats_index,"_BETA2W_SUM"] += to_use_sumstats["BETA"]**2 *(1/(to_use_sumstats["SE"]**2))
|
|
93
|
+
results_df.loc[sumstats_index,"_BETAW_SUM"] += to_use_sumstats["BETA"]*(1/(to_use_sumstats["SE"]**2))
|
|
94
|
+
results_df.loc[sumstats_index,"_W_SUM"] += 1/(to_use_sumstats["SE"]**2)
|
|
95
|
+
results_df.loc[sumstats_index,"_W2_SUM"] += results_df.loc[sumstats_index,"_W_SUM"]**2
|
|
96
|
+
|
|
97
|
+
# EAF
|
|
98
|
+
results_df.loc[sumstats_index,"_EA_N"] += to_use_sumstats["N"]*to_use_sumstats["EAF"]
|
|
99
|
+
results_df.loc[sumstats_index,"_NEA_N"] += to_use_sumstats["N"]*(1 - to_use_sumstats["EAF"])
|
|
100
|
+
|
|
101
|
+
# DIRECTION
|
|
102
|
+
beta_index = to_use_sumstats[to_use_sumstats["BETA"]>0].index
|
|
103
|
+
results_df.loc[beta_index, "DIRECTION"] += "+"
|
|
104
|
+
beta_index = to_use_sumstats[to_use_sumstats["BETA"]==0].index
|
|
105
|
+
results_df.loc[beta_index, "DIRECTION"] += "0"
|
|
106
|
+
beta_index = to_use_sumstats[to_use_sumstats["BETA"]<0].index
|
|
107
|
+
results_df.loc[beta_index, "DIRECTION"] += "-"
|
|
108
|
+
results_df.loc[results_df_not_in_sumstat_index, "DIRECTION"] += "?"
|
|
109
|
+
|
|
110
|
+
del to_use_sumstats
|
|
111
|
+
gc.collect()
|
|
112
|
+
|
|
113
|
+
##############################################################################
|
|
114
|
+
# fixed - effect statistics
|
|
115
|
+
results_df["BETA"] = results_df["_BETAW_SUM"] / results_df["_W_SUM"]
|
|
116
|
+
results_df["EAF"] = results_df["_EA_N"] / (results_df["_EA_N"] + results_df["_NEA_N"])
|
|
117
|
+
results_df["SE"] = np.sqrt(1/results_df["_W_SUM"])
|
|
118
|
+
results_df["Z"] = results_df["BETA"] / results_df["SE"]
|
|
119
|
+
results_df["P"] = norm.sf(abs(results_df["Z"]))*2
|
|
120
|
+
results_df["Q"] = results_df["_BETA2W_SUM"] - (results_df["_BETAW_SUM"]**2 / results_df["_W_SUM"])
|
|
121
|
+
|
|
122
|
+
for dof in results_df["DOF"].unique():
|
|
123
|
+
results_df_dof_index = results_df["DOF"] == dof
|
|
124
|
+
results_df.loc[results_df_dof_index,"P_HET"] = chi2.sf(results_df.loc[results_df_dof_index, "Q"].values,dof)
|
|
125
|
+
gc.collect()
|
|
126
|
+
|
|
127
|
+
results_df["I2_HET"] = (results_df["Q"] - results_df["DOF"])/results_df["Q"]
|
|
128
|
+
results_df.loc[results_df["I2_HET"]<0, "I2_HET"] = 0
|
|
129
|
+
|
|
130
|
+
results_df=results_df.drop(columns=["_EA_N","_NEA_N"])
|
|
131
|
+
gc.collect()
|
|
132
|
+
|
|
133
|
+
###########################################################################
|
|
134
|
+
if random_effects==True:
|
|
135
|
+
log.write(" -Iterating through {} datasets to compute statistics for random-effects model...".format(len(sumstats_list)))
|
|
136
|
+
results_df["_R2"] = (results_df["Q"] - results_df["DOF"])/(results_df["_W_SUM"] - (results_df["_W2_SUM"]/results_df["_W_SUM"]))
|
|
137
|
+
results_df.loc[results_df["_R2"]<0, "_R2"] = 0
|
|
138
|
+
variant_index_random = results_df[results_df["_R2"]>0].index
|
|
139
|
+
|
|
140
|
+
results_df["_BETAW_SUM_R"] = 0.0
|
|
141
|
+
results_df["_W_SUM_R"] = 0.0
|
|
142
|
+
results_df["BETA_RANDOM"] = results_df["BETA"]
|
|
143
|
+
results_df["SE_RANDOM"] = results_df["SE"]
|
|
144
|
+
|
|
145
|
+
for index,sumstats_path in enumerate(sumstats_list):
|
|
146
|
+
to_use_sumstats = process_sumstats(sumstats_path,
|
|
147
|
+
results_df.loc[variant_index_random, ["EA","NEA"]],
|
|
148
|
+
index=index,
|
|
149
|
+
match_allele=match_allele,
|
|
150
|
+
extract_index=variant_index_random)
|
|
151
|
+
|
|
152
|
+
sumstats_index = to_use_sumstats.index
|
|
153
|
+
|
|
154
|
+
# BEAT and SE
|
|
155
|
+
results_df.loc[sumstats_index,"_BETAW_SUM_R"] += to_use_sumstats["BETA"]*(1/(to_use_sumstats["SE"]**2 + results_df.loc[sumstats_index,"_R2"]))
|
|
156
|
+
results_df.loc[sumstats_index,"_W_SUM_R"] += 1/(to_use_sumstats["SE"]**2 + results_df.loc[sumstats_index,"_R2"])
|
|
157
|
+
|
|
158
|
+
del to_use_sumstats
|
|
159
|
+
del sumstats_index
|
|
160
|
+
gc.collect()
|
|
161
|
+
|
|
162
|
+
results_df.loc[variant_index_random,"BETA_RANDOM"] = results_df.loc[variant_index_random,"_BETAW_SUM_R"] / results_df.loc[variant_index_random,"_W_SUM_R"]
|
|
163
|
+
results_df.loc[variant_index_random,"SE_RANDOM"] = np.sqrt(1/results_df.loc[variant_index_random,"_W_SUM_R"])
|
|
164
|
+
results_df["Z_RANDOM"] = results_df["BETA_RANDOM"] / results_df["SE_RANDOM"]
|
|
165
|
+
results_df["P_RANDOM"] = norm.sf(abs(results_df["Z_RANDOM"]))*2
|
|
166
|
+
results_df = results_df.drop(columns=["_BETAW_SUM_R","_W_SUM_R"])
|
|
167
|
+
|
|
168
|
+
gc.collect()
|
|
169
|
+
###########################################################################
|
|
170
|
+
results_df = results_df.drop(columns=["_BETAW_SUM","_BETA2W_SUM","_W_SUM","_R2","_W2_SUM"]).sort_values(by=["CHR","POS"])
|
|
171
|
+
gc.collect()
|
|
172
|
+
log.write("Finished meta-analysis successfully!")
|
|
173
|
+
|
|
174
|
+
return results_df
|
|
175
|
+
|
|
176
|
+
def process_sumstats(sumstats_path, results_df, index, extract_index=None, match_allele=True, log=Log()):
|
|
177
|
+
|
|
178
|
+
if extract_index is None:
|
|
179
|
+
extract_index = results_df.index
|
|
180
|
+
|
|
181
|
+
sumstats = get_sumstats(sumstats_path)
|
|
182
|
+
|
|
183
|
+
to_use_sumstats = sumstats.loc[sumstats["SNPID"].isin(extract_index.values),["SNPID","EA","NEA","BETA","N","SE","EAF"]]
|
|
184
|
+
|
|
185
|
+
if len(to_use_sumstats)>0:
|
|
186
|
+
n_pre_dup = len(to_use_sumstats)
|
|
187
|
+
log.write(" -Processing {} variants from sumstats #{}".format(len(to_use_sumstats), index))
|
|
188
|
+
|
|
189
|
+
to_use_sumstats = to_use_sumstats.drop_duplicates(subset="SNPID").set_index("SNPID")
|
|
190
|
+
n_post_dup = len(to_use_sumstats)
|
|
191
|
+
|
|
192
|
+
if n_pre_dup - n_post_dup>0:
|
|
193
|
+
log.write(" -Dropping {} duplicated variants from sumstats #{}".format(n_pre_dup - n_post_dup, index))
|
|
194
|
+
|
|
195
|
+
if match_allele==True:
|
|
196
|
+
sumstats_index = to_use_sumstats.index
|
|
197
|
+
# drop not matched
|
|
198
|
+
is_match = (to_use_sumstats.loc[sumstats_index,"EA"] == results_df.loc[sumstats_index, "EA"] )&(to_use_sumstats.loc[sumstats_index,"NEA"] == results_df.loc[sumstats_index, "NEA"])
|
|
199
|
+
is_flip = (to_use_sumstats.loc[sumstats_index,"EA"] == results_df.loc[sumstats_index, "NEA"])&( to_use_sumstats.loc[sumstats_index,"NEA"] == results_df.loc[sumstats_index, "EA"])
|
|
200
|
+
is_flip = is_flip | ((to_use_sumstats.loc[sumstats_index,"NEA"] == results_df.loc[sumstats_index, "EA"])&( to_use_sumstats.loc[sumstats_index,"EA"] == results_df.loc[sumstats_index, "NEA"]))
|
|
201
|
+
is_to_use = is_match|is_flip
|
|
202
|
+
|
|
203
|
+
if sum(~is_to_use)>0:
|
|
204
|
+
log.write(" -Dropping {} variants with unmatched alleles from sumstats #{}".format(sum(~is_to_use), index))
|
|
205
|
+
|
|
206
|
+
to_use_sumstats.loc[is_flip[is_flip].index, "BETA"] = -to_use_sumstats.loc[is_flip[is_flip].index, "BETA"]
|
|
207
|
+
to_use_sumstats.loc[is_flip[is_flip].index, "EAF"] = 1-to_use_sumstats.loc[is_flip[is_flip].index, "EAF"]
|
|
208
|
+
to_use_sumstats = to_use_sumstats.loc[is_to_use[is_to_use].index,:]
|
|
209
|
+
|
|
210
|
+
gc.collect()
|
|
211
|
+
|
|
212
|
+
return to_use_sumstats
|
|
213
|
+
|
|
214
|
+
def get_sumstats(input_path,usekeys=None):
|
|
215
|
+
if isinstance(input_path, tuple):
|
|
216
|
+
path = input_path[0]
|
|
217
|
+
path_args = input_path[1]
|
|
218
|
+
else:
|
|
219
|
+
path = input_path
|
|
220
|
+
path_args={}
|
|
221
|
+
|
|
222
|
+
if isinstance(path, pd.DataFrame):
|
|
223
|
+
sumstats = Sumstats(path,fmt="auto",verbose=False,usekeys=usekeys,**path_args).data
|
|
224
|
+
elif isinstance(path, Sumstats):
|
|
225
|
+
sumstats = path.data
|
|
226
|
+
if usekeys is not None:
|
|
227
|
+
sumstats = sumstats[usekeys]
|
|
228
|
+
elif path[-6:] == "pickle":
|
|
229
|
+
sumstats = load_data_from_pickle(path)
|
|
230
|
+
if usekeys is not None:
|
|
231
|
+
sumstats = sumstats[usekeys]
|
|
232
|
+
else:
|
|
233
|
+
sumstats = Sumstats(path,fmt="auto",verbose=False,usekeys=usekeys,**path_args).data
|
|
234
|
+
return sumstats
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
def snphwe(obs_hets, obs_hom1, obs_hom2):
|
|
6
|
+
# Convert cpp code from (Jeremy McRae) to python
|
|
7
|
+
# https://github.com/jeremymcrae/snphwe/blob/master/src/snp_hwe.cpp
|
|
8
|
+
#/* (original comments)
|
|
9
|
+
#// This code implements an exact SNP test of Hardy-Weinberg Equilibrium as
|
|
10
|
+
#// described in Wigginton, JE, Cutler, DJ, and Abecasis, GR (2005) A Note on
|
|
11
|
+
#// Exact Tests of Hardy-Weinberg Equilibrium. AJHG 76: 887-893
|
|
12
|
+
#//
|
|
13
|
+
#// Written by Jan Wigginton
|
|
14
|
+
#*/
|
|
15
|
+
|
|
16
|
+
obs_homr = min(obs_hom1, obs_hom2)
|
|
17
|
+
obs_homc = max(obs_hom1, obs_hom2)
|
|
18
|
+
|
|
19
|
+
rare = 2 * obs_homr + obs_hets
|
|
20
|
+
genotypes = obs_hets + obs_homc + obs_homr
|
|
21
|
+
|
|
22
|
+
probs = np.array([0.0 for i in range(rare +1)])
|
|
23
|
+
|
|
24
|
+
mid = rare * (2 * genotypes - rare) // (2 * genotypes)
|
|
25
|
+
|
|
26
|
+
if mid % 2 != rare%2:
|
|
27
|
+
mid += 1
|
|
28
|
+
|
|
29
|
+
probs[mid] = 1.0
|
|
30
|
+
|
|
31
|
+
sum_p = 1 #probs[mid]
|
|
32
|
+
curr_homr = (rare - mid) // 2
|
|
33
|
+
curr_homc = genotypes - mid - curr_homr
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
for curr_hets in range(mid, 1, -2):
|
|
37
|
+
probs[curr_hets - 2] = probs[curr_hets] * curr_hets * (curr_hets - 1.0)/ (4.0 * (curr_homr + 1.0) * (curr_homc + 1.0))
|
|
38
|
+
sum_p+= probs[curr_hets - 2]
|
|
39
|
+
curr_homr += 1
|
|
40
|
+
curr_homc += 1
|
|
41
|
+
|
|
42
|
+
curr_homr = (rare - mid) // 2
|
|
43
|
+
curr_homc = genotypes - mid - curr_homr
|
|
44
|
+
|
|
45
|
+
for curr_hets in range(mid, rare-1, 2):
|
|
46
|
+
probs[curr_hets + 2] = probs[curr_hets] * 4.0 * curr_homr * curr_homc/ ((curr_hets + 2.0) * (curr_hets + 1.0))
|
|
47
|
+
sum_p += probs[curr_hets + 2]
|
|
48
|
+
curr_homr -= 1
|
|
49
|
+
curr_homc -= 1
|
|
50
|
+
|
|
51
|
+
target = probs[obs_hets]
|
|
52
|
+
p_hwe = 0.0
|
|
53
|
+
|
|
54
|
+
for p in probs:
|
|
55
|
+
if p <= target :
|
|
56
|
+
p_hwe += p / sum_p
|
|
57
|
+
|
|
58
|
+
return min(p_hwe,1)
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import matplotlib.pyplot as plt
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from gwaslab.g_Log import Log
|
|
5
|
+
|
|
6
|
+
#STATE NO. MNEMONIC DESCRIPTION COLOR NAME COLOR CODE
|
|
7
|
+
#1 TssA Active TSS Red 255,0,0
|
|
8
|
+
#2 TssAFlnk Flanking Active TSS Orange Red 255,69,0
|
|
9
|
+
#3 TxFlnk Transcr. at gene 5' and 3' LimeGreen 50,205,50
|
|
10
|
+
#4 Tx Strong transcription Green 0,128,0
|
|
11
|
+
#5 TxWk Weak transcription DarkGreen 0,100,0
|
|
12
|
+
#6 EnhG Genic enhancers GreenYellow 194,225,5
|
|
13
|
+
#7 Enh Enhancers Yellow 255,255,0
|
|
14
|
+
#8 ZNF/Rpts ZNF genes & repeats Medium Aquamarine 102,205,170
|
|
15
|
+
#9 Het Heterochromatin PaleTurquoise 138,145,208
|
|
16
|
+
#10 TssBiv Bivalent/Poised TSS IndianRed 205,92,92
|
|
17
|
+
#11 BivFlnk Flanking Bivalent TSS/Enh DarkSalmon 233,150,122
|
|
18
|
+
#12 EnhBiv Bivalent Enhancer DarkKhaki 189,183,107
|
|
19
|
+
#13 ReprPC Repressed PolyComb Silver 128,128,128
|
|
20
|
+
#14 ReprPCWk Weak Repressed PolyComb Gainsboro 192,192,192
|
|
21
|
+
#15 Quies Quiescent/Low White 255,255,255
|
|
22
|
+
|
|
23
|
+
color_dict={
|
|
24
|
+
"E1": np.array([255,0,0]),
|
|
25
|
+
"E2": np.array([255,69,0]),
|
|
26
|
+
"E3": np.array([50,205,50]),
|
|
27
|
+
"E4": np.array([0,128,0]),
|
|
28
|
+
"E5": np.array([0,100,0]),
|
|
29
|
+
"E6": np.array([194,225,5]),
|
|
30
|
+
"E7": np.array([255,255,0]),
|
|
31
|
+
"E8": np.array([102,205,170]),
|
|
32
|
+
"E9": np.array([138,145,208]),
|
|
33
|
+
"E10":np.array([205,92,92]),
|
|
34
|
+
"E11":np.array([233,150,122]),
|
|
35
|
+
"E12":np.array([189,183,107]),
|
|
36
|
+
"E13":np.array([128,128,128]),
|
|
37
|
+
"E14":np.array([192,192,192]),
|
|
38
|
+
"E15":np.array([255,255,255])
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
color_dict_i={
|
|
42
|
+
1: np.array([255,0,0]),
|
|
43
|
+
2: np.array([255,69,0]),
|
|
44
|
+
3: np.array([50,205,50]),
|
|
45
|
+
4: np.array([0,128,0]),
|
|
46
|
+
5: np.array([0,100,0]),
|
|
47
|
+
6: np.array([194,225,5]),
|
|
48
|
+
7: np.array([255,255,0]),
|
|
49
|
+
8: np.array([102,205,170]),
|
|
50
|
+
9: np.array([138,145,208]),
|
|
51
|
+
10:np.array([205,92,92]),
|
|
52
|
+
11:np.array([233,150,122]),
|
|
53
|
+
12:np.array([189,183,107]),
|
|
54
|
+
13:np.array([128,128,128]),
|
|
55
|
+
14:np.array([192,192,192]),
|
|
56
|
+
15:np.array([255,255,255])
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _plot_chromatin_state(region_chromatin_files,
|
|
61
|
+
region_chromatin_labels,
|
|
62
|
+
region,
|
|
63
|
+
fig,
|
|
64
|
+
ax,
|
|
65
|
+
xlim_i,
|
|
66
|
+
fontsize = 12,
|
|
67
|
+
font_family = "Arial",
|
|
68
|
+
log=Log(),
|
|
69
|
+
verbose=True):
|
|
70
|
+
'''
|
|
71
|
+
files : a list of numbers
|
|
72
|
+
'''
|
|
73
|
+
target_chr = region[0]
|
|
74
|
+
target_start = region[1]
|
|
75
|
+
target_end = region[2]
|
|
76
|
+
|
|
77
|
+
offset_i = xlim_i[0] - region[1]
|
|
78
|
+
|
|
79
|
+
ax.set_ylim([-0.05,0.1*len(region_chromatin_files)-0.05])
|
|
80
|
+
ax.set_xlim([offset_i+target_start,offset_i+target_end])
|
|
81
|
+
|
|
82
|
+
px_for_01 = ax.transData.transform([0,0])[1] - ax.transData.transform([0,0.1])[1]
|
|
83
|
+
|
|
84
|
+
point=fig.dpi/72
|
|
85
|
+
points_for_01 = px_for_01*72 / fig.dpi
|
|
86
|
+
|
|
87
|
+
# each tissue
|
|
88
|
+
for i,file in enumerate(region_chromatin_files):
|
|
89
|
+
log.write(" -Loading : {}".format(file), verbose=verbose)
|
|
90
|
+
enh = pd.read_csv(file,sep="\t",header=None)
|
|
91
|
+
enh.columns=["ID","START","END","STATE"]
|
|
92
|
+
enh["CHR"] = enh["ID"].str.extract(r"chr([0-9]+)").astype("float").astype("Int64")
|
|
93
|
+
enh["STATE_i"] = enh["STATE"].str.extract(r"([0-9]+)_*").astype("float").astype("Int64")
|
|
94
|
+
enh_in_region = (enh["CHR"] == target_chr) & ((enh["END"] > target_start) & (enh["START"]<target_end))
|
|
95
|
+
df =enh.loc[enh_in_region,["STATE_i","START","END"]].sort_values("STATE_i",ascending=False)
|
|
96
|
+
log.write(" -Number of records in specified region: {}".format(len(df)), verbose=verbose)
|
|
97
|
+
# each block
|
|
98
|
+
for index, row in df.iterrows():
|
|
99
|
+
color=color_dict_i[row["STATE_i"]]
|
|
100
|
+
ax.plot([offset_i + row["START"] ,offset_i + row["END"]],
|
|
101
|
+
[i*0.1,i*0.1],
|
|
102
|
+
c=color/255,linewidth=points_for_01,solid_capstyle="butt")
|
|
103
|
+
|
|
104
|
+
## add stripe label
|
|
105
|
+
if len(region_chromatin_labels) == len(region_chromatin_files):
|
|
106
|
+
ax.set_yticks([i*0.1 for i in range(len(region_chromatin_labels))], region_chromatin_labels, fontsize=fontsize, family=font_family)
|
|
107
|
+
else:
|
|
108
|
+
ax.set_yticks(ticks=[])
|
|
109
|
+
|
|
110
|
+
#ax.set_xticks(ticks=[])
|
|
111
|
+
ax.invert_yaxis()
|
|
112
|
+
return fig
|
|
@@ -75,7 +75,10 @@ def compare_effect(path1,
|
|
|
75
75
|
if scaled == True:
|
|
76
76
|
scaled1 = True
|
|
77
77
|
scaled2 = True
|
|
78
|
-
|
|
78
|
+
if is_q_mc=="fdr" or is_q_mc=="bon":
|
|
79
|
+
is_q = True
|
|
80
|
+
else:
|
|
81
|
+
raise ValueError("Please select either fdr or bon for is_q_mc.")
|
|
79
82
|
if save_args is None:
|
|
80
83
|
save_args = {"dpi":300,"facecolor":"white"}
|
|
81
84
|
if reg_box is None:
|