gwaslab 3.4.40__py3-none-any.whl → 3.4.42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/bd_common_data.py +14 -1
- gwaslab/bd_get_hapmap3.py +7 -3
- gwaslab/g_Sumstats.py +156 -138
- gwaslab/g_SumstatsPair.py +15 -15
- gwaslab/g_version.py +2 -2
- gwaslab/hm_harmonize_sumstats.py +365 -12
- gwaslab/io_read_tabular.py +7 -7
- gwaslab/io_to_formats.py +96 -21
- gwaslab/io_to_pickle.py +1 -1
- gwaslab/ldsc_ldscore.py +1 -1
- gwaslab/qc_fix_sumstats.py +2 -2
- gwaslab/util_ex_calculate_ldmatrix.py +2 -2
- gwaslab/util_ex_calculate_prs.py +2 -2
- gwaslab/util_ex_ldsc.py +163 -110
- gwaslab/util_ex_plink_filter.py +2 -2
- gwaslab/util_ex_run_clumping.py +2 -2
- gwaslab/util_in_filter_value.py +27 -9
- gwaslab/viz_plot_regionalplot.py +2 -2
- gwaslab/viz_plot_trumpetplot.py +115 -4
- {gwaslab-3.4.40.dist-info → gwaslab-3.4.42.dist-info}/METADATA +33 -5
- {gwaslab-3.4.40.dist-info → gwaslab-3.4.42.dist-info}/RECORD +25 -25
- {gwaslab-3.4.40.dist-info → gwaslab-3.4.42.dist-info}/WHEEL +1 -1
- {gwaslab-3.4.40.dist-info → gwaslab-3.4.42.dist-info}/LICENSE +0 -0
- {gwaslab-3.4.40.dist-info → gwaslab-3.4.42.dist-info}/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.4.40.dist-info → gwaslab-3.4.42.dist-info}/top_level.txt +0 -0
gwaslab/util_ex_ldsc.py
CHANGED
|
@@ -7,244 +7,249 @@ from gwaslab.qc_fix_sumstats import finished
|
|
|
7
7
|
from gwaslab.qc_fix_sumstats import skipped
|
|
8
8
|
from gwaslab.io_read_ldsc import parse_ldsc_summary
|
|
9
9
|
from gwaslab.io_read_ldsc import parse_partitioned_ldsc_summary
|
|
10
|
+
from gwaslab.util_in_filter_value import filtervalues
|
|
11
|
+
from gwaslab.util_in_filter_value import _filter_palindromic
|
|
12
|
+
from gwaslab.util_in_filter_value import _exclude_hla
|
|
13
|
+
from gwaslab.util_in_filter_value import _exclude_sexchr
|
|
14
|
+
|
|
10
15
|
class ARGS():
|
|
11
|
-
def __init__(self, **
|
|
16
|
+
def __init__(self, **kwargs):
|
|
12
17
|
|
|
13
18
|
self.out = "ldsc"
|
|
14
19
|
|
|
15
|
-
if "bfile" in
|
|
16
|
-
self.bfile =
|
|
20
|
+
if "bfile" in kwargs.keys():
|
|
21
|
+
self.bfile = kwargs["bfile"]
|
|
17
22
|
else:
|
|
18
23
|
self.bfile = None
|
|
19
24
|
|
|
20
|
-
if "l2" in
|
|
21
|
-
self.l2 =
|
|
25
|
+
if "l2" in kwargs.keys():
|
|
26
|
+
self.l2 = kwargs["l2"]
|
|
22
27
|
else:
|
|
23
28
|
self.l2 = None
|
|
24
29
|
|
|
25
|
-
if "extract" in
|
|
26
|
-
self.extract =
|
|
30
|
+
if "extract" in kwargs.keys():
|
|
31
|
+
self.extract = kwargs["extract"]
|
|
27
32
|
else:
|
|
28
33
|
self.extract = None
|
|
29
34
|
|
|
30
|
-
if "keep" in
|
|
31
|
-
self.keep =
|
|
35
|
+
if "keep" in kwargs.keys():
|
|
36
|
+
self.keep = kwargs["keep"]
|
|
32
37
|
else:
|
|
33
38
|
self.keep = None
|
|
34
39
|
|
|
35
|
-
if "ld_wind_snps" in
|
|
36
|
-
self.ld_wind_snps =
|
|
40
|
+
if "ld_wind_snps" in kwargs.keys():
|
|
41
|
+
self.ld_wind_snps = kwargs["ld_wind_snps"]
|
|
37
42
|
else:
|
|
38
43
|
self.ld_wind_snps = None
|
|
39
44
|
|
|
40
|
-
if "ld_wind_kb" in
|
|
41
|
-
self.ld_wind_kb =
|
|
45
|
+
if "ld_wind_kb" in kwargs.keys():
|
|
46
|
+
self.ld_wind_kb = kwargs["ld_wind_kb"]
|
|
42
47
|
else:
|
|
43
48
|
self.ld_wind_kb = None
|
|
44
49
|
|
|
45
|
-
if "ld_wind_cm" in
|
|
46
|
-
self.ld_wind_cm =
|
|
50
|
+
if "ld_wind_cm" in kwargs.keys():
|
|
51
|
+
self.ld_wind_cm = kwargs["ld_wind_cm"]
|
|
47
52
|
else:
|
|
48
53
|
self.ld_wind_cm = None
|
|
49
54
|
|
|
50
|
-
if "print_snps" in
|
|
51
|
-
self.print_snps =
|
|
55
|
+
if "print_snps" in kwargs.keys():
|
|
56
|
+
self.print_snps = kwargs["print_snps"]
|
|
52
57
|
else:
|
|
53
58
|
self.print_snps = None
|
|
54
59
|
|
|
55
|
-
if "annot" in
|
|
56
|
-
self.annot =
|
|
60
|
+
if "annot" in kwargs.keys():
|
|
61
|
+
self.annot = kwargs["annot"]
|
|
57
62
|
else:
|
|
58
63
|
self.annot = None
|
|
59
64
|
|
|
60
|
-
if "thin_annot" in
|
|
61
|
-
self.thin_annot =
|
|
65
|
+
if "thin_annot" in kwargs.keys():
|
|
66
|
+
self.thin_annot = kwargs["thin_annot"]
|
|
62
67
|
else:
|
|
63
68
|
self.thin_annot = None
|
|
64
69
|
|
|
65
|
-
if "cts_bin" in
|
|
66
|
-
self.cts_bin =
|
|
70
|
+
if "cts_bin" in kwargs.keys():
|
|
71
|
+
self.cts_bin = kwargs["cts_bin"]
|
|
67
72
|
else:
|
|
68
73
|
self.cts_bin = None
|
|
69
74
|
|
|
70
|
-
if "cts_breaks" in
|
|
71
|
-
self.cts_breaks =
|
|
75
|
+
if "cts_breaks" in kwargs.keys():
|
|
76
|
+
self.cts_breaks = kwargs["cts_breaks"]
|
|
72
77
|
else:
|
|
73
78
|
self.cts_breaks = None
|
|
74
79
|
|
|
75
|
-
if "cts_names" in
|
|
76
|
-
self.cts_names =
|
|
80
|
+
if "cts_names" in kwargs.keys():
|
|
81
|
+
self.cts_names = kwargs["cts_names"]
|
|
77
82
|
else:
|
|
78
83
|
self.cts_names = None
|
|
79
84
|
|
|
80
|
-
if "per_allele" in
|
|
81
|
-
self.per_allele =
|
|
85
|
+
if "per_allele" in kwargs.keys():
|
|
86
|
+
self.per_allele = kwargs["per_allele"]
|
|
82
87
|
else:
|
|
83
88
|
self.per_allele = None
|
|
84
89
|
|
|
85
|
-
if "pq_exp" in
|
|
86
|
-
self.pq_exp =
|
|
90
|
+
if "pq_exp" in kwargs.keys():
|
|
91
|
+
self.pq_exp = kwargs["pq_exp"]
|
|
87
92
|
else:
|
|
88
93
|
self.pq_exp = None
|
|
89
94
|
|
|
90
|
-
if "no_print_annot" in
|
|
91
|
-
self.no_print_annot =
|
|
95
|
+
if "no_print_annot" in kwargs.keys():
|
|
96
|
+
self.no_print_annot = kwargs["no_print_annot"]
|
|
92
97
|
else:
|
|
93
98
|
self.no_print_annot = None
|
|
94
99
|
|
|
95
|
-
if "h2" in
|
|
96
|
-
self.h2 =
|
|
100
|
+
if "h2" in kwargs.keys():
|
|
101
|
+
self.h2 = kwargs["h2"]
|
|
97
102
|
else:
|
|
98
103
|
self.h2 = None
|
|
99
104
|
|
|
100
|
-
if "h2_cts" in
|
|
101
|
-
self.h2_cts =
|
|
105
|
+
if "h2_cts" in kwargs.keys():
|
|
106
|
+
self.h2_cts = kwargs["h2_cts"]
|
|
102
107
|
else:
|
|
103
108
|
self.h2_cts = None
|
|
104
109
|
|
|
105
|
-
if "rg" in
|
|
106
|
-
self.rg =
|
|
110
|
+
if "rg" in kwargs.keys():
|
|
111
|
+
self.rg = kwargs["rg"]
|
|
107
112
|
else:
|
|
108
113
|
self.rg = None
|
|
109
114
|
|
|
110
|
-
if "ref_ld" in
|
|
111
|
-
self.ref_ld =
|
|
115
|
+
if "ref_ld" in kwargs.keys():
|
|
116
|
+
self.ref_ld = kwargs["ref_ld"]
|
|
112
117
|
else:
|
|
113
118
|
self.ref_ld = None
|
|
114
119
|
|
|
115
|
-
if "ref_ld_chr" in
|
|
116
|
-
self.ref_ld_chr =
|
|
120
|
+
if "ref_ld_chr" in kwargs.keys():
|
|
121
|
+
self.ref_ld_chr = kwargs["ref_ld_chr"]
|
|
117
122
|
else:
|
|
118
123
|
self.ref_ld_chr = None
|
|
119
124
|
|
|
120
|
-
if "w_ld" in
|
|
121
|
-
self.w_ld =
|
|
125
|
+
if "w_ld" in kwargs.keys():
|
|
126
|
+
self.w_ld = kwargs["w_ld"]
|
|
122
127
|
else:
|
|
123
128
|
self.w_ld = None
|
|
124
129
|
|
|
125
|
-
if "w_ld_chr" in
|
|
126
|
-
self.w_ld_chr =
|
|
130
|
+
if "w_ld_chr" in kwargs.keys():
|
|
131
|
+
self.w_ld_chr = kwargs["w_ld_chr"]
|
|
127
132
|
else:
|
|
128
133
|
self.w_ld_chr = None
|
|
129
134
|
|
|
130
|
-
if "overlap_annot" in
|
|
131
|
-
self.overlap_annot =
|
|
135
|
+
if "overlap_annot" in kwargs.keys():
|
|
136
|
+
self.overlap_annot = kwargs["overlap_annot"]
|
|
132
137
|
else:
|
|
133
138
|
self.overlap_annot = None
|
|
134
139
|
|
|
135
|
-
if "print_coefficients" in
|
|
136
|
-
self.print_coefficients =
|
|
140
|
+
if "print_coefficients" in kwargs.keys():
|
|
141
|
+
self.print_coefficients = kwargs["print_coefficients"]
|
|
137
142
|
else:
|
|
138
143
|
self.print_coefficients = "ldsc"
|
|
139
144
|
|
|
140
|
-
if "frqfile" in
|
|
141
|
-
self.frqfile =
|
|
145
|
+
if "frqfile" in kwargs.keys():
|
|
146
|
+
self.frqfile = kwargs["frqfile"]
|
|
142
147
|
else:
|
|
143
148
|
self.frqfile = None
|
|
144
149
|
|
|
145
|
-
if "frqfile_chr" in
|
|
146
|
-
self.frqfile_chr =
|
|
150
|
+
if "frqfile_chr" in kwargs.keys():
|
|
151
|
+
self.frqfile_chr = kwargs["frqfile_chr"]
|
|
147
152
|
else:
|
|
148
153
|
self.frqfile_chr = None
|
|
149
154
|
|
|
150
|
-
if "no_intercept" in
|
|
151
|
-
self.no_intercept =
|
|
155
|
+
if "no_intercept" in kwargs.keys():
|
|
156
|
+
self.no_intercept = kwargs["no_intercept"]
|
|
152
157
|
else:
|
|
153
158
|
self.no_intercept = None
|
|
154
159
|
|
|
155
|
-
if "intercept_h2" in
|
|
156
|
-
self.intercept_h2 =
|
|
160
|
+
if "intercept_h2" in kwargs.keys():
|
|
161
|
+
self.intercept_h2 = kwargs["intercept_h2"]
|
|
157
162
|
else:
|
|
158
163
|
self.intercept_h2 = None
|
|
159
164
|
|
|
160
|
-
if "intercept_gencov" in
|
|
161
|
-
self.intercept_gencov =
|
|
165
|
+
if "intercept_gencov" in kwargs.keys():
|
|
166
|
+
self.intercept_gencov = kwargs["intercept_gencov"]
|
|
162
167
|
else:
|
|
163
168
|
self.intercept_gencov = None
|
|
164
169
|
|
|
165
|
-
if "M" in
|
|
166
|
-
self.M =
|
|
170
|
+
if "M" in kwargs.keys():
|
|
171
|
+
self.M = kwargs["M"]
|
|
167
172
|
else:
|
|
168
173
|
self.M = None
|
|
169
174
|
|
|
170
|
-
if "two_step" in
|
|
171
|
-
self.two_step =
|
|
175
|
+
if "two_step" in kwargs.keys():
|
|
176
|
+
self.two_step = kwargs["two_step"]
|
|
172
177
|
else:
|
|
173
178
|
self.two_step = None
|
|
174
179
|
|
|
175
|
-
if "chisq_max" in
|
|
176
|
-
self.chisq_max =
|
|
180
|
+
if "chisq_max" in kwargs.keys():
|
|
181
|
+
self.chisq_max = kwargs["chisq_max"]
|
|
177
182
|
else:
|
|
178
183
|
self.chisq_max= None
|
|
179
184
|
|
|
180
|
-
if "ref_ld_chr_cts" in
|
|
181
|
-
self.ref_ld_chr_cts =
|
|
185
|
+
if "ref_ld_chr_cts" in kwargs.keys():
|
|
186
|
+
self.ref_ld_chr_cts = kwargs["ref_ld_chr_cts"]
|
|
182
187
|
else:
|
|
183
188
|
self.ref_ld_chr_cts = None
|
|
184
189
|
|
|
185
|
-
if "print_all_cts" in
|
|
186
|
-
self.print_all_cts =
|
|
190
|
+
if "print_all_cts" in kwargs.keys():
|
|
191
|
+
self.print_all_cts = kwargs["print_all_cts"]
|
|
187
192
|
else:
|
|
188
193
|
self.print_all_cts = False
|
|
189
194
|
|
|
190
|
-
if "print_cov" in
|
|
191
|
-
self.print_cov =
|
|
195
|
+
if "print_cov" in kwargs.keys():
|
|
196
|
+
self.print_cov = kwargs["print_cov"]
|
|
192
197
|
else:
|
|
193
198
|
self.print_cov = None
|
|
194
199
|
|
|
195
200
|
self.print_delete_vals = False
|
|
196
|
-
if "print_delete_vals" in
|
|
197
|
-
self.print_delete_vals =
|
|
201
|
+
if "print_delete_vals" in kwargs.keys():
|
|
202
|
+
self.print_delete_vals = kwargs["print_delete_vals"]
|
|
198
203
|
else:
|
|
199
204
|
self.print_delete_vals = False
|
|
200
205
|
|
|
201
|
-
if "chunk_size" in
|
|
202
|
-
self.chunk_size =
|
|
206
|
+
if "chunk_size" in kwargs.keys():
|
|
207
|
+
self.chunk_size = kwargs["chunk_size"]
|
|
203
208
|
else:
|
|
204
209
|
self.chunk_size = 50
|
|
205
210
|
|
|
206
|
-
if "pickle" in
|
|
207
|
-
self.pickle =
|
|
211
|
+
if "pickle" in kwargs.keys():
|
|
212
|
+
self.pickle = kwargs["pickle"]
|
|
208
213
|
else:
|
|
209
214
|
self.pickle = False
|
|
210
215
|
|
|
211
|
-
if "yes_really" in
|
|
212
|
-
self.yes_really =
|
|
216
|
+
if "yes_really" in kwargs.keys():
|
|
217
|
+
self.yes_really = kwargs["yes_really"]
|
|
213
218
|
else:
|
|
214
219
|
self.yes_really = False
|
|
215
220
|
|
|
216
|
-
if "invert_anyway" in
|
|
217
|
-
self.invert_anyway =
|
|
221
|
+
if "invert_anyway" in kwargs.keys():
|
|
222
|
+
self.invert_anyway = kwargs["invert_anyway"]
|
|
218
223
|
else:
|
|
219
224
|
self.invert_anyway = False
|
|
220
225
|
|
|
221
|
-
if "n_blocks" in
|
|
222
|
-
self.n_blocks =
|
|
226
|
+
if "n_blocks" in kwargs.keys():
|
|
227
|
+
self.n_blocks = kwargs["n_blocks"]
|
|
223
228
|
else:
|
|
224
229
|
self.n_blocks = 200
|
|
225
230
|
|
|
226
|
-
if "not_M_5_50" in
|
|
227
|
-
self.not_M_5_50 =
|
|
231
|
+
if "not_M_5_50" in kwargs.keys():
|
|
232
|
+
self.not_M_5_50 = kwargs["not_M_5_50"]
|
|
228
233
|
else:
|
|
229
234
|
self.not_M_5_50 = False
|
|
230
235
|
|
|
231
|
-
if "no_check_alleles" in
|
|
232
|
-
self.no_check_alleles =
|
|
236
|
+
if "no_check_alleles" in kwargs.keys():
|
|
237
|
+
self.no_check_alleles = kwargs["no_check_alleles"]
|
|
233
238
|
else:
|
|
234
239
|
self.no_check_alleles = False
|
|
235
240
|
|
|
236
|
-
if "return_silly_things" in
|
|
237
|
-
self.return_silly_things =
|
|
241
|
+
if "return_silly_things" in kwargs.keys():
|
|
242
|
+
self.return_silly_things = kwargs["return_silly_things"]
|
|
238
243
|
else:
|
|
239
244
|
self.return_silly_things = False
|
|
240
245
|
|
|
241
|
-
if "samp_prev" in
|
|
242
|
-
self.samp_prev =
|
|
246
|
+
if "samp_prev" in kwargs.keys():
|
|
247
|
+
self.samp_prev = kwargs["samp_prev"]
|
|
243
248
|
else:
|
|
244
249
|
self.samp_prev = None
|
|
245
250
|
|
|
246
|
-
if "pop_prev" in
|
|
247
|
-
self.pop_prev =
|
|
251
|
+
if "pop_prev" in kwargs.keys():
|
|
252
|
+
self.pop_prev = kwargs["pop_prev"]
|
|
248
253
|
else:
|
|
249
254
|
self.pop_prev = None
|
|
250
255
|
|
|
@@ -252,8 +257,16 @@ class ARGS():
|
|
|
252
257
|
####################################################################################################################
|
|
253
258
|
|
|
254
259
|
|
|
255
|
-
def _estimate_h2_by_ldsc(insumstats, log, verbose=True, **
|
|
260
|
+
def _estimate_h2_by_ldsc(insumstats, log, verbose=True, munge=False, munge_args=None, **kwargs):
|
|
256
261
|
sumstats = insumstats.copy()
|
|
262
|
+
|
|
263
|
+
if munge:
|
|
264
|
+
if munge_args is None:
|
|
265
|
+
munge_args={}
|
|
266
|
+
log.write("Start to munge sumstats.")
|
|
267
|
+
sumstats = _munge_sumstats(sumstats, log=log, verbose=verbose,**munge_args)
|
|
268
|
+
log.write("Finished munging sumstats.")
|
|
269
|
+
|
|
257
270
|
##start function with col checking##########################################################
|
|
258
271
|
_start_line = "run LD score regression"
|
|
259
272
|
_end_line = "running LD score regression"
|
|
@@ -274,12 +287,14 @@ def _estimate_h2_by_ldsc(insumstats, log, verbose=True, **args):
|
|
|
274
287
|
log.write(" -Run single variate LD score regression:", verbose=verbose)
|
|
275
288
|
log.write(" -Adopted from LDSC source code: https://github.com/bulik/ldsc", verbose=verbose)
|
|
276
289
|
log.write(" -Please cite LDSC: Bulik-Sullivan, et al. LD Score Regression Distinguishes Confounding from Polygenicity in Genome-Wide Association Studies. Nature Genetics, 2015.", verbose=verbose)
|
|
277
|
-
log.write(" -Arguments:", verbose=verbose)
|
|
278
290
|
|
|
279
|
-
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
log.write(" -Arguments:", verbose=verbose)
|
|
295
|
+
for key, value in kwargs.items():
|
|
280
296
|
log.write(" -{}:{}".format(key, value), verbose=verbose)
|
|
281
|
-
|
|
282
|
-
default_args = ARGS(**args)
|
|
297
|
+
default_args = ARGS(**kwargs)
|
|
283
298
|
|
|
284
299
|
if "Z" not in sumstats.columns:
|
|
285
300
|
sumstats["Z"] = sumstats["BETA"]/sumstats["SE"]
|
|
@@ -296,7 +311,7 @@ def _estimate_h2_by_ldsc(insumstats, log, verbose=True, **args):
|
|
|
296
311
|
|
|
297
312
|
####################################################################################################################
|
|
298
313
|
|
|
299
|
-
def _estimate_partitioned_h2_by_ldsc(insumstats, log, verbose=True, **
|
|
314
|
+
def _estimate_partitioned_h2_by_ldsc(insumstats, log, verbose=True, **kwargs):
|
|
300
315
|
sumstats = insumstats.copy()
|
|
301
316
|
##start function with col checking##########################################################
|
|
302
317
|
_start_line = "run LD score regression"
|
|
@@ -320,10 +335,10 @@ def _estimate_partitioned_h2_by_ldsc(insumstats, log, verbose=True, **args):
|
|
|
320
335
|
log.write(" -Please cite LDSC: Bulik-Sullivan, et al. LD Score Regression Distinguishes Confounding from Polygenicity in Genome-Wide Association Studies. Nature Genetics, 2015.", verbose=verbose)
|
|
321
336
|
log.write(" -Arguments:", verbose=verbose)
|
|
322
337
|
|
|
323
|
-
for key, value in
|
|
338
|
+
for key, value in kwargs.items():
|
|
324
339
|
log.write(" -{}:{}".format(key, value), verbose=verbose)
|
|
325
340
|
|
|
326
|
-
default_args = ARGS(**
|
|
341
|
+
default_args = ARGS(**kwargs)
|
|
327
342
|
|
|
328
343
|
if "Z" not in sumstats.columns:
|
|
329
344
|
sumstats["Z"] = sumstats["BETA"]/sumstats["SE"]
|
|
@@ -342,7 +357,7 @@ def _estimate_partitioned_h2_by_ldsc(insumstats, log, verbose=True, **args):
|
|
|
342
357
|
|
|
343
358
|
|
|
344
359
|
|
|
345
|
-
def _estimate_rg_by_ldsc(insumstats, other_traits ,log, verbose=True, **
|
|
360
|
+
def _estimate_rg_by_ldsc(insumstats, other_traits ,log, verbose=True, **kwargs):
|
|
346
361
|
sumstats = insumstats.copy()
|
|
347
362
|
##start function with col checking##########################################################
|
|
348
363
|
_start_line = "run LD score regression for genetic correlation"
|
|
@@ -366,10 +381,10 @@ def _estimate_rg_by_ldsc(insumstats, other_traits ,log, verbose=True, **args):
|
|
|
366
381
|
log.write(" -Please cite LDSC: Bulik-Sullivan, B., et al. An Atlas of Genetic Correlations across Human Diseases and Traits. Nature Genetics, 2015.", verbose=verbose)
|
|
367
382
|
log.write(" -Arguments:", verbose=verbose)
|
|
368
383
|
|
|
369
|
-
for key, value in
|
|
384
|
+
for key, value in kwargs.items():
|
|
370
385
|
log.write(" -{}:{}".format(key, value), verbose=verbose)
|
|
371
386
|
|
|
372
|
-
default_args = ARGS(**
|
|
387
|
+
default_args = ARGS(**kwargs)
|
|
373
388
|
|
|
374
389
|
if "Z" not in sumstats.columns:
|
|
375
390
|
sumstats["Z"] = sumstats["BETA"]/sumstats["SE"]
|
|
@@ -402,7 +417,7 @@ def _estimate_rg_by_ldsc(insumstats, other_traits ,log, verbose=True, **args):
|
|
|
402
417
|
####################################################################################################################
|
|
403
418
|
|
|
404
419
|
|
|
405
|
-
def _estimate_h2_cts_by_ldsc(insumstats, log, verbose=True, **
|
|
420
|
+
def _estimate_h2_cts_by_ldsc(insumstats, log, verbose=True, **kwargs):
|
|
406
421
|
sumstats = insumstats.copy()
|
|
407
422
|
##start function with col checking##########################################################
|
|
408
423
|
_start_line = "run LD score regression"
|
|
@@ -426,10 +441,10 @@ def _estimate_h2_cts_by_ldsc(insumstats, log, verbose=True, **args):
|
|
|
426
441
|
log.write(" -Please cite LDSC: Finucane, H. K., Reshef, Y. A., Anttila, V., Slowikowski, K., Gusev, A., Byrnes, A., ... & Price, A. L. (2018). Heritability enrichment of specifically expressed genes identifies disease-relevant tissues and cell types. Nature genetics, 50(4), 621-629.", verbose=verbose)
|
|
427
442
|
log.write(" -Arguments:", verbose=verbose)
|
|
428
443
|
|
|
429
|
-
for key, value in
|
|
444
|
+
for key, value in kwargs.items():
|
|
430
445
|
log.write(" -{}:{}".format(key, value), verbose=verbose)
|
|
431
446
|
|
|
432
|
-
default_args = ARGS(**
|
|
447
|
+
default_args = ARGS(**kwargs)
|
|
433
448
|
|
|
434
449
|
if "Z" not in sumstats.columns:
|
|
435
450
|
sumstats["Z"] = sumstats["BETA"]/sumstats["SE"]
|
|
@@ -441,4 +456,42 @@ def _estimate_h2_cts_by_ldsc(insumstats, log, verbose=True, **args):
|
|
|
441
456
|
|
|
442
457
|
log.write(" -Results have been stored in .ldsc_partitioned_h2", verbose=verbose)
|
|
443
458
|
finished(log=log,verbose=verbose,end_line=_end_line)
|
|
444
|
-
return summary
|
|
459
|
+
return summary
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def _munge_sumstats(sumstats, log,
|
|
464
|
+
info=0.9, maf=0.01,
|
|
465
|
+
n=None, nopalindromic=True,
|
|
466
|
+
exclude_hla=True, exclude_sexchr=True,
|
|
467
|
+
verbose=True, **kwargs):
|
|
468
|
+
if "CHR" in sumstats.columns and "POS" in sumstats.columns:
|
|
469
|
+
if exclude_hla == True:
|
|
470
|
+
sumstats = _exclude_hla(sumstats, verbose=verbose, log=log)
|
|
471
|
+
|
|
472
|
+
if "CHR" in sumstats.columns:
|
|
473
|
+
if exclude_sexchr == True:
|
|
474
|
+
sumstats = _exclude_sexchr(sumstats, verbose=verbose, log=log)
|
|
475
|
+
|
|
476
|
+
# filter_info
|
|
477
|
+
if "INFO" in sumstats.columns:
|
|
478
|
+
sumstats = filtervalues(sumstats, 'INFO >={}'.format(info) ,verbose=verbose, log=log)
|
|
479
|
+
|
|
480
|
+
# frequency
|
|
481
|
+
if "EAF" in sumstats.columns:
|
|
482
|
+
sumstats = filtervalues(sumstats,'EAF>={} and EAF<={}'.format(maf, 1-maf),verbose=verbose, log=log)
|
|
483
|
+
|
|
484
|
+
# N
|
|
485
|
+
if "N" in sumstats.columns:
|
|
486
|
+
if n is None:
|
|
487
|
+
min_n = sumstats.N.quantile(0.9) / 1.5
|
|
488
|
+
else:
|
|
489
|
+
min_n = n
|
|
490
|
+
sumstats = filtervalues(sumstats,'N>={}'.format(min_n),verbose=verbose, log=log)
|
|
491
|
+
|
|
492
|
+
# remove strand-unambiguous SNPs
|
|
493
|
+
if "EA" in sumstats.columns and "NEA" in sumstats.columns:
|
|
494
|
+
if nopalindromic==True:
|
|
495
|
+
sumstats = _filter_palindromic(sumstats, mode="out", verbose=verbose, log=log)
|
|
496
|
+
|
|
497
|
+
return sumstats
|
gwaslab/util_ex_plink_filter.py
CHANGED
|
@@ -16,10 +16,10 @@ def _run_plink_filter(filter_flag, out_prefix):
|
|
|
16
16
|
--out {}
|
|
17
17
|
'''.format(filter_flag, out_prefix)
|
|
18
18
|
|
|
19
|
-
def _plink2_filter_to_flag(tmpdir="./",**
|
|
19
|
+
def _plink2_filter_to_flag(tmpdir="./",**kwargs):
|
|
20
20
|
combined_flag=""
|
|
21
21
|
temp_file_list=[]
|
|
22
|
-
for flag_with_underbar,value in
|
|
22
|
+
for flag_with_underbar,value in kwargs.items():
|
|
23
23
|
if isinstance(value, pd.DataFrame) or isinstance(value, pd.Series):
|
|
24
24
|
formated_flag, temp_file = _process_df_to_file(flag_with_underbar=flag_with_underbar,
|
|
25
25
|
df=value,
|
gwaslab/util_ex_run_clumping.py
CHANGED
|
@@ -15,11 +15,11 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
|
|
|
15
15
|
##start function with col checking##########################################################
|
|
16
16
|
_start_line = "perfrom clumping"
|
|
17
17
|
_end_line = "clumping"
|
|
18
|
-
_start_cols =["SNPID","CHR","POS"
|
|
18
|
+
_start_cols =["SNPID","CHR","POS"]
|
|
19
19
|
_start_function = ".clump()"
|
|
20
20
|
_must_args ={}
|
|
21
21
|
|
|
22
|
-
is_enough_info = start_to(sumstats=
|
|
22
|
+
is_enough_info = start_to(sumstats=insumstats,
|
|
23
23
|
log=log,
|
|
24
24
|
verbose=verbose,
|
|
25
25
|
start_line=_start_line,
|
gwaslab/util_in_filter_value.py
CHANGED
|
@@ -274,7 +274,7 @@ def inferbuild(sumstats,status="STATUS",chrom="CHR", pos="POS", ea="EA", nea="NE
|
|
|
274
274
|
finished(log,verbose,_end_line)
|
|
275
275
|
return sumstats, inferred_build
|
|
276
276
|
|
|
277
|
-
def sampling(sumstats,n=1, p=None, verbose=True,log=Log(),**
|
|
277
|
+
def sampling(sumstats,n=1, p=None, verbose=True,log=Log(),**kwargs):
|
|
278
278
|
|
|
279
279
|
log.write("Start to randomly select variants from the sumstats...", verbose=verbose)
|
|
280
280
|
if p is None:
|
|
@@ -289,17 +289,17 @@ def sampling(sumstats,n=1, p=None, verbose=True,log=Log(),**args):
|
|
|
289
289
|
else:
|
|
290
290
|
raise ValueError("Please input a number in (0,1)")
|
|
291
291
|
|
|
292
|
-
if "random_state" in
|
|
293
|
-
log.write(" -Random state (seed): {}".format(
|
|
292
|
+
if "random_state" in kwargs.keys():
|
|
293
|
+
log.write(" -Random state (seed): {}".format(kwargs["random_state"]), verbose=verbose)
|
|
294
294
|
else:
|
|
295
|
-
|
|
296
|
-
log.write(" -Random state (seed): {}".format(
|
|
297
|
-
sampled = sumstats.sample(n=n,**
|
|
295
|
+
kwargs["random_state"] = np.random.randint(0,4294967295)
|
|
296
|
+
log.write(" -Random state (seed): {}".format(kwargs["random_state"]), verbose=verbose)
|
|
297
|
+
sampled = sumstats.sample(n=n,**kwargs)
|
|
298
298
|
log.write("Finished sampling...", verbose=verbose)
|
|
299
299
|
gc.collect()
|
|
300
300
|
return sampled
|
|
301
301
|
|
|
302
|
-
def _get_flanking(sumstats, snpid, windowsizekb=500, verbose=True,log=Log(),**
|
|
302
|
+
def _get_flanking(sumstats, snpid, windowsizekb=500, verbose=True,log=Log(),**kwargs):
|
|
303
303
|
|
|
304
304
|
log.write("Start to extract variants in the flanking regions:",verbose=verbose)
|
|
305
305
|
log.write(" - Central variant: {}".format(snpid))
|
|
@@ -320,7 +320,7 @@ def _get_flanking(sumstats, snpid, windowsizekb=500, verbose=True,log=Log(),**ar
|
|
|
320
320
|
|
|
321
321
|
return flanking
|
|
322
322
|
|
|
323
|
-
def _get_flanking_by_id(sumstats, snpid, windowsizekb=500, verbose=True,log=Log(),**
|
|
323
|
+
def _get_flanking_by_id(sumstats, snpid, windowsizekb=500, verbose=True,log=Log(),**kwargs):
|
|
324
324
|
|
|
325
325
|
log.write("Start to extract variants in the flanking regions using rsID or SNPID...",verbose=verbose)
|
|
326
326
|
log.write(" - Central variants: {}".format(snpid), verbose=verbose)
|
|
@@ -359,7 +359,7 @@ def _get_flanking_by_id(sumstats, snpid, windowsizekb=500, verbose=True,log=Log(
|
|
|
359
359
|
|
|
360
360
|
return flanking
|
|
361
361
|
|
|
362
|
-
def _get_flanking_by_chrpos(sumstats, chrpos, windowsizekb=500, verbose=True,log=Log(),**
|
|
362
|
+
def _get_flanking_by_chrpos(sumstats, chrpos, windowsizekb=500, verbose=True,log=Log(),**kwargs):
|
|
363
363
|
|
|
364
364
|
log.write("Start to extract variants in the flanking regions using CHR and POS...",verbose=verbose)
|
|
365
365
|
log.write(" - Central positions: {}".format(chrpos), verbose=verbose)
|
|
@@ -447,6 +447,24 @@ def _exclude_hla(sumstats, chrom="CHR", pos="POS", lower=25000000 ,upper=3400000
|
|
|
447
447
|
|
|
448
448
|
return sumstats
|
|
449
449
|
|
|
450
|
+
def _exclude_sexchr(sumstats, chrom="CHR", pos="POS", sexchrs=[23,24,25], log=Log(), verbose=True):
|
|
451
|
+
|
|
452
|
+
raw_len = len(sumstats)
|
|
453
|
+
|
|
454
|
+
if str(sumstats[chrom].dtype) == "string":
|
|
455
|
+
sexchrs_string = [str(i) for i in sexchrs]
|
|
456
|
+
is_in_sexchr = sumstats[chrom].astype("string").isin(sexchrs_string)
|
|
457
|
+
else:
|
|
458
|
+
is_in_sexchr = sumstats[chrom].isin(sexchrs)
|
|
459
|
+
|
|
460
|
+
sumstats = sumstats.loc[~is_in_sexchr, : ]
|
|
461
|
+
|
|
462
|
+
after_len = len(sumstats)
|
|
463
|
+
|
|
464
|
+
log.write(" -Excluded {} variants on sex chromosomes ({})...".format(raw_len - after_len,sexchrs),verbose=verbose)
|
|
465
|
+
|
|
466
|
+
return sumstats
|
|
467
|
+
|
|
450
468
|
def _extract(sumstats, extract=None, id_use="SNPID", log=Log(), verbose=True ):
|
|
451
469
|
if extract is not None:
|
|
452
470
|
log.write(" -Extracting {} variants from sumstats...".format(len(extract)),verbose=verbose)
|
gwaslab/viz_plot_regionalplot.py
CHANGED
|
@@ -650,8 +650,8 @@ def process_vcf(sumstats, vcf_path, region,region_ref, region_ref_second, log, v
|
|
|
650
650
|
#else:
|
|
651
651
|
# to_change_color = sumstats["RSQ2"]>ld_threshold
|
|
652
652
|
# sumstats.loc[to_change_color,"LD2"] = index+1
|
|
653
|
-
to_change_color = sumstats["
|
|
654
|
-
sumstats.loc[to_change_color,"
|
|
653
|
+
to_change_color = sumstats["RSQ2"]>ld_threshold
|
|
654
|
+
sumstats.loc[to_change_color,"LD2"] = index+2
|
|
655
655
|
|
|
656
656
|
sumstats.loc[lead_id,"LD2"] = len(region_ld_threshold)+2
|
|
657
657
|
sumstats["LEAD2"]="Other variants"
|