gwaslab 3.4.40__py3-none-any.whl → 3.4.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

gwaslab/util_ex_ldsc.py CHANGED
@@ -7,244 +7,249 @@ from gwaslab.qc_fix_sumstats import finished
7
7
  from gwaslab.qc_fix_sumstats import skipped
8
8
  from gwaslab.io_read_ldsc import parse_ldsc_summary
9
9
  from gwaslab.io_read_ldsc import parse_partitioned_ldsc_summary
10
+ from gwaslab.util_in_filter_value import filtervalues
11
+ from gwaslab.util_in_filter_value import _filter_palindromic
12
+ from gwaslab.util_in_filter_value import _exclude_hla
13
+ from gwaslab.util_in_filter_value import _exclude_sexchr
14
+
10
15
  class ARGS():
11
- def __init__(self, **args):
16
+ def __init__(self, **kwargs):
12
17
 
13
18
  self.out = "ldsc"
14
19
 
15
- if "bfile" in args.keys():
16
- self.bfile = args["bfile"]
20
+ if "bfile" in kwargs.keys():
21
+ self.bfile = kwargs["bfile"]
17
22
  else:
18
23
  self.bfile = None
19
24
 
20
- if "l2" in args.keys():
21
- self.l2 = args["l2"]
25
+ if "l2" in kwargs.keys():
26
+ self.l2 = kwargs["l2"]
22
27
  else:
23
28
  self.l2 = None
24
29
 
25
- if "extract" in args.keys():
26
- self.extract = args["extract"]
30
+ if "extract" in kwargs.keys():
31
+ self.extract = kwargs["extract"]
27
32
  else:
28
33
  self.extract = None
29
34
 
30
- if "keep" in args.keys():
31
- self.keep = args["keep"]
35
+ if "keep" in kwargs.keys():
36
+ self.keep = kwargs["keep"]
32
37
  else:
33
38
  self.keep = None
34
39
 
35
- if "ld_wind_snps" in args.keys():
36
- self.ld_wind_snps = args["ld_wind_snps"]
40
+ if "ld_wind_snps" in kwargs.keys():
41
+ self.ld_wind_snps = kwargs["ld_wind_snps"]
37
42
  else:
38
43
  self.ld_wind_snps = None
39
44
 
40
- if "ld_wind_kb" in args.keys():
41
- self.ld_wind_kb = args["ld_wind_kb"]
45
+ if "ld_wind_kb" in kwargs.keys():
46
+ self.ld_wind_kb = kwargs["ld_wind_kb"]
42
47
  else:
43
48
  self.ld_wind_kb = None
44
49
 
45
- if "ld_wind_cm" in args.keys():
46
- self.ld_wind_cm = args["ld_wind_cm"]
50
+ if "ld_wind_cm" in kwargs.keys():
51
+ self.ld_wind_cm = kwargs["ld_wind_cm"]
47
52
  else:
48
53
  self.ld_wind_cm = None
49
54
 
50
- if "print_snps" in args.keys():
51
- self.print_snps = args["print_snps"]
55
+ if "print_snps" in kwargs.keys():
56
+ self.print_snps = kwargs["print_snps"]
52
57
  else:
53
58
  self.print_snps = None
54
59
 
55
- if "annot" in args.keys():
56
- self.annot = args["annot"]
60
+ if "annot" in kwargs.keys():
61
+ self.annot = kwargs["annot"]
57
62
  else:
58
63
  self.annot = None
59
64
 
60
- if "thin_annot" in args.keys():
61
- self.thin_annot = args["thin_annot"]
65
+ if "thin_annot" in kwargs.keys():
66
+ self.thin_annot = kwargs["thin_annot"]
62
67
  else:
63
68
  self.thin_annot = None
64
69
 
65
- if "cts_bin" in args.keys():
66
- self.cts_bin = args["cts_bin"]
70
+ if "cts_bin" in kwargs.keys():
71
+ self.cts_bin = kwargs["cts_bin"]
67
72
  else:
68
73
  self.cts_bin = None
69
74
 
70
- if "cts_breaks" in args.keys():
71
- self.cts_breaks = args["cts_breaks"]
75
+ if "cts_breaks" in kwargs.keys():
76
+ self.cts_breaks = kwargs["cts_breaks"]
72
77
  else:
73
78
  self.cts_breaks = None
74
79
 
75
- if "cts_names" in args.keys():
76
- self.cts_names = args["cts_names"]
80
+ if "cts_names" in kwargs.keys():
81
+ self.cts_names = kwargs["cts_names"]
77
82
  else:
78
83
  self.cts_names = None
79
84
 
80
- if "per_allele" in args.keys():
81
- self.per_allele = args["per_allele"]
85
+ if "per_allele" in kwargs.keys():
86
+ self.per_allele = kwargs["per_allele"]
82
87
  else:
83
88
  self.per_allele = None
84
89
 
85
- if "pq_exp" in args.keys():
86
- self.pq_exp = args["pq_exp"]
90
+ if "pq_exp" in kwargs.keys():
91
+ self.pq_exp = kwargs["pq_exp"]
87
92
  else:
88
93
  self.pq_exp = None
89
94
 
90
- if "no_print_annot" in args.keys():
91
- self.no_print_annot = args["no_print_annot"]
95
+ if "no_print_annot" in kwargs.keys():
96
+ self.no_print_annot = kwargs["no_print_annot"]
92
97
  else:
93
98
  self.no_print_annot = None
94
99
 
95
- if "h2" in args.keys():
96
- self.h2 = args["h2"]
100
+ if "h2" in kwargs.keys():
101
+ self.h2 = kwargs["h2"]
97
102
  else:
98
103
  self.h2 = None
99
104
 
100
- if "h2_cts" in args.keys():
101
- self.h2_cts = args["h2_cts"]
105
+ if "h2_cts" in kwargs.keys():
106
+ self.h2_cts = kwargs["h2_cts"]
102
107
  else:
103
108
  self.h2_cts = None
104
109
 
105
- if "rg" in args.keys():
106
- self.rg = args["rg"]
110
+ if "rg" in kwargs.keys():
111
+ self.rg = kwargs["rg"]
107
112
  else:
108
113
  self.rg = None
109
114
 
110
- if "ref_ld" in args.keys():
111
- self.ref_ld = args["ref_ld"]
115
+ if "ref_ld" in kwargs.keys():
116
+ self.ref_ld = kwargs["ref_ld"]
112
117
  else:
113
118
  self.ref_ld = None
114
119
 
115
- if "ref_ld_chr" in args.keys():
116
- self.ref_ld_chr = args["ref_ld_chr"]
120
+ if "ref_ld_chr" in kwargs.keys():
121
+ self.ref_ld_chr = kwargs["ref_ld_chr"]
117
122
  else:
118
123
  self.ref_ld_chr = None
119
124
 
120
- if "w_ld" in args.keys():
121
- self.w_ld = args["w_ld"]
125
+ if "w_ld" in kwargs.keys():
126
+ self.w_ld = kwargs["w_ld"]
122
127
  else:
123
128
  self.w_ld = None
124
129
 
125
- if "w_ld_chr" in args.keys():
126
- self.w_ld_chr = args["w_ld_chr"]
130
+ if "w_ld_chr" in kwargs.keys():
131
+ self.w_ld_chr = kwargs["w_ld_chr"]
127
132
  else:
128
133
  self.w_ld_chr = None
129
134
 
130
- if "overlap_annot" in args.keys():
131
- self.overlap_annot = args["overlap_annot"]
135
+ if "overlap_annot" in kwargs.keys():
136
+ self.overlap_annot = kwargs["overlap_annot"]
132
137
  else:
133
138
  self.overlap_annot = None
134
139
 
135
- if "print_coefficients" in args.keys():
136
- self.print_coefficients = args["print_coefficients"]
140
+ if "print_coefficients" in kwargs.keys():
141
+ self.print_coefficients = kwargs["print_coefficients"]
137
142
  else:
138
143
  self.print_coefficients = "ldsc"
139
144
 
140
- if "frqfile" in args.keys():
141
- self.frqfile = args["frqfile"]
145
+ if "frqfile" in kwargs.keys():
146
+ self.frqfile = kwargs["frqfile"]
142
147
  else:
143
148
  self.frqfile = None
144
149
 
145
- if "frqfile_chr" in args.keys():
146
- self.frqfile_chr = args["frqfile_chr"]
150
+ if "frqfile_chr" in kwargs.keys():
151
+ self.frqfile_chr = kwargs["frqfile_chr"]
147
152
  else:
148
153
  self.frqfile_chr = None
149
154
 
150
- if "no_intercept" in args.keys():
151
- self.no_intercept = args["no_intercept"]
155
+ if "no_intercept" in kwargs.keys():
156
+ self.no_intercept = kwargs["no_intercept"]
152
157
  else:
153
158
  self.no_intercept = None
154
159
 
155
- if "intercept_h2" in args.keys():
156
- self.intercept_h2 = args["intercept_h2"]
160
+ if "intercept_h2" in kwargs.keys():
161
+ self.intercept_h2 = kwargs["intercept_h2"]
157
162
  else:
158
163
  self.intercept_h2 = None
159
164
 
160
- if "intercept_gencov" in args.keys():
161
- self.intercept_gencov = args["intercept_gencov"]
165
+ if "intercept_gencov" in kwargs.keys():
166
+ self.intercept_gencov = kwargs["intercept_gencov"]
162
167
  else:
163
168
  self.intercept_gencov = None
164
169
 
165
- if "M" in args.keys():
166
- self.M = args["M"]
170
+ if "M" in kwargs.keys():
171
+ self.M = kwargs["M"]
167
172
  else:
168
173
  self.M = None
169
174
 
170
- if "two_step" in args.keys():
171
- self.two_step = args["two_step"]
175
+ if "two_step" in kwargs.keys():
176
+ self.two_step = kwargs["two_step"]
172
177
  else:
173
178
  self.two_step = None
174
179
 
175
- if "chisq_max" in args.keys():
176
- self.chisq_max = args["chisq_max"]
180
+ if "chisq_max" in kwargs.keys():
181
+ self.chisq_max = kwargs["chisq_max"]
177
182
  else:
178
183
  self.chisq_max= None
179
184
 
180
- if "ref_ld_chr_cts" in args.keys():
181
- self.ref_ld_chr_cts = args["ref_ld_chr_cts"]
185
+ if "ref_ld_chr_cts" in kwargs.keys():
186
+ self.ref_ld_chr_cts = kwargs["ref_ld_chr_cts"]
182
187
  else:
183
188
  self.ref_ld_chr_cts = None
184
189
 
185
- if "print_all_cts" in args.keys():
186
- self.print_all_cts = args["print_all_cts"]
190
+ if "print_all_cts" in kwargs.keys():
191
+ self.print_all_cts = kwargs["print_all_cts"]
187
192
  else:
188
193
  self.print_all_cts = False
189
194
 
190
- if "print_cov" in args.keys():
191
- self.print_cov = args["print_cov"]
195
+ if "print_cov" in kwargs.keys():
196
+ self.print_cov = kwargs["print_cov"]
192
197
  else:
193
198
  self.print_cov = None
194
199
 
195
200
  self.print_delete_vals = False
196
- if "print_delete_vals" in args.keys():
197
- self.print_delete_vals = args["print_delete_vals"]
201
+ if "print_delete_vals" in kwargs.keys():
202
+ self.print_delete_vals = kwargs["print_delete_vals"]
198
203
  else:
199
204
  self.print_delete_vals = False
200
205
 
201
- if "chunk_size" in args.keys():
202
- self.chunk_size = args["chunk_size"]
206
+ if "chunk_size" in kwargs.keys():
207
+ self.chunk_size = kwargs["chunk_size"]
203
208
  else:
204
209
  self.chunk_size = 50
205
210
 
206
- if "pickle" in args.keys():
207
- self.pickle = args["pickle"]
211
+ if "pickle" in kwargs.keys():
212
+ self.pickle = kwargs["pickle"]
208
213
  else:
209
214
  self.pickle = False
210
215
 
211
- if "yes_really" in args.keys():
212
- self.yes_really = args["yes_really"]
216
+ if "yes_really" in kwargs.keys():
217
+ self.yes_really = kwargs["yes_really"]
213
218
  else:
214
219
  self.yes_really = False
215
220
 
216
- if "invert_anyway" in args.keys():
217
- self.invert_anyway = args["invert_anyway"]
221
+ if "invert_anyway" in kwargs.keys():
222
+ self.invert_anyway = kwargs["invert_anyway"]
218
223
  else:
219
224
  self.invert_anyway = False
220
225
 
221
- if "n_blocks" in args.keys():
222
- self.n_blocks = args["n_blocks"]
226
+ if "n_blocks" in kwargs.keys():
227
+ self.n_blocks = kwargs["n_blocks"]
223
228
  else:
224
229
  self.n_blocks = 200
225
230
 
226
- if "not_M_5_50" in args.keys():
227
- self.not_M_5_50 = args["not_M_5_50"]
231
+ if "not_M_5_50" in kwargs.keys():
232
+ self.not_M_5_50 = kwargs["not_M_5_50"]
228
233
  else:
229
234
  self.not_M_5_50 = False
230
235
 
231
- if "no_check_alleles" in args.keys():
232
- self.no_check_alleles = args["no_check_alleles"]
236
+ if "no_check_alleles" in kwargs.keys():
237
+ self.no_check_alleles = kwargs["no_check_alleles"]
233
238
  else:
234
239
  self.no_check_alleles = False
235
240
 
236
- if "return_silly_things" in args.keys():
237
- self.return_silly_things = args["return_silly_things"]
241
+ if "return_silly_things" in kwargs.keys():
242
+ self.return_silly_things = kwargs["return_silly_things"]
238
243
  else:
239
244
  self.return_silly_things = False
240
245
 
241
- if "samp_prev" in args.keys():
242
- self.samp_prev = args["samp_prev"]
246
+ if "samp_prev" in kwargs.keys():
247
+ self.samp_prev = kwargs["samp_prev"]
243
248
  else:
244
249
  self.samp_prev = None
245
250
 
246
- if "pop_prev" in args.keys():
247
- self.pop_prev = args["pop_prev"]
251
+ if "pop_prev" in kwargs.keys():
252
+ self.pop_prev = kwargs["pop_prev"]
248
253
  else:
249
254
  self.pop_prev = None
250
255
 
@@ -252,8 +257,16 @@ class ARGS():
252
257
  ####################################################################################################################
253
258
 
254
259
 
255
- def _estimate_h2_by_ldsc(insumstats, log, verbose=True, **args):
260
+ def _estimate_h2_by_ldsc(insumstats, log, verbose=True, munge=False, munge_args=None, **kwargs):
256
261
  sumstats = insumstats.copy()
262
+
263
+ if munge:
264
+ if munge_args is None:
265
+ munge_args={}
266
+ log.write("Start to munge sumstats.")
267
+ sumstats = _munge_sumstats(sumstats, log=log, verbose=verbose,**munge_args)
268
+ log.write("Finished munging sumstats.")
269
+
257
270
  ##start function with col checking##########################################################
258
271
  _start_line = "run LD score regression"
259
272
  _end_line = "running LD score regression"
@@ -274,12 +287,14 @@ def _estimate_h2_by_ldsc(insumstats, log, verbose=True, **args):
274
287
  log.write(" -Run single variate LD score regression:", verbose=verbose)
275
288
  log.write(" -Adopted from LDSC source code: https://github.com/bulik/ldsc", verbose=verbose)
276
289
  log.write(" -Please cite LDSC: Bulik-Sullivan, et al. LD Score Regression Distinguishes Confounding from Polygenicity in Genome-Wide Association Studies. Nature Genetics, 2015.", verbose=verbose)
277
- log.write(" -Arguments:", verbose=verbose)
278
290
 
279
- for key, value in args.items():
291
+
292
+
293
+
294
+ log.write(" -Arguments:", verbose=verbose)
295
+ for key, value in kwargs.items():
280
296
  log.write(" -{}:{}".format(key, value), verbose=verbose)
281
-
282
- default_args = ARGS(**args)
297
+ default_args = ARGS(**kwargs)
283
298
 
284
299
  if "Z" not in sumstats.columns:
285
300
  sumstats["Z"] = sumstats["BETA"]/sumstats["SE"]
@@ -296,7 +311,7 @@ def _estimate_h2_by_ldsc(insumstats, log, verbose=True, **args):
296
311
 
297
312
  ####################################################################################################################
298
313
 
299
- def _estimate_partitioned_h2_by_ldsc(insumstats, log, verbose=True, **args):
314
+ def _estimate_partitioned_h2_by_ldsc(insumstats, log, verbose=True, **kwargs):
300
315
  sumstats = insumstats.copy()
301
316
  ##start function with col checking##########################################################
302
317
  _start_line = "run LD score regression"
@@ -320,10 +335,10 @@ def _estimate_partitioned_h2_by_ldsc(insumstats, log, verbose=True, **args):
320
335
  log.write(" -Please cite LDSC: Bulik-Sullivan, et al. LD Score Regression Distinguishes Confounding from Polygenicity in Genome-Wide Association Studies. Nature Genetics, 2015.", verbose=verbose)
321
336
  log.write(" -Arguments:", verbose=verbose)
322
337
 
323
- for key, value in args.items():
338
+ for key, value in kwargs.items():
324
339
  log.write(" -{}:{}".format(key, value), verbose=verbose)
325
340
 
326
- default_args = ARGS(**args)
341
+ default_args = ARGS(**kwargs)
327
342
 
328
343
  if "Z" not in sumstats.columns:
329
344
  sumstats["Z"] = sumstats["BETA"]/sumstats["SE"]
@@ -342,7 +357,7 @@ def _estimate_partitioned_h2_by_ldsc(insumstats, log, verbose=True, **args):
342
357
 
343
358
 
344
359
 
345
- def _estimate_rg_by_ldsc(insumstats, other_traits ,log, verbose=True, **args):
360
+ def _estimate_rg_by_ldsc(insumstats, other_traits ,log, verbose=True, **kwargs):
346
361
  sumstats = insumstats.copy()
347
362
  ##start function with col checking##########################################################
348
363
  _start_line = "run LD score regression for genetic correlation"
@@ -366,10 +381,10 @@ def _estimate_rg_by_ldsc(insumstats, other_traits ,log, verbose=True, **args):
366
381
  log.write(" -Please cite LDSC: Bulik-Sullivan, B., et al. An Atlas of Genetic Correlations across Human Diseases and Traits. Nature Genetics, 2015.", verbose=verbose)
367
382
  log.write(" -Arguments:", verbose=verbose)
368
383
 
369
- for key, value in args.items():
384
+ for key, value in kwargs.items():
370
385
  log.write(" -{}:{}".format(key, value), verbose=verbose)
371
386
 
372
- default_args = ARGS(**args)
387
+ default_args = ARGS(**kwargs)
373
388
 
374
389
  if "Z" not in sumstats.columns:
375
390
  sumstats["Z"] = sumstats["BETA"]/sumstats["SE"]
@@ -402,7 +417,7 @@ def _estimate_rg_by_ldsc(insumstats, other_traits ,log, verbose=True, **args):
402
417
  ####################################################################################################################
403
418
 
404
419
 
405
- def _estimate_h2_cts_by_ldsc(insumstats, log, verbose=True, **args):
420
+ def _estimate_h2_cts_by_ldsc(insumstats, log, verbose=True, **kwargs):
406
421
  sumstats = insumstats.copy()
407
422
  ##start function with col checking##########################################################
408
423
  _start_line = "run LD score regression"
@@ -426,10 +441,10 @@ def _estimate_h2_cts_by_ldsc(insumstats, log, verbose=True, **args):
426
441
  log.write(" -Please cite LDSC: Finucane, H. K., Reshef, Y. A., Anttila, V., Slowikowski, K., Gusev, A., Byrnes, A., ... & Price, A. L. (2018). Heritability enrichment of specifically expressed genes identifies disease-relevant tissues and cell types. Nature genetics, 50(4), 621-629.", verbose=verbose)
427
442
  log.write(" -Arguments:", verbose=verbose)
428
443
 
429
- for key, value in args.items():
444
+ for key, value in kwargs.items():
430
445
  log.write(" -{}:{}".format(key, value), verbose=verbose)
431
446
 
432
- default_args = ARGS(**args)
447
+ default_args = ARGS(**kwargs)
433
448
 
434
449
  if "Z" not in sumstats.columns:
435
450
  sumstats["Z"] = sumstats["BETA"]/sumstats["SE"]
@@ -441,4 +456,42 @@ def _estimate_h2_cts_by_ldsc(insumstats, log, verbose=True, **args):
441
456
 
442
457
  log.write(" -Results have been stored in .ldsc_partitioned_h2", verbose=verbose)
443
458
  finished(log=log,verbose=verbose,end_line=_end_line)
444
- return summary
459
+ return summary
460
+
461
+
462
+
463
+ def _munge_sumstats(sumstats, log,
464
+ info=0.9, maf=0.01,
465
+ n=None, nopalindromic=True,
466
+ exclude_hla=True, exclude_sexchr=True,
467
+ verbose=True, **kwargs):
468
+ if "CHR" in sumstats.columns and "POS" in sumstats.columns:
469
+ if exclude_hla == True:
470
+ sumstats = _exclude_hla(sumstats, verbose=verbose, log=log)
471
+
472
+ if "CHR" in sumstats.columns:
473
+ if exclude_sexchr == True:
474
+ sumstats = _exclude_sexchr(sumstats, verbose=verbose, log=log)
475
+
476
+ # filter_info
477
+ if "INFO" in sumstats.columns:
478
+ sumstats = filtervalues(sumstats, 'INFO >={}'.format(info) ,verbose=verbose, log=log)
479
+
480
+ # frequency
481
+ if "EAF" in sumstats.columns:
482
+ sumstats = filtervalues(sumstats,'EAF>={} and EAF<={}'.format(maf, 1-maf),verbose=verbose, log=log)
483
+
484
+ # N
485
+ if "N" in sumstats.columns:
486
+ if n is None:
487
+ min_n = sumstats.N.quantile(0.9) / 1.5
488
+ else:
489
+ min_n = n
490
+ sumstats = filtervalues(sumstats,'N>={}'.format(min_n),verbose=verbose, log=log)
491
+
492
+ # remove strand-unambiguous SNPs
493
+ if "EA" in sumstats.columns and "NEA" in sumstats.columns:
494
+ if nopalindromic==True:
495
+ sumstats = _filter_palindromic(sumstats, mode="out", verbose=verbose, log=log)
496
+
497
+ return sumstats
@@ -16,10 +16,10 @@ def _run_plink_filter(filter_flag, out_prefix):
16
16
  --out {}
17
17
  '''.format(filter_flag, out_prefix)
18
18
 
19
- def _plink2_filter_to_flag(tmpdir="./",**args):
19
+ def _plink2_filter_to_flag(tmpdir="./",**kwargs):
20
20
  combined_flag=""
21
21
  temp_file_list=[]
22
- for flag_with_underbar,value in args.items():
22
+ for flag_with_underbar,value in kwargs.items():
23
23
  if isinstance(value, pd.DataFrame) or isinstance(value, pd.Series):
24
24
  formated_flag, temp_file = _process_df_to_file(flag_with_underbar=flag_with_underbar,
25
25
  df=value,
@@ -15,11 +15,11 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
15
15
  ##start function with col checking##########################################################
16
16
  _start_line = "perfrom clumping"
17
17
  _end_line = "clumping"
18
- _start_cols =["SNPID","CHR","POS","EA","NEA"]
18
+ _start_cols =["SNPID","CHR","POS"]
19
19
  _start_function = ".clump()"
20
20
  _must_args ={}
21
21
 
22
- is_enough_info = start_to(sumstats=sumstats,
22
+ is_enough_info = start_to(sumstats=insumstats,
23
23
  log=log,
24
24
  verbose=verbose,
25
25
  start_line=_start_line,
@@ -274,7 +274,7 @@ def inferbuild(sumstats,status="STATUS",chrom="CHR", pos="POS", ea="EA", nea="NE
274
274
  finished(log,verbose,_end_line)
275
275
  return sumstats, inferred_build
276
276
 
277
- def sampling(sumstats,n=1, p=None, verbose=True,log=Log(),**args):
277
+ def sampling(sumstats,n=1, p=None, verbose=True,log=Log(),**kwargs):
278
278
 
279
279
  log.write("Start to randomly select variants from the sumstats...", verbose=verbose)
280
280
  if p is None:
@@ -289,17 +289,17 @@ def sampling(sumstats,n=1, p=None, verbose=True,log=Log(),**args):
289
289
  else:
290
290
  raise ValueError("Please input a number in (0,1)")
291
291
 
292
- if "random_state" in args.keys():
293
- log.write(" -Random state (seed): {}".format(args["random_state"]), verbose=verbose)
292
+ if "random_state" in kwargs.keys():
293
+ log.write(" -Random state (seed): {}".format(kwargs["random_state"]), verbose=verbose)
294
294
  else:
295
- args["random_state"] = np.random.randint(0,4294967295)
296
- log.write(" -Random state (seed): {}".format(args["random_state"]), verbose=verbose)
297
- sampled = sumstats.sample(n=n,**args)
295
+ kwargs["random_state"] = np.random.randint(0,4294967295)
296
+ log.write(" -Random state (seed): {}".format(kwargs["random_state"]), verbose=verbose)
297
+ sampled = sumstats.sample(n=n,**kwargs)
298
298
  log.write("Finished sampling...", verbose=verbose)
299
299
  gc.collect()
300
300
  return sampled
301
301
 
302
- def _get_flanking(sumstats, snpid, windowsizekb=500, verbose=True,log=Log(),**args):
302
+ def _get_flanking(sumstats, snpid, windowsizekb=500, verbose=True,log=Log(),**kwargs):
303
303
 
304
304
  log.write("Start to extract variants in the flanking regions:",verbose=verbose)
305
305
  log.write(" - Central variant: {}".format(snpid))
@@ -320,7 +320,7 @@ def _get_flanking(sumstats, snpid, windowsizekb=500, verbose=True,log=Log(),**ar
320
320
 
321
321
  return flanking
322
322
 
323
- def _get_flanking_by_id(sumstats, snpid, windowsizekb=500, verbose=True,log=Log(),**args):
323
+ def _get_flanking_by_id(sumstats, snpid, windowsizekb=500, verbose=True,log=Log(),**kwargs):
324
324
 
325
325
  log.write("Start to extract variants in the flanking regions using rsID or SNPID...",verbose=verbose)
326
326
  log.write(" - Central variants: {}".format(snpid), verbose=verbose)
@@ -359,7 +359,7 @@ def _get_flanking_by_id(sumstats, snpid, windowsizekb=500, verbose=True,log=Log(
359
359
 
360
360
  return flanking
361
361
 
362
- def _get_flanking_by_chrpos(sumstats, chrpos, windowsizekb=500, verbose=True,log=Log(),**args):
362
+ def _get_flanking_by_chrpos(sumstats, chrpos, windowsizekb=500, verbose=True,log=Log(),**kwargs):
363
363
 
364
364
  log.write("Start to extract variants in the flanking regions using CHR and POS...",verbose=verbose)
365
365
  log.write(" - Central positions: {}".format(chrpos), verbose=verbose)
@@ -447,6 +447,24 @@ def _exclude_hla(sumstats, chrom="CHR", pos="POS", lower=25000000 ,upper=3400000
447
447
 
448
448
  return sumstats
449
449
 
450
+ def _exclude_sexchr(sumstats, chrom="CHR", pos="POS", sexchrs=[23,24,25], log=Log(), verbose=True):
451
+
452
+ raw_len = len(sumstats)
453
+
454
+ if str(sumstats[chrom].dtype) == "string":
455
+ sexchrs_string = [str(i) for i in sexchrs]
456
+ is_in_sexchr = sumstats[chrom].astype("string").isin(sexchrs_string)
457
+ else:
458
+ is_in_sexchr = sumstats[chrom].isin(sexchrs)
459
+
460
+ sumstats = sumstats.loc[~is_in_sexchr, : ]
461
+
462
+ after_len = len(sumstats)
463
+
464
+ log.write(" -Excluded {} variants on sex chromosomes ({})...".format(raw_len - after_len,sexchrs),verbose=verbose)
465
+
466
+ return sumstats
467
+
450
468
  def _extract(sumstats, extract=None, id_use="SNPID", log=Log(), verbose=True ):
451
469
  if extract is not None:
452
470
  log.write(" -Extracting {} variants from sumstats...".format(len(extract)),verbose=verbose)
@@ -650,8 +650,8 @@ def process_vcf(sumstats, vcf_path, region,region_ref, region_ref_second, log, v
650
650
  #else:
651
651
  # to_change_color = sumstats["RSQ2"]>ld_threshold
652
652
  # sumstats.loc[to_change_color,"LD2"] = index+1
653
- to_change_color = sumstats["RSQ"]>ld_threshold
654
- sumstats.loc[to_change_color,"LD"] = index+2
653
+ to_change_color = sumstats["RSQ2"]>ld_threshold
654
+ sumstats.loc[to_change_color,"LD2"] = index+2
655
655
 
656
656
  sumstats.loc[lead_id,"LD2"] = len(region_ld_threshold)+2
657
657
  sumstats["LEAD2"]="Other variants"