gwaslab 3.4.41__py3-none-any.whl → 3.4.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

gwaslab/bd_common_data.py CHANGED
@@ -298,9 +298,22 @@ def gtf_to_protein_coding(gtfpath,log=Log(),verbose=True):
298
298
 
299
299
  return protein_coding_path
300
300
 
301
+ ####################################################################################################################
302
+ # From BioPython: https://github.com/biopython/biopython/blob/c5a6b1374267d769b19c1022b4b45472316e78b4/Bio/Seq.py#L36
303
+ def _maketrans(complement_mapping):
304
+ """Make a python string translation table.
305
+
306
+ Arguments:
307
+ - complement_mapping - a dictionary.
301
308
 
302
-
309
+ Returns a translation table (a bytes object of length 256) for use with
310
+ the python string's translate method.
303
311
 
312
+ Compatible with lower case and upper case sequences.
313
+ """
314
+ keys = "".join(complement_mapping.keys()).encode("ASCII")
315
+ values = "".join(complement_mapping.values()).encode("ASCII")
316
+ return bytes.maketrans(keys + keys.lower(), values + values.lower())
304
317
 
305
318
  ####################################################################################################################
306
319
 
gwaslab/bd_get_hapmap3.py CHANGED
@@ -12,7 +12,7 @@ from gwaslab.qc_fix_sumstats import finished
12
12
  #A P-value
13
13
  #A signed summary statistic (beta, OR, log odds, Z-score, etc)
14
14
 
15
- def gethapmap3(sumstats,rsid="rsID",chrom="CHR", pos="POS", ea="EA", nea="NEA",build="19", verbose=True, match_allele= True, log=Log()):
15
+ def gethapmap3(sumstats,rsid="rsID",chrom="CHR", pos="POS", ea="EA", nea="NEA",build="19", verbose=True, match_allele= True, how="inner", log=Log()):
16
16
  ##start function with col checking##########################################################
17
17
  _start_line = "extract HapMap3 SNPs"
18
18
  _end_line = "extracting HapMap3 SNPs"
@@ -47,7 +47,7 @@ def gethapmap3(sumstats,rsid="rsID",chrom="CHR", pos="POS", ea="EA", nea="NEA",b
47
47
  #rsid A1 A2 #CHROM POS
48
48
  #rs3094315 G A 1 752566
49
49
 
50
- if rsid in sumstats.columns:
50
+ if rsid in sumstats.columns and how=="inner":
51
51
  output = sumstats.loc[sumstats[rsid].isin(hapmap3_ref["rsid"].values),:].copy()
52
52
  return output
53
53
 
@@ -56,11 +56,15 @@ def gethapmap3(sumstats,rsid="rsID",chrom="CHR", pos="POS", ea="EA", nea="NEA",b
56
56
  sumstats ["chr:pos"] = sumstats[chrom].astype("string")+":"+sumstats[pos].astype("string")
57
57
  hapmap3_ref["chr:pos"] = hapmap3_ref["#CHROM"]+":"+hapmap3_ref["POS"]
58
58
  hapmap3_ref = hapmap3_ref.rename(columns={"rsid":"rsID"})
59
- output = pd.merge(sumstats,hapmap3_ref.loc[:,["chr:pos","rsID"]+additional_cols],left_on="chr:pos",right_on="chr:pos",how="inner",suffixes=('', '_hapmap3')).copy()
59
+ output = pd.merge(sumstats,hapmap3_ref.loc[:,["chr:pos","rsID"]+additional_cols],left_on="chr:pos",right_on="chr:pos",how=how,suffixes=('', '_hapmap3')).copy()
60
60
  if match_allele:
61
61
  log.write(" -Checking if alleles are same...")
62
62
  is_matched = ((output[ea].astype("string") == output["A1"]) & (output[nea].astype("string") == output["A2"])) \
63
63
  | ((output[ea].astype("string") == output["A2"]) & (output[nea].astype("string") == output["A1"]))
64
+ if how=="right":
65
+ is_matched = ((output[ea].astype("string") == output["A1"]) & (output[nea].astype("string") == output["A2"])) \
66
+ | ((output[ea].astype("string") == output["A2"]) & (output[nea].astype("string") == output["A1"])) | output[ea].isna()
67
+
64
68
  log.write(" -Variants with macthed alleles: {}".format(sum(is_matched)))
65
69
  output = output.loc[is_matched,:]
66
70
  output = output.drop(columns=["chr:pos"]+additional_cols)
gwaslab/g_Sumstats.py CHANGED
@@ -23,6 +23,7 @@ from gwaslab.qc_fix_sumstats import _process_build
23
23
  from gwaslab.hm_harmonize_sumstats import parallelecheckaf
24
24
  from gwaslab.hm_harmonize_sumstats import paralleleinferaf
25
25
  from gwaslab.hm_harmonize_sumstats import checkref
26
+ from gwaslab.hm_harmonize_sumstats import oldcheckref
26
27
  from gwaslab.hm_harmonize_sumstats import rsidtochrpos
27
28
  from gwaslab.hm_harmonize_sumstats import parallelizeassignrsid
28
29
  from gwaslab.hm_harmonize_sumstats import parallelinferstrand
@@ -35,6 +36,7 @@ from gwaslab.util_in_filter_value import filterregionout
35
36
  from gwaslab.util_in_filter_value import _filter_indel
36
37
  from gwaslab.util_in_filter_value import _filter_palindromic
37
38
  from gwaslab.util_in_filter_value import _filter_snp
39
+ from gwaslab.util_in_filter_value import _exclude_hla
38
40
  from gwaslab.util_in_filter_value import inferbuild
39
41
  from gwaslab.util_in_filter_value import sampling
40
42
  from gwaslab.util_in_filter_value import _get_flanking
@@ -238,15 +240,15 @@ class Sumstats():
238
240
  self.data, self.meta["gwaslab"]["genome_build"] = _set_build(self.data, build=build, log=self.log,verbose=verbose)
239
241
  gc.collect()
240
242
 
241
- def infer_build(self,**args):
242
- self.data, self.meta["gwaslab"]["genome_build"] = inferbuild(self.data,**args)
243
+ def infer_build(self,verbose=True,**kwargs):
244
+ self.data, self.meta["gwaslab"]["genome_build"] = inferbuild(self.data,log=self.log,verbose=verbose,**kwargs)
243
245
 
244
- def liftover(self,to_build, from_build=None,**args):
246
+ def liftover(self,to_build, from_build=None,**kwargs):
245
247
  if from_build is None:
246
248
  if self.meta["gwaslab"]["genome_build"]=="99":
247
- self.data, self.meta["gwaslab"]["genome_build"] = inferbuild(self.data,**args)
249
+ self.data, self.meta["gwaslab"]["genome_build"] = inferbuild(self.data,**kwargs)
248
250
  from_build = self.meta["gwaslab"]["genome_build"]
249
- self.data = parallelizeliftovervariant(self.data,from_build=from_build, to_build=to_build, log=self.log,**args)
251
+ self.data = parallelizeliftovervariant(self.data,from_build=from_build, to_build=to_build, log=self.log,**kwargs)
250
252
  self.meta["is_sorted"] = False
251
253
  self.meta["is_harmonised"] = False
252
254
  self.meta["gwaslab"]["genome_build"]=to_build
@@ -259,7 +261,7 @@ class Sumstats():
259
261
  n_cores=1,
260
262
  fixid_args={},
261
263
  removedup_args={},
262
- fixchr_agrs={},
264
+ fixchr_args={},
263
265
  fixpos_args={},
264
266
  fixallele_args={},
265
267
  sanitycheckstats_args={},
@@ -269,8 +271,8 @@ class Sumstats():
269
271
  verbose=True):
270
272
  ###############################################
271
273
  # try to fix data without dropping any information
272
- self.data = fixID(self.data,verbose=verbose, **fixid_args)
273
- self.data = fixchr(self.data,log=self.log,remove=remove,verbose=verbose,**fixchr_agrs)
274
+ self.data = fixID(self.data,log=self.log,verbose=verbose, **fixid_args)
275
+ self.data = fixchr(self.data,log=self.log,remove=remove,verbose=verbose,**fixchr_args)
274
276
  self.data = fixpos(self.data,log=self.log,remove=remove,verbose=verbose,**fixpos_args)
275
277
  self.data = fixallele(self.data,log=self.log,remove=remove,verbose=verbose,**fixallele_args)
276
278
  self.data = sanitycheckstats(self.data,log=self.log,verbose=verbose,**sanitycheckstats_args)
@@ -294,6 +296,7 @@ class Sumstats():
294
296
  ref_infer=None,
295
297
  ref_alt_freq=None,
296
298
  maf_threshold=0.40,
299
+ ref_seq_mode="v",
297
300
  n_cores=1,
298
301
  remove=False,
299
302
  checkref_args={},
@@ -303,7 +306,7 @@ class Sumstats():
303
306
  flipallelestats_args={},
304
307
  liftover_args={},
305
308
  fixid_args={},
306
- fixchr_agrs={},
309
+ fixchr_args={},
307
310
  fixpos_args={},
308
311
  fixallele_args={},
309
312
  sanitycheckstats_args={},
@@ -321,9 +324,9 @@ class Sumstats():
321
324
  # 1.6 sorting genomic coordinates and column order
322
325
  if basic_check is True:
323
326
 
324
- self.data = fixID(self.data,**fixid_args)
327
+ self.data = fixID(self.data,log=self.log,**fixid_args)
325
328
 
326
- self.data = fixchr(self.data,remove=remove,log=self.log,**fixchr_agrs)
329
+ self.data = fixchr(self.data,remove=remove,log=self.log,**fixchr_args)
327
330
 
328
331
  self.data = fixpos(self.data,remove=remove,log=self.log,**fixpos_args)
329
332
 
@@ -351,8 +354,10 @@ class Sumstats():
351
354
  # 3.2 infer strand for palindromic SNP (target build)
352
355
  #####################################################
353
356
  if ref_seq is not None:
354
-
355
- self.data = checkref(self.data,ref_seq,log=self.log,**checkref_args)
357
+ if ref_seq_mode=="v":
358
+ self.data = checkref(self.data,ref_seq,log=self.log,**checkref_args)
359
+ else:
360
+ self.data = oldcheckref(self.data,ref_seq,log=self.log,**checkref_args)
356
361
 
357
362
  self.meta["gwaslab"]["references"]["ref_seq"] = ref_seq
358
363
 
@@ -404,170 +409,183 @@ class Sumstats():
404
409
  return self
405
410
  ############################################################################################################
406
411
  #customizable API to build your own QC pipeline
407
- def fix_id(self,**args):
408
- self.data = fixID(self.data,log=self.log,**args)
409
- def fix_chr(self,**args):
410
- self.data = fixchr(self.data,log=self.log,**args)
411
- def fix_pos(self,**args):
412
- self.data = fixpos(self.data,log=self.log,**args)
413
- def fix_allele(self,**args):
414
- self.data = fixallele(self.data,log=self.log,**args)
415
- def remove_dup(self,**args):
416
- self.data = removedup(self.data,log=self.log,**args)
417
- def check_sanity(self,**args):
418
- self.data = sanitycheckstats(self.data,log=self.log,**args)
419
- def check_data_consistency(self, **args):
420
- _check_data_consistency(self.data,log=self.log,**args)
421
- def check_id(self,**args):
412
+ def fix_id(self,**kwargs):
413
+ self.data = fixID(self.data,log=self.log,**kwargs)
414
+ def fix_chr(self,**kwargs):
415
+ self.data = fixchr(self.data,log=self.log,**kwargs)
416
+ def fix_pos(self,**kwargs):
417
+ self.data = fixpos(self.data,log=self.log,**kwargs)
418
+ def fix_allele(self,**kwargs):
419
+ self.data = fixallele(self.data,log=self.log,**kwargs)
420
+ def remove_dup(self,**kwargs):
421
+ self.data = removedup(self.data,log=self.log,**kwargs)
422
+ def check_sanity(self,**kwargs):
423
+ self.data = sanitycheckstats(self.data,log=self.log,**kwargs)
424
+ def check_data_consistency(self, **kwargs):
425
+ _check_data_consistency(self.data,log=self.log,**kwargs)
426
+ def check_id(self,**kwargs):
422
427
  pass
423
- def check_ref(self,ref_seq,**args):
424
- self.meta["gwaslab"]["references"]["ref_seq"] = ref_seq
425
- self.data = checkref(self.data,ref_seq,log=self.log,**args)
426
- def infer_strand(self,ref_infer,**args):
428
+ def check_ref(self,ref_seq,ref_seq_mode="v",**kwargs):
429
+ if ref_seq_mode=="v":
430
+ self.meta["gwaslab"]["references"]["ref_seq"] = ref_seq
431
+ self.data = checkref(self.data,ref_seq,log=self.log,**kwargs)
432
+ else:
433
+ self.meta["gwaslab"]["references"]["ref_seq"] = ref_seq
434
+ self.data = oldcheckref(self.data,ref_seq,log=self.log,**kwargs)
435
+ def infer_strand(self,ref_infer,**kwargs):
427
436
  self.meta["gwaslab"]["references"]["ref_infer"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_infer"] , ref_infer)
428
- self.data = parallelinferstrand(self.data,ref_infer=ref_infer,log=self.log,**args)
429
- def flip_allele_stats(self,**args):
430
- self.data = flipallelestats(self.data,log=self.log,**args)
431
- def normalize_allele(self,**args):
432
- self.data = parallelnormalizeallele(self.data,log=self.log,**args)
437
+ self.data = parallelinferstrand(self.data,ref_infer=ref_infer,log=self.log,**kwargs)
438
+ def flip_allele_stats(self,**kwargs):
439
+ self.data = flipallelestats(self.data,log=self.log,**kwargs)
440
+ def normalize_allele(self,**kwargs):
441
+ self.data = parallelnormalizeallele(self.data,log=self.log,**kwargs)
433
442
  def assign_rsid(self,
434
443
  ref_rsid_tsv=None,
435
444
  ref_rsid_vcf=None,
436
- **args):
445
+ **kwargs):
437
446
  if ref_rsid_tsv is not None:
438
- self.data = parallelizeassignrsid(self.data,path=ref_rsid_tsv,ref_mode="tsv",log=self.log,**args)
447
+ self.data = parallelizeassignrsid(self.data,path=ref_rsid_tsv,ref_mode="tsv",log=self.log,**kwargs)
439
448
  self.meta["gwaslab"]["references"]["ref_rsid_tsv"] = ref_rsid_tsv
440
449
  if ref_rsid_vcf is not None:
441
- self.data = parallelizeassignrsid(self.data,path=ref_rsid_vcf,ref_mode="vcf",log=self.log,**args)
450
+ self.data = parallelizeassignrsid(self.data,path=ref_rsid_vcf,ref_mode="vcf",log=self.log,**kwargs)
442
451
  self.meta["gwaslab"]["references"]["ref_rsid_vcf"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_rsid_vcf"] , ref_rsid_vcf)
443
- def rsid_to_chrpos(self,**args):
444
- self.data = rsidtochrpos(self.data,log=self.log,**args)
445
- def rsid_to_chrpos2(self,**args):
446
- self.data = parallelrsidtochrpos(self.data,log=self.log,**args)
452
+ def rsid_to_chrpos(self,**kwargs):
453
+ self.data = rsidtochrpos(self.data,log=self.log,**kwargs)
454
+ def rsid_to_chrpos2(self,**kwargs):
455
+ self.data = parallelrsidtochrpos(self.data,log=self.log,**kwargs)
447
456
 
448
457
  ############################################################################################################
449
458
 
450
459
  def sort_coordinate(self,**sort_args):
451
460
  self.data = sortcoordinate(self.data,log=self.log,**sort_args)
452
461
  self.meta["is_sorted"] = True
453
- def sort_column(self,**args):
454
- self.data = sortcolumn(self.data,log=self.log,**args)
462
+ def sort_column(self,**kwargs):
463
+ self.data = sortcolumn(self.data,log=self.log,**kwargs)
455
464
 
456
465
  ############################################################################################################
457
- def fill_data(self, verbose=True, **args):
458
- self.data = filldata(self.data, verbose=verbose, **args)
466
+ def fill_data(self, verbose=True, **kwargs):
467
+ self.data = filldata(self.data, verbose=verbose, log=self.log, **kwargs)
459
468
  self.data = sortcolumn(self.data, verbose=verbose, log=self.log)
460
469
 
461
470
  # utilities ############################################################################################################
462
471
  # filter series ######################################################################
463
- def filter_flanking(self, inplace=False,**args):
472
+ def filter_flanking(self, inplace=False,**kwargs):
473
+ if inplace is False:
474
+ new_Sumstats_object = copy.deepcopy(self)
475
+ new_Sumstats_object.data = _get_flanking(new_Sumstats_object.data, **kwargs)
476
+ return new_Sumstats_object
477
+ else:
478
+ self.data = _get_flanking(self.data, **kwargs)
479
+ def filter_flanking_by_chrpos(self, chrpos, inplace=False,**kwargs):
464
480
  if inplace is False:
465
481
  new_Sumstats_object = copy.deepcopy(self)
466
- new_Sumstats_object.data = _get_flanking(new_Sumstats_object.data, **args)
482
+ new_Sumstats_object.data = _get_flanking_by_chrpos(new_Sumstats_object.data, chrpos, **kwargs)
467
483
  return new_Sumstats_object
468
484
  else:
469
- self.data = _get_flanking(self.data, **args)
470
- def filter_flanking_by_chrpos(self, chrpos, inplace=False,**args):
485
+ self.data = _get_flanking_by_chrpos(self.data, chrpos,**kwargs)
486
+ def filter_flanking_by_id(self, snpid, inplace=False,**kwargs):
471
487
  if inplace is False:
472
488
  new_Sumstats_object = copy.deepcopy(self)
473
- new_Sumstats_object.data = _get_flanking_by_chrpos(new_Sumstats_object.data, chrpos, **args)
489
+ new_Sumstats_object.data = _get_flanking_by_id(new_Sumstats_object.data, snpid, **kwargs)
474
490
  return new_Sumstats_object
475
491
  else:
476
- self.data = _get_flanking_by_chrpos(self.data, chrpos,**args)
477
- def filter_flanking_by_id(self, snpid, inplace=False,**args):
492
+ self.data = _get_flanking_by_id(self.data, snpid, **kwargs)
493
+ def filter_value(self, expr, inplace=False, **kwargs):
478
494
  if inplace is False:
479
495
  new_Sumstats_object = copy.deepcopy(self)
480
- new_Sumstats_object.data = _get_flanking_by_id(new_Sumstats_object.data, snpid, **args)
496
+ new_Sumstats_object.data = filtervalues(new_Sumstats_object.data,expr,log=new_Sumstats_object.log, **kwargs)
481
497
  return new_Sumstats_object
482
498
  else:
483
- self.data = _get_flanking_by_id(self.data, snpid, **args)
484
- def filter_value(self, expr, inplace=False, **args):
499
+ self.data = filtervalues(self.data, expr,log=self.log,**kwargs)
500
+ def filter_out(self, inplace=False, **kwargs):
485
501
  if inplace is False:
486
502
  new_Sumstats_object = copy.deepcopy(self)
487
- new_Sumstats_object.data = filtervalues(new_Sumstats_object.data,expr,log=new_Sumstats_object.log, **args)
503
+ new_Sumstats_object.data = filterout(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
488
504
  return new_Sumstats_object
489
505
  else:
490
- self.data = filtervalues(self.data, expr,log=self.log,**args)
491
- def filter_out(self, inplace=False, **args):
506
+ self.data = filterout(self.data,log=self.log,**kwargs)
507
+ def filter_in(self, inplace=False, **kwargs):
492
508
  if inplace is False:
493
509
  new_Sumstats_object = copy.deepcopy(self)
494
- new_Sumstats_object.data = filterout(new_Sumstats_object.data,log=new_Sumstats_object.log,**args)
510
+ new_Sumstats_object.data = filterin(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
495
511
  return new_Sumstats_object
496
512
  else:
497
- self.data = filterout(self.data,log=self.log,**args)
498
- def filter_in(self, inplace=False, **args):
513
+ self.data = filterin(self.data,log=self.log,**kwargs)
514
+ def filter_region_in(self, inplace=False, **kwargs):
499
515
  if inplace is False:
500
516
  new_Sumstats_object = copy.deepcopy(self)
501
- new_Sumstats_object.data = filterin(new_Sumstats_object.data,log=new_Sumstats_object.log,**args)
517
+ new_Sumstats_object.data = filterregionin(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
502
518
  return new_Sumstats_object
503
519
  else:
504
- self.data = filterin(self.data,log=self.log,**args)
505
- def filter_region_in(self, inplace=False, **args):
520
+ self.data = filterregionin(self.data,log=self.log,**kwargs)
521
+ def filter_region_out(self, inplace=False, **kwargs):
506
522
  if inplace is False:
507
523
  new_Sumstats_object = copy.deepcopy(self)
508
- new_Sumstats_object.data = filterregionin(new_Sumstats_object.data,log=new_Sumstats_object.log,**args)
524
+ new_Sumstats_object.data = filterregionout(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
509
525
  return new_Sumstats_object
510
526
  else:
511
- self.data = filterregionin(self.data,log=self.log,**args)
512
- def filter_region_out(self, inplace=False, **args):
527
+ self.data = filterregionout(self.data,log=self.log,**kwargs)
528
+ def filter_palindromic(self, inplace=False, **kwargs):
513
529
  if inplace is False:
514
530
  new_Sumstats_object = copy.deepcopy(self)
515
- new_Sumstats_object.data = filterregionout(new_Sumstats_object.data,log=new_Sumstats_object.log,**args)
531
+ new_Sumstats_object.data = _filter_palindromic(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
516
532
  return new_Sumstats_object
517
533
  else:
518
- self.data = filterregionout(self.data,log=self.log,**args)
519
- def filter_palindromic(self, inplace=False, **args):
534
+ self.data = _filter_palindromic(self.data,log=self.log,**kwargs)
535
+ def filter_snp(self, inplace=False, **kwargs):
520
536
  if inplace is False:
521
537
  new_Sumstats_object = copy.deepcopy(self)
522
- new_Sumstats_object.data = _filter_palindromic(new_Sumstats_object.data,log=new_Sumstats_object.log,**args)
538
+ new_Sumstats_object.data = _filter_snp(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
523
539
  return new_Sumstats_object
524
540
  else:
525
- self.data = _filter_palindromic(self.data,log=self.log,**args)
526
- def filter_snp(self, inplace=False, **args):
541
+ self.data = _filter_snp(self.data,log=self.log,**kwargs)
542
+ def filter_indel(self, inplace=False, **kwargs):
527
543
  if inplace is False:
528
544
  new_Sumstats_object = copy.deepcopy(self)
529
- new_Sumstats_object.data = _filter_snp(new_Sumstats_object.data,log=new_Sumstats_object.log,**args)
545
+ new_Sumstats_object.data = _filter_indel(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
530
546
  return new_Sumstats_object
531
547
  else:
532
- self.data = _filter_snp(self.data,log=self.log,**args)
533
- def filter_indel(self, inplace=False, **args):
548
+ self.data = _filter_indel(self.data,log=self.log,**kwargs)
549
+
550
+ def exclude_hla(self, inplace=False, **kwargs):
534
551
  if inplace is False:
535
552
  new_Sumstats_object = copy.deepcopy(self)
536
- new_Sumstats_object.data = _filter_indel(new_Sumstats_object.data,log=new_Sumstats_object.log,**args)
553
+ new_Sumstats_object.data = _exclude_hla(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
537
554
  return new_Sumstats_object
538
555
  else:
539
- self.data = _filter_indel(self.data,log=self.log,**args)
556
+ self.data = _exclude_hla(self.data,log=self.log,**kwargs)
557
+
540
558
 
541
- def random_variants(self,inplace=False,n=1,p=None,**args):
559
+ def random_variants(self,inplace=False,n=1,p=None,**kwargs):
542
560
  if inplace is True:
543
- self.data = sampling(self.data,n=n,p=p,log=self.log,**args)
561
+ self.data = sampling(self.data,n=n,p=p,log=self.log,**kwargs)
544
562
  else:
545
563
  new_Sumstats_object = copy.deepcopy(self)
546
- new_Sumstats_object.data = sampling(new_Sumstats_object.data,n=n,p=p,log=new_Sumstats_object.log,**args)
564
+ new_Sumstats_object.data = sampling(new_Sumstats_object.data,n=n,p=p,log=new_Sumstats_object.log,**kwargs)
547
565
  return new_Sumstats_object
548
566
 
549
- def filter_hapmap3(self, inplace=False, build=None, **args ):
567
+ def filter_hapmap3(self, inplace=False, build=None, **kwargs ):
550
568
  if build is None:
551
569
  build = self.meta["gwaslab"]["genome_build"]
552
570
  if inplace is True:
553
- self.data = gethapmap3(self.data, build=build,log=self.log, **args)
571
+ self.data = gethapmap3(self.data, build=build,log=self.log, **kwargs)
554
572
  else:
555
573
  new_Sumstats_object = copy.deepcopy(self)
556
- new_Sumstats_object.data = gethapmap3(new_Sumstats_object.data, build=build,log=self.log, **args)
574
+ new_Sumstats_object.data = gethapmap3(new_Sumstats_object.data, build=build,log=self.log, **kwargs)
557
575
  return new_Sumstats_object
558
576
  ######################################################################
559
577
 
560
- def check_af(self,ref_infer,**args):
561
- self.data = parallelecheckaf(self.data,ref_infer=ref_infer,log=self.log,**args)
578
+ def check_af(self,ref_infer,**kwargs):
579
+ self.data = parallelecheckaf(self.data,ref_infer=ref_infer,log=self.log,**kwargs)
562
580
  self.meta["gwaslab"]["references"]["ref_infer_daf"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_infer_daf"] , ref_infer)
563
- def infer_af(self,ref_infer,**args):
564
- self.data = paralleleinferaf(self.data,ref_infer=ref_infer,log=self.log,**args)
581
+ def infer_af(self,ref_infer,**kwargs):
582
+ self.data = paralleleinferaf(self.data,ref_infer=ref_infer,log=self.log,**kwargs)
565
583
  self.meta["gwaslab"]["references"]["ref_infer_af"] = ref_infer
566
584
  self.meta["gwaslab"]["references"]["ref_infer_af"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_infer_af"] , ref_infer)
567
- def plot_daf(self, **args):
568
- fig,outliers = plotdaf(self.data, **args)
585
+ def plot_daf(self, **kwargs):
586
+ fig,outliers = plotdaf(self.data, **kwargs)
569
587
  return fig, outliers
570
- def plot_mqq(self, build=None, **args):
588
+ def plot_mqq(self, build=None, **kwargs):
571
589
 
572
590
  chrom="CHR"
573
591
  pos="POS"
@@ -594,17 +612,17 @@ class Sumstats():
594
612
  p=p,
595
613
  eaf=eaf,
596
614
  build = build,
597
- **args)
615
+ **kwargs)
598
616
 
599
617
  return plot
600
618
 
601
- def plot_trumpet(self, build=None, **args):
619
+ def plot_trumpet(self, build=None, **kwargs):
602
620
  if build is None:
603
621
  build = self.meta["gwaslab"]["genome_build"]
604
- fig = plottrumpet(self.data,build = build, **args)
622
+ fig = plottrumpet(self.data,build = build, **kwargs)
605
623
  return fig
606
624
 
607
- def get_lead(self, build=None, gls=False, **args):
625
+ def get_lead(self, build=None, gls=False, **kwargs):
608
626
  if "SNPID" in self.data.columns:
609
627
  id_to_use = "SNPID"
610
628
  else:
@@ -621,7 +639,7 @@ class Sumstats():
621
639
  p="P",
622
640
  log=self.log,
623
641
  build=build,
624
- **args)
642
+ **kwargs)
625
643
  # return sumstats object
626
644
  if gls == True:
627
645
  new_Sumstats_object = copy.deepcopy(self)
@@ -630,7 +648,7 @@ class Sumstats():
630
648
  return new_Sumstats_object
631
649
  return output
632
650
 
633
- def get_density(self, sig_list=None, windowsizekb=100,**args):
651
+ def get_density(self, sig_list=None, windowsizekb=100,**kwargs):
634
652
 
635
653
  if "SNPID" in self.data.columns:
636
654
  id_to_use = "SNPID"
@@ -655,7 +673,7 @@ class Sumstats():
655
673
  log=self.log)
656
674
 
657
675
 
658
- def get_novel(self, **args):
676
+ def get_novel(self, **kwargs):
659
677
  if "SNPID" in self.data.columns:
660
678
  id_to_use = "SNPID"
661
679
  else:
@@ -666,11 +684,11 @@ class Sumstats():
666
684
  pos="POS",
667
685
  p="P",
668
686
  log=self.log,
669
- **args)
687
+ **kwargs)
670
688
  # return sumstats object
671
689
  return output
672
690
 
673
- def check_cis(self, **args):
691
+ def check_cis(self, **kwargs):
674
692
  if "SNPID" in self.data.columns:
675
693
  id_to_use = "SNPID"
676
694
  else:
@@ -681,11 +699,11 @@ class Sumstats():
681
699
  pos="POS",
682
700
  p="P",
683
701
  log=self.log,
684
- **args)
702
+ **kwargs)
685
703
  # return sumstats object
686
704
  return output
687
705
 
688
- def check_novel_set(self, **args):
706
+ def check_novel_set(self, **kwargs):
689
707
  if "SNPID" in self.data.columns:
690
708
  id_to_use = "SNPID"
691
709
  else:
@@ -696,11 +714,11 @@ class Sumstats():
696
714
  pos="POS",
697
715
  p="P",
698
716
  log=self.log,
699
- **args)
717
+ **kwargs)
700
718
  # return sumstats object
701
719
  return output
702
720
 
703
- def anno_gene(self, **args):
721
+ def anno_gene(self, **kwargs):
704
722
  if "SNPID" in self.data.columns:
705
723
  id_to_use = "SNPID"
706
724
  else:
@@ -710,73 +728,73 @@ class Sumstats():
710
728
  chrom="CHR",
711
729
  pos="POS",
712
730
  log=self.log,
713
- **args)
731
+ **kwargs)
714
732
  return output
715
733
 
716
- def get_per_snp_r2(self,**args):
717
- self.data = _get_per_snp_r2(self.data, beta="BETA", af="EAF", n="N", log=self.log, **args)
734
+ def get_per_snp_r2(self,**kwargs):
735
+ self.data = _get_per_snp_r2(self.data, beta="BETA", af="EAF", n="N", log=self.log, **kwargs)
718
736
  #add data inplace
719
737
 
720
- def get_gc(self, mode=None, **args):
738
+ def get_gc(self, mode=None, **kwargs):
721
739
  if mode is None:
722
740
  if "P" in self.data.columns:
723
- output = lambdaGC(self.data[["CHR","P"]],mode="P",**args)
741
+ output = lambdaGC(self.data[["CHR","P"]],mode="P",**kwargs)
724
742
  elif "Z" in self.data.columns:
725
- output = lambdaGC(self.data[["CHR","Z"]],mode="Z",**args)
743
+ output = lambdaGC(self.data[["CHR","Z"]],mode="Z",**kwargs)
726
744
  elif "CHISQ" in self.data.columns:
727
- output = lambdaGC(self.data[["CHR","CHISQ"]],mode="CHISQ",**args)
745
+ output = lambdaGC(self.data[["CHR","CHISQ"]],mode="CHISQ",**kwargs)
728
746
  elif "MLOG10P" in self.data.columns:
729
- output = lambdaGC(self.data[["CHR","MLOG10P"]],mode="MLOG10P",**args)
747
+ output = lambdaGC(self.data[["CHR","MLOG10P"]],mode="MLOG10P",**kwargs)
730
748
 
731
749
  #return scalar
732
750
  self.meta["Genomic inflation factor"] = output
733
751
  return output
734
752
  else:
735
- output = lambdaGC(self.data[["CHR",mode]],mode=mode,**args)
753
+ output = lambdaGC(self.data[["CHR",mode]],mode=mode,**kwargs)
736
754
  self.meta["Genomic inflation factor"] = output
737
755
  return output
738
756
  ## LDSC ##############################################################################################
739
- def estimate_h2_by_ldsc(self, build=None, verbose=True, match_allele=True, **args):
757
+ def estimate_h2_by_ldsc(self, build=None, verbose=True, match_allele=True, **kwargs):
740
758
  if build is None:
741
759
  build = self.meta["gwaslab"]["genome_build"]
742
- insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=True )
743
- self.ldsc_h2 = _estimate_h2_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **args)
760
+ insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=True, how="right" )
761
+ self.ldsc_h2 = _estimate_h2_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **kwargs)
744
762
 
745
- def estimate_rg_by_ldsc(self, build=None, verbose=True, match_allele=True, **args):
763
+ def estimate_rg_by_ldsc(self, build=None, verbose=True, match_allele=True, **kwargs):
746
764
  if build is None:
747
765
  build = self.meta["gwaslab"]["genome_build"]
748
- insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=True )
749
- self.ldsc_rg = _estimate_rg_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **args)
766
+ insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=True, how="right" )
767
+ self.ldsc_rg = _estimate_rg_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **kwargs)
750
768
 
751
- def estimate_h2_cts_by_ldsc(self, build=None, verbose=True, match_allele=True, **args):
769
+ def estimate_h2_cts_by_ldsc(self, build=None, verbose=True, match_allele=True, **kwargs):
752
770
  if build is None:
753
771
  build = self.meta["gwaslab"]["genome_build"]
754
- insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=True )
755
- self.ldsc_h2_cts = _estimate_h2_cts_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **args)
772
+ insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=True, how="right" )
773
+ self.ldsc_h2_cts = _estimate_h2_cts_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **kwargs)
756
774
 
757
- def estimate_partitioned_h2_by_ldsc(self, build=None, verbose=True, match_allele=True, **args):
775
+ def estimate_partitioned_h2_by_ldsc(self, build=None, verbose=True, match_allele=True, **kwargs):
758
776
  if build is None:
759
777
  build = self.meta["gwaslab"]["genome_build"]
760
- insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=True )
761
- self.ldsc_partitioned_h2_summary, self.ldsc_partitioned_h2_results = _estimate_partitioned_h2_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **args)
778
+ insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=True, how="right" )
779
+ self.ldsc_partitioned_h2_summary, self.ldsc_partitioned_h2_results = _estimate_partitioned_h2_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **kwargs)
762
780
  # external ################################################################################################
763
781
 
764
- def calculate_ld_matrix(self,**args):
765
- self.to_finemapping_file_path, self.to_finemapping_file, self.plink_log = tofinemapping(self.data,study = self.meta["gwaslab"]["study_name"],**args)
782
+ def calculate_ld_matrix(self,**kwargs):
783
+ self.to_finemapping_file_path, self.to_finemapping_file, self.plink_log = tofinemapping(self.data,study = self.meta["gwaslab"]["study_name"],**kwargs)
766
784
 
767
- def run_susie_rss(self,**args):
768
- self.pipcs=_run_susie_rss(self.to_finemapping_file_path,**args)
785
+ def run_susie_rss(self,**kwargs):
786
+ self.pipcs=_run_susie_rss(self.to_finemapping_file_path,**kwargs)
769
787
 
770
- def clump(self,**args):
771
- self.clumps,self.plink_log = _clump(self.data, log=self.log, study = self.meta["gwaslab"]["study_name"], **args)
788
+ def clump(self,**kwargs):
789
+ self.clumps,self.plink_log = _clump(self.data, log=self.log, study = self.meta["gwaslab"]["study_name"], **kwargs)
772
790
 
773
- def calculate_prs(self,**args):
774
- combined_results_summary = _calculate_prs(self.data, log=self.log, study = self.meta["gwaslab"]["study_name"], **args)
791
+ def calculate_prs(self,**kwargs):
792
+ combined_results_summary = _calculate_prs(self.data, log=self.log, study = self.meta["gwaslab"]["study_name"], **kwargs)
775
793
  return combined_results_summary
776
794
 
777
795
  # to_format ###############################################################################################
778
796
 
779
- def to_format(self, path, build=None, **args):
797
+ def to_format(self, path, build=None, verbose=True, **kwargs):
780
798
  if build is None:
781
799
  build = self.meta["gwaslab"]["genome_build"]
782
- _to_format(self.data, path, log=self.log, meta=self.meta, build=build, **args)
800
+ _to_format(self.data, path, log=self.log, verbose=verbose, meta=self.meta, build=build, **kwargs)