gwaslab 3.5.5__py3-none-any.whl → 3.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

@@ -0,0 +1,663 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ import time
4
+ import re
5
+ import copy
6
+ from gwaslab.g_Sumstats_summary import summarize
7
+ from gwaslab.g_Sumstats_summary import lookupstatus
8
+ from gwaslab.io_preformat_input import preformat
9
+ from gwaslab.io_to_formats import _to_format
10
+ from gwaslab.g_Log import Log
11
+ from gwaslab.qc_fix_sumstats import fixID
12
+ from gwaslab.qc_fix_sumstats import flipSNPID
13
+ from gwaslab.qc_fix_sumstats import stripSNPID
14
+ from gwaslab.qc_fix_sumstats import removedup
15
+ from gwaslab.qc_fix_sumstats import fixchr
16
+ from gwaslab.qc_fix_sumstats import fixpos
17
+ from gwaslab.qc_fix_sumstats import fixallele
18
+ from gwaslab.qc_fix_sumstats import parallelnormalizeallele
19
+ from gwaslab.qc_fix_sumstats import sanitycheckstats
20
+ from gwaslab.qc_fix_sumstats import parallelizeliftovervariant
21
+ from gwaslab.qc_fix_sumstats import flipallelestats
22
+ from gwaslab.qc_fix_sumstats import sortcoordinate
23
+ from gwaslab.qc_fix_sumstats import sortcolumn
24
+ from gwaslab.qc_fix_sumstats import _set_build
25
+ from gwaslab.qc_fix_sumstats import _process_build
26
+ from gwaslab.hm_harmonize_sumstats import parallelecheckaf
27
+ from gwaslab.hm_harmonize_sumstats import paralleleinferaf
28
+ from gwaslab.hm_harmonize_sumstats import checkref
29
+ from gwaslab.hm_harmonize_sumstats import oldcheckref
30
+ from gwaslab.hm_harmonize_sumstats import rsidtochrpos
31
+ from gwaslab.hm_harmonize_sumstats import parallelizeassignrsid
32
+ from gwaslab.hm_harmonize_sumstats import parallelinferstrand
33
+ from gwaslab.hm_harmonize_sumstats import parallelrsidtochrpos
34
+ from gwaslab.hm_harmonize_sumstats import _paralleleinferafwithmaf
35
+ from gwaslab.util_in_filter_value import filtervalues
36
+ from gwaslab.util_in_filter_value import filterout
37
+ from gwaslab.util_in_filter_value import filterin
38
+ from gwaslab.util_in_filter_value import filterregionin
39
+ from gwaslab.util_in_filter_value import filterregionout
40
+ from gwaslab.util_in_filter_value import _filter_indel
41
+ from gwaslab.util_in_filter_value import _filter_palindromic
42
+ from gwaslab.util_in_filter_value import _filter_snp
43
+ from gwaslab.util_in_filter_value import _exclude_hla
44
+ from gwaslab.util_in_filter_value import inferbuild
45
+ from gwaslab.util_in_filter_value import sampling
46
+ from gwaslab.util_in_filter_value import _get_flanking
47
+ from gwaslab.util_in_filter_value import _get_flanking_by_chrpos
48
+ from gwaslab.util_in_filter_value import _get_flanking_by_id
49
+ from gwaslab.util_in_calculate_gc import lambdaGC
50
+ from gwaslab.util_in_convert_h2 import _get_per_snp_r2
51
+ from gwaslab.util_in_get_sig import getsig
52
+ from gwaslab.util_in_get_density import getsignaldensity
53
+ from gwaslab.util_in_get_density import assigndensity
54
+ from gwaslab.util_in_get_sig import annogene
55
+ from gwaslab.util_in_get_sig import getnovel
56
+ from gwaslab.util_in_get_sig import _check_cis
57
+ from gwaslab.util_in_get_sig import _check_novel_set
58
+ from gwaslab.util_in_fill_data import filldata
59
+ from gwaslab.bd_get_hapmap3 import gethapmap3
60
+ from gwaslab.bd_common_data import get_chr_list
61
+ from gwaslab.bd_common_data import get_number_to_chr
62
+ from gwaslab.bd_common_data import get_chr_to_number
63
+ from gwaslab.bd_common_data import get_high_ld
64
+ from gwaslab.bd_common_data import get_format_dict
65
+ from gwaslab.bd_common_data import get_formats_list
66
+ from gwaslab.g_version import _show_version
67
+ from gwaslab.g_version import gwaslab_info
68
+ from gwaslab.g_meta import _init_meta
69
+ from gwaslab.g_meta import _append_meta_record
70
+ from gwaslab.util_ex_run_clumping import _clump
71
+ from gwaslab.util_ex_calculate_ldmatrix import tofinemapping
72
+ from gwaslab.util_ex_calculate_prs import _calculate_prs
73
+ from gwaslab.viz_plot_mqqplot import mqqplot
74
+ from gwaslab.viz_plot_trumpetplot import plottrumpet
75
+ from gwaslab.viz_plot_compare_af import plotdaf
76
+ from gwaslab.util_ex_run_susie import _run_susie_rss
77
+ from gwaslab.qc_fix_sumstats import _check_data_consistency
78
+ from gwaslab.util_ex_ldsc import _estimate_h2_by_ldsc
79
+ from gwaslab.util_ex_ldsc import _estimate_rg_by_ldsc
80
+ from gwaslab.util_ex_ldsc import _estimate_h2_cts_by_ldsc
81
+ from gwaslab.util_ex_ldsc import _estimate_partitioned_h2_by_ldsc
82
+ from gwaslab.bd_get_hapmap3 import gethapmap3
83
+ from gwaslab.util_abf_finemapping import abf_finemapping
84
+ from gwaslab.util_abf_finemapping import make_cs
85
+ from gwaslab.io_read_pipcs import _read_pipcs
86
+ from gwaslab.viz_plot_credible_sets import _plot_cs
87
+ import gc
88
+ from gwaslab.viz_plot_phe_heatmap import _gwheatmap
89
+ from gwaslab.viz_plot_effect import _plot_effect
90
+ from gwaslab.util_in_merge import _extract_variant
91
+
92
+ #20250215
93
+ class SumstatsSet():
94
+ def __init__(self,
95
+ sumstats_dic,
96
+ variant_set=None,
97
+ build="99",
98
+ species="homo sapiens",
99
+ build_infer=False,
100
+ set="set1",
101
+ verbose=True,
102
+ **readargs):
103
+
104
+ # basic attributes
105
+ self.data = pd.DataFrame()
106
+ self.log = Log()
107
+ # meta information
108
+
109
+ self.meta = _init_meta()
110
+ self.build = build
111
+ self.meta["gwaslab"]["set_name"] = set
112
+ self.meta["gwaslab"]["species"] = species
113
+
114
+ # print gwaslab version information
115
+ _show_version(self.log, verbose=verbose)
116
+
117
+ self.data = _extract_variant(variant_set, sumstats_dic,log=self.log, verbose=verbose)
118
+
119
+ def plot_effect(self,**args):
120
+ _plot_effect(self.data,**args)
121
+
122
+
123
+ #### healper #################################################################################
124
+
125
+ def lookup_status(self,status="STATUS"):
126
+ return lookupstatus(self.data[status])
127
+
128
+ def set_build(self, build, verbose=True):
129
+ self.data, self.meta["gwaslab"]["genome_build"] = _set_build(self.data, build=build, log=self.log,verbose=verbose)
130
+ gc.collect()
131
+
132
+ def infer_build(self,verbose=True,**kwargs):
133
+ self.data, self.meta["gwaslab"]["genome_build"] = inferbuild(self.data,log=self.log,verbose=verbose,**kwargs)
134
+
135
+ def liftover(self,to_build, from_build=None,**kwargs):
136
+ if from_build is None:
137
+ if self.meta["gwaslab"]["genome_build"]=="99":
138
+ self.data, self.meta["gwaslab"]["genome_build"] = inferbuild(self.data,**kwargs)
139
+ from_build = self.meta["gwaslab"]["genome_build"]
140
+ self.data = parallelizeliftovervariant(self.data,from_build=from_build, to_build=to_build, log=self.log,**kwargs)
141
+ self.meta["is_sorted"] = False
142
+ self.meta["is_harmonised"] = False
143
+ self.meta["gwaslab"]["genome_build"]=to_build
144
+
145
+ # QC ######################################################################################
146
+ #clean the sumstats with one line
147
+ def basic_check(self,
148
+ remove=False,
149
+ remove_dup=False,
150
+ n_cores=1,
151
+ fixid_args={},
152
+ removedup_args={},
153
+ fixchr_args={},
154
+ fixpos_args={},
155
+ fixallele_args={},
156
+ sanitycheckstats_args={},
157
+ consistencycheck_args={},
158
+ normalize=True,
159
+ normalizeallele_args={},
160
+ verbose=True):
161
+ ###############################################
162
+ # try to fix data without dropping any information
163
+ self.data = fixID(self.data,log=self.log,verbose=verbose, **fixid_args)
164
+ self.data = fixchr(self.data,log=self.log,remove=remove,verbose=verbose,**fixchr_args)
165
+ self.data = fixpos(self.data,log=self.log,remove=remove,verbose=verbose,**fixpos_args)
166
+ self.data = fixallele(self.data,log=self.log,remove=remove,verbose=verbose,**fixallele_args)
167
+ self.data = sanitycheckstats(self.data,log=self.log,verbose=verbose,**sanitycheckstats_args)
168
+ _check_data_consistency(self.data,log=self.log,verbose=verbose,**consistencycheck_args)
169
+
170
+ if normalize is True:
171
+ self.data = parallelnormalizeallele(self.data,n_cores=n_cores,verbose=verbose,log=self.log,**normalizeallele_args)
172
+ if remove_dup is True:
173
+ self.data = removedup(self.data,log=self.log,verbose=verbose,**removedup_args)
174
+ self.data = sortcoordinate(self.data,verbose=verbose,log=self.log)
175
+ self.data = sortcolumn(self.data,verbose=verbose,log=self.log)
176
+ self.meta["is_sorted"] = True
177
+ ###############################################
178
+
179
+
180
+ def harmonize(self,
181
+ basic_check=True,
182
+ ref_seq=None,
183
+ ref_rsid_tsv=None,
184
+ ref_rsid_vcf=None,
185
+ ref_infer=None,
186
+ ref_alt_freq=None,
187
+ maf_threshold=0.40,
188
+ ref_seq_mode="v",
189
+ n_cores=1,
190
+ remove=False,
191
+ checkref_args={},
192
+ removedup_args={},
193
+ assignrsid_args={},
194
+ inferstrand_args={},
195
+ flipallelestats_args={},
196
+ liftover_args={},
197
+ fixid_args={},
198
+ fixchr_args={},
199
+ fixpos_args={},
200
+ fixallele_args={},
201
+ sanitycheckstats_args={},
202
+ normalizeallele_args={}
203
+ ):
204
+
205
+ #Standard pipeline
206
+ ####################################################
207
+ #part 1 : basic_check
208
+ # 1.1 fix ID
209
+ # 1.2 remove duplication
210
+ # 1.3 standardization : CHR POS EA NEA
211
+ # 1.4 normalization : EA NEA
212
+ # 1.5 sanity check : BETA SE OR EAF N OR_95L OR_95H
213
+ # 1.6 sorting genomic coordinates and column order
214
+ if basic_check is True:
215
+
216
+ self.data = fixID(self.data,log=self.log,**fixid_args)
217
+
218
+ self.data = fixchr(self.data,remove=remove,log=self.log,**fixchr_args)
219
+
220
+ self.data = fixpos(self.data,remove=remove,log=self.log,**fixpos_args)
221
+
222
+ self.data = fixallele(self.data,log=self.log,**fixallele_args)
223
+
224
+ self.data = sanitycheckstats(self.data,log=self.log,**sanitycheckstats_args)
225
+
226
+ self.data = parallelnormalizeallele(self.data,log=self.log,n_cores=n_cores,**normalizeallele_args)
227
+
228
+ self.data = sortcolumn(self.data,log=self.log)
229
+
230
+ gc.collect()
231
+
232
+ #####################################################
233
+ #part 2 : annotating and flipping
234
+ # 2.1 ref check -> flip allele and allel-specific stats
235
+ # 2.2 assign rsid
236
+ # 2.3 infer strand for palindromic SNP
237
+ #
238
+ ########## liftover ###############
239
+ # 3 : liftover by chr and pos to target build -> reset status
240
+ ###################################
241
+ # 3.1 ref check (target build) -> flip allele and allel-specific stats
242
+ # 3.2 assign rsid (target build)
243
+ # 3.2 infer strand for palindromic SNP (target build)
244
+ #####################################################
245
+ if ref_seq is not None:
246
+ if ref_seq_mode=="v":
247
+ self.data = checkref(self.data,ref_seq,log=self.log,**checkref_args)
248
+ elif ref_seq_mode=="s":
249
+ self.data = oldcheckref(self.data,ref_seq,log=self.log,**checkref_args)
250
+ else:
251
+ raise ValueError("ref_seq_mode should be 'v' (vectorized, faster) or 's' (sequential, slower)")
252
+
253
+ self.meta["gwaslab"]["references"]["ref_seq"] = ref_seq
254
+
255
+ self.data = flipallelestats(self.data,log=self.log,**flipallelestats_args)
256
+
257
+ gc.collect()
258
+
259
+ if ref_infer is not None:
260
+
261
+ self.data= parallelinferstrand(self.data,ref_infer = ref_infer,ref_alt_freq=ref_alt_freq,maf_threshold=maf_threshold,
262
+ n_cores=n_cores,log=self.log,**inferstrand_args)
263
+
264
+ self.meta["gwaslab"]["references"]["ref_infer"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_infer"] , ref_infer)
265
+
266
+ self.data =flipallelestats(self.data,log=self.log,**flipallelestats_args)
267
+
268
+ gc.collect()
269
+
270
+ if (ref_seq is not None or ref_infer is not None) and (ref_rsid_tsv is not None or ref_rsid_vcf is not None):
271
+
272
+ self.data = fixID(self.data, log=self.log, **{"fixid":True, "fixsep":True, "overwrite":True})
273
+
274
+ gc.collect()
275
+
276
+ #####################################################
277
+ if ref_rsid_tsv is not None:
278
+
279
+ self.data = parallelizeassignrsid(self.data,path=ref_rsid_tsv,ref_mode="tsv",
280
+ n_cores=n_cores,log=self.log,**assignrsid_args)
281
+
282
+
283
+
284
+ self.meta["gwaslab"]["references"]["ref_rsid_tsv"] = ref_rsid_tsv
285
+ gc.collect()
286
+
287
+ if ref_rsid_vcf is not None:
288
+ self.data = parallelizeassignrsid(self.data,path=ref_rsid_vcf,ref_mode="vcf",
289
+ n_cores=n_cores,log=self.log,**assignrsid_args)
290
+
291
+ self.meta["gwaslab"]["references"]["ref_rsid_vcf"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_rsid_vcf"] , ref_rsid_vcf)
292
+
293
+ gc.collect()
294
+ ######################################################
295
+ if remove is True:
296
+
297
+ self.data = removedup(self.data,log=self.log,**removedup_args)
298
+ ################################################
299
+
300
+ self.data = sortcoordinate(self.data,log=self.log)
301
+
302
+ self.data = sortcolumn(self.data,log=self.log)
303
+ gc.collect()
304
+ self.meta["is_sorted"] = True
305
+ self.meta["is_harmonised"] = True
306
+ return self
307
+ ############################################################################################################
308
+ #customizable API to build your own QC pipeline
309
+ def fix_id(self,**kwargs):
310
+ self.data = fixID(self.data,log=self.log,**kwargs)
311
+ def flip_snpid(self,**kwargs):
312
+ self.data = flipSNPID(self.data,log=self.log,**kwargs)
313
+ def strip_snpid(self,**kwargs):
314
+ self.data = stripSNPID(self.data,log=self.log,**kwargs)
315
+ def fix_chr(self,**kwargs):
316
+ self.data = fixchr(self.data,log=self.log,**kwargs)
317
+ def fix_pos(self,**kwargs):
318
+ self.data = fixpos(self.data,log=self.log,**kwargs)
319
+ def fix_allele(self,**kwargs):
320
+ self.data = fixallele(self.data,log=self.log,**kwargs)
321
+ def remove_dup(self,**kwargs):
322
+ self.data = removedup(self.data,log=self.log,**kwargs)
323
+ def check_sanity(self,**kwargs):
324
+ self.data = sanitycheckstats(self.data,log=self.log,**kwargs)
325
+ def check_data_consistency(self, **kwargs):
326
+ _check_data_consistency(self.data,log=self.log,**kwargs)
327
+ def check_id(self,**kwargs):
328
+ pass
329
+ def check_ref(self,ref_seq,ref_seq_mode="v",**kwargs):
330
+ if ref_seq_mode=="v":
331
+ self.meta["gwaslab"]["references"]["ref_seq"] = ref_seq
332
+ self.data = checkref(self.data,ref_seq,log=self.log,**kwargs)
333
+ elif ref_seq_mode=="s":
334
+ self.meta["gwaslab"]["references"]["ref_seq"] = ref_seq
335
+ self.data = oldcheckref(self.data,ref_seq,log=self.log,**kwargs)
336
+ def infer_strand(self,ref_infer,**kwargs):
337
+ self.meta["gwaslab"]["references"]["ref_infer"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_infer"] , ref_infer)
338
+ self.data = parallelinferstrand(self.data,ref_infer=ref_infer,log=self.log,**kwargs)
339
+ def flip_allele_stats(self,**kwargs):
340
+ self.data = flipallelestats(self.data,log=self.log,**kwargs)
341
+ def normalize_allele(self,**kwargs):
342
+ self.data = parallelnormalizeallele(self.data,log=self.log,**kwargs)
343
+ def assign_rsid(self,
344
+ ref_rsid_tsv=None,
345
+ ref_rsid_vcf=None,
346
+ **kwargs):
347
+ if ref_rsid_tsv is not None:
348
+ self.data = parallelizeassignrsid(self.data,path=ref_rsid_tsv,ref_mode="tsv",log=self.log,**kwargs)
349
+ self.meta["gwaslab"]["references"]["ref_rsid_tsv"] = ref_rsid_tsv
350
+ if ref_rsid_vcf is not None:
351
+ self.data = parallelizeassignrsid(self.data,path=ref_rsid_vcf,ref_mode="vcf",log=self.log,**kwargs)
352
+ self.meta["gwaslab"]["references"]["ref_rsid_vcf"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_rsid_vcf"] , ref_rsid_vcf)
353
+ def rsid_to_chrpos(self,**kwargs):
354
+ self.data = rsidtochrpos(self.data,log=self.log,**kwargs)
355
+ def rsid_to_chrpos2(self,**kwargs):
356
+ self.data = parallelrsidtochrpos(self.data,log=self.log,**kwargs)
357
+
358
+ ############################################################################################################
359
+
360
+ def sort_coordinate(self,**sort_args):
361
+ self.data = sortcoordinate(self.data,log=self.log,**sort_args)
362
+ self.meta["is_sorted"] = True
363
+ def sort_column(self,**kwargs):
364
+ self.data = sortcolumn(self.data,log=self.log,**kwargs)
365
+
366
+ ############################################################################################################
367
+ def fill_data(self, verbose=True, **kwargs):
368
+ self.data = filldata(self.data, verbose=verbose, log=self.log, **kwargs)
369
+ self.data = sortcolumn(self.data, verbose=verbose, log=self.log)
370
+
371
+ # utilities ############################################################################################################
372
+ # filter series ######################################################################
373
+ def filter_flanking(self, inplace=False,**kwargs):
374
+ if inplace is False:
375
+ new_Sumstats_object = copy.deepcopy(self)
376
+ new_Sumstats_object.data = _get_flanking(new_Sumstats_object.data, **kwargs)
377
+ return new_Sumstats_object
378
+ else:
379
+ self.data = _get_flanking(self.data, **kwargs)
380
+ def filter_flanking_by_chrpos(self, chrpos, inplace=False,**kwargs):
381
+ if inplace is False:
382
+ new_Sumstats_object = copy.deepcopy(self)
383
+ new_Sumstats_object.data = _get_flanking_by_chrpos(new_Sumstats_object.data, chrpos, **kwargs)
384
+ return new_Sumstats_object
385
+ else:
386
+ self.data = _get_flanking_by_chrpos(self.data, chrpos,**kwargs)
387
+ def filter_flanking_by_id(self, snpid, inplace=False,**kwargs):
388
+ if inplace is False:
389
+ new_Sumstats_object = copy.deepcopy(self)
390
+ new_Sumstats_object.data = _get_flanking_by_id(new_Sumstats_object.data, snpid, **kwargs)
391
+ return new_Sumstats_object
392
+ else:
393
+ self.data = _get_flanking_by_id(self.data, snpid, **kwargs)
394
+ def filter_value(self, expr, inplace=False, **kwargs):
395
+ if inplace is False:
396
+ new_Sumstats_object = copy.deepcopy(self)
397
+ new_Sumstats_object.data = filtervalues(new_Sumstats_object.data,expr,log=new_Sumstats_object.log, **kwargs)
398
+ return new_Sumstats_object
399
+ else:
400
+ self.data = filtervalues(self.data, expr,log=self.log,**kwargs)
401
+ def filter_out(self, inplace=False, **kwargs):
402
+ if inplace is False:
403
+ new_Sumstats_object = copy.deepcopy(self)
404
+ new_Sumstats_object.data = filterout(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
405
+ return new_Sumstats_object
406
+ else:
407
+ self.data = filterout(self.data,log=self.log,**kwargs)
408
+ def filter_in(self, inplace=False, **kwargs):
409
+ if inplace is False:
410
+ new_Sumstats_object = copy.deepcopy(self)
411
+ new_Sumstats_object.data = filterin(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
412
+ return new_Sumstats_object
413
+ else:
414
+ self.data = filterin(self.data,log=self.log,**kwargs)
415
+ def filter_region_in(self, inplace=False, **kwargs):
416
+ if inplace is False:
417
+ new_Sumstats_object = copy.deepcopy(self)
418
+ new_Sumstats_object.data = filterregionin(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
419
+ return new_Sumstats_object
420
+ else:
421
+ self.data = filterregionin(self.data,log=self.log,**kwargs)
422
+ def filter_region_out(self, inplace=False, **kwargs):
423
+ if inplace is False:
424
+ new_Sumstats_object = copy.deepcopy(self)
425
+ new_Sumstats_object.data = filterregionout(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
426
+ return new_Sumstats_object
427
+ else:
428
+ self.data = filterregionout(self.data,log=self.log,**kwargs)
429
+ def filter_palindromic(self, inplace=False, **kwargs):
430
+ if inplace is False:
431
+ new_Sumstats_object = copy.deepcopy(self)
432
+ new_Sumstats_object.data = _filter_palindromic(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
433
+ return new_Sumstats_object
434
+ else:
435
+ self.data = _filter_palindromic(self.data,log=self.log,**kwargs)
436
+ def filter_snp(self, inplace=False, **kwargs):
437
+ if inplace is False:
438
+ new_Sumstats_object = copy.deepcopy(self)
439
+ new_Sumstats_object.data = _filter_snp(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
440
+ return new_Sumstats_object
441
+ else:
442
+ self.data = _filter_snp(self.data,log=self.log,**kwargs)
443
+ def filter_indel(self, inplace=False, **kwargs):
444
+ if inplace is False:
445
+ new_Sumstats_object = copy.deepcopy(self)
446
+ new_Sumstats_object.data = _filter_indel(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
447
+ return new_Sumstats_object
448
+ else:
449
+ self.data = _filter_indel(self.data,log=self.log,**kwargs)
450
+
451
+ def exclude_hla(self, inplace=False, **kwargs):
452
+ if inplace is False:
453
+ new_Sumstats_object = copy.deepcopy(self)
454
+ new_Sumstats_object.data = _exclude_hla(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
455
+ return new_Sumstats_object
456
+ else:
457
+ self.data = _exclude_hla(self.data,log=self.log,**kwargs)
458
+
459
+
460
+ def random_variants(self,inplace=False,n=1,p=None,**kwargs):
461
+ if inplace is True:
462
+ self.data = sampling(self.data,n=n,p=p,log=self.log,**kwargs)
463
+ else:
464
+ new_Sumstats_object = copy.deepcopy(self)
465
+ new_Sumstats_object.data = sampling(new_Sumstats_object.data,n=n,p=p,log=new_Sumstats_object.log,**kwargs)
466
+ return new_Sumstats_object
467
+
468
+ def filter_hapmap3(self, inplace=False, build=None, **kwargs ):
469
+ if build is None:
470
+ build = self.meta["gwaslab"]["genome_build"]
471
+ if inplace is True:
472
+ self.data = gethapmap3(self.data, build=build,log=self.log, **kwargs)
473
+ else:
474
+ new_Sumstats_object = copy.deepcopy(self)
475
+ new_Sumstats_object.data = gethapmap3(new_Sumstats_object.data, build=build,log=self.log, **kwargs)
476
+ return new_Sumstats_object
477
+ ######################################################################
478
+
479
+ def check_af(self,ref_infer,**kwargs):
480
+ self.data = parallelecheckaf(self.data,ref_infer=ref_infer,log=self.log,**kwargs)
481
+ self.meta["gwaslab"]["references"]["ref_infer_daf"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_infer_daf"] , ref_infer)
482
+
483
+ def infer_af(self,ref_infer,**kwargs):
484
+ self.data = paralleleinferaf(self.data,ref_infer=ref_infer,log=self.log,**kwargs)
485
+ self.meta["gwaslab"]["references"]["ref_infer_af"] = ref_infer
486
+ self.meta["gwaslab"]["references"]["ref_infer_af"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_infer_af"] , ref_infer)
487
+ def maf_to_eaf(self,ref_infer,**kwargs):
488
+ self.data = _paralleleinferafwithmaf(self.data,ref_infer=ref_infer,log=self.log,**kwargs)
489
+ self.meta["gwaslab"]["references"]["ref_infer_maf"] = ref_infer
490
+ self.meta["gwaslab"]["references"]["ref_infer_maf"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_infer_af"] , ref_infer)
491
+ def plot_daf(self, **kwargs):
492
+ fig,outliers = plotdaf(self.data, **kwargs)
493
+ return fig, outliers
494
+
495
+ def plot_gwheatmap(self, **kwargs):
496
+ fig = _gwheatmap(self.data, **kwargs)
497
+ return fig
498
+
499
+ def plot_mqq(self, build=None, **kwargs):
500
+
501
+ chrom="CHR"
502
+ pos="POS"
503
+ p="P"
504
+
505
+ if "SNPID" in self.data.columns:
506
+ snpid="SNPID"
507
+ elif "rsID" in self.data.columns:
508
+ snpid="rsID"
509
+
510
+ if "EAF" in self.data.columns:
511
+ eaf="EAF"
512
+ else:
513
+ eaf=None
514
+
515
+ # extract build information from meta data
516
+ if build is None:
517
+ build = self.meta["gwaslab"]["genome_build"]
518
+
519
+ plot = mqqplot(self.data,
520
+ snpid=snpid,
521
+ chrom=chrom,
522
+ pos=pos,
523
+ p=p,
524
+ eaf=eaf,
525
+ build = build,
526
+ **kwargs)
527
+
528
+ return plot
529
+
530
+ def plot_trumpet(self, build=None, **kwargs):
531
+ if build is None:
532
+ build = self.meta["gwaslab"]["genome_build"]
533
+ fig = plottrumpet(self.data,build = build, **kwargs)
534
+ return fig
535
+
536
+ def get_lead(self, build=None, gls=False, **kwargs):
537
+ if "SNPID" in self.data.columns:
538
+ id_to_use = "SNPID"
539
+ else:
540
+ id_to_use = "rsID"
541
+
542
+ # extract build information from meta data
543
+ if build is None:
544
+ build = self.meta["gwaslab"]["genome_build"]
545
+
546
+ output = getsig(self.data,
547
+ id=id_to_use,
548
+ chrom="CHR",
549
+ pos="POS",
550
+ p="P",
551
+ log=self.log,
552
+ build=build,
553
+ **kwargs)
554
+ # return sumstats object
555
+ if gls == True:
556
+ new_Sumstats_object = copy.deepcopy(self)
557
+ new_Sumstats_object.data = output
558
+ gc.collect()
559
+ return new_Sumstats_object
560
+ return output
561
+
562
+ def get_density(self, sig_list=None, windowsizekb=100,**kwargs):
563
+
564
+ if "SNPID" in self.data.columns:
565
+ id_to_use = "SNPID"
566
+ else:
567
+ id_to_use = "rsID"
568
+
569
+ if sig_list is None:
570
+ self.data["DENSITY"] = getsignaldensity(self.data,
571
+ id=id_to_use,
572
+ chrom="CHR",
573
+ pos="POS",
574
+ bwindowsizekb=windowsizekb,
575
+ log=self.log)
576
+ else:
577
+ if isinstance(sig_list, pd.DataFrame):
578
+ self.data["DENSITY"] = assigndensity(self.data,
579
+ sig_list,
580
+ id=id_to_use,
581
+ chrom="CHR",
582
+ pos="POS",
583
+ bwindowsizekb=windowsizekb,
584
+ log=self.log)
585
+
586
+
587
+ def get_novel(self, **kwargs):
588
+ if "SNPID" in self.data.columns:
589
+ id_to_use = "SNPID"
590
+ else:
591
+ id_to_use = "rsID"
592
+ output = getnovel(self.data,
593
+ id=id_to_use,
594
+ chrom="CHR",
595
+ pos="POS",
596
+ p="P",
597
+ log=self.log,
598
+ **kwargs)
599
+ # return sumstats object
600
+ return output
601
+
602
+ def check_cis(self, gls=False, **kwargs):
603
+ if "SNPID" in self.data.columns:
604
+ id_to_use = "SNPID"
605
+ else:
606
+ id_to_use = "rsID"
607
+ output = _check_cis(self.data,
608
+ id=id_to_use,
609
+ chrom="CHR",
610
+ pos="POS",
611
+ p="P",
612
+ log=self.log,
613
+ **kwargs)
614
+
615
+ # return sumstats object
616
+ if gls == True:
617
+ new_Sumstats_object = copy.deepcopy(self)
618
+ new_Sumstats_object.data = output
619
+ gc.collect()
620
+ return new_Sumstats_object
621
+ return output
622
+
623
+ def check_novel_set(self, **kwargs):
624
+ if "SNPID" in self.data.columns:
625
+ id_to_use = "SNPID"
626
+ else:
627
+ id_to_use = "rsID"
628
+ output = _check_novel_set(self.data,
629
+ id=id_to_use,
630
+ chrom="CHR",
631
+ pos="POS",
632
+ p="P",
633
+ log=self.log,
634
+ **kwargs)
635
+ # return sumstats object
636
+ return output
637
+
638
+ def anno_gene(self, **kwargs):
639
+ if "SNPID" in self.data.columns:
640
+ id_to_use = "SNPID"
641
+ else:
642
+ id_to_use = "rsID"
643
+ output = annogene(self.data,
644
+ id=id_to_use,
645
+ chrom="CHR",
646
+ pos="POS",
647
+ log=self.log,
648
+ **kwargs)
649
+ return output
650
+
651
+ def get_per_snp_r2(self,**kwargs):
652
+ self.data = _get_per_snp_r2(self.data, beta="BETA", af="EAF", n="N", log=self.log, **kwargs)
653
+ #add data inplace
654
+
655
+
656
+ # to_format ###############################################################################################
657
+
658
+ def to_format(self, path, build=None, verbose=True, **kwargs):
659
+ if build is None:
660
+ build = self.meta["gwaslab"]["genome_build"]
661
+ _to_format(self.data, path, log=self.log, verbose=verbose, meta=self.meta, build=build, **kwargs)
662
+
663
+
gwaslab/g_version.py CHANGED
@@ -15,8 +15,8 @@ def _get_version():
15
15
  def gwaslab_info():
16
16
  # version meta information
17
17
  dic={
18
- "version":"3.5.5",
19
- "release_date":"20250102"
18
+ "version":"3.5.7",
19
+ "release_date":"20250307"
20
20
  }
21
21
  return dic
22
22